201 lines
7.0 KiB
Plaintext
201 lines
7.0 KiB
Plaintext
|
|
R Under development (unstable) (2024-02-06 r85866) -- "Unsuffered Consequences"
|
|
Copyright (C) 2024 The R Foundation for Statistical Computing
|
|
Platform: aarch64-unknown-linux-gnu
|
|
|
|
R is free software and comes with ABSOLUTELY NO WARRANTY.
|
|
You are welcome to redistribute it under certain conditions.
|
|
Type 'license()' or 'licence()' for distribution details.
|
|
|
|
R is a collaborative project with many contributors.
|
|
Type 'contributors()' for more information and
|
|
'citation()' on how to cite R or R packages in publications.
|
|
|
|
Type 'demo()' for some demos, 'help()' for on-line help, or
|
|
'help.start()' for an HTML browser interface to help.
|
|
Type 'q()' to quit R.
|
|
|
|
> library(survival)
|
|
> options(na.action=na.exclude) # preserve missings
|
|
> options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type
|
|
> aeq <- function(x,y,...) all.equal(as.vector(x), as.vector(y),...)
|
|
>
|
|
> #
|
|
> # Test out the results for competing risks. Survfit does this directly as just
|
|
> # one case of the Aalen-Johansen, but also known as 'cumulative incidence'.
|
|
> #
|
|
> # First trivial test
|
|
> tdata <- data.frame(time=c(1,2,2,3,3,3,5,6),
|
|
+ status = c(0,1,0,1,0,1,0,1),
|
|
+ event = c(1,1,2,2,1,2,3,2),
|
|
+ grp = c(1,2,1,2,1,2,1,2),
|
|
+ id = 1:8)
|
|
> old <- survfit(Surv(time, status*event, type="mstate") ~1, tdata) #old style
|
|
> fit <- survfit(Surv(time, factor(status*event)) ~1, tdata)
|
|
>
|
|
> # test that the old (should be depricated) form gives the same answer
|
|
> indx <- match("call", names(fit))
|
|
> all.equal(unclass(old)[-indx], unclass(fit)[-indx])
|
|
[1] TRUE
|
|
>
|
|
> byhand <- function() {
|
|
+ #everyone starts in state 0
|
|
+ p1 <- c(1,0,0)
|
|
+
|
|
+ p2 <- c(6/7, 1/7, 0) # 0-1 transition at time 2
|
|
+ u2 <- matrix(rep(c(1/49, -1/49, 0), each=8), ncol=3) #leverage matrix at time 2
|
|
+ u2[1,] <- 0 #subject 1 is not present
|
|
+ u2[2,1:2] <- u2[2, 1:2] + c(-1/7, 1/7)
|
|
+
|
|
+ p3 <- c((6/7)*(3/5), 1/7, 12/35) # 0-2 transition at time 3, 5 at risk
|
|
+ h3 <- matrix(c(3/5, 0, 2/5, 0,1,0, 0,1,0), byrow=T, ncol=3) #hazard mat
|
|
+ u3 <- u2 %*% h3
|
|
+ u3[4:8,1] <- u3[4:8,1] + p2[1]*2/25
|
|
+ u3[4:8,3] <- u3[4:8,3] -p2[1]*2/25
|
|
+ u3[4,] <- u3[4,] + c(-p2[1]/5, 0, p2[1]/5)
|
|
+ u3[6,] <- u3[4,]
|
|
+
|
|
+ p6 <- c(0, 1/7, 6/7) # 0-2 at time 6, 1 at risk
|
|
+ h6 <- matrix(c(-1,0,1,0,1,0,0,1,0), byrow=T, ncol=3)
|
|
+ u6 <- cbind(0, u3[,2], -u3[,2])
|
|
+
|
|
+ V <- rbind(0, colSums(u2^2),
|
|
+ colSums(u3^2),
|
|
+ colSums(u3^2),
|
|
+ colSums(u6^2))
|
|
+ list(P=rbind(p1, p2, p3, p3, p6), u2=u2, u3=u3, u6=u6, V=V)
|
|
+ }
|
|
> bfit <- byhand()
|
|
> aeq(fit$pstate, bfit$P)
|
|
[1] TRUE
|
|
> aeq(fit$n.risk[,1], c(8,7,5,2,1))
|
|
[1] TRUE
|
|
> aeq(fit$n.event[,2:3], c(0,1,0,0,0, 0,0 ,2,0,1))
|
|
[1] TRUE
|
|
> aeq(fit$std.err, sqrt(bfit$V))
|
|
[1] TRUE
|
|
>
|
|
> # Check the influence directly, per row
|
|
> eps <- 1e-6
|
|
> deltaU <- array(0, dim= c(nrow(tdata), dim(fit$pstate)))
|
|
> deltaC <- array(0, dim= c(nrow(tdata), dim(fit$cumhaz)))
|
|
> deltaA <- deltaU
|
|
> auc <- function(fit) {
|
|
+ nr <- length(fit$time)
|
|
+ rbind(fit$p0*fit$time[1],
|
|
+ apply(diff(fit$time) * fit$pstate[-nr,], 2, cumsum))
|
|
+ }
|
|
> for (i in 1:nrow(tdata)) {
|
|
+ twt <- rep(1, nrow(tdata))
|
|
+ twt[i] <- twt[i] + eps
|
|
+ tfit <- survfit(Surv(time, factor(status*event)) ~1, tdata, id=id,
|
|
+ weights= twt)
|
|
+ deltaU[i,,] <- (tfit$pstate - fit$pstate)/eps # approx derivative
|
|
+ deltaC[i,,] <- (tfit$cumhaz - fit$cumhaz)/eps
|
|
+ deltaA[i,,] <- (auc(tfit) - auc(fit))/eps
|
|
+ }
|
|
> aeq(bfit$u2, deltaU[,2,], tol=eps)
|
|
[1] TRUE
|
|
> aeq(bfit$u3, deltaU[,3,], tol=eps)
|
|
[1] TRUE
|
|
> aeq(bfit$u6, deltaU[,5,], tol=eps)
|
|
[1] TRUE
|
|
>
|
|
> sqmean <- function(x) sqrt(sum(x^2))
|
|
> aeq(fit$std.chaz, apply(deltaC, 2:3, sqmean), tol=eps)
|
|
[1] TRUE
|
|
> aeq(fit$std.err, apply(deltaU, 2:3, sqmean), tol=eps)
|
|
[1] TRUE
|
|
> aeq(fit$std.auc, apply(deltaA, 2:3, sqmean), tol=eps)
|
|
[1] TRUE
|
|
>
|
|
> # Times purposely has values that are before the start, exact, intermediate
|
|
> # and after the end of the observed times
|
|
> sfit <- summary(fit, times=c(0, 1, 3.5, 6, 7), extend=TRUE)
|
|
> aeq(sfit$pstate, rbind(c(1,0,0), bfit$P[c(1,3,5,5),]))
|
|
[1] TRUE
|
|
> aeq(sfit$n.risk[,1], c(8,8, 2, 1, 0))
|
|
[1] TRUE
|
|
> aeq(sfit$n.event, matrix(c(0,0,0,0,0, 0,0,1,0,0, 0,0,2,1,0), ncol=3))
|
|
[1] TRUE
|
|
>
|
|
> #
|
|
> # For this we need the competing risks MGUS data set, first
|
|
> # event
|
|
> #
|
|
> tdata <- mgus1[mgus1$enum==1,]
|
|
> # Ensure the old-style call using "etype" works (backwards compatability)
|
|
> fit1 <- survfit(Surv(stop, status) ~ 1, etype=event, tdata)
|
|
> fit1b <-survfit(Surv(stop, event) ~1, tdata)
|
|
> indx <- match("call", names(fit1))
|
|
> all.equal(unclass(fit1)[-indx], unclass(fit1b)[-indx])
|
|
[1] TRUE
|
|
>
|
|
> # Now get the overall survival, and the hazard for progression
|
|
> fit2 <- survfit(Surv(stop, status) ~1, tdata) #overall to "first bad thing"
|
|
> fit3 <- survfit(Surv(stop, status*(event=='pcm')) ~1, tdata,
|
|
+ type='fleming')
|
|
> fit4 <- survfit(Surv(stop, status*(event=='death')) ~1, tdata,
|
|
+ type='fleming')
|
|
>
|
|
> aeq(fit1$n.risk[,1], fit2$n.risk)
|
|
[1] TRUE
|
|
> aeq(rowSums(fit1$n.event), fit2$n.event)
|
|
[1] TRUE
|
|
>
|
|
> # Classic CI formula
|
|
> # integral [hazard(t) S(t-0) dt], where S= "survival to first event"
|
|
> haz1 <- diff(c(0, -log(fit3$surv))) #Aalen hazard estimate for progression
|
|
> haz2 <- diff(c(0, -log(fit4$surv))) #Aalen estimate for death
|
|
> tsurv <- c(1, fit2$surv[-length(fit2$surv)]) #lagged survival
|
|
> ci1 <- cumsum(haz1 *tsurv)
|
|
> ci2 <- cumsum(haz2 *tsurv)
|
|
> aeq(cbind(ci1, ci2), fit1$pstate[,2:3])
|
|
[1] TRUE
|
|
>
|
|
> #
|
|
> # Now, make sure that it works for subgroups
|
|
> #
|
|
> fit1 <- survfit(Surv(stop, event) ~ sex, tdata)
|
|
> fit2 <- survfit(Surv(stop, event) ~ 1, tdata,
|
|
+ subset=(sex=='female'))
|
|
> fit3 <- survfit(Surv(stop, event) ~ 1, tdata,
|
|
+ subset=(sex=='male'))
|
|
>
|
|
> aeq(fit2$pstate, fit1$pstate[1:fit1$strata[1],])
|
|
[1] TRUE
|
|
> aeq(fit2$std, fit1$std[1:fit1$strata[1],])
|
|
[1] TRUE
|
|
> aeq(fit3$pstate, fit1$pstate[-(1:fit1$strata[1]),])
|
|
[1] TRUE
|
|
>
|
|
> # A second test of cumulative incidence
|
|
> # compare results to Bob Gray's functions
|
|
> # The file gray1 is the result of
|
|
> # library(cmprsk)
|
|
> # tstat <- ifelse(tdata$status==0, 0, 1+ (tdata$event=='death'))
|
|
> # gray1 <- cuminc(tdata$stop, tstat)
|
|
> load("gray1.rda")
|
|
> fit2 <- survfit(Surv(stop, event) ~ 1, tdata)
|
|
>
|
|
> if (FALSE) {
|
|
+ # lines of the two graphs should overlay
|
|
+ plot(gray1[[1]]$time, gray1[[1]]$est, type='l',
|
|
+ ylim=range(c(gray1[[1]]$est, gray1[[2]]$est)),
|
|
+ xlab="Time")
|
|
+ lines(gray1[[2]]$time, gray1[[2]]$est, lty=2)
|
|
+ matlines(fit2$time, fit2$pstate, col=2, lty=1:2, type='s')
|
|
+ }
|
|
> # To formally match these is a bit of a nuisance.
|
|
> # The cuminc function returns a full step function, and survfit only
|
|
> # the bottoms of the steps.
|
|
> temp1 <- tapply(gray1[[1]]$est, gray1[[1]]$time, max)[-1] #toss time 0
|
|
> indx1 <- match(names(temp1), fit2$time)
|
|
> aeq(temp1, fit2$pstate[indx1,2])
|
|
[1] TRUE
|
|
>
|
|
>
|
|
> proc.time()
|
|
user system elapsed
|
|
0.449 0.028 0.475
|