227 lines
7.4 KiB
Plaintext
227 lines
7.4 KiB
Plaintext
|
|
||
|
R Under development (unstable) (2024-04-17 r86441) -- "Unsuffered Consequences"
|
||
|
Copyright (C) 2024 The R Foundation for Statistical Computing
|
||
|
Platform: aarch64-unknown-linux-gnu
|
||
|
|
||
|
R is free software and comes with ABSOLUTELY NO WARRANTY.
|
||
|
You are welcome to redistribute it under certain conditions.
|
||
|
Type 'license()' or 'licence()' for distribution details.
|
||
|
|
||
|
R is a collaborative project with many contributors.
|
||
|
Type 'contributors()' for more information and
|
||
|
'citation()' on how to cite R or R packages in publications.
|
||
|
|
||
|
Type 'demo()' for some demos, 'help()' for on-line help, or
|
||
|
'help.start()' for an HTML browser interface to help.
|
||
|
Type 'q()' to quit R.
|
||
|
|
||
|
> library(survival)
|
||
|
> options(na.action=na.exclude) # preserve missings
|
||
|
> options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type
|
||
|
>
|
||
|
> #
|
||
|
> # Tests from the appendix of Therneau and Grambsch
|
||
|
> # b. Data set 1 and Efron estimate
|
||
|
> #
|
||
|
> test1 <- data.frame(time= c(9, 3,1,1,6,6,8),
|
||
|
+ status=c(1,NA,1,0,1,1,0),
|
||
|
+ x= c(0, 2,1,1,1,0,0))
|
||
|
>
|
||
|
> byhand <- function(beta, newx=0) {
|
||
|
+ r <- exp(beta)
|
||
|
+ loglik <- 2*beta - (log(3*r +3) + log((r+5)/2) + log(r+3))
|
||
|
+ u <- (30 + 23*r - r^3)/ ((r+1)*(r+3)*(r+5))
|
||
|
+ tfun <- function(x) x - x^2
|
||
|
+ imat <- tfun(r/(r+1)) + tfun(r/(r+5)) + tfun(r/(r+3))
|
||
|
+
|
||
|
+ # The matrix of weights, one row per obs, one col per time
|
||
|
+ # Time of 1, 6, 6+0 (second death), and 9
|
||
|
+ wtmat <- matrix(c(1,1,1,1,1,1,
|
||
|
+ 0,0,1,1,1,1,
|
||
|
+ 0,0,.5, .5, 1,1,
|
||
|
+ 0,0,0,0,0,1), ncol=4)
|
||
|
+ wtmat <- diag(c(r,r,r,1,1,1)) %*% wtmat
|
||
|
+
|
||
|
+ x <- c(1,1,1,0,0,0)
|
||
|
+ status <- c(1,0,1,1,0,1)
|
||
|
+ xbar <- colSums(wtmat*x)/ colSums(wtmat)
|
||
|
+ haz <- 1/ colSums(wtmat) # one death at each of the times
|
||
|
+
|
||
|
+ hazmat <- wtmat %*% diag(haz) #each subject's hazard over time
|
||
|
+ mart <- status - rowSums(hazmat)
|
||
|
+
|
||
|
+ a <- r+1; b<- r+3; d<- r+5 # 'c' in the book, 'd' here
|
||
|
+ score <- c((2*r + 3)/ (3*a^2),
|
||
|
+ -r/ (3*a^2),
|
||
|
+ (675+ r*(1305 +r*(756 + r*(-4 +r*(-79 -13*r)))))/(3*(a*b*d)^2),
|
||
|
+ r*(1/(3*a^2) - a/(2*b^2) - b/(2*d^2)),
|
||
|
+ 2*r*(177 + r*(282 +r*(182 + r*(50 + 5*r)))) /(3*(a*b*d)^2),
|
||
|
+ 2*r*(177 + r*(282 +r*(182 + r*(50 + 5*r)))) /(3*(a*b*d)^2))
|
||
|
+
|
||
|
+ # Schoenfeld residual
|
||
|
+ d <- mean(xbar[2:3])
|
||
|
+ scho <- c(1/(r+1), 1- d, 0- d , 0)
|
||
|
+
|
||
|
+ surv <- exp(-cumsum(haz)* exp(beta*newx))[c(1,3,4)]
|
||
|
+ varhaz.g <- cumsum(haz^2) # since all numerators are 1
|
||
|
+
|
||
|
+ varhaz.d <- cumsum((newx-xbar) * haz)
|
||
|
+
|
||
|
+ varhaz <- (varhaz.g + varhaz.d^2/ imat) * exp(2*beta*newx)
|
||
|
+
|
||
|
+ list(loglik=loglik, u=u, imat=imat, xbar=xbar, haz=haz,
|
||
|
+ mart=mart, score=score, var.g=varhaz.g, var.d=varhaz.d,
|
||
|
+ scho=scho, surv=surv, var=varhaz[c(1,3,4)])
|
||
|
+ }
|
||
|
>
|
||
|
>
|
||
|
> aeq <- function(x,y) all.equal(as.vector(x), as.vector(y))
|
||
|
>
|
||
|
> fit0 <-coxph(Surv(time, status) ~x, test1, iter=0)
|
||
|
> truth0 <- byhand(0,0)
|
||
|
> aeq(truth0$loglik, fit0$loglik[1])
|
||
|
[1] TRUE
|
||
|
> aeq(1/truth0$imat, fit0$var)
|
||
|
[1] TRUE
|
||
|
> aeq(truth0$mart, fit0$residuals[c(2:6,1)])
|
||
|
[1] TRUE
|
||
|
> aeq(resid(fit0), c(-3/4, NA, 5/6, -1/6, 5/12, 5/12, -3/4))
|
||
|
[1] TRUE
|
||
|
> aeq(truth0$scho, resid(fit0, 'schoen'))
|
||
|
[1] TRUE
|
||
|
> aeq(truth0$score, resid(fit0, 'score')[c(3:7,1)])
|
||
|
[1] TRUE
|
||
|
> sfit <- survfit(fit0, list(x=0), censor=FALSE)
|
||
|
> aeq(sfit$std.err^2, truth0$var)
|
||
|
[1] TRUE
|
||
|
> aeq(sfit$surv, truth0$surv)
|
||
|
[1] TRUE
|
||
|
>
|
||
|
> fit <- coxph(Surv(time, status) ~x, test1, eps=1e-8, nocenter=NULL)
|
||
|
> aeq(round(fit$coefficients,6), 1.676857)
|
||
|
[1] TRUE
|
||
|
> truebeta <- log(cos(acos((45/23)*sqrt(3/23))/3) * 2* sqrt(23/3))
|
||
|
> truth <- byhand(truebeta, 0)
|
||
|
> aeq(truth$loglik, fit$loglik[2])
|
||
|
[1] TRUE
|
||
|
> aeq(1/truth$imat, fit$var)
|
||
|
[1] TRUE
|
||
|
> aeq(truth$mart, fit$residuals[c(2:6,1)])
|
||
|
[1] TRUE
|
||
|
> aeq(truth$scho, resid(fit, 'schoen'))
|
||
|
[1] TRUE
|
||
|
> aeq(truth$score, resid(fit, 'score')[c(3:7,1)])
|
||
|
[1] TRUE
|
||
|
>
|
||
|
> # Per comments in the source code, the below is expected to fail for Efron
|
||
|
> # at the tied death times. (When predicting for new data, predict
|
||
|
> # treats a time in the new data set that exactly matches one in the original
|
||
|
> # as being just after the original, i.e., experiences the full hazard
|
||
|
> # jump there, in the same way that censors do.)
|
||
|
> expect <- predict(fit, type='expected', newdata=test1) #force recalc
|
||
|
> use <- !(test1$time==6 | is.na(test1$status))
|
||
|
> aeq(test1$status[use] - resid(fit)[use], expect[use])
|
||
|
[1] TRUE
|
||
|
>
|
||
|
> sfit <- survfit(fit, list(x=0), censor=FALSE)
|
||
|
> aeq(sfit$surv, truth$surv)
|
||
|
[1] TRUE
|
||
|
> aeq(sfit$std.err^2, truth$var)
|
||
|
[1] TRUE
|
||
|
>
|
||
|
> #
|
||
|
> # Done with the formal test, now print out lots of bits
|
||
|
> #
|
||
|
> resid(fit)
|
||
|
1 2 3 4 5 6 7
|
||
|
-0.3655434 NA 0.7191707 -0.2808293 -0.4383414 0.7310869 -0.3655434
|
||
|
> resid(fit, 'scor')
|
||
|
1 2 3 4 5 6 7
|
||
|
0.2208584 NA 0.1132780 -0.0442340 -0.1029199 -0.4078409 0.2208584
|
||
|
> resid(fit, 'scho')
|
||
|
1 6 6 9
|
||
|
0.157512 0.421244 -0.578756 0.000000
|
||
|
>
|
||
|
> predict(fit, type='lp')
|
||
|
[1] -0.8384287 NA 0.8384287 0.8384287 0.8384287 -0.8384287 -0.8384287
|
||
|
> predict(fit, type='risk')
|
||
|
[1] 0.4323894 NA 2.3127302 2.3127302 2.3127302 0.4323894 0.4323894
|
||
|
> predict(fit, type='expected')
|
||
|
1 2 3 4 5 6 7
|
||
|
1.3655434 NA 0.2808293 0.2808293 1.4383414 0.2689131 0.3655434
|
||
|
> predict(fit, type='terms')
|
||
|
x
|
||
|
1 -0.8384287
|
||
|
2 NA
|
||
|
3 0.8384287
|
||
|
4 0.8384287
|
||
|
5 0.8384287
|
||
|
6 -0.8384287
|
||
|
7 -0.8384287
|
||
|
> predict(fit, type='lp', se.fit=T)
|
||
|
$fit
|
||
|
1 2 3 4 5 6 7
|
||
|
-0.8384287 NA 0.8384287 0.8384287 0.8384287 -0.8384287 -0.8384287
|
||
|
|
||
|
$se.fit
|
||
|
1 2 3 4 5 6 7
|
||
|
0.6388078 NA 0.6388078 0.6388078 0.6388078 0.6388078 0.6388078
|
||
|
|
||
|
> predict(fit, type='risk', se.fit=T)
|
||
|
$fit
|
||
|
1 2 3 4 5 6 7
|
||
|
0.4323894 NA 2.3127302 2.3127302 2.3127302 0.4323894 0.4323894
|
||
|
|
||
|
$se.fit
|
||
|
1 2 3 4 5 6 7
|
||
|
0.4200565 NA 0.9714774 0.9714774 0.9714774 0.4200565 0.4200565
|
||
|
|
||
|
> predict(fit, type='expected', se.fit=T)
|
||
|
$fit
|
||
|
1 2 3 4 5 6 7
|
||
|
1.3655434 NA 0.2808293 0.2808293 1.4383414 0.2689131 0.3655434
|
||
|
|
||
|
$se.fit
|
||
|
[1] 1.0649293 NA 0.2864593 0.2864593 1.5922983 0.3661617 0.3661617
|
||
|
|
||
|
> predict(fit, type='terms', se.fit=T)
|
||
|
$fit
|
||
|
x
|
||
|
1 -0.8384287
|
||
|
2 NA
|
||
|
3 0.8384287
|
||
|
4 0.8384287
|
||
|
5 0.8384287
|
||
|
6 -0.8384287
|
||
|
7 -0.8384287
|
||
|
|
||
|
$se.fit
|
||
|
x
|
||
|
1 0.6388078
|
||
|
2 NA
|
||
|
3 0.6388078
|
||
|
4 0.6388078
|
||
|
5 0.6388078
|
||
|
6 0.6388078
|
||
|
7 0.6388078
|
||
|
|
||
|
>
|
||
|
> summary(survfit(fit))
|
||
|
Call: survfit(formula = fit)
|
||
|
|
||
|
time n.risk n.event survival std.err lower 95% CI upper 95% CI
|
||
|
1 6 1 0.8857 0.117 0.683036 1
|
||
|
6 4 2 0.4294 0.237 0.145743 1
|
||
|
9 1 1 0.0425 0.116 0.000198 1
|
||
|
> summary(survfit(fit, list(x=2)))
|
||
|
Call: survfit(formula = fit, newdata = list(x = 2))
|
||
|
|
||
|
time n.risk n.event survival std.err lower 95% CI upper 95% CI
|
||
|
1 6 1 2.23e-01 5.97e-01 1.16e-03 1
|
||
|
6 4 2 2.87e-05 5.69e-04 3.96e-22 1
|
||
|
9 1 1 1.08e-17 1.04e-15 1.07e-99 1
|
||
|
>
|
||
|
> proc.time()
|
||
|
user system elapsed
|
||
|
0.429 0.028 0.454
|