R Under development (unstable) (2020-02-28 r77869) -- "Unsuffered Consequences" Copyright (C) 2020 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > options(na.action=na.exclude) # preserve missings > options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type > library(survival) > > # Tests of the weighted Cox model > # > # Similar data set to test1, but add weights, > # a double-death/censor tied time > # a censored last subject > # The latter two are cases covered only feebly elsewhere. > # > # The data set testw2 has the same data, but done via replication > # > aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) > > testw1 <- data.frame(time= c(1,1,2,2,2,2,3,4,5), + status= c(1,0,1,1,1,0,0,1,0), + x= c(2,0,1,1,0,1,0,1,0), + wt = c(1,2,3,4,3,2,1,2,1)) > xx <- c(1,2,3,4,3,2,1,2,1) > testw2 <- data.frame(time= rep(c(1,1,2,2,2,2,3,4,5), xx), + status= rep(c(1,0,1,1,1,0,0,1,0), xx), + x= rep(c(2,0,1,1,0,1,0,1,0), xx), + id= rep(1:9, xx)) > indx <- match(1:9, testw2$id) > testw2 <- data.frame(time= rep(c(1,1,2,2,2,2,3,4,5), xx), + status= rep(c(1,0,1,1,1,0,0,1,0), xx), + x= rep(c(2,0,1,1,0,1,0,1,0), xx), + id= rep(1:9, xx)) > indx <- match(1:9, testw2$id) > > fit0 <- coxph(Surv(time, status) ~x, testw1, weights=wt, + method='breslow', iter=0) > fit0b <- coxph(Surv(time, status) ~x, testw2, ties='breslow', iter=0) > fit <- coxph(Surv(time, status) ~x, testw1, weights=wt, ties='breslow') > fitb <- coxph(Surv(time, status) ~x, testw2, ties='breslow') > > texp <- function(beta) { # expected, Breslow estimate + r <- exp(beta) + temp <- cumsum(c(1/(r^2 + 11*r +7), 10/(11*r +5), 2/(2*r+1))) + c(r^2, 1,r,r,1,r,1,r,1)* temp[c(1,1,2,2,2,2,2,3,3)] + } > aeq(texp(0), c(1/19, 1/19, rep(103/152, 5), rep(613/456,2))) #verify texp() [1] TRUE > > xbar <- function(beta) { # xbar, Breslow estimate + r <- exp(beta) + temp <- r* rep(c(2*r + 11, 11/10, 1), c(2, 5, 2)) + temp * texp(beta) + } > > fit0 Call: coxph(formula = Surv(time, status) ~ x, data = testw1, weights = wt, method = "breslow", iter = 0) coef exp(coef) se(coef) z p x 0.0000 1.0000 0.5858 0 1 Likelihood ratio test=0 on 1 df, p=1 n= 9, number of events= 5 > summary(fit) Call: coxph(formula = Surv(time, status) ~ x, data = testw1, weights = wt, ties = "breslow") n= 9, number of events= 5 coef exp(coef) se(coef) z Pr(>|z|) x 0.8596 2.3621 0.7131 1.205 0.228 exp(coef) exp(-coef) lower .95 upper .95 x 2.362 0.4233 0.5839 9.556 Concordance= 0.637 (se = 0.161 ) Likelihood ratio test= 1.69 on 1 df, p=0.2 Wald test = 1.45 on 1 df, p=0.2 Score (logrank) test = 1.52 on 1 df, p=0.2 > aeq(resid(fit0), testw1$status - texp(0)) [1] TRUE > resid(fit0, type='score') 1 2 3 4 5 6 1.24653740 0.03601108 0.10056700 0.10056700 -0.22180142 -0.21193300 7 8 9 0.46569858 -0.10082189 0.91014302 > resid(fit0, type='scho') 1 2 2 2 4 1.3157895 0.3125000 0.3125000 -0.6875000 0.3333333 > > aeq(resid(fit0, type='mart'), (resid(fit0b, type='mart'))[indx]) [1] TRUE > aeq(resid(fit0, type='scor'), (resid(fit0b, type='scor'))[indx]) [1] TRUE > aeq(unique(resid(fit0, type='scho')), unique(resid(fit0b, type='scho'))) [1] TRUE > > > aeq(resid(fit, type='mart'), testw1$status - texp(fit$coef)) [1] TRUE > resid(fit, type='score') 1 2 3 4 5 6 0.88681615 0.02497653 0.03608964 0.03608964 -0.54297652 -0.12528780 7 8 9 0.29564605 -0.09476911 0.58400064 > resid(fit, type='scho') 1 2 2 2 4 1.0368337 0.1613774 0.1613774 -0.8386226 0.1746960 > aeq(resid(fit, type='mart'), (resid(fitb, type='mart'))[indx]) [1] TRUE > aeq(resid(fit, type='scor'), (resid(fitb, type='scor'))[indx]) [1] TRUE > aeq(unique(resid(fit, type='scho')), unique(resid(fitb, type='scho'))) [1] TRUE > rr1 <- resid(fit, type='mart') > rr2 <- resid(fit, type='mart', weighted=T) > aeq(rr2/rr1, testw1$wt) [1] TRUE > > rr1 <- resid(fit, type='score') > rr2 <- resid(fit, type='score', weighted=T) > aeq(rr2/rr1, testw1$wt) [1] TRUE > > fit <- coxph(Surv(time, status) ~x, testw1, weights=wt, ties='efron') > fit Call: coxph(formula = Surv(time, status) ~ x, data = testw1, weights = wt, ties = "efron") coef exp(coef) se(coef) z p x 0.8726 2.3931 0.7126 1.225 0.221 Likelihood ratio test=1.75 on 1 df, p=0.1858 n= 9, number of events= 5 > resid(fit, type='mart') 1 2 3 4 5 6 0.85334536 -0.02560716 0.32265266 0.32265266 0.71696234 -1.07772629 7 8 9 -0.45034077 -0.90490339 -0.79598658 > resid(fit, type='score') 1 2 3 4 5 6 0.88116056 0.02477248 0.06057806 0.06057806 -0.59724033 -0.16737066 7 8 9 0.38040295 -0.13750290 0.66631324 > resid(fit, type='scho') 1 2 2 2 4 1.0325955 0.1621759 0.1621759 -0.8378241 0.1728229 > > # Tests of the weighted Cox model, AG form of the data > # Same solution as doweight1.s > # > testw3 <- data.frame(id = c( 1, 1, 2, 3, 3, 3, 4, 5, 5, 6, 7, 8, 8, 9), + begin= c( 0, 5, 0, 0,10,15, 0, 0,14, 0, 0, 0,23, 0), + time= c( 5,10,10,10,15,20,20,14,20,20,30,23,40,50), + status= c( 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0), + x= c( 2, 2, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0), + wt = c( 1, 1, 2, 3, 3, 3, 4, 3, 3, 2, 1, 2, 2, 1)) > > fit0 <- coxph(Surv(begin,time, status) ~x, testw3, weights=wt, + ties='breslow', iter=0) > fit <- coxph(Surv(begin,time, status) ~x, testw3, weights=wt, ties='breslow') > fit0 Call: coxph(formula = Surv(begin, time, status) ~ x, data = testw3, weights = wt, ties = "breslow", iter = 0) coef exp(coef) se(coef) z p x 0.0000 1.0000 0.5858 0 1 Likelihood ratio test=0 on 1 df, p=1 n= 14, number of events= 5 > summary(fit) Call: coxph(formula = Surv(begin, time, status) ~ x, data = testw3, weights = wt, ties = "breslow") n= 14, number of events= 5 coef exp(coef) se(coef) z Pr(>|z|) x 0.8596 2.3621 0.7131 1.205 0.228 exp(coef) exp(-coef) lower .95 upper .95 x 2.362 0.4233 0.5839 9.556 Concordance= 0.637 (se = 0.172 ) Likelihood ratio test= 1.69 on 1 df, p=0.2 Wald test = 1.45 on 1 df, p=0.2 Score (logrank) test = 1.52 on 1 df, p=0.2 > resid(fit0, type='mart', collapse=testw3$id) 1 2 3 4 5 6 0.94736842 -0.05263158 0.32236842 0.32236842 0.32236842 -0.67763158 7 8 9 -0.67763158 -0.34429825 -1.34429825 > resid(fit0, type='score', collapse=testw3$id) 1 2 3 4 5 6 1.24653740 0.03601108 0.10056700 0.10056700 -0.22180142 -0.21193300 7 8 9 0.46569858 -0.10082189 0.91014302 > resid(fit0, type='scho') 10 20 20 20 40 1.3157895 0.3125000 0.3125000 -0.6875000 0.3333333 > > resid(fit, type='mart', collapse=testw3$id) 1 2 3 4 5 6 0.85531186 -0.02593169 0.17636221 0.17636221 0.65131344 -0.82363779 7 8 9 -0.34868656 -0.64894181 -0.69807852 > resid(fit, type='score', collapse=testw3$id) 1 2 3 4 5 6 0.88681615 0.02497653 0.03608964 0.03608964 -0.54297652 -0.12528780 7 8 9 0.29564605 -0.09476911 0.58400064 > resid(fit, type='scho') 10 20 20 20 40 1.0368337 0.1613774 0.1613774 -0.8386226 0.1746960 > fit0 <- coxph(Surv(begin, time, status) ~x,testw3, weights=wt, iter=0) > resid(fit0, 'mart', collapse=testw3$id) 1 2 3 4 5 6 0.94736842 -0.05263158 0.44454887 0.44454887 0.44454887 -0.88126566 7 8 9 -0.88126566 -0.54793233 -1.54793233 > resid(coxph(Surv(begin, time, status) ~1, testw3, weights=wt) + , collapse=testw3$id) #Null model 1 2 3 4 5 6 0.94736842 -0.05263158 0.44454887 0.44454887 0.44454887 -0.88126566 7 8 9 -0.88126566 -0.54793233 -1.54793233 > > fit <- coxph(Surv(begin,time, status) ~x, testw3, weights=wt, ties='efron') > fit Call: coxph(formula = Surv(begin, time, status) ~ x, data = testw3, weights = wt, ties = "efron") coef exp(coef) se(coef) z p x 0.8726 2.3931 0.7126 1.225 0.221 Likelihood ratio test=1.75 on 1 df, p=0.1858 n= 14, number of events= 5 > resid(fit, type='mart', collapse=testw3$id) 1 2 3 4 5 6 0.85334536 -0.02560716 0.32265266 0.32265266 0.71696234 -1.07772629 7 8 9 -0.45034077 -0.90490339 -0.79598658 > resid(fit, type='score', collapse=testw3$id) 1 2 3 4 5 6 0.88116056 0.02477248 0.06057806 0.06057806 -0.59724033 -0.16737066 7 8 9 0.38040295 -0.13750290 0.66631324 > resid(fit, type='scho') 10 20 20 20 40 1.0325955 0.1621759 0.1621759 -0.8378241 0.1728229 > # > # Check out the impact of weights on the dfbetas > # Am I computing them correctly? > # > wtemp <- rep(1,26) > wtemp[c(5,10,15)] <- 2:4 > fit <- coxph(Surv(futime, fustat) ~ age + ecog.ps, ovarian, weights=wtemp) > rr <- resid(fit, 'dfbeta') > > fit1 <- coxph(Surv(futime, fustat) ~ age + ecog.ps, ovarian, weights=wtemp, + subset=(-5)) > fit2 <- coxph(Surv(futime, fustat) ~ age + ecog.ps, ovarian, weights=wtemp, + subset=(-10)) > fit3 <- coxph(Surv(futime, fustat) ~ age + ecog.ps, ovarian, weights=wtemp, + subset=(-15)) > > # > # Effect of case weights on expected survival curves post Cox model > # > fit0 <- coxph(Surv(time, status) ~x, testw1, weights=wt, ties='breslow', + iter=0) > fit0b <- coxph(Surv(time, status) ~x, testw2, ties='breslow', iter=0) > > surv1 <- survfit(fit0, newdata=list(x=0)) > surv2 <- survfit(fit0b, newdata=list(x=0)) > aeq(surv1$surv, surv2$surv) [1] TRUE > # > # Check out the Efron approx. > # > > fit0 <- coxph(Surv(time, status) ~x,testw1, weights=wt, iter=0) > fit <- coxph(Surv(time, status) ~x,testw1, weights=wt) > resid(fit0, 'mart') 1 2 3 4 5 6 0.94736842 -0.05263158 0.44454887 0.44454887 0.44454887 -0.88126566 7 8 9 -0.88126566 -0.54793233 -1.54793233 > resid(coxph(Surv(time, status) ~1, testw1, weights=wt)) #Null model 1 2 3 4 5 6 0.94736842 -0.05263158 0.44454887 0.44454887 0.44454887 -0.88126566 7 8 9 -0.88126566 -0.54793233 -1.54793233 > > # lfun is the known log-likelihood for this data set, worked out in the > # appendix of Therneau and Grambsch > # ufun is the score vector and ifun the information matrix > lfun <- function(beta) { + r <- exp(beta) + a <- 7*r +3 + b <- 4*r +2 + 11*beta - ( log(r^2 + 11*r +7) + + (10/3)*(log(a+b) + log(2*a/3 +b) + log(a/3 +b)) + 2*log(2*r +1)) + } > aeq(fit0$log[1], lfun(0)) [1] TRUE > aeq(fit$log[2], lfun(fit$coef)) [1] TRUE > > ufun <- function(beta, efron=T) { #score statistic + r <- exp(beta) + xbar1 <- (2*r^2+11*r)/(r^2+11*r +7) + xbar2 <- 11*r/(11*r +5) + xbar3 <- 2*r/(2*r +1) + xbar2b<- 26*r/(26*r+12) + xbar2c<- 19*r/(19*r + 9) + temp <- 11 - (xbar1 + 2*xbar3) + if (efron) temp - (10/3)*(xbar2 + xbar2b + xbar2c) + else temp - 10*xbar2 + } > print(ufun(fit$coef) < 1e-4) # Should be true x TRUE > > ifun <- function(beta, efron=T) { # information matrix + r <- exp(beta) + xbar1 <- (2*r^2+11*r)/(r^2+11*r +7) + xbar2 <- 11*r/(11*r +5) + xbar3 <- 2*r/(2*r +1) + xbar2b<- 26*r/(26*r+12) + xbar2c<- 19*r/(19*r + 9) + temp <- ((4*r^2 + 11*r)/(r^2+11*r +7) - xbar1^2) + + 2*(xbar3 - xbar3^2) + if (efron) temp + (10/3)*((xbar2- xbar2^2) + (xbar2b - xbar2b^2) + + (xbar2c -xbar2c^2)) + else temp + 10 * (xbar2- xbar2^2) + } > > aeq(fit0$var, 1/ifun(0)) [1] TRUE > aeq(fit$var, 1/ifun(fit$coef)) [1] TRUE > > > > # Make sure that the weights pass through the residuals correctly > rr1 <- resid(fit, type='mart') > rr2 <- resid(fit, type='mart', weighted=T) > aeq(rr2/rr1, testw1$wt) [1] TRUE > rr1 <- resid(fit, type='score') > rr2 <- resid(fit, type='score', weighted=T) > aeq(rr2/rr1, testw1$wt) [1] TRUE > > # > # Look at the individual components > # > dt0 <- coxph.detail(fit0) > dt <- coxph.detail(fit) > aeq(sum(dt$score), ufun(fit$coef)) #score statistic [1] TRUE > aeq(sum(dt0$score), ufun(0)) [1] TRUE > aeq(dt0$hazard, c(1/19, (10/3)*(1/16 + 1/(6+20/3) + 1/(6+10/3)), 2/3)) [1] TRUE > > > > rm(fit, fit0, rr1, rr2, dt, dt0) > # > # Effect of weights on the robust variance > # > test1 <- data.frame(time= c(9, 3,1,1,6,6,8), + status=c(1,NA,1,0,1,1,0), + x= c(0, 2,1,1,1,0,0), + wt= c(3,0,1,1,1,1,1), + id= 1:7) > testx <- data.frame(time= c(4,4,4,1,1,2,2,3), + status=c(1,1,1,1,0,1,1,0), + x= c(0,0,0,1,1,1,0,0), + wt= c(1,1,1,1,1,1,1,1), + id= 1:8) > > fit1 <- coxph(Surv(time, status) ~x, cluster=id, test1, ties='breslow', + weights=wt) > fit2 <- coxph(Surv(time, status) ~x, cluster=id, testx, ties='breslow') > > db1 <- resid(fit1, 'dfbeta', weighted=F) > db1 <- db1[-2] #toss the missing > db2 <- resid(fit2, 'dfbeta') > aeq(db1, db2[3:8]) [1] TRUE > > W <- c(3,1,1,1,1,1) #Weights, after removal of the missing value > aeq(fit2$var, sum(db1*db1*W)) [1] TRUE > aeq(fit1$var, sum(db1*db1*W*W)) [1] TRUE > > > proc.time() user system elapsed 0.986 0.049 1.028