CircosHeatmap-aardio/dist/lib/r/site-library/survival/tests/turnbull.Rout.save


R Under development (unstable) (2024-04-17 r86441) -- "Unsuffered Consequences"
Copyright (C) 2024 The R Foundation for Statistical Computing
Platform: aarch64-unknown-linux-gnu

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> options(na.action=na.exclude) # preserve missings
> options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type
> library(survival)
> 
> #
> # The test data set from Turnbull, JASA 1974, 169-73.
> #
> #  status  0=right censored
> #          1=exact
> #          2=left censored
> #
> aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y), ...)
> 
> 
> turnbull <- data.frame( time  =c( 1,1,1, 2,2,2, 3,3,3, 4,4,4),
+ 			status=c( 1,0,2, 1,0,2, 1,0,2, 1,0,2),
+ 			  n   =c(12,3,2, 6,2,4, 2,0,2, 3,3,5))
> #
> # Compute the K-M for the Turnbull data
> #      via a slow EM calculation
> #
> 
> emsurv <- function(time, status, wt, verbose=T) {
+     left.cen <- (status==2)
+     if (!any(left.cen)) stop("No left censored data!")
+     if (!any(status==1))stop("Must have some exact death times")
+ 
+     tempy <- Surv(time[!left.cen], status[!left.cen])
+     ww <- wt[!left.cen]
+     tempx <- factor(rep(1, sum(!left.cen)))
+     tfit <- survfit(tempy~tempx, weights=ww)
+     if (verbose)
+        cat("Iteration 0, survival=", format(round(tfit$surv[tfit$n.event>0],3)),
+ 		       "\n")
+ 
+     stimes <- tfit$time[tfit$n.event>0]
+     ltime <- time[left.cen]
+     lwt   <- wt[left.cen]
+     tempx <- factor(rep(1, length(stimes) + sum(!left.cen)))
+     tempy <- Surv(c(time[!left.cen], stimes),
+ 		  c(status[!left.cen], rep(1, length(stimes))))
+     for (iter in 1:4) {
+ 	wt2 <- stimes*0
+ 	ssurv <- tfit$surv[tfit$n.event>0]
+ 	sjump <- diff(c(1, ssurv))
+ 	for (j in 1:(length(ltime))) {
+ 	    k <- sum(ltime[j]>=stimes)   #index of the death time
+ 	    if (k==0)
+ 		stop("Left censored observation before the first death")
+ 	    wt2[1:k] <- wt2[1:k] + lwt[j]*sjump[1:k] /(ssurv[k]-1)
+ 	    }
+ 	tfit <- survfit(tempy~tempx, weights=c(ww, wt2))
+ 	if (verbose) {
+ 	   cat("Iteration", iter, "survival=",
+ 		 format(round(tfit$surv[tfit$n.event>0],3)),  "\n")
+ 	   cat("             weights=", format(round(wt2,3)), "\n")
+ 	   }
+ 	}
+     survfit(tempy ~ tempx, weights=c(ww, wt2), robust=FALSE)
+     }
> 
> temp <-emsurv(turnbull$time, turnbull$status, turnbull$n)
Iteration 0, survival= 0.613 0.383 0.287 0.144 
Iteration 1 survival= 0.549 0.303 0.214 0.094 
             weights= 7.856 3.477 0.828 0.839 
Iteration 2 survival= 0.540 0.296 0.210 0.095 
             weights= 8.228 3.394 0.714 0.664 
Iteration 3 survival= 0.538 0.295 0.210 0.095 
             weights= 8.315 3.356 0.690 0.638 
Iteration 4 survival= 0.538 0.295 0.210 0.095 
             weights= 8.338 3.342 0.685 0.635 
> print(summary(temp))
Call: survfit(formula = tempy ~ tempx, weights = c(ww, wt2), robust = FALSE)

 time n.risk n.event survival std.err lower 95% CI upper 95% CI
    1  44.00   20.34   0.5378  0.0752       0.4089        0.707
    2  20.66    9.34   0.2946  0.0719       0.1827        0.475
    3   9.32    2.68   0.2098  0.0673       0.1119        0.393
    4   6.64    3.64   0.0948  0.0507       0.0333        0.270
> # First check, use the data from Turnbull, JASA 1974, 169-173.
> 
> tdata <- data.frame(time  =c(1,1,1,2,2,2,3,3,3,4,4,4),
+ 		    status=rep(c(1,0,2),4),
+ 		    n     =c(12,3,2,6,2,4,2,0,2,3,3,5))
> 
> tfit <- survfit(Surv(time, time, status, type='interval') ~1, tdata, weights=n)
> all.equal(round(tfit$surv,3), c(.538, .295, .210, .095))
[1] TRUE
> 
> 
> # Second check, compare to a reversed survival curve
> # This is not as simple a test as one might think, because left and right
> #  censored observations are not treated symmetrically by the routine:
> #  time <= y for left and time> y for right (this is to make the routine
> #  correct for the common situation of panel data).
> # To get equivalence, make the left censoreds happen just a little bit
> #  earlier.  The left-continuous/right-continuous shift is also a bother.
> #
> test1 <- data.frame(time=  c(9, 3,1,1,6,6,8),
+                     status=c(1,NA,1,0,1,1,0),
+                     x=     c(0, 2,1,1,1,0,0))
> fit1 <- survfit(Surv(time, status) ~1, test1)
> temp <- ifelse(test1$status==0, 4.99,5) - test1$time
> fit2 <- survfit(Surv(temp, status, type='left') ~1, test1)
> 
> all.equal(round(fit1$surv[1:2],5), round(1-fit2$surv[3:2],5))
[1] TRUE
> 
> rm(tdata, tfit, fit1, temp, fit2)
> #
> # Create a data set similar to the one provided by Al Zinsmeister
> #  It is a hard test case for survfit.turnbull
> #
> time1 <- c(rep(0,100), rep(1,200), 100, 200, 210, 220,
+            rep(365,100), rep(366,5), 731:741)
> 
> time2 <- c((1:100)*3,  10+1:100, rep(365:366, c(60,40)), NA, 500, NA, 450,
+            rep(730,90), rep(NA,10), c(528,571,691,730,731),
+            NA, 1095:1099, NA, 1400, 1200, 772, 1461)
> 
> zfit <- survfit(Surv(time1, time2, type='interval2') ~1)
> 
> #
> # There are 100 intervals of the form (0,x) where x is from 3 to 300,
> #  and 200 more of the form (1,x) where x is from 11 to 366.  These
> #  lead to a mass point in the interval (1,3), which is placed at 2.
> #  The starting estimate has far too little mass placed here, and it takes
> #  the EM a long time to realize that most of the weight for the first 300
> #  subjects goes here.  With acceleration, it takes 16 iterations, without
> #  it takes >40.  (On Al's orginal data, without accel still wasn't there after
> #  165 iters!)
> #
> # The next 4 obs give rise to potential jumps at 100.5, 200.5, 211.5, and
> #  221.  However, the final estimate has no mass at all on any of these.
> #  Assume mass of a,b, and c at 2, 100.5 and 365.5, and consider the 
> #  contributions: 
> #    123 obs that overlap a only
> #    137 obs that overlap a and b
> #     40 obs that overlap a, b, c
> #      1 obs that overlap b, c
> #    108 obs that overlap c   (200, 210,200, 365, and 366 starting points)
> #  For some trial values of a,b,c, compare the loglik to that of (a+b),0,c
> #   First one: a^123 (a+b)^137 (a+b+c)^40 (b+c) c^108
> #   Second:    (a+b)^123 (a+b)^137 (a+b+c)^40 c c^108
> #   Likelhood improves if (1 + b/a)^123 > 1+ b/c, which is true for almost
> #     all a and c.  In particular, at the solution a and c are approx .7 and
> #     .18, respectively.
> #
> # The program can't see this coming, of course, and so iterates towards a
> #  KM with epsilon sized jumps at 100.5, 200.5, and 211.5.  Whether these
> #  intervals should be removed during iteration, as detected, is an open
> #  question for me.
> #
> #
> # True solution: mass points at 2, 365.5, 408, and 756.5, of sizes a, b, c, d
> # Likelihood:    a^260 (a+b)^40 (b+c)^92 (b+c+d)^12 c^5 d^11
> # Solution: a=0.6958, b=0.1674, c=0.1079, d=0.0289
> 
> tfun <- function(x) {
+     if (length(x) ==3) x <- c(x, .03)
+     x <- x/sum(x)  #make probabilities sum to 1
+     loglik <- 260*log(x[1]) + 40*log(x[1]+x[2]) + 92*log(x[2] + x[3]) +
+                        12*log(x[2]+x[3]+x[4]) + 5*log(x[3]) + 11*log(x[4])
+     -loglik  #find the max, not the min
+     }
> 
> nfit <- nlminb(start=c(.7,.15, .1), tfun, lower=0, upper=1)
> nparm <- c(nfit$par, .03)
> nparm <- nparm / sum(nparm)
> zparm <- -diff(c(1, zfit$surv[match(c(2, 365.5, 408, 756.5), zfit$time)]))
> aeq(round(tfun(nparm),4), round(tfun(zparm),4))
[1] TRUE
> # .0001 is the tolerance in survfit.turnbull
> 
> rm(tfun, nfit, nparm, zparm, time1, time2, zfit) 
> 
> proc.time()
   user  system elapsed 
  0.451   0.020   0.469
首次上传 2025-01-12 00:52:51 +08:00
			`R Under development (unstable) (2024-04-17 r86441) -- "Unsuffered Consequences"`
			`Copyright (C) 2024 The R Foundation for Statistical Computing`
			`Platform: aarch64-unknown-linux-gnu`

			`R is free software and comes with ABSOLUTELY NO WARRANTY.`
			`You are welcome to redistribute it under certain conditions.`
			`Type 'license()' or 'licence()' for distribution details.`

			`R is a collaborative project with many contributors.`
			`Type 'contributors()' for more information and`
			`'citation()' on how to cite R or R packages in publications.`

			`Type 'demo()' for some demos, 'help()' for on-line help, or`
			`'help.start()' for an HTML browser interface to help.`
			`Type 'q()' to quit R.`

			`> options(na.action=na.exclude) # preserve missings`
			`> options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type`
			`> library(survival)`
			`>`
			`> #`
			`> # The test data set from Turnbull, JASA 1974, 169-73.`
			`> #`
			`> # status 0=right censored`
			`> # 1=exact`
			`> # 2=left censored`
			`> #`
			`> aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y), ...)`
			`>`
			`>`
			`> turnbull <- data.frame( time =c( 1,1,1, 2,2,2, 3,3,3, 4,4,4),`
			`+ status=c( 1,0,2, 1,0,2, 1,0,2, 1,0,2),`
			`+ n =c(12,3,2, 6,2,4, 2,0,2, 3,3,5))`
			`> #`
			`> # Compute the K-M for the Turnbull data`
			`> # via a slow EM calculation`
			`> #`
			`>`
			`> emsurv <- function(time, status, wt, verbose=T) {`
			`+ left.cen <- (status==2)`
			`+ if (!any(left.cen)) stop("No left censored data!")`
			`+ if (!any(status==1))stop("Must have some exact death times")`
			`+`
			`+ tempy <- Surv(time[!left.cen], status[!left.cen])`
			`+ ww <- wt[!left.cen]`
			`+ tempx <- factor(rep(1, sum(!left.cen)))`
			`+ tfit <- survfit(tempy~tempx, weights=ww)`
			`+ if (verbose)`
			`+ cat("Iteration 0, survival=", format(round(tfit$surv[tfit$n.event>0],3)),`
			`+ "\n")`
			`+`
			`+ stimes <- tfit$time[tfit$n.event>0]`
			`+ ltime <- time[left.cen]`
			`+ lwt <- wt[left.cen]`
			`+ tempx <- factor(rep(1, length(stimes) + sum(!left.cen)))`
			`+ tempy <- Surv(c(time[!left.cen], stimes),`
			`+ c(status[!left.cen], rep(1, length(stimes))))`
			`+ for (iter in 1:4) {`
			`+ wt2 <- stimes*0`
			`+ ssurv <- tfit$surv[tfit$n.event>0]`
			`+ sjump <- diff(c(1, ssurv))`
			`+ for (j in 1:(length(ltime))) {`
			`+ k <- sum(ltime[j]>=stimes) #index of the death time`
			`+ if (k==0)`
			`+ stop("Left censored observation before the first death")`
			`+ wt2[1:k] <- wt2[1:k] + lwt[j]*sjump[1:k] /(ssurv[k]-1)`
			`+ }`
			`+ tfit <- survfit(tempy~tempx, weights=c(ww, wt2))`
			`+ if (verbose) {`
			`+ cat("Iteration", iter, "survival=",`
			`+ format(round(tfit$surv[tfit$n.event>0],3)), "\n")`
			`+ cat(" weights=", format(round(wt2,3)), "\n")`
			`+ }`
			`+ }`
			`+ survfit(tempy ~ tempx, weights=c(ww, wt2), robust=FALSE)`
			`+ }`
			`>`
			`> temp <-emsurv(turnbull$time, turnbull$status, turnbull$n)`
			`Iteration 0, survival= 0.613 0.383 0.287 0.144`
			`Iteration 1 survival= 0.549 0.303 0.214 0.094`
			`weights= 7.856 3.477 0.828 0.839`
			`Iteration 2 survival= 0.540 0.296 0.210 0.095`
			`weights= 8.228 3.394 0.714 0.664`
			`Iteration 3 survival= 0.538 0.295 0.210 0.095`
			`weights= 8.315 3.356 0.690 0.638`
			`Iteration 4 survival= 0.538 0.295 0.210 0.095`
			`weights= 8.338 3.342 0.685 0.635`
			`> print(summary(temp))`
			`Call: survfit(formula = tempy ~ tempx, weights = c(ww, wt2), robust = FALSE)`

			`time n.risk n.event survival std.err lower 95% CI upper 95% CI`
			`1 44.00 20.34 0.5378 0.0752 0.4089 0.707`
			`2 20.66 9.34 0.2946 0.0719 0.1827 0.475`
			`3 9.32 2.68 0.2098 0.0673 0.1119 0.393`
			`4 6.64 3.64 0.0948 0.0507 0.0333 0.270`
			`> # First check, use the data from Turnbull, JASA 1974, 169-173.`
			`>`
			`> tdata <- data.frame(time =c(1,1,1,2,2,2,3,3,3,4,4,4),`
			`+ status=rep(c(1,0,2),4),`
			`+ n =c(12,3,2,6,2,4,2,0,2,3,3,5))`
			`>`
			`> tfit <- survfit(Surv(time, time, status, type='interval') ~1, tdata, weights=n)`
			`> all.equal(round(tfit$surv,3), c(.538, .295, .210, .095))`
			`[1] TRUE`
			`>`
			`>`
			`> # Second check, compare to a reversed survival curve`
			`> # This is not as simple a test as one might think, because left and right`
			`> # censored observations are not treated symmetrically by the routine:`
			`> # time <= y for left and time> y for right (this is to make the routine`
			`> # correct for the common situation of panel data).`
			`> # To get equivalence, make the left censoreds happen just a little bit`
			`> # earlier. The left-continuous/right-continuous shift is also a bother.`
			`> #`
			`> test1 <- data.frame(time= c(9, 3,1,1,6,6,8),`
			`+ status=c(1,NA,1,0,1,1,0),`
			`+ x= c(0, 2,1,1,1,0,0))`
			`> fit1 <- survfit(Surv(time, status) ~1, test1)`
			`> temp <- ifelse(test1$status==0, 4.99,5) - test1$time`
			`> fit2 <- survfit(Surv(temp, status, type='left') ~1, test1)`
			`>`
			`> all.equal(round(fit1$surv[1:2],5), round(1-fit2$surv[3:2],5))`
			`[1] TRUE`
			`>`
			`> rm(tdata, tfit, fit1, temp, fit2)`
			`> #`
			`> # Create a data set similar to the one provided by Al Zinsmeister`
			`> # It is a hard test case for survfit.turnbull`
			`> #`
			`> time1 <- c(rep(0,100), rep(1,200), 100, 200, 210, 220,`
			`+ rep(365,100), rep(366,5), 731:741)`
			`>`
			`> time2 <- c((1:100)*3, 10+1:100, rep(365:366, c(60,40)), NA, 500, NA, 450,`
			`+ rep(730,90), rep(NA,10), c(528,571,691,730,731),`
			`+ NA, 1095:1099, NA, 1400, 1200, 772, 1461)`
			`>`
			`> zfit <- survfit(Surv(time1, time2, type='interval2') ~1)`
			`>`
			`> #`
			`> # There are 100 intervals of the form (0,x) where x is from 3 to 300,`
			`> # and 200 more of the form (1,x) where x is from 11 to 366. These`
			`> # lead to a mass point in the interval (1,3), which is placed at 2.`
			`> # The starting estimate has far too little mass placed here, and it takes`
			`> # the EM a long time to realize that most of the weight for the first 300`
			`> # subjects goes here. With acceleration, it takes 16 iterations, without`
			`> # it takes >40. (On Al's orginal data, without accel still wasn't there after`
			`> # 165 iters!)`
			`> #`
			`> # The next 4 obs give rise to potential jumps at 100.5, 200.5, 211.5, and`
			`> # 221. However, the final estimate has no mass at all on any of these.`
			`> # Assume mass of a,b, and c at 2, 100.5 and 365.5, and consider the`
			`> # contributions:`
			`> # 123 obs that overlap a only`
			`> # 137 obs that overlap a and b`
			`> # 40 obs that overlap a, b, c`
			`> # 1 obs that overlap b, c`
			`> # 108 obs that overlap c (200, 210,200, 365, and 366 starting points)`
			`> # For some trial values of a,b,c, compare the loglik to that of (a+b),0,c`
			`> # First one: a^123 (a+b)^137 (a+b+c)^40 (b+c) c^108`
			`> # Second: (a+b)^123 (a+b)^137 (a+b+c)^40 c c^108`
			`> # Likelhood improves if (1 + b/a)^123 > 1+ b/c, which is true for almost`
			`> # all a and c. In particular, at the solution a and c are approx .7 and`
			`> # .18, respectively.`
			`> #`
			`> # The program can't see this coming, of course, and so iterates towards a`
			`> # KM with epsilon sized jumps at 100.5, 200.5, and 211.5. Whether these`
			`> # intervals should be removed during iteration, as detected, is an open`
			`> # question for me.`
			`> #`
			`> #`
			`> # True solution: mass points at 2, 365.5, 408, and 756.5, of sizes a, b, c, d`
			`> # Likelihood: a^260 (a+b)^40 (b+c)^92 (b+c+d)^12 c^5 d^11`
			`> # Solution: a=0.6958, b=0.1674, c=0.1079, d=0.0289`
			`>`
			`> tfun <- function(x) {`
			`+ if (length(x) ==3) x <- c(x, .03)`
			`+ x <- x/sum(x) #make probabilities sum to 1`
			`+ loglik <- 260log(x[1]) + 40log(x[1]+x[2]) + 92*log(x[2] + x[3]) +`
			`+ 12log(x[2]+x[3]+x[4]) + 5log(x[3]) + 11*log(x[4])`
			`+ -loglik #find the max, not the min`
			`+ }`
			`>`
			`> nfit <- nlminb(start=c(.7,.15, .1), tfun, lower=0, upper=1)`
			`> nparm <- c(nfit$par, .03)`
			`> nparm <- nparm / sum(nparm)`
			`> zparm <- -diff(c(1, zfit$surv[match(c(2, 365.5, 408, 756.5), zfit$time)]))`
			`> aeq(round(tfun(nparm),4), round(tfun(zparm),4))`
			`[1] TRUE`
			`> # .0001 is the tolerance in survfit.turnbull`
			`>`
			`> rm(tfun, nfit, nparm, zparm, time1, time2, zfit)`
			`>`
			`> proc.time()`
			`user system elapsed`
			`0.451 0.020 0.469`