CircosHeatmap-aardio/dist/lib/r-library/survival/tests/ekm.Rout.save


R Under development (unstable) (2024-04-01 r86255) -- "Unsuffered Consequences"
Copyright (C) 2024 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(survival)
> aeq <- function(x, y, ...) all.equal(as.vector(x), as.vector(y), ...)
> 
> # test for the "extended KM", where subjects change arms midstream
> # (I don't like it statistically, but some use it).  
> 
> tdata <- aml
> tdata$id <- 1:nrow(tdata)
> tdata <- survSplit(Surv(time, status) ~ ., tdata, cut= c(9, 17, 30))
> tdata$trt <- rep(c(1,1,2,2,2), length=nrow(tdata))
> # different weights for different rows of the same subject = hardest case
> tdata$wt <-  rep(1:6, length= nrow(tdata))
> tdata$status[tdata$time==13] <- 1   # force at least 1 tied event
> 
> # not exported, but used in byhand
> if (!exists("survflag")) survflag <- survival:::survflag
> 
> byhand <- function(t1, t2, status, grp, id, wt, debug=FALSE) {
+     if (missing(wt)) wt <- rep(1, length(t1))
+     ugrp <- unique(grp)
+     ngrp <- length(ugrp)
+     out <- vector("list", ngrp)
+     names(out) <- ugrp
+     pos <- survflag(Surv(t1, t2, status), id, grp)
+     for (i in ugrp) { # create this curve
+         keep <- (grp ==i)
+         n <- sum(keep)
+         utime <- sort(unique(c(t1[keep], t2[keep])))
+         ntime <- length(utime)
+         nrisk <- ncensor <- nevent <- entry <- double(ntime)
+         surv <- cumhaz <- double(ntime)
+         U <- C <- matrix(0, n, ntime) # influence
+         U2 <- dN <- U  # portions of U, useful for debugging the AUC
+         utemp <- ctemp <- double(n)
+         km <- 1.0; chaz <- 0
+         for (j in 1:ntime) {
+             atrisk <- (keep & t1 < utime[j] &  t2 >= utime[j])
+             nrisk[j] <- sum(wt[atrisk])
+             nevent[j] <- sum(wt[keep & t2== utime[j]  & status ==1])
+             ncensor[j] <- sum(wt[keep & t2==utime[j]  & status==0 & pos >1])
+             entry[j] <- sum(wt[keep & t1== utime[j] & pos%%2 ==1])
+             if (nrisk[j] >0) {
+                 km <- km * (nrisk[j]- nevent[j])/ nrisk[j]
+                 chaz <- chaz + nevent[j]/nrisk[j]
+             }
+             surv[j] =km
+             cumhaz[j] =chaz
+ 
+             # influence
+             if (nrisk[j] > 0) {
+                 haz <- nevent[j]/nrisk[j]
+                 death <- (t2[keep]== utime[j]  & status[keep] ==1)
+                 temp <- double(n)
+                 temp[death] <- 1/nrisk[j]
+                 temp[atrisk[keep]] <- temp[atrisk[keep]] - haz/nrisk[j]
+                 ctemp <- ctemp + temp
+                 if (haz <1) utemp <- utemp - temp/(1-haz) else utemp <- 0
+                 dN[death,j] <- 1/(nrisk[j]* (1-haz))
+                 U2[atrisk[keep],j] <- haz/(nrisk[j] * (1-haz))
+             }
+             U[,j] <- utemp*km
+             C[,j] <- ctemp
+         }   
+         out[[i]] <- list(n.id = length(unique(id[keep])), n= n,
+                          time= utime, n.enter=entry, n.risk=nrisk, 
+                          n.event=nevent, n.censor=  ncensor, surv= surv,
+                          cumhaz = cumhaz, U=U, C=C, U2=U2, dN=dN)
+         }
+     out
+ }
> 
> true <- with(tdata, byhand(tstart, time, status, trt, id, wt))
> ekm <- survfit(Surv(tstart, time, status) ~ trt, tdata, id=id, entry=TRUE,
+                influence= TRUE, weights=wt)
> 
> aeq(ekm$n.id,    unlist(lapply(true, function(x) x$n.id)))
[1] TRUE
> aeq(ekm$n,       unlist(lapply(true, function(x) x$n)))
[1] TRUE
> aeq(ekm$time,    unlist(lapply(true, function(x) x$time)))
[1] TRUE
> aeq(ekm$n.risk,  unlist(lapply(true, function(x) x$n.risk)))
[1] TRUE
> aeq(ekm$n.enter, unlist(lapply(true, function(x) x$n.enter)))
[1] TRUE
> aeq(ekm$n.event, unlist(lapply(true, function(x) x$n.event)))
[1] TRUE
> aeq(ekm$n.censor,unlist(lapply(true, function(x) x$n.censor)))
[1] TRUE
> aeq(ekm$surv    ,unlist(lapply(true, function(x) x$surv)))
[1] TRUE
> 
> # The byhand function gives per-observation influence, ekm has per-subject,
> #  residuals can do either, but will fail with an error for this data when
> #  collapse=TRUE with a message "same id appears in multiple curves "
> rr <- residuals(ekm, times= c(9, 17, 30, 45), type="cumhaz")
> aeq(rr[tdata$trt==1,], true[[1]]$C[, match(c(9,17,30,45), true[[1]]$time)])
[1] TRUE
> aeq(rr[tdata$trt==2,], true[[2]]$C[, match(c(9,17,30,45), true[[2]]$time)])
[1] TRUE
> 
> rr <- residuals(ekm, times= c(9, 17, 30, 45), type= "pstate")
> aeq(rr[tdata$trt==1,], true[[1]]$U[, match(c(9,17,30,45), true[[1]]$time)])
[1] TRUE
> aeq(rr[tdata$trt==2,], true[[2]]$U[, match(c(9,17,30,45), true[[2]]$time)])
[1] TRUE
> 
> # Check influence returned by survfit
> tdata1 <- subset(tdata, trt==1)
> tdata2 <- subset(tdata, trt==2)
> inf1 <- rowsum(tdata1$wt* true[[1]]$U, tdata1$id, reorder=FALSE)
> inf2 <- rowsum(tdata2$wt* true[[2]]$U, tdata2$id, reorder=FALSE)
> aeq(inf1, ekm$influence.surv[[1]])
[1] TRUE
> aeq(inf2, ekm$influence.surv[[2]])
[1] TRUE
> 
> c1 <- rowsum(tdata1$wt* true[[1]]$C, tdata$id[tdata$trt==1], reorder=FALSE)
> c2 <- rowsum(tdata2$wt* true[[2]]$C, tdata$id[tdata$trt==2], reorder=FALSE)
> aeq(c1, ekm$influence.chaz[[1]])
[1] TRUE
> aeq(c2, ekm$influence.chaz[[2]])
[1] TRUE
> 
> # Look at the AUC
> t1 <- true[[1]]
> width <- diff(c(t1$time, 50)) 
> aucr <- width* t1$surv  # the rectangles that make up auc(50)
> ainf <- t1$U %*% diag(width) # row i= influence of obs i on each rectangle
> aucinf <- t(apply(ainf,1,cumsum))
> 
> rr <- residuals(ekm, times= c(t1$time[-1], 50), type= "auc")
> aeq(rr[tdata$trt==1,], aucinf)
[1] TRUE
> 
> # use factor(status) to force the multi-state code
> ekm2 <- survfit(Surv(tstart, time, factor(status)) ~ trt, tdata, id=id, 
+                 entry=TRUE, influence= TRUE, weights=wt)
> 
> aeq(ekm$n,        ekm2$n)
[1] TRUE
> aeq(ekm$time,     ekm2$time)
[1] TRUE
> aeq(ekm$n.risk,   ekm2$n.risk[,1])
[1] TRUE
> aeq(ekm$n.event,  ekm2$n.event[,2])
[1] TRUE
> aeq(ekm$n.censor, ekm2$n.censor[,1])
[1] TRUE
> aeq(ekm$n.enter,  ekm2$n.enter[,1])
[1] TRUE
> aeq(ekm$surv,     ekm2$pstate[,1])
[1] TRUE
> aeq(ekm$std.err,  ekm2$std.err[,1])
[1] TRUE
> aeq(ekm$cumhaz,   ekm2$cumhaz[,1])
[1] TRUE
> aeq(ekm$std.chaz, ekm2$std.chaz[,1])
[1] TRUE
> aeq(ekm$strata,   ekm2$strata)
[1] TRUE
> aeq(ekm$n.id,     ekm2$n.id)
[1] TRUE
> aeq(ekm$counts, 
+     ekm2$counts[,c("nrisk:1", "ntrans:1.2", "ncensor:1", "nenter:1")])
[1] TRUE
> aeq(ekm$influence.surv[[1]], ekm2$influence.pstate[[1]][,,1])
[1] TRUE
> aeq(ekm$influence.surv[[2]], ekm2$influence.pstate[[2]][,,1])
[1] TRUE
> 
> 
> proc.time()
   user  system elapsed 
  0.971   0.071   1.034
首次上传 2025-01-12 00:52:51 +08:00
			`R Under development (unstable) (2024-04-01 r86255) -- "Unsuffered Consequences"`
			`Copyright (C) 2024 The R Foundation for Statistical Computing`
			`Platform: x86_64-pc-linux-gnu`

			`R is free software and comes with ABSOLUTELY NO WARRANTY.`
			`You are welcome to redistribute it under certain conditions.`
			`Type 'license()' or 'licence()' for distribution details.`

			`R is a collaborative project with many contributors.`
			`Type 'contributors()' for more information and`
			`'citation()' on how to cite R or R packages in publications.`

			`Type 'demo()' for some demos, 'help()' for on-line help, or`
			`'help.start()' for an HTML browser interface to help.`
			`Type 'q()' to quit R.`

			`> library(survival)`
			`> aeq <- function(x, y, ...) all.equal(as.vector(x), as.vector(y), ...)`
			`>`
			`> # test for the "extended KM", where subjects change arms midstream`
			`> # (I don't like it statistically, but some use it).`
			`>`
			`> tdata <- aml`
			`> tdata$id <- 1:nrow(tdata)`
			`> tdata <- survSplit(Surv(time, status) ~ ., tdata, cut= c(9, 17, 30))`
			`> tdata$trt <- rep(c(1,1,2,2,2), length=nrow(tdata))`
			`> # different weights for different rows of the same subject = hardest case`
			`> tdata$wt <- rep(1:6, length= nrow(tdata))`
			`> tdata$status[tdata$time==13] <- 1 # force at least 1 tied event`
			`>`
			`> # not exported, but used in byhand`
			`> if (!exists("survflag")) survflag <- survival:::survflag`
			`>`
			`> byhand <- function(t1, t2, status, grp, id, wt, debug=FALSE) {`
			`+ if (missing(wt)) wt <- rep(1, length(t1))`
			`+ ugrp <- unique(grp)`
			`+ ngrp <- length(ugrp)`
			`+ out <- vector("list", ngrp)`
			`+ names(out) <- ugrp`
			`+ pos <- survflag(Surv(t1, t2, status), id, grp)`
			`+ for (i in ugrp) { # create this curve`
			`+ keep <- (grp ==i)`
			`+ n <- sum(keep)`
			`+ utime <- sort(unique(c(t1[keep], t2[keep])))`
			`+ ntime <- length(utime)`
			`+ nrisk <- ncensor <- nevent <- entry <- double(ntime)`
			`+ surv <- cumhaz <- double(ntime)`
			`+ U <- C <- matrix(0, n, ntime) # influence`
			`+ U2 <- dN <- U # portions of U, useful for debugging the AUC`
			`+ utemp <- ctemp <- double(n)`
			`+ km <- 1.0; chaz <- 0`
			`+ for (j in 1:ntime) {`
			`+ atrisk <- (keep & t1 < utime[j] & t2 >= utime[j])`
			`+ nrisk[j] <- sum(wt[atrisk])`
			`+ nevent[j] <- sum(wt[keep & t2== utime[j] & status ==1])`
			`+ ncensor[j] <- sum(wt[keep & t2==utime[j] & status==0 & pos >1])`
			`+ entry[j] <- sum(wt[keep & t1== utime[j] & pos%%2 ==1])`
			`+ if (nrisk[j] >0) {`
			`+ km <- km * (nrisk[j]- nevent[j])/ nrisk[j]`
			`+ chaz <- chaz + nevent[j]/nrisk[j]`
			`+ }`
			`+ surv[j] =km`
			`+ cumhaz[j] =chaz`
			`+`
			`+ # influence`
			`+ if (nrisk[j] > 0) {`
			`+ haz <- nevent[j]/nrisk[j]`
			`+ death <- (t2[keep]== utime[j] & status[keep] ==1)`
			`+ temp <- double(n)`
			`+ temp[death] <- 1/nrisk[j]`
			`+ temp[atrisk[keep]] <- temp[atrisk[keep]] - haz/nrisk[j]`
			`+ ctemp <- ctemp + temp`
			`+ if (haz <1) utemp <- utemp - temp/(1-haz) else utemp <- 0`
			`+ dN[death,j] <- 1/(nrisk[j]* (1-haz))`
			`+ U2[atrisk[keep],j] <- haz/(nrisk[j] * (1-haz))`
			`+ }`
			`+ U[,j] <- utemp*km`
			`+ C[,j] <- ctemp`
			`+ }`
			`+ out[[i]] <- list(n.id = length(unique(id[keep])), n= n,`
			`+ time= utime, n.enter=entry, n.risk=nrisk,`
			`+ n.event=nevent, n.censor= ncensor, surv= surv,`
			`+ cumhaz = cumhaz, U=U, C=C, U2=U2, dN=dN)`
			`+ }`
			`+ out`
			`+ }`
			`>`
			`> true <- with(tdata, byhand(tstart, time, status, trt, id, wt))`
			`> ekm <- survfit(Surv(tstart, time, status) ~ trt, tdata, id=id, entry=TRUE,`
			`+ influence= TRUE, weights=wt)`
			`>`
			`> aeq(ekm$n.id, unlist(lapply(true, function(x) x$n.id)))`
			`[1] TRUE`
			`> aeq(ekm$n, unlist(lapply(true, function(x) x$n)))`
			`[1] TRUE`
			`> aeq(ekm$time, unlist(lapply(true, function(x) x$time)))`
			`[1] TRUE`
			`> aeq(ekm$n.risk, unlist(lapply(true, function(x) x$n.risk)))`
			`[1] TRUE`
			`> aeq(ekm$n.enter, unlist(lapply(true, function(x) x$n.enter)))`
			`[1] TRUE`
			`> aeq(ekm$n.event, unlist(lapply(true, function(x) x$n.event)))`
			`[1] TRUE`
			`> aeq(ekm$n.censor,unlist(lapply(true, function(x) x$n.censor)))`
			`[1] TRUE`
			`> aeq(ekm$surv ,unlist(lapply(true, function(x) x$surv)))`
			`[1] TRUE`
			`>`
			`> # The byhand function gives per-observation influence, ekm has per-subject,`
			`> # residuals can do either, but will fail with an error for this data when`
			`> # collapse=TRUE with a message "same id appears in multiple curves "`
			`> rr <- residuals(ekm, times= c(9, 17, 30, 45), type="cumhaz")`
			`> aeq(rr[tdata$trt==1,], true[[1]]$C[, match(c(9,17,30,45), true[[1]]$time)])`
			`[1] TRUE`
			`> aeq(rr[tdata$trt==2,], true[[2]]$C[, match(c(9,17,30,45), true[[2]]$time)])`
			`[1] TRUE`
			`>`
			`> rr <- residuals(ekm, times= c(9, 17, 30, 45), type= "pstate")`
			`> aeq(rr[tdata$trt==1,], true[[1]]$U[, match(c(9,17,30,45), true[[1]]$time)])`
			`[1] TRUE`
			`> aeq(rr[tdata$trt==2,], true[[2]]$U[, match(c(9,17,30,45), true[[2]]$time)])`
			`[1] TRUE`
			`>`
			`> # Check influence returned by survfit`
			`> tdata1 <- subset(tdata, trt==1)`
			`> tdata2 <- subset(tdata, trt==2)`
			`> inf1 <- rowsum(tdata1$wt* true[[1]]$U, tdata1$id, reorder=FALSE)`
			`> inf2 <- rowsum(tdata2$wt* true[[2]]$U, tdata2$id, reorder=FALSE)`
			`> aeq(inf1, ekm$influence.surv[[1]])`
			`[1] TRUE`
			`> aeq(inf2, ekm$influence.surv[[2]])`
			`[1] TRUE`
			`>`
			`> c1 <- rowsum(tdata1$wt* true[[1]]$C, tdata$id[tdata$trt==1], reorder=FALSE)`
			`> c2 <- rowsum(tdata2$wt* true[[2]]$C, tdata$id[tdata$trt==2], reorder=FALSE)`
			`> aeq(c1, ekm$influence.chaz[[1]])`
			`[1] TRUE`
			`> aeq(c2, ekm$influence.chaz[[2]])`
			`[1] TRUE`
			`>`
			`> # Look at the AUC`
			`> t1 <- true[[1]]`
			`> width <- diff(c(t1$time, 50))`
			`> aucr <- width* t1$surv # the rectangles that make up auc(50)`
			`> ainf <- t1$U %*% diag(width) # row i= influence of obs i on each rectangle`
			`> aucinf <- t(apply(ainf,1,cumsum))`
			`>`
			`> rr <- residuals(ekm, times= c(t1$time[-1], 50), type= "auc")`
			`> aeq(rr[tdata$trt==1,], aucinf)`
			`[1] TRUE`
			`>`
			`> # use factor(status) to force the multi-state code`
			`> ekm2 <- survfit(Surv(tstart, time, factor(status)) ~ trt, tdata, id=id,`
			`+ entry=TRUE, influence= TRUE, weights=wt)`
			`>`
			`> aeq(ekm$n, ekm2$n)`
			`[1] TRUE`
			`> aeq(ekm$time, ekm2$time)`
			`[1] TRUE`
			`> aeq(ekm$n.risk, ekm2$n.risk[,1])`
			`[1] TRUE`
			`> aeq(ekm$n.event, ekm2$n.event[,2])`
			`[1] TRUE`
			`> aeq(ekm$n.censor, ekm2$n.censor[,1])`
			`[1] TRUE`
			`> aeq(ekm$n.enter, ekm2$n.enter[,1])`
			`[1] TRUE`
			`> aeq(ekm$surv, ekm2$pstate[,1])`
			`[1] TRUE`
			`> aeq(ekm$std.err, ekm2$std.err[,1])`
			`[1] TRUE`
			`> aeq(ekm$cumhaz, ekm2$cumhaz[,1])`
			`[1] TRUE`
			`> aeq(ekm$std.chaz, ekm2$std.chaz[,1])`
			`[1] TRUE`
			`> aeq(ekm$strata, ekm2$strata)`
			`[1] TRUE`
			`> aeq(ekm$n.id, ekm2$n.id)`
			`[1] TRUE`
			`> aeq(ekm$counts,`
			`+ ekm2$counts[,c("nrisk:1", "ntrans:1.2", "ncensor:1", "nenter:1")])`
			`[1] TRUE`
			`> aeq(ekm$influence.surv[[1]], ekm2$influence.pstate[[1]][,,1])`
			`[1] TRUE`
			`> aeq(ekm$influence.surv[[2]], ekm2$influence.pstate[[2]][,,1])`
			`[1] TRUE`
			`>`
			`>`
			`> proc.time()`
			`user system elapsed`
			`0.971 0.071 1.034`