CircosHeatmap-aardio/dist/lib/r-library/survival/tests/finegray.Rout.save


R Under development (unstable) (2024-04-17 r86441) -- "Unsuffered Consequences"
Copyright (C) 2024 The R Foundation for Statistical Computing
Platform: aarch64-unknown-linux-gnu

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(survival)
> # Test data set 1 for Fine-Gray regression
> fdata <- data.frame(time  =c(1,2,3,4,4,4,5,5,6,8,8, 9,10,12),
+                     status=factor(c(1,2,0,1,0,0,2,1,0,0,2, 0,1 ,0), 0:2,
+                              c("cen", "type1", "type2")),      
+                     x     =c(5,4,3,1,2,1,1,2,2,4,6,1,2, 0),
+                     id = 1:14)
> test1 <- finegray(Surv(time, status) ~., fdata, count="fgcount")
> test2 <- finegray(Surv(time, status) ~x, fdata, etype="type2")
> 
> # When creating the censoring time distribution remember that
> #  censors happen after deaths, so the distribution does not drop until
> #  time 3+, 4+, 6+, 8+ and 9+
> csurv <- list(time=c(0, 3, 4, 6, 8, 9),
+               p = cumprod(c(1, 11/12, 8/10, 5/6, 3/4, 2/3)))
> #
> # For estimation of event type 1, the first subject of event type
> #  2 will have weights of curve$p over (0,3], (3,4], (4,6], (6,8], (8,9] 
> #  and (9,12].  All that really matters is the weight at times 1, 4, 5,
> #  and 10, however, which are the points at which events of type 1 happen
> #  
> # The next subject of event type 2 occurs at time 5, and will have a
> #  weight of (9,12] /(4,5] = (5*4*2)/(7*5*3) = 8/21 at time 10.  The last
> #  censor at time 6 has a weight of 2/3 at time 10.
> 
> all.equal(test1$id, c(1, 2,2,2,2, 3:6, 7, 7, 8:11, 11, 12:14))
[1] TRUE
> twt <- c(1, csurv$p[c(1,2,3,6)], 1,1,1, 1, 1, 5/12, 1,1,1,  
+                          1, 1/2, 1,1,1)
> all.equal(test1$fgwt, twt)
[1] TRUE
> #extra obs will end at times found in csurv$time, or max(time)=12
> all.equal(test1$fgstop[test1$fgcount>0], c(4,6,12, 12,12))
[1] TRUE
> 
> #
> # Verify the data reproduces a multi-state curve
> #  censoring times may be different in the two setups so only 
> #  compare at the event times
> sfit <- survfit(Surv(time, status) ~1, fdata)
> sfit1<- survfit(Surv(fgstart, fgstop, fgstatus) ~1, test1, weights=fgwt)
> sfita<- sfit["type1"]
> i1 <- sfita$n.event > 0
> i2 <- sfit1$n.event > 0
> all.equal(sfita$pstate[i1], 1- sfit1$surv[i2])
[1] TRUE
> 
> sfitb <- sfit["type2"]
> sfit2 <- survfit(Surv(fgstart, fgstop, fgstatus) ~1, test2, weights=fgwt)
> i1 <- sfitb$n.event > 0
> i2 <- sfit2$n.event > 0
> all.equal(sfitb$pstate[i1], 1- sfit2$surv[i2])
[1] TRUE
> 
> # Test strata.  Make a single data set that has fdata for the first 19
> #  rows, then fdata with outcomes switched for the second 19.  It should
> #  reprise test1 and test2 in a single call.
> fdata2 <- rbind(fdata, fdata)
> fdata2$group <- rep(1:2, each=nrow(fdata))
> temp <- c(1,3,2)[as.numeric(fdata$status)]
> fdata2$status[fdata2$group==2] <- factor(temp, 1:3, levels(fdata$status))
> test3 <- finegray(Surv(time, status) ~ .+ strata(group), fdata2)
> vtemp <- c("fgstart", "fgstop", "fgstatus", "fgwt")
> all.equal(test3[1:19, vtemp], test1[,vtemp])
[1] TRUE
> all.equal(test3[20:38, vtemp], test2[,vtemp], check.attributes=FALSE)
[1] TRUE
> 
> #
> # Test data set 2: use the larger MGUS data set
> #  Time is in months which leads to lots of ties
> etime <- with(mgus2, ifelse(pstat==0, futime, ptime))
> event <- with(mgus2, ifelse(pstat==0, 2*death, 1))
> e2 <- factor(event, 0:2, c('censor', 'pcm', 'death'))
> edata <- finegray(Surv(etime, e2) ~ sex + id, mgus2, etype="pcm")
> 
> # Build G(t) = the KM of the censoring distribution
> # An event at time x is not "at risk" for censoring at time x (Geskus 2011)
> tt <- sort(unique(etime))  # all the times
> ntime <- length(tt)
> nrisk <- nevent <- double(ntime)
> for (i in 1:ntime) {
+     nrisk[i] <- sum((etime > tt[i] & event >0) | (etime >= tt[i] & event==0))
+     nevent[i] <- sum(etime == tt[i] & event==0)
+ }
> G <- cumprod(1- nevent/nrisk)
> 
> # The weight is defined as w(t)= G(t-)/G(s-) where s is the event time
> # for a subject who experiences an endpoint other then the one of interest
> type2 <- event[edata$id]==2  # the rows to be expanded
> # These rows are copied over as is: endpoint 1 and censors
> all(edata$fgstop[!type2] == etime[edata$id[!type2]])
[1] TRUE
> all(edata$fgstart[!type2] ==0) 
[1] TRUE
> all(edata$fgwt[!type2] ==1)
[1] TRUE
> 
> tdata <- edata[type2,]  #expanded rows
> first <- match(tdata$id, tdata$id)  #points to the first row for each subject
> Gwt <- c(1, G)[match(tdata$fgstop, tt)]  # G(t-)
> all.equal(tdata$fgwt, Gwt/Gwt[first])
[1] TRUE
> 
> # Test data 3, left truncation.
> # Ties are assumed to be ordered as event, censor, entry
> # H(t) = truncation distribution, and is calculated on a reverse time scale
> # Since there is only one row per subject every obs is a "start" event.
> # Per equation 5 and 6 of Geskus both G and H are right continuous functions
> #  (the value at t- epsilon is different than the value at t).
> fdata <- data.frame(time1 = c(0,0,0,3,2,0,0,1,0,7,5, 0, 0, 0),
+                     time2 = c(1,2,3,4,4,4,5,5,6,8,8, 9,10,12),
+                     status= c(1,2,0,1,0,0,2,1,0,0,2, 0, 1 ,0), 
+                     x     = c(5,4,3,1,2,1,1,2,2,4,6, 1, 2, 0),
+                     id = 1:14)
> tt <- sort(unique(c(fdata$time1, fdata$time2)))
> ntime <- length(tt)
> Grisk <- Gevent <- double(ntime)
> Hrisk <- Hevent <- double(ntime)
> for (i in 1:ntime) {
+     Grisk[i] <- with(fdata, sum((time2 > tt[i] & status >0 & time1 < tt[i]) | 
+                                 (time2 >= tt[i] & status ==0 & time1 < tt[i])))
+     Gevent[i]<- with(fdata, sum(time2 == tt[i] & status==0))
+     Hrisk[i] <- with(fdata, sum(time2 > tt[i] & time1 <= tt[i]))
+     Hevent[i]<- with(fdata, sum(time1 == tt[i]))
+ }
> G <- cumprod(1- Gevent/pmax(1,Grisk))
> G2 <- survfit(Surv(time1, time2 - .1*(status !=0), status==0) ~1, fdata)
> all.equal(G2$surv[G2$n.event>0], G[Gevent>0])
[1] TRUE
> 
> H <- double(ntime)
> # The loop below uses the definition of equation 6 in Geskus
> for (i in 1:ntime) 
+     H[i] <- prod((1- Hevent/pmax(1, Hrisk))[-(i:1)])
> H2 <- rev(cumprod(rev(1 - Hevent/pmax(1, Hrisk))))  #alternate form
> H3 <- survfit(Surv(-time2, -time1, rep(1,14)) ~1, fdata) # alternate 3
> # c(0,H) = H(t-), H2 = H(t-) already due to the time reversal
> i2 <- sort(match(unique(fdata$time1), tt))  #time points in H3
> all.equal(c(0, H), c(H2, 1))  
[1] TRUE
> all.equal(H2[i2], rev(H3$surv))
[1] TRUE
> 
> fg <- finegray(Surv(time1, time2, factor(status, 0:2)) ~ x, id=id, fdata)
> stat2 <- !is.na(match(fg$id, fdata$id[fdata$status==2]))  #expanded ids
> all(fg$fgwt[!stat2] ==1)  #ordinary rows are left alone
[1] TRUE
> all(fg$fgstart[!stat2] == fdata$time1[fdata$status !=2])
[1] TRUE
> all(fg$fgstop[!stat2]  == fdata$time2[fdata$status !=2])
[1] TRUE
> 
> tdata <- fg[stat2,]
> index <- match(tdata$id, tdata$id)   # points to the first row for each
> Gwt <- c(1, G)[match(tdata$fgstop, tt)]  # G(t-)
> Hwt <- c(0, H)[match(tdata$fgstop, tt)]  # H(t-)
> all.equal(tdata$fgwt,  Gwt*Hwt/(Gwt*Hwt)[index])
[1] TRUE
> 
> #
> # Test data 4: mgus2 data on age scale
> #  The answer is incorrect due to roundoff, but consistent
> #
> start <- mgus2$age  # age in years
> end   <- start + etime/12  #etime in months
> tt <- sort(unique(c(start, end)))  # all the times
> ntime <- length(tt)
> Grisk <- Gevent <-  double(ntime)
> Hrisk <- Hevent <-  double(ntime)
> for (i in 1:ntime) {
+     Grisk[i] <- sum(((end > tt[i] & event >0) | (end >= tt[i] & event==0)) &
+                       (tt[i] > start))
+     Gevent[i] <- sum(end == tt[i] & event==0)
+     Hrisk[i]  <- sum(start <= tt[i] & end > tt[i])
+     Hevent[i] <- sum(start == tt[i])
+ }
> G <- cumprod(1 - Gevent/pmax(1, Grisk))         # pmax to avoid 0/0
> H <- rev(cumprod(rev(1-Hevent/pmax(1,Hrisk))))
> H <- c(H[-1], 1)  #make it right continuous
> 
> wdata <- finegray(Surv(start, end, e2) ~ ., id=id, mgus2, timefix=FALSE)
> type2 <- event[wdata$id]==2  # the rows to be expanded
> tdata <- wdata[type2,]
> first <- match(tdata$id, tdata$id)
> 
> Gwt <- c(1, G)[match(tdata$fgstop, tt)]  # G(t-)
> Hwt <- c(0, H)[match(tdata$fgstop, tt)]  # H(t-)
> all.equal(tdata$fgwt,  (Gwt/Gwt[first]) * (Hwt / Hwt[first]))
[1] TRUE
> 
> 
> proc.time()
   user  system elapsed 
  0.486   0.024   0.507
首次上传 2025-01-12 00:52:51 +08:00
			`R Under development (unstable) (2024-04-17 r86441) -- "Unsuffered Consequences"`
			`Copyright (C) 2024 The R Foundation for Statistical Computing`
			`Platform: aarch64-unknown-linux-gnu`

			`R is free software and comes with ABSOLUTELY NO WARRANTY.`
			`You are welcome to redistribute it under certain conditions.`
			`Type 'license()' or 'licence()' for distribution details.`

			`R is a collaborative project with many contributors.`
			`Type 'contributors()' for more information and`
			`'citation()' on how to cite R or R packages in publications.`

			`Type 'demo()' for some demos, 'help()' for on-line help, or`
			`'help.start()' for an HTML browser interface to help.`
			`Type 'q()' to quit R.`

			`> library(survival)`
			`> # Test data set 1 for Fine-Gray regression`
			`> fdata <- data.frame(time =c(1,2,3,4,4,4,5,5,6,8,8, 9,10,12),`
			`+ status=factor(c(1,2,0,1,0,0,2,1,0,0,2, 0,1 ,0), 0:2,`
			`+ c("cen", "type1", "type2")),`
			`+ x =c(5,4,3,1,2,1,1,2,2,4,6,1,2, 0),`
			`+ id = 1:14)`
			`> test1 <- finegray(Surv(time, status) ~., fdata, count="fgcount")`
			`> test2 <- finegray(Surv(time, status) ~x, fdata, etype="type2")`
			`>`
			`> # When creating the censoring time distribution remember that`
			`> # censors happen after deaths, so the distribution does not drop until`
			`> # time 3+, 4+, 6+, 8+ and 9+`
			`> csurv <- list(time=c(0, 3, 4, 6, 8, 9),`
			`+ p = cumprod(c(1, 11/12, 8/10, 5/6, 3/4, 2/3)))`
			`> #`
			`> # For estimation of event type 1, the first subject of event type`
			`> # 2 will have weights of curve$p over (0,3], (3,4], (4,6], (6,8], (8,9]`
			`> # and (9,12]. All that really matters is the weight at times 1, 4, 5,`
			`> # and 10, however, which are the points at which events of type 1 happen`
			`> #`
			`> # The next subject of event type 2 occurs at time 5, and will have a`
			`> # weight of (9,12] /(4,5] = (542)/(753) = 8/21 at time 10. The last`
			`> # censor at time 6 has a weight of 2/3 at time 10.`
			`>`
			`> all.equal(test1$id, c(1, 2,2,2,2, 3:6, 7, 7, 8:11, 11, 12:14))`
			`[1] TRUE`
			`> twt <- c(1, csurv$p[c(1,2,3,6)], 1,1,1, 1, 1, 5/12, 1,1,1,`
			`+ 1, 1/2, 1,1,1)`
			`> all.equal(test1$fgwt, twt)`
			`[1] TRUE`
			`> #extra obs will end at times found in csurv$time, or max(time)=12`
			`> all.equal(test1$fgstop[test1$fgcount>0], c(4,6,12, 12,12))`
			`[1] TRUE`
			`>`
			`> #`
			`> # Verify the data reproduces a multi-state curve`
			`> # censoring times may be different in the two setups so only`
			`> # compare at the event times`
			`> sfit <- survfit(Surv(time, status) ~1, fdata)`
			`> sfit1<- survfit(Surv(fgstart, fgstop, fgstatus) ~1, test1, weights=fgwt)`
			`> sfita<- sfit["type1"]`
			`> i1 <- sfita$n.event > 0`
			`> i2 <- sfit1$n.event > 0`
			`> all.equal(sfita$pstate[i1], 1- sfit1$surv[i2])`
			`[1] TRUE`
			`>`
			`> sfitb <- sfit["type2"]`
			`> sfit2 <- survfit(Surv(fgstart, fgstop, fgstatus) ~1, test2, weights=fgwt)`
			`> i1 <- sfitb$n.event > 0`
			`> i2 <- sfit2$n.event > 0`
			`> all.equal(sfitb$pstate[i1], 1- sfit2$surv[i2])`
			`[1] TRUE`
			`>`
			`> # Test strata. Make a single data set that has fdata for the first 19`
			`> # rows, then fdata with outcomes switched for the second 19. It should`
			`> # reprise test1 and test2 in a single call.`
			`> fdata2 <- rbind(fdata, fdata)`
			`> fdata2$group <- rep(1:2, each=nrow(fdata))`
			`> temp <- c(1,3,2)[as.numeric(fdata$status)]`
			`> fdata2$status[fdata2$group==2] <- factor(temp, 1:3, levels(fdata$status))`
			`> test3 <- finegray(Surv(time, status) ~ .+ strata(group), fdata2)`
			`> vtemp <- c("fgstart", "fgstop", "fgstatus", "fgwt")`
			`> all.equal(test3[1:19, vtemp], test1[,vtemp])`
			`[1] TRUE`
			`> all.equal(test3[20:38, vtemp], test2[,vtemp], check.attributes=FALSE)`
			`[1] TRUE`
			`>`
			`> #`
			`> # Test data set 2: use the larger MGUS data set`
			`> # Time is in months which leads to lots of ties`
			`> etime <- with(mgus2, ifelse(pstat==0, futime, ptime))`
			`> event <- with(mgus2, ifelse(pstat==0, 2*death, 1))`
			`> e2 <- factor(event, 0:2, c('censor', 'pcm', 'death'))`
			`> edata <- finegray(Surv(etime, e2) ~ sex + id, mgus2, etype="pcm")`
			`>`
			`> # Build G(t) = the KM of the censoring distribution`
			`> # An event at time x is not "at risk" for censoring at time x (Geskus 2011)`
			`> tt <- sort(unique(etime)) # all the times`
			`> ntime <- length(tt)`
			`> nrisk <- nevent <- double(ntime)`
			`> for (i in 1:ntime) {`
			`+ nrisk[i] <- sum((etime > tt[i] & event >0) \| (etime >= tt[i] & event==0))`
			`+ nevent[i] <- sum(etime == tt[i] & event==0)`
			`+ }`
			`> G <- cumprod(1- nevent/nrisk)`
			`>`
			`> # The weight is defined as w(t)= G(t-)/G(s-) where s is the event time`
			`> # for a subject who experiences an endpoint other then the one of interest`
			`> type2 <- event[edata$id]==2 # the rows to be expanded`
			`> # These rows are copied over as is: endpoint 1 and censors`
			`> all(edata$fgstop[!type2] == etime[edata$id[!type2]])`
			`[1] TRUE`
			`> all(edata$fgstart[!type2] ==0)`
			`[1] TRUE`
			`> all(edata$fgwt[!type2] ==1)`
			`[1] TRUE`
			`>`
			`> tdata <- edata[type2,] #expanded rows`
			`> first <- match(tdata$id, tdata$id) #points to the first row for each subject`
			`> Gwt <- c(1, G)[match(tdata$fgstop, tt)] # G(t-)`
			`> all.equal(tdata$fgwt, Gwt/Gwt[first])`
			`[1] TRUE`
			`>`
			`> # Test data 3, left truncation.`
			`> # Ties are assumed to be ordered as event, censor, entry`
			`> # H(t) = truncation distribution, and is calculated on a reverse time scale`
			`> # Since there is only one row per subject every obs is a "start" event.`
			`> # Per equation 5 and 6 of Geskus both G and H are right continuous functions`
			`> # (the value at t- epsilon is different than the value at t).`
			`> fdata <- data.frame(time1 = c(0,0,0,3,2,0,0,1,0,7,5, 0, 0, 0),`
			`+ time2 = c(1,2,3,4,4,4,5,5,6,8,8, 9,10,12),`
			`+ status= c(1,2,0,1,0,0,2,1,0,0,2, 0, 1 ,0),`
			`+ x = c(5,4,3,1,2,1,1,2,2,4,6, 1, 2, 0),`
			`+ id = 1:14)`
			`> tt <- sort(unique(c(fdata$time1, fdata$time2)))`
			`> ntime <- length(tt)`
			`> Grisk <- Gevent <- double(ntime)`
			`> Hrisk <- Hevent <- double(ntime)`
			`> for (i in 1:ntime) {`
			`+ Grisk[i] <- with(fdata, sum((time2 > tt[i] & status >0 & time1 < tt[i]) \|`
			`+ (time2 >= tt[i] & status ==0 & time1 < tt[i])))`
			`+ Gevent[i]<- with(fdata, sum(time2 == tt[i] & status==0))`
			`+ Hrisk[i] <- with(fdata, sum(time2 > tt[i] & time1 <= tt[i]))`
			`+ Hevent[i]<- with(fdata, sum(time1 == tt[i]))`
			`+ }`
			`> G <- cumprod(1- Gevent/pmax(1,Grisk))`
			`> G2 <- survfit(Surv(time1, time2 - .1*(status !=0), status==0) ~1, fdata)`
			`> all.equal(G2$surv[G2$n.event>0], G[Gevent>0])`
			`[1] TRUE`
			`>`
			`> H <- double(ntime)`
			`> # The loop below uses the definition of equation 6 in Geskus`
			`> for (i in 1:ntime)`
			`+ H[i] <- prod((1- Hevent/pmax(1, Hrisk))[-(i:1)])`
			`> H2 <- rev(cumprod(rev(1 - Hevent/pmax(1, Hrisk)))) #alternate form`
			`> H3 <- survfit(Surv(-time2, -time1, rep(1,14)) ~1, fdata) # alternate 3`
			`> # c(0,H) = H(t-), H2 = H(t-) already due to the time reversal`
			`> i2 <- sort(match(unique(fdata$time1), tt)) #time points in H3`
			`> all.equal(c(0, H), c(H2, 1))`
			`[1] TRUE`
			`> all.equal(H2[i2], rev(H3$surv))`
			`[1] TRUE`
			`>`
			`> fg <- finegray(Surv(time1, time2, factor(status, 0:2)) ~ x, id=id, fdata)`
			`> stat2 <- !is.na(match(fg$id, fdata$id[fdata$status==2])) #expanded ids`
			`> all(fg$fgwt[!stat2] ==1) #ordinary rows are left alone`
			`[1] TRUE`
			`> all(fg$fgstart[!stat2] == fdata$time1[fdata$status !=2])`
			`[1] TRUE`
			`> all(fg$fgstop[!stat2] == fdata$time2[fdata$status !=2])`
			`[1] TRUE`
			`>`
			`> tdata <- fg[stat2,]`
			`> index <- match(tdata$id, tdata$id) # points to the first row for each`
			`> Gwt <- c(1, G)[match(tdata$fgstop, tt)] # G(t-)`
			`> Hwt <- c(0, H)[match(tdata$fgstop, tt)] # H(t-)`
			`> all.equal(tdata$fgwt, GwtHwt/(GwtHwt)[index])`
			`[1] TRUE`
			`>`
			`> #`
			`> # Test data 4: mgus2 data on age scale`
			`> # The answer is incorrect due to roundoff, but consistent`
			`> #`
			`> start <- mgus2$age # age in years`
			`> end <- start + etime/12 #etime in months`
			`> tt <- sort(unique(c(start, end))) # all the times`
			`> ntime <- length(tt)`
			`> Grisk <- Gevent <- double(ntime)`
			`> Hrisk <- Hevent <- double(ntime)`
			`> for (i in 1:ntime) {`
			`+ Grisk[i] <- sum(((end > tt[i] & event >0) \| (end >= tt[i] & event==0)) &`
			`+ (tt[i] > start))`
			`+ Gevent[i] <- sum(end == tt[i] & event==0)`
			`+ Hrisk[i] <- sum(start <= tt[i] & end > tt[i])`
			`+ Hevent[i] <- sum(start == tt[i])`
			`+ }`
			`> G <- cumprod(1 - Gevent/pmax(1, Grisk)) # pmax to avoid 0/0`
			`> H <- rev(cumprod(rev(1-Hevent/pmax(1,Hrisk))))`
			`> H <- c(H[-1], 1) #make it right continuous`
			`>`
			`> wdata <- finegray(Surv(start, end, e2) ~ ., id=id, mgus2, timefix=FALSE)`
			`> type2 <- event[wdata$id]==2 # the rows to be expanded`
			`> tdata <- wdata[type2,]`
			`> first <- match(tdata$id, tdata$id)`
			`>`
			`> Gwt <- c(1, G)[match(tdata$fgstop, tt)] # G(t-)`
			`> Hwt <- c(0, H)[match(tdata$fgstop, tt)] # H(t-)`
			`> all.equal(tdata$fgwt, (Gwt/Gwt[first]) * (Hwt / Hwt[first]))`
			`[1] TRUE`
			`>`
			`>`
			`> proc.time()`
			`user system elapsed`
			`0.486 0.024 0.507`