R Under development (unstable) (2024-04-17 r86441) -- "Unsuffered Consequences"
Copyright (C) 2024 The R Foundation for Statistical Computing
Platform: aarch64-unknown-linux-gnu

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(survival)
> aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y), ...)
> #
> # Test the multi-state version of the CI curve
> #
> tdata <- data.frame(id=c(1,1,1,1, 2,2,2, 3,3, 4,4,4,4, 5, 6, 6),
+                     time1=c(0, 10,20,30, 0, 5, 15,  0, 20, 0, 6,18,34, 0, 0,15),
+                     time2=c(10,20,30,40, 5, 15,25, 20, 22, 6,18,34,50,10,15,20),
+                     status=c(1,1,1,1, 1,1,1, 1,0, 1,1,1,0,0,1,0),
+                     event= letters[c(1,2,3,4, 2,4,3, 2,2, 3,1,2,2,1, 1,1)],
+                     wt = c(2,2,2,2, 1,1,1, 3,3, 1,1,1,1, 2, 1,1),
+                     stringsAsFactors=TRUE)
> tdata$stat2 <- factor(tdata$status * as.numeric(tdata$event),
+                       labels=c("censor", levels(tdata$event)))
>          
> fit <- survfit(Surv(time1, time2, stat2) ~1, id=id, weights=wt, tdata,
+                influence=TRUE)
> 
> # The exact figures for testci2.
> # The subject data of  id, weight, (transition time, transition)
> 
> #1: 2 (10, 0->a)  (20, a->b)  (30, b->c)  (40, c->d)  no data after 40=censored
> #2: 1 ( 5, 0->b)  (15, b->d)  (25, d->c)  no data after 25 implies censored then
> #3: 3 (20, 0->b)  (22, censor)
> #4: 1 ( 6, 0->c)  (18, c->a)  (34, a->b)  (50, censor)
> #5: 2 (10, censor)
> #6: 1 (15, 0->a)  (20, censor)
> 
> # Each line below follows a subject through time as a (state, rdist weight) pair
> #  using the redistribute to the right algorithm.
> # RDR algorithm: at each censoring (or last fu) a subject's weight is put into
> #  a "pool" for that state and their weight goes to zero. The pool is
> #  dynamically shared between all members of the state proportional to their
> #  original case weight, when someone leaves they take their portion of the
> #  pool to the new state.
> 
> # Table of case weights and state, blank is weight of zero
> #    time   5    6   10   15    18    20     25    30    34    40    50
> # -----------------------------------------------------------------------
> # id, wt
> #  1, 2     -    -    a    a      a    b      b     c     c     d    
> #  2, 1     b    b    b    d      d    d      c    
> #  3, 3     -    -    -    -      -    b      
> #  4, 1     -    c    c    c      a    a      a     a     b     b     b
> #  5, 2     -    -    -
> #  6, 1     -    -    -    a      a    a      
> 
> # Pool weights
> #         10  10+   15    18    20    20+   22+   25   25+   30   34    40   40+
> #    -     0   2    3/2  3/2    0 
> #    a     0   0    1/2  1/2    1/4   5/4   5/4   5/4  5/4   5/4
> #    b     0   0     0    0     7/4   7/4  19/4  19/4 19/4        5/4   5/4  5/4
> #    c     0   0     0    0     0                      1    23/4 23/4
> #    d     0   0     0    0     0                                      23/4 31/4
> 
> # fit$pstate for time i and state j = total weight at that time/state in the
> #  above table (original weight + redistrib), divided by 10.
> 
> # time            5  6   10    15    18    20     25    30    34    40    50
> truth <- matrix(c(0, 0,   2,    3,    4,    2,     1,    1,    0,    0,    0,
+                   1, 1,   1,    0,    0,    5,     2,    0,    1,    1,    1,
+                   0, 1,   1,    1,    0,    0,     1,    2,    2,    0,    0,
+                   0, 0,   0,    1,    1,    1,     0,    0,    0,    2,    0) +
+                 c(0, 0,   0,   .5,   .5,   1/4,   5/4,  5/4,   0,    0,    0,
+                   0, 0,   0,    0,    0,   7/4,  19/4,   0,   5/4,   5/4,  5/4,
+                   0, 0,   0,    0,    0,    0,     0,  23/4, 23/4,   0,    0, 
+                   0, 0,   0,    0,    0,    0,     0,    0,    0,  23/4,  31/4),
+                 ncol=4)
> truth <- truth[c(1:6, 6:11),]/10  #the explicit censor at 22
> 
> #dimnames(truth) <- list(c(5, 6, 10, 15, 18, 20, 25, 30, 34, 40, 50),
> #                        c('a', 'b', 'c', 'd')
> aeq(truth, fit$pstate[,2:5])
[1] TRUE
> 
> # Test the dfbetas
> # It was a big surprise, but the epsilon where a finite difference approx to
> #  the derivative is most accurate is around 1e-7 = approx sqrt(precision).
> # Smaller eps makes the approximate derivative worse.
> # There is a now a formal test in mstate.R, not approximate.
> 
> # compute the per observation influence first
> n <- nrow(tdata)     
> U <- array(0, dim=c(n, dim(fit$pstate)))
> eps <- sqrt(.Machine$double.eps)
> n <- nrow(tdata)     
> for (i in 1:n) {
+     twt <- tdata$wt
+     twt[i] <- twt[i] + eps
+     tfit <- survfit(Surv(time1, time2, stat2) ~ 1, id=id, tdata,
+                     weights=twt)
+     U[i,,] <- (tfit$pstate - fit$pstate)/eps  #finite difference approx
+ }
> dfbeta <- rowsum(tdata$wt*matrix(U,nrow=n), tdata$id) # per subject
> dfbeta <- array(dfbeta, dim=c(6,12,5))
> aeq(dfbeta, fit$influence, tolerance= eps*10)
[1] TRUE
> 
> aeq(fit$std.err, sqrt(apply(fit$influence.pstate^2, 2:3, sum)))
[1] TRUE
> 
> if (FALSE) {
+     # a plot of the data that helped during creation of the example
+     plot(c(0,50), c(1,6), type='n', xlab='time', ylab='subject')
+     with(tdata, segments(time1, id, time2, id))
+     with(tdata, text(time2, id, as.numeric(stat2)-1, cex=1.5, col=2))
+ }
> 
> if (FALSE) {
+     # The following lines test out 4 error messages in the routine
+     #
+     # Gap in follow-up time, id 2
+     survfit(Surv(c(0,5,9,0,5,0), c(5,9,12, 4, 6, 3), factor(c(0,0,1,1,0,2))) ~1,
+             id=c(1,1,1,2,2,3))
+     # mismatched weights
+     survfit(Surv(c(0,5,9,0,5,0), c(5,9,12, 5, 6, 3), factor(c(0,0,1,1,0,2))) ~1,
+             id=c(1,1,1,2,2,3), weights=c(1,1,2,1,1,4))
+     # in two groups at once
+      survfit(Surv(c(0,5,9,0,5,0), c(5,9,12, 5, 6, 3), factor(c(0,0,1,1,0,2))) ~
+                c(1,1,2,1,1,2), id=c(1,1,1,2,2,3)) 
+     # state change that isn't a state change (went from 1 to 1)
+         survfit(Surv(c(0,5,9,0,5,0), c(5,9,12, 5, 6, 3), factor(c(0,1,1,1,0,2))) ~1,
+             id=c(1,1,1,2,2,3))
+ }
> 
> # Check the start.time option
> #
> # Later work showed this test has to be false.  At time 0 everyone starts in
> # state (s0), but by time 20 many have shifted to another.  fit2 picks up at
> # the right place, but because there is no istate varaible, fit2x starts
> # everyone in (s0) at time 20.  There is no way for survfit to know.
> if (FALSE) {
+ fit2 <- survfit(Surv(time1, time2, stat2) ~1, id=id, weights=wt, tdata,
+                 start.time=20)
+ data2 <- subset(tdata, time2>= 20)
+ fit2x <- survfit(Surv(time1, time2, stat2) ~1, id=id, weights=wt, data2)
+ 
+ ii <- names(fit2)[!(names(fit2) %in%  c("call", "start.time"))]
+ all.equal(unclass(fit2)[ii], unclass(fit2x)[ii])
+ }
> 
> proc.time()
   user  system elapsed 
  0.423   0.024   0.444