R Under development (unstable) (2024-04-17 r86441) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: aarch64-unknown-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > # Tests of pseudovalues, by calculating directly from survfit and residuals > # this assumes that residuals.survfit is correct > library(survival) > aeq <- function(x, y, ...) all.equal(as.vector(x), as.vector(y), ...) > > mdata <- mgus2 > temp <- ifelse(mdata$pstat==1, 1, 2*mdata$death) > mdata$event <- factor(temp, 0:2, c("censor", "pcm", "death")) > mdata$etime <- ifelse(mdata$pstat==1, mdata$ptime, mdata$futime) > mdata <- subset(mdata, etime > 12) # remove first year > tvec <- c(10, 100, 200, 365) > > # Single endpoint, one curve > fit1 <- survfit(Surv(ptime, pstat) ~1, mdata) > # a time point before first event, after last event, at an event time, > # and between event times > rr1 <- resid(fit1, tvec) > aeq(colSums(rr1), rep(0,4)) [1] TRUE > sv1 <- summary(fit1, time=tvec, extend=TRUE)$surv > > # one time point > ps1a <- pseudo(fit1, time=100) > aeq(ps1a, sv1[2] + fit1$n*rr1[,2]) [1] TRUE > # multiple > ps1b <- pseudo(fit1, time=tvec) > aeq(ps1b, sv1[col(rr1)] + fit1$n * rr1) [1] TRUE > > # Single endpoint, multiple curves > fit2 <- survfit(Surv(futime, death) ~ sex, mdata) > rr2 <- resid(fit2, time=tvec) > aeq(colSums(rr2), rep(0,4)) [1] TRUE > sv2 <- summary(fit2, time=tvec, extend=TRUE)$surv > sv2 <- t(matrix(sv2, ncol=2)) # row 1= female, row2 = male > > # residuals are the same as for separate models > fit2a <- survfit(Surv(futime, death) ~1, mdata, subset=( sex=='F')) > fit2b <- survfit(Surv(futime, death) ~1, mdata, subset= (sex=='M')) > fem <- (mdata$sex=='F') > rr2a <- resid(fit2a, times=tvec) > rr2b <- resid(fit2b, times=tvec) > aeq(rr2a, rr2[fem,]) # row names won't be equal [1] TRUE > aeq(rr2b, rr2[!fem,]) [1] TRUE > > # one time point > ps2a <- pseudo(fit2a, time=100) > aeq(ps2a, sv2[1,2] + fit2a$n[1]* rr2a[,2]) [1] TRUE > ps2b <- pseudo(fit2b, time=100) > aeq(ps2b, sv2[2,2] + fit2b$n[1]* rr2b[,2]) [1] TRUE > > # overall psuedo are the same as for separate models > # (each row of mdata belongs to a single curve) > ps2c <- pseudo(fit2, time=100) > aeq(ps2c[ fem], ps2a) [1] TRUE > aeq(ps2c[!fem], ps2b) [1] TRUE > > # multiple time points > ps2d <- pseudo(fit2a, times=tvec) > aeq(ps2d, sv2[1, col(rr2a)] + fit2$n[1]* rr2a) [1] TRUE > ps2e <- pseudo(fit2b, times=tvec) > aeq(ps2e, sv2[2, col(rr2b)] + fit2$n[2]* rr2b) [1] TRUE > > ps2f <- pseudo(fit2, times=tvec) > aeq(ps2d, ps2f[ fem,]) [1] TRUE > aeq(ps2e, ps2f[!fem,]) [1] TRUE > > # Repeat the process for a multi-state model > fit3 <- survfit(Surv(etime, event) ~ sex, mdata) > fit3a <- survfit(Surv(etime, event) ~1, mdata, subset= (sex=='F')) > fit3b <- survfit(Surv(etime, event) ~1, mdata, subset= (sex=='M')) > rr3 <- resid(fit3, times=tvec) > aeq(apply(rr3, 2:3, sum), matrix(0,3,4)) # resids sum to 0 for each state & time [1] TRUE > rr3a <- resid(fit3a, times=tvec) > rr3b <- resid(fit3b, times=tvec) > aeq(rr3[fem,,], rr3a) [1] TRUE > aeq(rr3[!fem,,], rr3b) [1] TRUE > > ps3 <- pseudo(fit3, times=tvec) > ps3a <- pseudo(fit3a, times=tvec) > ps3b <- pseudo(fit3b, times=tvec) > aeq(ps3[ fem,,], ps3a) [1] TRUE > aeq(ps3[!fem,,], ps3b) [1] TRUE > > sv3 <- summary(fit3, times=tvec, extend=TRUE)$pstate > sv3 <- array(sv3, dim=c(4,2,3)) #times, curve, order > # ps3a has dimensions (number obs in fit3a, 3 states, 4 timepoints) > # to each of the 3x4 combinations we need to add the value of the > # survival curve at that time. A loop is easiest > temp1 <- array(0, dim= dim(rr3a)) > temp2 <- array(0, dim= dim(rr3b)) > for (i in 1:3) { # each of the 3 states + for (j in 1:4) { # each of the 4 times + temp1[, i,j] <- sv3[j,1,i] + fit3$n[1]*rr3a[,i,j] + temp2[, i,j] <- sv3[j,2,i] + fit3$n[2]*rr3b[,i,j] + } + } > aeq(temp1, ps3a) [1] TRUE > aeq(temp2, ps3b) [1] TRUE > > ########################### > # All again, just the same, for cumulative hazards > # Though there are 2 of them, vs 3 states. > # > rr1 <- resid(fit1, tvec, type="cumhaz") > aeq(colSums(rr1), rep(0,4)) [1] TRUE > sv1 <- summary(fit1, time=tvec, extend=TRUE)$cumhaz > > # one time point > ps1a <- pseudo(fit1, time=100, type="cumhaz") > aeq(ps1a, sv1[2] + fit1$n*rr1[,2]) [1] TRUE > # multiple > ps1b <- pseudo(fit1, time=tvec, type="cumhaz") > aeq(ps1b, sv1[col(rr1)] + fit1$n * rr1) [1] TRUE > > # Single endpoint, multiple curves > fit2 <- survfit(Surv(futime, death) ~ sex, mdata) > rr2 <- resid(fit2, time=tvec, type="cumhaz") > aeq(colSums(rr2), rep(0,4)) [1] TRUE > sv2 <- summary(fit2, time=tvec, extend=TRUE)$cumhaz > sv2 <- t(matrix(sv2, ncol=2)) # row 1= female, row2 = male > > # residuals are the same as for separate models > rr2a <- resid(fit2a, times=tvec, type= "cumhaz") > rr2b <- resid(fit2b, times=tvec, type= "cumhaz") > aeq(rr2a, rr2[fem,]) [1] TRUE > aeq(rr2b, rr2[!fem,]) [1] TRUE > > # one time point > ps2a <- pseudo(fit2a, time=100, type="cumhaz") > aeq(ps2a, sv2[1,2] + fit2a$n[1]* rr2a[,2]) [1] TRUE > ps2b <- pseudo(fit2b, time=100, type="cumhaz") > aeq(ps2b, sv2[2,2] + fit2b$n[1]* rr2b[,2]) [1] TRUE > > # overall psuedo are the same as for separate models > # (each row of mdata belongs to a single curve) > ps2c <- pseudo(fit2, time=100, type="cumhaz") > aeq(ps2c[ fem], ps2a) [1] TRUE > aeq(ps2c[!fem], ps2b) [1] TRUE > > # multiple time points > ps2d <- pseudo(fit2a, times=tvec, type="cumhaz") > aeq(ps2d, sv2[1, col(rr2a)] + fit2$n[1]* rr2a) [1] TRUE > ps2e <- pseudo(fit2b, times=tvec, type= "cumhaz") > aeq(ps2e, sv2[2, col(rr2b)] + fit2$n[2]* rr2b) [1] TRUE > > ps2f <- pseudo(fit2, times=tvec, type="cumhaz") > aeq(ps2d, ps2f[ fem,]) [1] TRUE > aeq(ps2e, ps2f[!fem,]) [1] TRUE > > # Repeat the process for a multi-state model > rr3 <- resid(fit3, times=tvec, type="cumhaz") > aeq(apply(rr3, 2:3, sum), matrix(0, 2,4)) [1] TRUE > rr3a <- resid(fit3a, times=tvec, type="cumhaz") > rr3b <- resid(fit3b, times=tvec, type="cumhaz") > aeq(rr3[fem,,], rr3a) [1] TRUE > aeq(rr3[!fem,,], rr3b) [1] TRUE > > ps3 <- pseudo(fit3, times=tvec, type="cumhaz") > ps3a <- pseudo(fit3a, times=tvec, type="cumhaz") > ps3b <- pseudo(fit3b, times=tvec, type="cumhaz") > aeq(ps3[ fem,,], ps3a) [1] TRUE > aeq(ps3[!fem,,], ps3b) [1] TRUE > > sv3 <- summary(fit3, times=tvec, extend=TRUE)$cumhaz > sv3 <- array(sv3, dim=c(4,2,2)) #times, curve, hazard > # ps3a has dimensions (number obs in fit3a, 4 timepoints, 3 states) > # to each of the 4x3 combinations we need to add the value of the > # survival curve at that time. A loop is easiest > temp1 <- array(0, dim= dim(rr3a)) > temp2 <- array(0, dim= dim(rr3b)) > for (i in 1:2) { # each of the 2 hazard + for (j in 1:4) { # each of the 4 timepoints + temp1[, i,j] <- sv3[j,1,i] + fit3$n[1]*rr3a[,i,j] + temp2[, i,j] <- sv3[j,2,i] + fit3$n[2]*rr3b[,i,j] + } + } > aeq(temp1, ps3a) [1] TRUE > aeq(temp2, ps3b) [1] TRUE > > ################################################# > # Last, one more time with AUC > # A bit more bother, since summary.survfit only returns AUC for one time > # value at a time. It also does not like times before the first event > # > tvec <- tvec[2:4] > > rr1 <- resid(fit1, tvec, type="auc") > aeq(colSums(rr1), rep(0,3)) [1] TRUE > afun <- function(fit, times) { + ntime <- length(times) + if (length(fit$strata)) xfun <- function(x) x$table[, "rmean"] + else xfun <- function(x) x$table["rmean"] + + temp <- xfun(summary(fit, rmean=times[1])) + if (ntime==1) return(temp) + + result <- matrix(0, ntime, length(temp)) + result[1,] <- temp + for (i in 2:ntime) + result[i,] <- xfun(summary(fit, rmean=times[i])) + drop(result) + } > > sv1 <- afun(fit1, tvec) > > # one time point > ps1a <- pseudo(fit1, time=tvec[1], type="auc") > aeq(ps1a, sv1[1] + fit1$n*rr1[,1]) [1] TRUE > # multiple > ps1b <- pseudo(fit1, time=tvec, type="auc") > aeq(ps1b, sv1[col(rr1)] + fit1$n * rr1) [1] TRUE > > # Single endpoint, multiple curves > rr2 <- resid(fit2, time=tvec, type="auc") > sv2 <- t(afun(fit2, tvec)) > aeq(colSums(rr2), rep(0,3)) [1] TRUE > > # residuals are the same as for separate models > rr2a <- resid(fit2a, times=tvec, type= "auc") > rr2b <- resid(fit2b, times=tvec, type= "auc") > aeq(rr2a, rr2[fem,]) [1] TRUE > aeq(rr2b, rr2[!fem,]) [1] TRUE > > # one time point > ps2a <- pseudo(fit2a, time=100, type="auc") > aeq(ps2a, sv2[1,1] + fit2a$n[1]* rr2a[,1]) [1] TRUE > ps2b <- pseudo(fit2b, time=100, type="auc") > aeq(ps2b, sv2[2,1] + fit2b$n[1]* rr2b[,1]) [1] TRUE > > # overall psuedo are the same as for separate models > # (each row of mdata belongs to a single curve) > ps2c <- pseudo(fit2, time=100, type="auc") > aeq(ps2c[ fem], ps2a) [1] TRUE > aeq(ps2c[!fem], ps2b) [1] TRUE > > # multiple time points > ps2d <- pseudo(fit2a, times=tvec, type="auc") > aeq(ps2d, sv2[1, col(rr2a)] + fit2$n[1]* rr2a) [1] TRUE > ps2e <- pseudo(fit2b, times=tvec, type= "auc") > aeq(ps2e, sv2[2, col(rr2b)] + fit2$n[2]* rr2b) [1] TRUE > > ps2f <- pseudo(fit2, times=tvec, type="auc") > aeq(ps2d, ps2f[ fem,]) [1] TRUE > aeq(ps2e, ps2f[!fem,]) [1] TRUE > > # Repeat the process for a multi-state model > rr3 <- resid(fit3, times=tvec, type="auc") > aeq(apply(rr3, 2:3, sum), matrix(0, 3,3)) [1] TRUE > rr3a <- resid(fit3a, times=tvec, type="auc") > rr3b <- resid(fit3b, times=tvec, type="auc") > aeq(rr3[fem,,], rr3a) [1] TRUE > aeq(rr3[!fem,,], rr3b) [1] TRUE > > ps3 <- pseudo(fit3, times=tvec, type="auc") > ps3a <- pseudo(fit3a, times=tvec, type="auc") > ps3b <- pseudo(fit3b, times=tvec, type="auc") > aeq(ps3[ fem,,], ps3a) [1] TRUE > aeq(ps3[!fem,,], ps3b) [1] TRUE > > sv3 <- rbind(summary(fit3, rmean=tvec[1])$table[,"rmean"], + summary(fit3, rmean=tvec[2])$table[,"rmean"], + summary(fit3, rmean=tvec[3])$table[,"rmean"]) > sv3 <- array(sv3, dim=c(3,2,3)) #times, curve, state > # ps3a has dimensions (number obs in fit3a, 4 timepoints, 3 states) > # to each of the 4x3 combinations we need to add the value of the > # survival curve at that time. A loop is easiest > temp1 <- array(0, dim= dim(rr3a)) > temp2 <- array(0, dim= dim(rr3b)) > for (i in 1:3) { # each of the 3 states + for (j in 1:3) { # each of the 3 times + temp1[, i,j] <- sv3[j,1,i] + fit3$n[1]*rr3a[,i,j] + temp2[, i,j] <- sv3[j,2,i] + fit3$n[2]*rr3b[,i,j] + } + } > aeq(temp1, ps3a) [1] TRUE > aeq(temp2, ps3b) [1] TRUE > > # > # a data set with a missing value, and with a group that has only one obs > # a good test of edge cases > # > lfit1 <- survfit(Surv(time, status) ~ ph.ecog, lung) > # This will warn about points beyond the curve; ph.ecog==3 has a single point > # at time=118, and it will have one fewer obs than the data > p1 <- pseudo(lfit1, times=c(100, 200)) Warning message: In pseudo(lfit1, times = c(100, 200)) : requested time points are beyond the end of one or more curves > aeq(dim(p1), c(nrow(lung)-1, 2)) [1] TRUE > > > # This will have rows that match the data > lfit2 <- survfit(Surv(time, status) ~ ph.ecog, lung, na.action= na.exclude) > p2 <- pseudo(lfit2, time=c(100, 200)) Warning message: In pseudo(lfit2, time = c(100, 200)) : requested time points are beyond the end of one or more curves > aeq(dim(p2), c(nrow(lung), 2)) [1] TRUE > all(is.na(p2[is.na(lung$ph.ecog)])) # a row of missing was inserted [1] TRUE > > row3 <- which(!is.na(lung$ph.ecog) & lung$ph.ecog ==3) # the singleton row > all(p2[row3,] == c(1, 0)) [1] TRUE > > > proc.time() user system elapsed 0.622 0.024 0.643