297 lines
9.7 KiB
R
297 lines
9.7 KiB
R
# Tests of pseudovalues, by calculating directly from survfit and residuals
|
|
# this assumes that residuals.survfit is correct
|
|
library(survival)
|
|
aeq <- function(x, y, ...) all.equal(as.vector(x), as.vector(y), ...)
|
|
|
|
mdata <- mgus2
|
|
temp <- ifelse(mdata$pstat==1, 1, 2*mdata$death)
|
|
mdata$event <- factor(temp, 0:2, c("censor", "pcm", "death"))
|
|
mdata$etime <- ifelse(mdata$pstat==1, mdata$ptime, mdata$futime)
|
|
mdata <- subset(mdata, etime > 12) # remove first year
|
|
tvec <- c(10, 100, 200, 365)
|
|
|
|
# Single endpoint, one curve
|
|
fit1 <- survfit(Surv(ptime, pstat) ~1, mdata)
|
|
# a time point before first event, after last event, at an event time,
|
|
# and between event times
|
|
rr1 <- resid(fit1, tvec)
|
|
aeq(colSums(rr1), rep(0,4))
|
|
sv1 <- summary(fit1, time=tvec, extend=TRUE)$surv
|
|
|
|
# one time point
|
|
ps1a <- pseudo(fit1, time=100)
|
|
aeq(ps1a, sv1[2] + fit1$n*rr1[,2])
|
|
# multiple
|
|
ps1b <- pseudo(fit1, time=tvec)
|
|
aeq(ps1b, sv1[col(rr1)] + fit1$n * rr1)
|
|
|
|
# Single endpoint, multiple curves
|
|
fit2 <- survfit(Surv(futime, death) ~ sex, mdata)
|
|
rr2 <- resid(fit2, time=tvec)
|
|
aeq(colSums(rr2), rep(0,4))
|
|
sv2 <- summary(fit2, time=tvec, extend=TRUE)$surv
|
|
sv2 <- t(matrix(sv2, ncol=2)) # row 1= female, row2 = male
|
|
|
|
# residuals are the same as for separate models
|
|
fit2a <- survfit(Surv(futime, death) ~1, mdata, subset=( sex=='F'))
|
|
fit2b <- survfit(Surv(futime, death) ~1, mdata, subset= (sex=='M'))
|
|
fem <- (mdata$sex=='F')
|
|
rr2a <- resid(fit2a, times=tvec)
|
|
rr2b <- resid(fit2b, times=tvec)
|
|
aeq(rr2a, rr2[fem,]) # row names won't be equal
|
|
aeq(rr2b, rr2[!fem,])
|
|
|
|
# one time point
|
|
ps2a <- pseudo(fit2a, time=100)
|
|
aeq(ps2a, sv2[1,2] + fit2a$n[1]* rr2a[,2])
|
|
ps2b <- pseudo(fit2b, time=100)
|
|
aeq(ps2b, sv2[2,2] + fit2b$n[1]* rr2b[,2])
|
|
|
|
# overall psuedo are the same as for separate models
|
|
# (each row of mdata belongs to a single curve)
|
|
ps2c <- pseudo(fit2, time=100)
|
|
aeq(ps2c[ fem], ps2a)
|
|
aeq(ps2c[!fem], ps2b)
|
|
|
|
# multiple time points
|
|
ps2d <- pseudo(fit2a, times=tvec)
|
|
aeq(ps2d, sv2[1, col(rr2a)] + fit2$n[1]* rr2a)
|
|
ps2e <- pseudo(fit2b, times=tvec)
|
|
aeq(ps2e, sv2[2, col(rr2b)] + fit2$n[2]* rr2b)
|
|
|
|
ps2f <- pseudo(fit2, times=tvec)
|
|
aeq(ps2d, ps2f[ fem,])
|
|
aeq(ps2e, ps2f[!fem,])
|
|
|
|
# Repeat the process for a multi-state model
|
|
fit3 <- survfit(Surv(etime, event) ~ sex, mdata)
|
|
fit3a <- survfit(Surv(etime, event) ~1, mdata, subset= (sex=='F'))
|
|
fit3b <- survfit(Surv(etime, event) ~1, mdata, subset= (sex=='M'))
|
|
rr3 <- resid(fit3, times=tvec)
|
|
aeq(apply(rr3, 2:3, sum), matrix(0,3,4)) # resids sum to 0 for each state & time
|
|
rr3a <- resid(fit3a, times=tvec)
|
|
rr3b <- resid(fit3b, times=tvec)
|
|
aeq(rr3[fem,,], rr3a)
|
|
aeq(rr3[!fem,,], rr3b)
|
|
|
|
ps3 <- pseudo(fit3, times=tvec)
|
|
ps3a <- pseudo(fit3a, times=tvec)
|
|
ps3b <- pseudo(fit3b, times=tvec)
|
|
aeq(ps3[ fem,,], ps3a)
|
|
aeq(ps3[!fem,,], ps3b)
|
|
|
|
sv3 <- summary(fit3, times=tvec, extend=TRUE)$pstate
|
|
sv3 <- array(sv3, dim=c(4,2,3)) #times, curve, order
|
|
# ps3a has dimensions (number obs in fit3a, 3 states, 4 timepoints)
|
|
# to each of the 3x4 combinations we need to add the value of the
|
|
# survival curve at that time. A loop is easiest
|
|
temp1 <- array(0, dim= dim(rr3a))
|
|
temp2 <- array(0, dim= dim(rr3b))
|
|
for (i in 1:3) { # each of the 3 states
|
|
for (j in 1:4) { # each of the 4 times
|
|
temp1[, i,j] <- sv3[j,1,i] + fit3$n[1]*rr3a[,i,j]
|
|
temp2[, i,j] <- sv3[j,2,i] + fit3$n[2]*rr3b[,i,j]
|
|
}
|
|
}
|
|
aeq(temp1, ps3a)
|
|
aeq(temp2, ps3b)
|
|
|
|
###########################
|
|
# All again, just the same, for cumulative hazards
|
|
# Though there are 2 of them, vs 3 states.
|
|
#
|
|
rr1 <- resid(fit1, tvec, type="cumhaz")
|
|
aeq(colSums(rr1), rep(0,4))
|
|
sv1 <- summary(fit1, time=tvec, extend=TRUE)$cumhaz
|
|
|
|
# one time point
|
|
ps1a <- pseudo(fit1, time=100, type="cumhaz")
|
|
aeq(ps1a, sv1[2] + fit1$n*rr1[,2])
|
|
# multiple
|
|
ps1b <- pseudo(fit1, time=tvec, type="cumhaz")
|
|
aeq(ps1b, sv1[col(rr1)] + fit1$n * rr1)
|
|
|
|
# Single endpoint, multiple curves
|
|
fit2 <- survfit(Surv(futime, death) ~ sex, mdata)
|
|
rr2 <- resid(fit2, time=tvec, type="cumhaz")
|
|
aeq(colSums(rr2), rep(0,4))
|
|
sv2 <- summary(fit2, time=tvec, extend=TRUE)$cumhaz
|
|
sv2 <- t(matrix(sv2, ncol=2)) # row 1= female, row2 = male
|
|
|
|
# residuals are the same as for separate models
|
|
rr2a <- resid(fit2a, times=tvec, type= "cumhaz")
|
|
rr2b <- resid(fit2b, times=tvec, type= "cumhaz")
|
|
aeq(rr2a, rr2[fem,])
|
|
aeq(rr2b, rr2[!fem,])
|
|
|
|
# one time point
|
|
ps2a <- pseudo(fit2a, time=100, type="cumhaz")
|
|
aeq(ps2a, sv2[1,2] + fit2a$n[1]* rr2a[,2])
|
|
ps2b <- pseudo(fit2b, time=100, type="cumhaz")
|
|
aeq(ps2b, sv2[2,2] + fit2b$n[1]* rr2b[,2])
|
|
|
|
# overall psuedo are the same as for separate models
|
|
# (each row of mdata belongs to a single curve)
|
|
ps2c <- pseudo(fit2, time=100, type="cumhaz")
|
|
aeq(ps2c[ fem], ps2a)
|
|
aeq(ps2c[!fem], ps2b)
|
|
|
|
# multiple time points
|
|
ps2d <- pseudo(fit2a, times=tvec, type="cumhaz")
|
|
aeq(ps2d, sv2[1, col(rr2a)] + fit2$n[1]* rr2a)
|
|
ps2e <- pseudo(fit2b, times=tvec, type= "cumhaz")
|
|
aeq(ps2e, sv2[2, col(rr2b)] + fit2$n[2]* rr2b)
|
|
|
|
ps2f <- pseudo(fit2, times=tvec, type="cumhaz")
|
|
aeq(ps2d, ps2f[ fem,])
|
|
aeq(ps2e, ps2f[!fem,])
|
|
|
|
# Repeat the process for a multi-state model
|
|
rr3 <- resid(fit3, times=tvec, type="cumhaz")
|
|
aeq(apply(rr3, 2:3, sum), matrix(0, 2,4))
|
|
rr3a <- resid(fit3a, times=tvec, type="cumhaz")
|
|
rr3b <- resid(fit3b, times=tvec, type="cumhaz")
|
|
aeq(rr3[fem,,], rr3a)
|
|
aeq(rr3[!fem,,], rr3b)
|
|
|
|
ps3 <- pseudo(fit3, times=tvec, type="cumhaz")
|
|
ps3a <- pseudo(fit3a, times=tvec, type="cumhaz")
|
|
ps3b <- pseudo(fit3b, times=tvec, type="cumhaz")
|
|
aeq(ps3[ fem,,], ps3a)
|
|
aeq(ps3[!fem,,], ps3b)
|
|
|
|
sv3 <- summary(fit3, times=tvec, extend=TRUE)$cumhaz
|
|
sv3 <- array(sv3, dim=c(4,2,2)) #times, curve, hazard
|
|
# ps3a has dimensions (number obs in fit3a, 4 timepoints, 3 states)
|
|
# to each of the 4x3 combinations we need to add the value of the
|
|
# survival curve at that time. A loop is easiest
|
|
temp1 <- array(0, dim= dim(rr3a))
|
|
temp2 <- array(0, dim= dim(rr3b))
|
|
for (i in 1:2) { # each of the 2 hazard
|
|
for (j in 1:4) { # each of the 4 timepoints
|
|
temp1[, i,j] <- sv3[j,1,i] + fit3$n[1]*rr3a[,i,j]
|
|
temp2[, i,j] <- sv3[j,2,i] + fit3$n[2]*rr3b[,i,j]
|
|
}
|
|
}
|
|
aeq(temp1, ps3a)
|
|
aeq(temp2, ps3b)
|
|
|
|
#################################################
|
|
# Last, one more time with AUC
|
|
# A bit more bother, since summary.survfit only returns AUC for one time
|
|
# value at a time. It also does not like times before the first event
|
|
#
|
|
tvec <- tvec[2:4]
|
|
|
|
rr1 <- resid(fit1, tvec, type="auc")
|
|
aeq(colSums(rr1), rep(0,3))
|
|
afun <- function(fit, times) {
|
|
ntime <- length(times)
|
|
if (length(fit$strata)) xfun <- function(x) x$table[, "rmean"]
|
|
else xfun <- function(x) x$table["rmean"]
|
|
|
|
temp <- xfun(summary(fit, rmean=times[1]))
|
|
if (ntime==1) return(temp)
|
|
|
|
result <- matrix(0, ntime, length(temp))
|
|
result[1,] <- temp
|
|
for (i in 2:ntime)
|
|
result[i,] <- xfun(summary(fit, rmean=times[i]))
|
|
drop(result)
|
|
}
|
|
|
|
sv1 <- afun(fit1, tvec)
|
|
|
|
# one time point
|
|
ps1a <- pseudo(fit1, time=tvec[1], type="auc")
|
|
aeq(ps1a, sv1[1] + fit1$n*rr1[,1])
|
|
# multiple
|
|
ps1b <- pseudo(fit1, time=tvec, type="auc")
|
|
aeq(ps1b, sv1[col(rr1)] + fit1$n * rr1)
|
|
|
|
# Single endpoint, multiple curves
|
|
rr2 <- resid(fit2, time=tvec, type="auc")
|
|
sv2 <- t(afun(fit2, tvec))
|
|
aeq(colSums(rr2), rep(0,3))
|
|
|
|
# residuals are the same as for separate models
|
|
rr2a <- resid(fit2a, times=tvec, type= "auc")
|
|
rr2b <- resid(fit2b, times=tvec, type= "auc")
|
|
aeq(rr2a, rr2[fem,])
|
|
aeq(rr2b, rr2[!fem,])
|
|
|
|
# one time point
|
|
ps2a <- pseudo(fit2a, time=100, type="auc")
|
|
aeq(ps2a, sv2[1,1] + fit2a$n[1]* rr2a[,1])
|
|
ps2b <- pseudo(fit2b, time=100, type="auc")
|
|
aeq(ps2b, sv2[2,1] + fit2b$n[1]* rr2b[,1])
|
|
|
|
# overall psuedo are the same as for separate models
|
|
# (each row of mdata belongs to a single curve)
|
|
ps2c <- pseudo(fit2, time=100, type="auc")
|
|
aeq(ps2c[ fem], ps2a)
|
|
aeq(ps2c[!fem], ps2b)
|
|
|
|
# multiple time points
|
|
ps2d <- pseudo(fit2a, times=tvec, type="auc")
|
|
aeq(ps2d, sv2[1, col(rr2a)] + fit2$n[1]* rr2a)
|
|
ps2e <- pseudo(fit2b, times=tvec, type= "auc")
|
|
aeq(ps2e, sv2[2, col(rr2b)] + fit2$n[2]* rr2b)
|
|
|
|
ps2f <- pseudo(fit2, times=tvec, type="auc")
|
|
aeq(ps2d, ps2f[ fem,])
|
|
aeq(ps2e, ps2f[!fem,])
|
|
|
|
# Repeat the process for a multi-state model
|
|
rr3 <- resid(fit3, times=tvec, type="auc")
|
|
aeq(apply(rr3, 2:3, sum), matrix(0, 3,3))
|
|
rr3a <- resid(fit3a, times=tvec, type="auc")
|
|
rr3b <- resid(fit3b, times=tvec, type="auc")
|
|
aeq(rr3[fem,,], rr3a)
|
|
aeq(rr3[!fem,,], rr3b)
|
|
|
|
ps3 <- pseudo(fit3, times=tvec, type="auc")
|
|
ps3a <- pseudo(fit3a, times=tvec, type="auc")
|
|
ps3b <- pseudo(fit3b, times=tvec, type="auc")
|
|
aeq(ps3[ fem,,], ps3a)
|
|
aeq(ps3[!fem,,], ps3b)
|
|
|
|
sv3 <- rbind(summary(fit3, rmean=tvec[1])$table[,"rmean"],
|
|
summary(fit3, rmean=tvec[2])$table[,"rmean"],
|
|
summary(fit3, rmean=tvec[3])$table[,"rmean"])
|
|
sv3 <- array(sv3, dim=c(3,2,3)) #times, curve, state
|
|
# ps3a has dimensions (number obs in fit3a, 4 timepoints, 3 states)
|
|
# to each of the 4x3 combinations we need to add the value of the
|
|
# survival curve at that time. A loop is easiest
|
|
temp1 <- array(0, dim= dim(rr3a))
|
|
temp2 <- array(0, dim= dim(rr3b))
|
|
for (i in 1:3) { # each of the 3 states
|
|
for (j in 1:3) { # each of the 3 times
|
|
temp1[, i,j] <- sv3[j,1,i] + fit3$n[1]*rr3a[,i,j]
|
|
temp2[, i,j] <- sv3[j,2,i] + fit3$n[2]*rr3b[,i,j]
|
|
}
|
|
}
|
|
aeq(temp1, ps3a)
|
|
aeq(temp2, ps3b)
|
|
|
|
#
|
|
# a data set with a missing value, and with a group that has only one obs
|
|
# a good test of edge cases
|
|
#
|
|
lfit1 <- survfit(Surv(time, status) ~ ph.ecog, lung)
|
|
# This will warn about points beyond the curve; ph.ecog==3 has a single point
|
|
# at time=118, and it will have one fewer obs than the data
|
|
p1 <- pseudo(lfit1, times=c(100, 200))
|
|
aeq(dim(p1), c(nrow(lung)-1, 2))
|
|
|
|
|
|
# This will have rows that match the data
|
|
lfit2 <- survfit(Surv(time, status) ~ ph.ecog, lung, na.action= na.exclude)
|
|
p2 <- pseudo(lfit2, time=c(100, 200))
|
|
aeq(dim(p2), c(nrow(lung), 2))
|
|
all(is.na(p2[is.na(lung$ph.ecog)])) # a row of missing was inserted
|
|
|
|
row3 <- which(!is.na(lung$ph.ecog) & lung$ph.ecog ==3) # the singleton row
|
|
all(p2[row3,] == c(1, 0))
|
|
|