268 lines
10 KiB
Plaintext
268 lines
10 KiB
Plaintext
|
|
||
|
R Under development (unstable) (2024-04-17 r86441) -- "Unsuffered Consequences"
|
||
|
Copyright (C) 2024 The R Foundation for Statistical Computing
|
||
|
Platform: aarch64-unknown-linux-gnu
|
||
|
|
||
|
R is free software and comes with ABSOLUTELY NO WARRANTY.
|
||
|
You are welcome to redistribute it under certain conditions.
|
||
|
Type 'license()' or 'licence()' for distribution details.
|
||
|
|
||
|
R is a collaborative project with many contributors.
|
||
|
Type 'contributors()' for more information and
|
||
|
'citation()' on how to cite R or R packages in publications.
|
||
|
|
||
|
Type 'demo()' for some demos, 'help()' for on-line help, or
|
||
|
'help.start()' for an HTML browser interface to help.
|
||
|
Type 'q()' to quit R.
|
||
|
|
||
|
> library(survival)
|
||
|
> options(na.action=na.exclude) # preserve missings
|
||
|
> options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type
|
||
|
>
|
||
|
> #
|
||
|
> # Tests for the condordance function.
|
||
|
> #
|
||
|
> aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y), ...)
|
||
|
>
|
||
|
> grank <- function(x, time, grp, wt) {
|
||
|
+ if (all(wt==1)) unlist(tapply(x, grp, rank))
|
||
|
+ else unlist(tapply(1:length(x), grp, function(i) {
|
||
|
+ xx <- x[i] # x and wts for this subset of the data
|
||
|
+ ww <- wt[i]
|
||
|
+ temp <- outer(xx, xx, function(a, b) sign(b-a))
|
||
|
+ colSums(ww*temp)/2
|
||
|
+ }))
|
||
|
+ }
|
||
|
>
|
||
|
> # a Cox model using iter=0, ties='breslow' and the above function has a score
|
||
|
> # statistic which is U=(C-D)/2 and score test U^2/H, where H is the Cox model
|
||
|
> # information matrix, with fit$var=1/H. The concordance is U+ 1/2.
|
||
|
> # Pull out the Somers' d and its variance
|
||
|
> phget <- function(fit) {
|
||
|
+ c(d = 2*sqrt(fit$score/fit$var), v= 4/fit$var)
|
||
|
+ }
|
||
|
> fscale <- function(fit) {
|
||
|
+ if (is.matrix(fit$count)) temp <- colSums(fit$count) else temp <- fit$count
|
||
|
+ npair <- sum(temp[1:3])
|
||
|
+ c(d = abs(temp[1]-temp[2]), v=4*fit$cvar*npair^2)
|
||
|
+ }
|
||
|
>
|
||
|
> # Concordance by brute force. O(n^2) algorithm, but ok for n<500 or so
|
||
|
> allpair <- function(time, status, x, wt, all=FALSE) {
|
||
|
+ n <- length(time)
|
||
|
+ if (missing(wt)) wt <- rep(1, length(x))
|
||
|
+ count <- sapply(which(status==1), function(i) {
|
||
|
+ atrisk <- (time > time[i]) | (time==time[i] & status==0)
|
||
|
+ temp <- tapply(wt[atrisk], factor(sign(x[i] -x[atrisk]), c(-1, 1, 0)),
|
||
|
+ sum)
|
||
|
+ tiedtime <- (time==time[i] & status ==1 & (1:n)>i)
|
||
|
+ ties <- tapply(wt[tiedtime], factor(x[tiedtime]==x[i],
|
||
|
+ c(FALSE, TRUE)),sum)
|
||
|
+ wt[i]* c(ifelse(is.na(temp), 0, temp), ifelse(is.na(ties), 0, ties))
|
||
|
+ })
|
||
|
+ rownames(count) <- c("concordant", "discordant", "tied.x", "tied.y",
|
||
|
+ "tied.xy")
|
||
|
+ if (all) {
|
||
|
+ colnames(count) <- time[status==1]
|
||
|
+ t(count)
|
||
|
+ }
|
||
|
+ else rowSums(count)
|
||
|
+ }
|
||
|
>
|
||
|
> # leverage by brute force
|
||
|
> leverage <- function(time, status, x, wt, eps=1e-5) {
|
||
|
+ if (missing(wt)) wt <- rep(1, length(x))
|
||
|
+ toss <- is.na(time + status + x +wt)
|
||
|
+ if (any(toss)) {
|
||
|
+ time <- time[!toss]
|
||
|
+ status <- status[!toss]
|
||
|
+ x <- x[!toss]
|
||
|
+ wt <- wt[!toss]
|
||
|
+ }
|
||
|
+ n <- length(time)
|
||
|
+ influence <- matrix(0, n, 5)
|
||
|
+ t2 <- time + eps*(status==0)
|
||
|
+ for (i in 1:n) {
|
||
|
+ if (status[i] ==0) comparable <- (time<=time[i] & status==1)
|
||
|
+ else comparable <- ifelse(status==0, time >= time[i], time!=time[i])
|
||
|
+ temp <- sign((x[i]-x[comparable])*(t2[i] - t2[comparable]))
|
||
|
+ influence[i,1:3] <-tapply(wt[comparable],factor(temp, c(1,-1,0)), sum)
|
||
|
+ if (status[i]==1) {
|
||
|
+ tied <- (time==time[i] & status==1 & (1:n)!= i)
|
||
|
+ if (any(tied)) {
|
||
|
+ itemp<- tapply(wt[tied], factor(x[tied]==x[i],
|
||
|
+ c(FALSE, TRUE)), sum)
|
||
|
+ influence[i,4:5] <- itemp
|
||
|
+ }
|
||
|
+ }
|
||
|
+ }
|
||
|
+ dimnames(influence) <- list(as.character(Surv(time, status)),
|
||
|
+ c("concord", "discord", "tie.x", "tie.y",
|
||
|
+ "tie.xy"))
|
||
|
+ ifelse(is.na(influence), 0, influence)
|
||
|
+ }
|
||
|
>
|
||
|
> tdata <- aml[aml$x=='Maintained', c("time", "status")]
|
||
|
> tdata$x <- c(1,6,2,7,3,7,3,8,4,4,5)
|
||
|
> tdata$wt <- c(1,2,3,2,1,2,3,4,3,2,1)
|
||
|
>
|
||
|
> fit <- concordance(Surv(time, status) ~x, tdata, influence=2)
|
||
|
> aeq(fit$count, with(tdata, allpair(time, status, x)))
|
||
|
[1] TRUE
|
||
|
> aeq(fit$influence, with(tdata, leverage(time, status, x)))
|
||
|
[1] TRUE
|
||
|
>
|
||
|
> cfit <- coxph(Surv(time, status) ~ tt(x), tdata, tt=grank, ties='breslow',
|
||
|
+ iter=0, x=T)
|
||
|
> aeq(phget(cfit), fscale(fit)) # agree with Cox model
|
||
|
[1] TRUE
|
||
|
>
|
||
|
> # Test 2: Lots of ties
|
||
|
> tempy <- Surv(c(1,2,2,2,3,4,4,4,5,2), c(1,0,1,0,1,0,1,1,0,1))
|
||
|
> tempx <- c(5,5,4,4,3,3,7,6,5,4)
|
||
|
> fit2 <- concordance(tempy ~ tempx, influence=2)
|
||
|
> aeq(fit2$count, allpair(tempy[,1], tempy[,2], tempx))
|
||
|
[1] TRUE
|
||
|
> aeq(fit2$influence, leverage(tempy[,1], tempy[,2], tempx))
|
||
|
[1] TRUE
|
||
|
> cfit2 <- coxph(tempy ~ tt(tempx), tt=grank, ties="breslow", iter=0)
|
||
|
> aeq(phget(cfit2), fscale(fit2)) # agree with Cox model
|
||
|
[1] TRUE
|
||
|
>
|
||
|
> # Bigger data
|
||
|
> cox3 <- coxph(Surv(time, status) ~ age + sex + ph.ecog, lung)
|
||
|
> fit3 <- concordance(Surv(time, status) ~ predict(cox3), lung, influence=2)
|
||
|
> aeq(fit3$count, allpair(lung$time, lung$status-1,predict(cox3)))
|
||
|
[1] TRUE
|
||
|
> aeq(fit3$influence, leverage(lung$time, lung$status-1,predict(cox3)))
|
||
|
[1] TRUE
|
||
|
> cfit3 <- coxph(Surv(time, status) ~ tt(predict(cox3)), tt=grank,
|
||
|
+ ties="breslow", iter=0, data=lung)
|
||
|
> aeq(phget(cfit3), fscale(fit3)) # agree with Cox model
|
||
|
[1] TRUE
|
||
|
>
|
||
|
> # More ties
|
||
|
> fit4 <- concordance(Surv(time, status) ~ ph.ecog, lung, influence=2)
|
||
|
> fit4b <- concordance(Surv(time, status) ~ ph.ecog, lung, reverse=TRUE)
|
||
|
> aeq(fit4$count, allpair(lung$time, lung$status-1, lung$ph.ecog))
|
||
|
[1] TRUE
|
||
|
> aeq(fit4b$count, c(8392, 4258, 7137, 21, 7))
|
||
|
[1] TRUE
|
||
|
> cfit4 <- coxph(Surv(time, status) ~ tt(ph.ecog), lung,
|
||
|
+ iter=0, method='breslow', tt=grank)
|
||
|
> aeq(phget(cfit4), fscale(fit4)) # agree with Cox model
|
||
|
[1] TRUE
|
||
|
>
|
||
|
> # Case weights
|
||
|
> fit5 <- concordance(Surv(time, status) ~ x, tdata, weights=wt, influence=2)
|
||
|
> fit6 <- concordance(Surv(time, status) ~x, tdata[rep(1:11,tdata$wt),])
|
||
|
> aeq(fit5$count, with(tdata, allpair(time, status, x, wt)))
|
||
|
[1] TRUE
|
||
|
> aeq(fit5$count, c(91, 70, 7, 0, 0)) # checked by hand
|
||
|
[1] TRUE
|
||
|
> aeq(fit5$count[1:3], fit6$count[1:3]) #spurious "tied.xy" values, ignore
|
||
|
[1] TRUE
|
||
|
> aeq(fit5$var[2], fit6$var[2])
|
||
|
[1] TRUE
|
||
|
> aeq(fit5$influence, with(tdata, leverage(time, status, x, wt)))
|
||
|
[1] TRUE
|
||
|
> cfit5 <- coxph(Surv(time, status) ~ tt(x), tdata, weights=wt,
|
||
|
+ iter=0, method='breslow', tt=grank)
|
||
|
> aeq(phget(cfit5), fscale(fit5)) # agree with Cox model
|
||
|
[1] TRUE
|
||
|
>
|
||
|
> # Start, stop simplest cases
|
||
|
> fit6 <- concordance(Surv(rep(0,11), time, status) ~ x, tdata)
|
||
|
> aeq(fit6$count, fit$count)
|
||
|
[1] TRUE
|
||
|
> aeq(fit6$var, fit$var)
|
||
|
[1] TRUE
|
||
|
> fit7 <- concordance(Surv(rep(0,11), time, status) ~ x, tdata, weights=wt)
|
||
|
> aeq(fit7$count, fit5$count)
|
||
|
[1] TRUE
|
||
|
> aeq(fit7$var, fit5$var)
|
||
|
[1] TRUE
|
||
|
>
|
||
|
> # Multiple intervals for some, but same risk sets as tdata
|
||
|
> tdata2 <- data.frame(time1=c(0,3, 5, 6,7, 0, 4,17, 7, 0,16, 2, 0,
|
||
|
+ 0,9, 5),
|
||
|
+ time2=c(3,9, 13, 7,13, 18, 17,23, 28, 16,31, 34, 45,
|
||
|
+ 9,48, 60),
|
||
|
+ status=c(0,1, 1, 0,0, 1, 0,1, 0, 0,1, 1, 0, 0,1, 0),
|
||
|
+ x = c(1,1, 6, 2,2, 7, 3,3, 7, 3,3, 8, 4, 4,4, 5),
|
||
|
+ wt= c(1,1, 2, 3,3, 2, 1,1, 2, 3,3, 4, 3, 2,2, 1),
|
||
|
+ id= c(1,1, 2, 3,3, 4, 5,5, 6, 7,7, 8, 9, 10,10, 11))
|
||
|
> fit8 <- concordance(Surv(time1, time2, status) ~x, cluster=id, tdata2,
|
||
|
+ weights=wt, influence=2)
|
||
|
> aeq(fit5$count, fit8$count)
|
||
|
[1] TRUE
|
||
|
> # influence has one row per obs, so the next line is false: mismatched lengths
|
||
|
> # aeq(fit5$influence, fit8$influence)
|
||
|
> aeq(fit5$var, fit8$var)
|
||
|
[1] TRUE
|
||
|
> cfit8 <- coxph(Surv(time1, time2, status) ~ tt(x), tdata2, weights=wt,
|
||
|
+ iter=0, method='breslow', tt=grank)
|
||
|
> aeq(phget(cfit8), fscale(fit8)) # agree with Cox model
|
||
|
[1] TRUE
|
||
|
>
|
||
|
> # Stratified
|
||
|
> tdata3 <- data.frame(time1=c(tdata2$time1, rep(0, nrow(lung))),
|
||
|
+ time2=c(tdata2$time2, lung$time),
|
||
|
+ status = c(tdata2$status, lung$status -1),
|
||
|
+ x = c(tdata2$x, lung$ph.ecog),
|
||
|
+ wt= c(tdata2$wt, rep(1, nrow(lung))),
|
||
|
+ grp=rep(1:2, c(nrow(tdata2), nrow(lung))),
|
||
|
+ id = c(tdata2$id, 100+ 1:nrow(lung)))
|
||
|
> fit9 <- concordance(Surv(time1, time2, status) ~x + strata(grp), cluster=id,
|
||
|
+ data=tdata3, weights=wt, influence=2)
|
||
|
> aeq(fit9$count, rbind(fit8$count, fit4$count))
|
||
|
[1] TRUE
|
||
|
>
|
||
|
> # check out case weights, strata, and grouped jackknife;
|
||
|
> # force several ties in x, y, and xy (with missing values too for good measure).
|
||
|
> tdata <- subset(lung, select=-c(meal.cal, wt.loss, sex, age))
|
||
|
> tdata$wt <- rep(1:25, length.out=nrow(tdata))/10
|
||
|
> tdata$time <- ceiling(tdata$time/30) # force ties in y
|
||
|
> tfit <- coxph(Surv(time, status) ~ ph.ecog + pat.karno + strata(inst)
|
||
|
+ + cluster(inst), tdata, weights=wt)
|
||
|
> tdata$tpred <- predict(tfit)
|
||
|
> cm4 <- concordance(tfit, influence=3, keepstrata=TRUE)
|
||
|
> cm5 <- concordance(Surv(time, status) ~ tpred + strata(inst) + cluster(inst),
|
||
|
+ data=tdata, weights=wt, reverse=TRUE, influence=3,
|
||
|
+ keepstrata=TRUE)
|
||
|
> all.equal(cm4[1:6], cm5[1:6]) # call and na.action won't match
|
||
|
[1] TRUE
|
||
|
>
|
||
|
> u.inst <- sort(unique(tdata$inst))
|
||
|
> temp <- matrix(0, length(u.inst), 5)
|
||
|
> for (i in 1:length(u.inst)) {
|
||
|
+ temp[i,] <- with(subset(tdata, inst==u.inst[i]),
|
||
|
+ allpair(time, status-1, -tpred, wt))
|
||
|
+ }
|
||
|
> aeq(temp, cm4$count)
|
||
|
[1] TRUE
|
||
|
>
|
||
|
> eps <- 1e-6
|
||
|
> keep <- (1:nrow(tdata))[-tfit$na.action] # the obs that are not tossed
|
||
|
> lmat <- matrix(0., length(keep), 5)
|
||
|
> for (i in 1:length(keep)) {
|
||
|
+ wt2 <- tdata$wt
|
||
|
+ wt2[keep[i]] <- wt2[keep[i]] + eps
|
||
|
+
|
||
|
+ test <- concordance(Surv(time, status) ~ predict(tfit) + strata(inst),
|
||
|
+ data=tdata, weights=wt2, group=group, reverse=TRUE,
|
||
|
+ keepstrata=TRUE)
|
||
|
+ lmat[i,] <- colSums(test$count - cm4$count)/eps
|
||
|
+ }
|
||
|
> aeq(lmat, cm4$influence, tolerance=eps)
|
||
|
[1] TRUE
|
||
|
>
|
||
|
> # Check that keepstrata gives the correct sum
|
||
|
> cm4b <- concordance(tfit, keepstrata=FALSE)
|
||
|
> aeq(cm4b$count, colSums(cm4$count))
|
||
|
[1] TRUE
|
||
|
>
|
||
|
> proc.time()
|
||
|
user system elapsed
|
||
|
1.246 0.027 1.271
|