CircosHeatmap-aardio/dist/lib/r/site-library/rpart/tests/testall.R

# 
# This is a set of tests, with printout, that tries
#  to exercise all of the splitting rules
# It's less formal than the others in this directory, but covers
#  more.
# For all of the cross-validations I set xgroups explicitly, so as
#  to avoid any changes in random number allocation of the groups.

library(rpart)
require(survival)
options(digits=4)  #avoid trivial rounding changes across R versions
set.seed(10)
# 
#
# Survival, using the stagec data
#
#   Time to progression in years
#   status  1=progressed, 0= censored
#   age
#   early endocrine therapy   1=no 2=yes
#   % of cells in g2 phase, from flow cytometry
#   tumor grade (Farrow) 1,2,3,4
#   Gleason score (competing grading system)
#   ploidy
xgroup <- rep(1:10, length=nrow(stagec))
fit1 <- rpart(Surv(pgtime, pgstat) ~ age + eet + g2+grade+gleason +ploidy,
		stagec, method="poisson",
              control=rpart.control(usesurrogate=0, cp=0, xval=xgroup))
fit1
summary(fit1)

fit2 <- rpart(Surv(pgtime, pgstat) ~ age + eet + g2+grade+gleason +ploidy,
		stagec, xval=xgroup)
fit2
summary(fit2)


#
# Continuous y variable
#  Use deterministic xgroups: the first group is the 1st, 11th, 21st, etc
#  smallest obs, the second is 2, 12, 22, ...

mystate <- data.frame(state.x77, region=factor(state.region))
names(mystate) <- c("population","income" , "illiteracy","life" ,
       "murder", "hs.grad", "frost",     "area",      "region")

xvals <- 1:nrow(mystate)
xvals[order(mystate$income)] <- rep(1:10, length=nrow(mystate))

fit4 <- rpart(income ~ population + region + illiteracy +life + murder +
			hs.grad + frost , mystate,
		   control=rpart.control(minsplit=10, xval=xvals))

summary(fit4)


#
# Check out xpred.rpart
#
meany <- mean(mystate$income)
xpr <- xpred.rpart(fit4, xval=xvals)
xpr2 <- (xpr - mystate$income)^2
risk0 <- mean((mystate$income - meany)^2)
xpmean <- as.vector(apply(xpr2, 2, mean))   #kill the names
all.equal(xpmean/risk0, as.vector(fit4$cptable[,'xerror']))

xpstd <- as.vector(apply((sweep(xpr2, 2, xpmean))^2, 2, sum))
xpstd <- sqrt(xpstd)/(50*risk0)
all.equal(xpstd, as.vector(fit4$cptable[,'xstd']))

#
# recreate subset #3 of the xval
#
tfit4 <- rpart(income ~ population + region + illiteracy +life + murder +
			hs.grad + frost , mystate,  subset=(xvals!=3),
		   control=rpart.control(minsplit=10, xval=0))
tpred <- predict(tfit4, mystate[xvals==3,])
all.equal(tpred, xpr[xvals==3,ncol(xpr)])

# How much does this differ from the "real" formula, more complex,
#   found on page 309 of Breiman et al. ?
#xtemp <- (xpr2/outer(rep(1,50),xpmean)) -  ((mystate$income - meany)^2)/risk0
#real.se<- xpmean* sqrt(apply(xtemp^2,2,sum))/(risk0*50)


# Simple yes/no classification model
fit5 <- rpart(factor(pgstat) ~  age + eet + g2+grade+gleason +ploidy,
	  stagec, xval=xgroup)

fit5

fit6 <- rpart(factor(pgstat) ~  age + eet + g2+grade+gleason +ploidy,
		stagec, parm=list(prior=c(.5,.5)), xval=xgroup)
summary(fit6)
#
# Fit a classification model to the car data.
#  Now, since Reliability is an ordered category, this model doesn't
# make a lot of statistical sense, but it does test out some
# areas of the code that nothing else does
#
xcar <- rep(1:8, length=nrow(cu.summary))
carfit <- rpart(Reliability ~ Price + Country + Mileage + Type,
		   method='class', data=cu.summary, xval=xcar)

summary(carfit)
首次上传 2025-01-12 00:52:51 +08:00			`#`
			`# This is a set of tests, with printout, that tries`
			`# to exercise all of the splitting rules`
			`# It's less formal than the others in this directory, but covers`
			`# more.`
			`# For all of the cross-validations I set xgroups explicitly, so as`
			`# to avoid any changes in random number allocation of the groups.`

			`library(rpart)`
			`require(survival)`
			`options(digits=4) #avoid trivial rounding changes across R versions`
			`set.seed(10)`
			`#`
			`#`
			`# Survival, using the stagec data`
			`#`
			`# Time to progression in years`
			`# status 1=progressed, 0= censored`
			`# age`
			`# early endocrine therapy 1=no 2=yes`
			`# % of cells in g2 phase, from flow cytometry`
			`# tumor grade (Farrow) 1,2,3,4`
			`# Gleason score (competing grading system)`
			`# ploidy`
			`xgroup <- rep(1:10, length=nrow(stagec))`
			`fit1 <- rpart(Surv(pgtime, pgstat) ~ age + eet + g2+grade+gleason +ploidy,`
			`stagec, method="poisson",`
			`control=rpart.control(usesurrogate=0, cp=0, xval=xgroup))`
			`fit1`
			`summary(fit1)`

			`fit2 <- rpart(Surv(pgtime, pgstat) ~ age + eet + g2+grade+gleason +ploidy,`
			`stagec, xval=xgroup)`
			`fit2`
			`summary(fit2)`


			`#`
			`# Continuous y variable`
			`# Use deterministic xgroups: the first group is the 1st, 11th, 21st, etc`
			`# smallest obs, the second is 2, 12, 22, ...`

			`mystate <- data.frame(state.x77, region=factor(state.region))`
			`names(mystate) <- c("population","income" , "illiteracy","life" ,`
			`"murder", "hs.grad", "frost", "area", "region")`

			`xvals <- 1:nrow(mystate)`
			`xvals[order(mystate$income)] <- rep(1:10, length=nrow(mystate))`

			`fit4 <- rpart(income ~ population + region + illiteracy +life + murder +`
			`hs.grad + frost , mystate,`
			`control=rpart.control(minsplit=10, xval=xvals))`

			`summary(fit4)`


			`#`
			`# Check out xpred.rpart`
			`#`
			`meany <- mean(mystate$income)`
			`xpr <- xpred.rpart(fit4, xval=xvals)`
			`xpr2 <- (xpr - mystate$income)^2`
			`risk0 <- mean((mystate$income - meany)^2)`
			`xpmean <- as.vector(apply(xpr2, 2, mean)) #kill the names`
			`all.equal(xpmean/risk0, as.vector(fit4$cptable[,'xerror']))`

			`xpstd <- as.vector(apply((sweep(xpr2, 2, xpmean))^2, 2, sum))`
			`xpstd <- sqrt(xpstd)/(50*risk0)`
			`all.equal(xpstd, as.vector(fit4$cptable[,'xstd']))`

			`#`
			`# recreate subset #3 of the xval`
			`#`
			`tfit4 <- rpart(income ~ population + region + illiteracy +life + murder +`
			`hs.grad + frost , mystate, subset=(xvals!=3),`
			`control=rpart.control(minsplit=10, xval=0))`
			`tpred <- predict(tfit4, mystate[xvals==3,])`
			`all.equal(tpred, xpr[xvals==3,ncol(xpr)])`

			`# How much does this differ from the "real" formula, more complex,`
			`# found on page 309 of Breiman et al. ?`
			`#xtemp <- (xpr2/outer(rep(1,50),xpmean)) - ((mystate$income - meany)^2)/risk0`
			`#real.se<- xpmean* sqrt(apply(xtemp^2,2,sum))/(risk0*50)`


			`# Simple yes/no classification model`
			`fit5 <- rpart(factor(pgstat) ~ age + eet + g2+grade+gleason +ploidy,`
			`stagec, xval=xgroup)`

			`fit5`

			`fit6 <- rpart(factor(pgstat) ~ age + eet + g2+grade+gleason +ploidy,`
			`stagec, parm=list(prior=c(.5,.5)), xval=xgroup)`
			`summary(fit6)`
			`#`
			`# Fit a classification model to the car data.`
			`# Now, since Reliability is an ordered category, this model doesn't`
			`# make a lot of statistical sense, but it does test out some`
			`# areas of the code that nothing else does`
			`#`
			`xcar <- rep(1:8, length=nrow(cu.summary))`
			`carfit <- rpart(Reliability ~ Price + Country + Mileage + Type,`
			`method='class', data=cu.summary, xval=xcar)`

			`summary(carfit)`