2025-01-12 04:36:52 +08:00

105 lines
2.4 KiB
R

#-*- R -*-
## Script from Fourth Edition of `Modern Applied Statistics with S'
# Chapter 9 Tree-based Methods
library(MASS)
pdf(file="ch09.pdf", width=8, height=6, pointsize=9)
options(digits=5)
library(rpart)
# Figure 9.3
shuttle.rp <- rpart(use ~ ., data=shuttle, minbucket=0, xval = 0,
maxsurrogate = 0, cp = 0, subset = 1:253)
post(shuttle.rp, horizontal = FALSE, height=10, width=8, title = "",
pointsize = 8, pretty = 0)
# 9.3 Implementation in rpart
set.seed(123)
cpus.rp <- rpart(log10(perf) ~ ., cpus[ , 2:8], cp = 1e-3)
cpus.rp
print(cpus.rp, cp = 0.01) # default pruning
plot(cpus.rp, uniform = TRUE)
text(cpus.rp, digits = 3)
printcp(cpus.rp)
plotcp(cpus.rp)
cpus.rp1 <- prune(cpus.rp, cp = 0.006)
print(cpus.rp1, digits = 3)
plot(cpus.rp1, branch = 0.4, uniform = TRUE)
text(cpus.rp1, digits = 3)
# for figure 9.2
cpus.rp2 <- prune(cpus.rp, cp = 0.03)
post(cpus.rp2, horizontal = FALSE, title = "", digits=4, pointsize=18)
set.seed(123)
fgl.rp <- rpart(type ~ ., fgl, cp = 0.001)
plotcp(fgl.rp)
printcp(fgl.rp)
fgl.rp2 <- prune(fgl.rp, cp = 0.02)
plot(fgl.rp2, uniform = TRUE)
text(fgl.rp2, use.n = TRUE)
fgl.rp2
summary(fgl.rp2)
set.seed(123)
fgl.rp3 <- rpart(type ~ ., fgl, cp = 0.001,
parms = list(split="information"))
plotcp(fgl.rp3)
printcp(fgl.rp3)
fgl.rp4 <- prune(fgl.rp3, cp = 0.03)
plot(fgl.rp4, uniform = TRUE); text(fgl.rp4, use.n = TRUE)
plot(cpus.rp, branch = 0.6, compress = TRUE, uniform = TRUE)
text(cpus.rp, digits = 3, all = TRUE, use.n = TRUE)
# 9.3 Implementation in tree
library(tree)
## the stopping criteria differ slightly between R and S-PLUS
cpus.ltr <- tree(log10(perf) ~ ., data = cpus[, 2:8], mindev = 0.005)
summary(cpus.ltr)
cpus.ltr
plot(cpus.ltr, type="u"); text(cpus.ltr)
par(mfrow = c(1, 2), pty = "s")
set.seed(321)
plot(cv.tree(cpus.ltr, , prune.tree))
cpus.ltr1 <- prune.tree(cpus.ltr, best = 10)
plot(cpus.ltr1, type = "u")
text(cpus.ltr1, digits = 3)
par(mfrow = c(1, 1), pty = "m")
fgl.tr <- tree(type ~ ., fgl)
summary(fgl.tr)
plot(fgl.tr)
text(fgl.tr, all = TRUE, cex = 0.5)
par(mfrow = c(1, 2), pty = "s")
set.seed(123)
fgl.cv <- cv.tree(fgl.tr,, prune.misclass)
for(i in 2:5) fgl.cv$dev <- fgl.cv$dev +
cv.tree(fgl.tr,, prune.misclass)$dev
fgl.cv$dev <- fgl.cv$dev/5
fgl.cv
plot(fgl.cv)
fgl.tr1 <- prune.misclass(fgl.tr, best = 9)
plot(fgl.tr1, type = "u")
text(fgl.tr1, all = TRUE)
# End of ch09