168 lines
7.2 KiB
Plaintext
168 lines
7.2 KiB
Plaintext
|
|
R Under development (unstable) (2019-04-05 r76323) -- "Unsuffered Consequences"
|
|
Copyright (C) 2019 The R Foundation for Statistical Computing
|
|
Platform: x86_64-pc-linux-gnu (64-bit)
|
|
|
|
R is free software and comes with ABSOLUTELY NO WARRANTY.
|
|
You are welcome to redistribute it under certain conditions.
|
|
Type 'license()' or 'licence()' for distribution details.
|
|
|
|
R is a collaborative project with many contributors.
|
|
Type 'contributors()' for more information and
|
|
'citation()' on how to cite R or R packages in publications.
|
|
|
|
Type 'demo()' for some demos, 'help()' for on-line help, or
|
|
'help.start()' for an HTML browser interface to help.
|
|
Type 'q()' to quit R.
|
|
|
|
> #
|
|
> # The treble test for classification trees
|
|
> #
|
|
> #
|
|
> library(rpart)
|
|
> set.seed(10)
|
|
>
|
|
> xgrp <- rep(1:10,length=nrow(cu.summary))
|
|
> carfit <- rpart(Country ~ Reliability + Price + Mileage + Type,
|
|
+ method='class', data=cu.summary,
|
|
+ control=rpart.control(xval=xgrp))
|
|
>
|
|
> carfit2 <- rpart(Country ~ Reliability + Price + Mileage + Type,
|
|
+ method='class', data=cu.summary,
|
|
+ weight=rep(3,nrow(cu.summary)),
|
|
+ control=rpart.control(xval=xgrp))
|
|
>
|
|
> all.equal(carfit$frame$wt, carfit2$frame$wt/3)
|
|
[1] TRUE
|
|
> all.equal(carfit$frame$dev, carfit2$frame$dev/3)
|
|
[1] TRUE
|
|
> all.equal(carfit$frame[,5:7], carfit2$frame[,5:7])
|
|
[1] TRUE
|
|
> all.equal(carfit$frame$yval2[,12:21], carfit2$frame$yval2[,12:21])
|
|
[1] TRUE
|
|
> all.equal(carfit[c('where', 'csplit')],
|
|
+ carfit2[c('where', 'csplit')])
|
|
[1] TRUE
|
|
> xx <- carfit2$splits
|
|
> xx[,'improve'] <- xx[,'improve'] / ifelse(xx[,5]> 0,1,3) # surrogate?
|
|
> all.equal(xx, carfit$splits)
|
|
[1] TRUE
|
|
> all.equal(as.vector(carfit$cptable),
|
|
+ as.vector(carfit2$cptable%*% diag(c(1,1,1,1,sqrt(3)))))
|
|
[1] TRUE
|
|
>
|
|
> summary(carfit2)
|
|
Call:
|
|
rpart(formula = Country ~ Reliability + Price + Mileage + Type,
|
|
data = cu.summary, weights = rep(3, nrow(cu.summary)), method = "class",
|
|
control = rpart.control(xval = xgrp))
|
|
n= 117
|
|
|
|
CP nsplit rel error xerror xstd
|
|
1 0.23529412 0 1.0000000 1.0000000 0.04530958
|
|
2 0.02941176 1 0.7647059 0.8088235 0.04583685
|
|
3 0.02205882 3 0.7058824 0.9117647 0.04583685
|
|
4 0.01000000 5 0.6617647 0.9558824 0.04563477
|
|
|
|
Variable importance
|
|
Reliability Price Type
|
|
49 30 21
|
|
|
|
Node number 1: 117 observations, complexity param=0.2352941
|
|
predicted class=USA expected loss=0.5811966 P(node) =1
|
|
class counts: 3 3 6 33 93 27 15 9 15 147
|
|
probabilities: 0.009 0.009 0.017 0.094 0.265 0.077 0.043 0.026 0.043 0.419
|
|
left son=2 (33 obs) right son=3 (84 obs)
|
|
Primary splits:
|
|
Reliability splits as RRRLL, improve=44.777630, (32 missing)
|
|
Type splits as LRRLRR, improve=11.246350, (0 missing)
|
|
Price < 6923 to the left, improve=10.448720, (0 missing)
|
|
Mileage < 22.5 to the right, improve= 6.084489, (57 missing)
|
|
Surrogate splits:
|
|
Type splits as RRRLRR, agree=0.706, adj=0.138, (32 split)
|
|
Price < 6665 to the left, agree=0.682, adj=0.069, (0 split)
|
|
|
|
Node number 2: 33 observations, complexity param=0.02205882
|
|
predicted class=Japan expected loss=0.4545455 P(node) =0.2820513
|
|
class counts: 3 0 0 3 54 27 3 3 0 6
|
|
probabilities: 0.030 0.000 0.000 0.030 0.545 0.273 0.030 0.030 0.000 0.061
|
|
left son=4 (9 obs) right son=5 (24 obs)
|
|
Primary splits:
|
|
Price < 13202 to the right, improve=7.621212, (0 missing)
|
|
Type splits as R-LRLL, improve=5.621212, (0 missing)
|
|
Reliability splits as RRRRL, improve=5.406404, (4 missing)
|
|
Mileage < 23.5 to the left, improve=3.915966, (9 missing)
|
|
Surrogate splits:
|
|
Type splits as R-LRLL, agree=0.879, adj=0.556, (0 split)
|
|
|
|
Node number 3: 84 observations, complexity param=0.02941176
|
|
predicted class=USA expected loss=0.4404762 P(node) =0.7179487
|
|
class counts: 0 3 6 30 39 0 12 6 15 141
|
|
probabilities: 0.000 0.012 0.024 0.119 0.155 0.000 0.048 0.024 0.060 0.560
|
|
left son=6 (30 obs) right son=7 (54 obs)
|
|
Primary splits:
|
|
Price < 16668.5 to the right, improve=10.403170, (0 missing)
|
|
Type splits as RRRLRR, improve= 6.025974, (0 missing)
|
|
Mileage < 22.5 to the right, improve= 3.132611, (48 missing)
|
|
Reliability splits as RLLLL, improve= 2.195489, (28 missing)
|
|
Surrogate splits:
|
|
Type splits as RLLRRR, agree=0.714, adj=0.2, (0 split)
|
|
|
|
Node number 4: 9 observations
|
|
predicted class=Japan expected loss=0.1111111 P(node) =0.07692308
|
|
class counts: 0 0 0 3 24 0 0 0 0 0
|
|
probabilities: 0.000 0.000 0.000 0.111 0.889 0.000 0.000 0.000 0.000 0.000
|
|
|
|
Node number 5: 24 observations, complexity param=0.02205882
|
|
predicted class=Japan expected loss=0.5833333 P(node) =0.2051282
|
|
class counts: 3 0 0 0 30 27 3 3 0 6
|
|
probabilities: 0.042 0.000 0.000 0.000 0.417 0.375 0.042 0.042 0.000 0.083
|
|
left son=10 (7 obs) right son=11 (17 obs)
|
|
Primary splits:
|
|
Price < 7038 to the left, improve=4.836134, (0 missing)
|
|
Type splits as R--LR-, improve=1.966667, (0 missing)
|
|
Mileage < 30.5 to the right, improve=1.716667, (6 missing)
|
|
|
|
Node number 6: 30 observations, complexity param=0.02941176
|
|
predicted class=USA expected loss=0.6333333 P(node) =0.2564103
|
|
class counts: 0 3 3 24 12 0 0 0 15 33
|
|
probabilities: 0.000 0.033 0.033 0.267 0.133 0.000 0.000 0.000 0.167 0.367
|
|
left son=12 (10 obs) right son=13 (20 obs)
|
|
Primary splits:
|
|
Type splits as LRR-L-, improve=6.6000000, (0 missing)
|
|
Price < 30511 to the right, improve=5.3739130, (0 missing)
|
|
Reliability splits as RRLLL, improve=0.3333333, (12 missing)
|
|
Surrogate splits:
|
|
Price < 19422.5 to the left, agree=0.767, adj=0.3, (0 split)
|
|
|
|
Node number 7: 54 observations
|
|
predicted class=USA expected loss=0.3333333 P(node) =0.4615385
|
|
class counts: 0 0 3 6 27 0 12 6 0 108
|
|
probabilities: 0.000 0.000 0.019 0.037 0.167 0.000 0.074 0.037 0.000 0.667
|
|
|
|
Node number 10: 7 observations
|
|
predicted class=Japan expected loss=0.2857143 P(node) =0.05982906
|
|
class counts: 0 0 0 0 15 3 3 0 0 0
|
|
probabilities: 0.000 0.000 0.000 0.000 0.714 0.143 0.143 0.000 0.000 0.000
|
|
|
|
Node number 11: 17 observations
|
|
predicted class=Japan/USA expected loss=0.5294118 P(node) =0.1452991
|
|
class counts: 3 0 0 0 15 24 0 3 0 6
|
|
probabilities: 0.059 0.000 0.000 0.000 0.294 0.471 0.000 0.059 0.000 0.118
|
|
|
|
Node number 12: 10 observations
|
|
predicted class=Germany expected loss=0.5 P(node) =0.08547009
|
|
class counts: 0 0 0 15 3 0 0 0 9 3
|
|
probabilities: 0.000 0.000 0.000 0.500 0.100 0.000 0.000 0.000 0.300 0.100
|
|
|
|
Node number 13: 20 observations
|
|
predicted class=USA expected loss=0.5 P(node) =0.1709402
|
|
class counts: 0 3 3 9 9 0 0 0 6 30
|
|
probabilities: 0.000 0.050 0.050 0.150 0.150 0.000 0.000 0.000 0.100 0.500
|
|
|
|
>
|
|
>
|
|
> proc.time()
|
|
user system elapsed
|
|
0.114 0.017 0.125
|