85 lines
3.2 KiB
Plaintext
85 lines
3.2 KiB
Plaintext
|
|
||
|
R Under development (unstable) (2019-04-05 r76323) -- "Unsuffered Consequences"
|
||
|
Copyright (C) 2019 The R Foundation for Statistical Computing
|
||
|
Platform: x86_64-pc-linux-gnu (64-bit)
|
||
|
|
||
|
R is free software and comes with ABSOLUTELY NO WARRANTY.
|
||
|
You are welcome to redistribute it under certain conditions.
|
||
|
Type 'license()' or 'licence()' for distribution details.
|
||
|
|
||
|
R is a collaborative project with many contributors.
|
||
|
Type 'contributors()' for more information and
|
||
|
'citation()' on how to cite R or R packages in publications.
|
||
|
|
||
|
Type 'demo()' for some demos, 'help()' for on-line help, or
|
||
|
'help.start()' for an HTML browser interface to help.
|
||
|
Type 'q()' to quit R.
|
||
|
|
||
|
> #
|
||
|
> # Treble test for class trees with 2 outcomes
|
||
|
> #
|
||
|
> # fit1 and fit1b failed equality because .7 and .3 are not easily represented
|
||
|
> # in binary. Thus a complelxity param was 4e-17 (basically 0, but enough
|
||
|
> # to cause a split where it shouldn't be). Eric Lunde 2005-08-03
|
||
|
> library(rpart)
|
||
|
> control <- rpart.control(maxsurrogate=0, cp=1e-15, xval=0)
|
||
|
> set.seed(10)
|
||
|
>
|
||
|
> fit1 <- rpart(Kyphosis ~ Age + Number + Start, data=kyphosis,
|
||
|
+ control=control,
|
||
|
+ parms=list(prior=c(.7,.3),
|
||
|
+ loss=matrix(c(0,1,2,0),nrow=2,ncol=2)))
|
||
|
> wts <- rep(3, nrow(kyphosis))
|
||
|
> fit1b <- rpart(Kyphosis ~ Age + Number + Start, data=kyphosis,
|
||
|
+ control=control,
|
||
|
+ weights=wts,
|
||
|
+ parms=list(prior=c(.7,.3),
|
||
|
+ loss=matrix(c(0,1,2,0),nrow=2,ncol=2)))
|
||
|
> fit1b$frame$wt <- fit1b$frame$wt/3
|
||
|
> fit1b$frame$dev <- fit1b$frame$dev/3
|
||
|
> fit1b$frame$yval2[,2:3] <- fit1b$frame$yval2[,2:3]/3
|
||
|
> fit1b$splits[,3] <- fit1b$splits[,3]/3
|
||
|
> fit1b$variable.importance <- fit1b$variable.importance/3
|
||
|
> all.equal(fit1[-3], fit1b[-3]) #all but the "call"
|
||
|
[1] TRUE
|
||
|
>
|
||
|
> # Now for a set of non-equal weights
|
||
|
> nn <- nrow(kyphosis)
|
||
|
> pseudo <- double(nn)
|
||
|
> pseudo[1] <- pi/6
|
||
|
> for (i in 2:nn) pseudo[i] <- 4*pseudo[i-1]*(1-pseudo[i-1])
|
||
|
>
|
||
|
> wts <- rep(1:7, length=nn)
|
||
|
> temp <- rep(1:nn, wts) #row replicates
|
||
|
> xgrp <- rep(1:10, length=nn)[order(pseudo)]
|
||
|
> xgrp2<- rep(xgrp, wts)
|
||
|
>
|
||
|
> # The cp value stops one last split where the two predictors are
|
||
|
> # completely equal in importance (perfect surrogates), but the
|
||
|
> # weighted and unweighted pick a different one due to round off error
|
||
|
> tempc <- rpart.control(minsplit=2, xval=xgrp2, maxsurrogate=0, cp=.039)
|
||
|
> # Direct: replicate rows in the data set, and use unweighted
|
||
|
> fit2 <- rpart(Kyphosis ~ Age + Number + Start, data=kyphosis[temp,],
|
||
|
+ control=tempc,
|
||
|
+ parms=list(prior=c(.7,.3),
|
||
|
+ loss=matrix(c(0,1,2,0),nrow=2,ncol=2)))
|
||
|
> # Weighted
|
||
|
> tempc <- rpart.control(minsplit=2, xval=xgrp, maxsurrogate=0, cp=.039)
|
||
|
> fit2b <- rpart(Kyphosis ~ Age + Number + Start, data=kyphosis,
|
||
|
+ control=tempc, weights=wts,
|
||
|
+ parms=list(prior=c(.7,.3),
|
||
|
+ loss=matrix(c(0,1,2,0),nrow=2,ncol=2)))
|
||
|
>
|
||
|
> all.equal(fit2$frame[,-2], fit2b$frame[,-2]) # the "n" component won't match
|
||
|
[1] TRUE
|
||
|
> all.equal(fit2$cptable, fit2b$cptable)
|
||
|
[1] TRUE
|
||
|
> all.equal(fit2$splits[,-1],fit2b$splits[,-1])
|
||
|
[1] TRUE
|
||
|
> all.equal(fit2$csplit, fit2b$csplit)
|
||
|
[1] TRUE
|
||
|
>
|
||
|
> proc.time()
|
||
|
user system elapsed
|
||
|
0.130 0.020 0.144
|