CircosHeatmap-aardio/dist/lib/r-library/rpart/tests/treble.Rout.save


R Under development (unstable) (2019-04-05 r76323) -- "Unsuffered Consequences"
Copyright (C) 2019 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> #
> # Simplest weight test: treble the weights
> #
> #  By using the unshrunken estimates the weights will nearly cancel
> #   out:  frame$wt, frame$dev, frame$yval2, and improvement will all
> #   be threefold larger, other things will be the same.
> # The improvement is the splits matrix, column 3, rows with n>0.  Other
> #   rows are surrogate splits.
> library(rpart)
> require(survival)
Loading required package: survival
> set.seed(10)
> 
> tempc <- rpart.control(maxsurrogate=0, cp=0, xval=0)
> fit1 <- rpart(Surv(pgtime, pgstat) ~ age + eet + g2+grade+gleason +ploidy,
+                 stagec, control=tempc,
+                 method='poisson', parms=list(shrink=0))
> wts <- rep(3, nrow(stagec))
> fit1b <- rpart(Surv(pgtime, pgstat) ~ age + eet + g2+grade+gleason +ploidy,
+                 stagec, control= tempc, parms=list(shrink=0),
+                 method='poisson', weights=wts)
> fit1b$frame$wt   <- fit1b$frame$wt/3
> fit1b$frame$dev  <- fit1b$frame$dev/3
> fit1b$frame$yval2[,2] <- fit1b$frame$yval2[,2]/3
> fit1b$splits[,3] <- fit1b$splits[,3]/3
> zz <- match(c("call", "variable.importance"), names(fit1))
> all.equal(fit1[-zz], fit1b[-zz])   #all but the "call" and importance
[1] TRUE
> all.equal(fit1b$variable.importance/fit1$variable.importance, rep(3,4),
+           check.attributes = FALSE)
[1] TRUE
> 
> #
> # Compare a pair of multiply weighted fits
> #  In this one, the lengths of where and y won't match
> # I have to set minsplit to the smallest possible, because otherwise
> #  the replicated data set will sometimes have enough "n" to split, but
> #  the weighted one won't.  Use of CP keeps the degenerate splits
> #  (n=2, several covariates with exactly the same improvement) at bay.
> # For larger trees, the weighted split will sometimes have fewer
> #  surrogates, because of the "at least two obs" rule.
> #
> # Create a reproducable psuedo random order using the logisic attractor
> pseudo <- double(nrow(stagec))
> pseudo[1] <- pi/4
> for (i in 2:nrow(stagec)) pseudo[i] <- 4*pseudo[i-1]*(1 - pseudo[i-1])
> 
> wts <- rep(1:5, length=nrow(stagec))
> temp <- rep(1:nrow(stagec), wts)             #row replicates
> xgrp <- rep(1:10, length=146)[order(pseudo)]
> xgrp2<- rep(xgrp, wts)
> #  Direct: replicate rows in the data set, and use unweighted
> fit2 <- rpart(Surv(pgtime, pgstat) ~ age + eet + g2+grade+gleason +ploidy,
+ 	      control=rpart.control(minsplit=2, xval=xgrp2, cp=.025),
+ 	      data=stagec[temp,], method='poisson')
> 
> #  Weighted
> fit2b<- rpart(Surv(pgtime, pgstat) ~ age + eet + g2+grade+gleason +ploidy,
+ 	      control=rpart.control(minsplit=2, xval=xgrp, cp=.025),
+ 	      data=stagec, method='poisson', weight=wts)
> 
> all.equal(fit2$frame[-2],  fit2b$frame[-2])  # the "n" component won't match
[1] TRUE
> all.equal(fit2$cptable,    fit2b$cptable)
[1] TRUE
> #all.equal(fit2$splits[,-1],fit2b$splits[,-1]) #fails
> toss <- c(49, 64)
> all.equal(fit2$splits[-toss,-1],fit2b$splits[-toss,-1]) #ok
[1] TRUE
> all.equal(fit2$csplit,    fit2b$csplit)
[1] TRUE
> # Line 49 is a surrogate split in a group whose 2 smallest ages are
> #  47 and 48.  The weighted fit won't split there because it wants to
> #  send at least 2 obs to the left; the replicate fit thinks that there
> #  are several 47's.
> 
> 
> 
> 
> 
> 
> proc.time()
   user  system elapsed 
  0.706   0.053   0.753
首次上传 2025-01-12 00:52:51 +08:00
			`R Under development (unstable) (2019-04-05 r76323) -- "Unsuffered Consequences"`
			`Copyright (C) 2019 The R Foundation for Statistical Computing`
			`Platform: x86_64-pc-linux-gnu (64-bit)`

			`R is free software and comes with ABSOLUTELY NO WARRANTY.`
			`You are welcome to redistribute it under certain conditions.`
			`Type 'license()' or 'licence()' for distribution details.`

			`R is a collaborative project with many contributors.`
			`Type 'contributors()' for more information and`
			`'citation()' on how to cite R or R packages in publications.`

			`Type 'demo()' for some demos, 'help()' for on-line help, or`
			`'help.start()' for an HTML browser interface to help.`
			`Type 'q()' to quit R.`

			`> #`
			`> # Simplest weight test: treble the weights`
			`> #`
			`> # By using the unshrunken estimates the weights will nearly cancel`
			`> # out: frame$wt, frame$dev, frame$yval2, and improvement will all`
			`> # be threefold larger, other things will be the same.`
			`> # The improvement is the splits matrix, column 3, rows with n>0. Other`
			`> # rows are surrogate splits.`
			`> library(rpart)`
			`> require(survival)`
			`Loading required package: survival`
			`> set.seed(10)`
			`>`
			`> tempc <- rpart.control(maxsurrogate=0, cp=0, xval=0)`
			`> fit1 <- rpart(Surv(pgtime, pgstat) ~ age + eet + g2+grade+gleason +ploidy,`
			`+ stagec, control=tempc,`
			`+ method='poisson', parms=list(shrink=0))`
			`> wts <- rep(3, nrow(stagec))`
			`> fit1b <- rpart(Surv(pgtime, pgstat) ~ age + eet + g2+grade+gleason +ploidy,`
			`+ stagec, control= tempc, parms=list(shrink=0),`
			`+ method='poisson', weights=wts)`
			`> fit1b$frame$wt <- fit1b$frame$wt/3`
			`> fit1b$frame$dev <- fit1b$frame$dev/3`
			`> fit1b$frame$yval2[,2] <- fit1b$frame$yval2[,2]/3`
			`> fit1b$splits[,3] <- fit1b$splits[,3]/3`
			`> zz <- match(c("call", "variable.importance"), names(fit1))`
			`> all.equal(fit1[-zz], fit1b[-zz]) #all but the "call" and importance`
			`[1] TRUE`
			`> all.equal(fit1b$variable.importance/fit1$variable.importance, rep(3,4),`
			`+ check.attributes = FALSE)`
			`[1] TRUE`
			`>`
			`> #`
			`> # Compare a pair of multiply weighted fits`
			`> # In this one, the lengths of where and y won't match`
			`> # I have to set minsplit to the smallest possible, because otherwise`
			`> # the replicated data set will sometimes have enough "n" to split, but`
			`> # the weighted one won't. Use of CP keeps the degenerate splits`
			`> # (n=2, several covariates with exactly the same improvement) at bay.`
			`> # For larger trees, the weighted split will sometimes have fewer`
			`> # surrogates, because of the "at least two obs" rule.`
			`> #`
			`> # Create a reproducable psuedo random order using the logisic attractor`
			`> pseudo <- double(nrow(stagec))`
			`> pseudo[1] <- pi/4`
			`> for (i in 2:nrow(stagec)) pseudo[i] <- 4pseudo[i-1](1 - pseudo[i-1])`
			`>`
			`> wts <- rep(1:5, length=nrow(stagec))`
			`> temp <- rep(1:nrow(stagec), wts) #row replicates`
			`> xgrp <- rep(1:10, length=146)[order(pseudo)]`
			`> xgrp2<- rep(xgrp, wts)`
			`> # Direct: replicate rows in the data set, and use unweighted`
			`> fit2 <- rpart(Surv(pgtime, pgstat) ~ age + eet + g2+grade+gleason +ploidy,`
			`+ control=rpart.control(minsplit=2, xval=xgrp2, cp=.025),`
			`+ data=stagec[temp,], method='poisson')`
			`>`
			`> # Weighted`
			`> fit2b<- rpart(Surv(pgtime, pgstat) ~ age + eet + g2+grade+gleason +ploidy,`
			`+ control=rpart.control(minsplit=2, xval=xgrp, cp=.025),`
			`+ data=stagec, method='poisson', weight=wts)`
			`>`
			`> all.equal(fit2$frame[-2], fit2b$frame[-2]) # the "n" component won't match`
			`[1] TRUE`
			`> all.equal(fit2$cptable, fit2b$cptable)`
			`[1] TRUE`
			`> #all.equal(fit2$splits[,-1],fit2b$splits[,-1]) #fails`
			`> toss <- c(49, 64)`
			`> all.equal(fit2$splits[-toss,-1],fit2b$splits[-toss,-1]) #ok`
			`[1] TRUE`
			`> all.equal(fit2$csplit, fit2b$csplit)`
			`[1] TRUE`
			`> # Line 49 is a surrogate split in a group whose 2 smallest ages are`
			`> # 47 and 48. The weighted fit won't split there because it wants to`
			`> # send at least 2 obs to the left; the replicate fit thinks that there`
			`> # are several 47's.`
			`>`
			`>`
			`>`
			`>`
			`>`
			`>`
			`> proc.time()`
			`user system elapsed`
			`0.706 0.053 0.753`