CircosHeatmap-aardio/dist/lib/r-library/rpart/tests/priors.Rout.save


R Under development (unstable) (2019-04-05 r76323) -- "Unsuffered Consequences"
Copyright (C) 2019 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> #
> # A hard-core test of losses and priors
> #  Simple data set where I know what the answers must be
> #
> library(rpart)
> aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y), ...)
> 
> dummy <- c(3,1,4,1,5,9,2,6,5,3,5,8,9,7,9)/5
> pdata <- data.frame(y=factor(rep(1:3, 5)),
+                     x1 = 1:15,
+                     x2 = c(1:6, 1:6, 1:3),
+                     x3 = (rep(1:3, 5) + dummy)*10)
> 
> pdata$x3[c(1,5,10)] <- NA
> pdata$y[15] <- 1     # make things unbalanced
> 
> set.seed(10)
> pfit <- rpart(y ~ x1 + x2 + x3, pdata,
+               cp=0, xval=0, minsplit=5, maxdepth=1,
+               parms=list(prior=c(.2, .3, .5), 
+                          loss =matrix(c(0,2,2,2,0,6,1,1,0), 3,3,byrow=T)))
> 
> #
> # See section 12.1 of the report for these numbers
> #
> 
> ntot   <- c(6,5,4)
> phat   <- c(6,5,4)/15   # observed class probabilities
> prior  <- c(.2, .3, .5) # priors
> aprior <- c(4,12,5)/21  # altered priors
> lmat   <- matrix(c(0,1,2, 2,0,1, 2,6,0), ncol=3)  #loss matrix
> 
> gini <- function(p) 1-sum(p^2)
> loss <- function(n, class)  sum(n * lmat[,class])
> 
> phat <- function(n, ntot=c(6,5,4), prior=c(.2, .3, .5)) {
+     n*prior/ntot
+     }
> 
> # Are the losses correct?
> # Class counts for the two children are (4,4,0) and (2,1,4), when
> #   using surrogates
> aeq(pfit$frame$dev/15, c(loss(prior,2), loss(phat(c(4,4,0)),2), 
+                                  loss(phat(c(2,1,4)),3)))
[1] TRUE
> # Node probabilities?
> aeq(pfit$frame$yval2[,8] , 
+     c(1, sum(phat(c(4,4,0))), sum(phat(c(2,1,4)))))
[1] TRUE
> 
> aeq(pfit$frame$yval2[,5:7] , rbind(prior, 
+                                    phat(c(4,4,0))/ sum(phat(c(4,4,0))), 
+                                    phat(c(2,1,4))/ sum(phat(c(2,1,4)))))
[1] TRUE
> 
> # Now the node and class probs, under altered priors
> phat2 <- function(n, ntot=c(6,5,4), prior=aprior) {
+     n*prior/ntot
+     }
> 
> # Use these to create the gini losses, base data, and for the best
> #  splits on variables 1, 2, 3
> gfun <- function(n) {  #The gini loss for a node, given the counts
+     temp <- phat2(n)
+     sum(temp) * gini(temp/sum(temp))
+     }
>            
> # These are in order x3, x2, x1 (best split to worst)
> # Note that for x3, missing values cause the "parent" to be viewed as
> #   having 12 obs instead of 15.
> # Each line is gini(parent) - gini(children)
> aeq(pfit$splits[1:3, 3],
+     15* c(gfun(c(4,4,4)) - (gfun(c(3,4,0)) +  gfun(c(1,0,4))),
+           gfun(c(6,5,4)) - (gfun(c(6,5,2)) +  gfun(c(0,0,2))),
+           gfun(c(6,5,4)) - (gfun(c(4,4,4)) +  gfun(c(2,1,0)))))
[1] TRUE
> 
> proc.time()
   user  system elapsed 
  0.129   0.012   0.135
首次上传 2025-01-12 00:52:51 +08:00
			`R Under development (unstable) (2019-04-05 r76323) -- "Unsuffered Consequences"`
			`Copyright (C) 2019 The R Foundation for Statistical Computing`
			`Platform: x86_64-pc-linux-gnu (64-bit)`

			`R is free software and comes with ABSOLUTELY NO WARRANTY.`
			`You are welcome to redistribute it under certain conditions.`
			`Type 'license()' or 'licence()' for distribution details.`

			`R is a collaborative project with many contributors.`
			`Type 'contributors()' for more information and`
			`'citation()' on how to cite R or R packages in publications.`

			`Type 'demo()' for some demos, 'help()' for on-line help, or`
			`'help.start()' for an HTML browser interface to help.`
			`Type 'q()' to quit R.`

			`> #`
			`> # A hard-core test of losses and priors`
			`> # Simple data set where I know what the answers must be`
			`> #`
			`> library(rpart)`
			`> aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y), ...)`
			`>`
			`> dummy <- c(3,1,4,1,5,9,2,6,5,3,5,8,9,7,9)/5`
			`> pdata <- data.frame(y=factor(rep(1:3, 5)),`
			`+ x1 = 1:15,`
			`+ x2 = c(1:6, 1:6, 1:3),`
			`+ x3 = (rep(1:3, 5) + dummy)*10)`
			`>`
			`> pdata$x3[c(1,5,10)] <- NA`
			`> pdata$y[15] <- 1 # make things unbalanced`
			`>`
			`> set.seed(10)`
			`> pfit <- rpart(y ~ x1 + x2 + x3, pdata,`
			`+ cp=0, xval=0, minsplit=5, maxdepth=1,`
			`+ parms=list(prior=c(.2, .3, .5),`
			`+ loss =matrix(c(0,2,2,2,0,6,1,1,0), 3,3,byrow=T)))`
			`>`
			`> #`
			`> # See section 12.1 of the report for these numbers`
			`> #`
			`>`
			`> ntot <- c(6,5,4)`
			`> phat <- c(6,5,4)/15 # observed class probabilities`
			`> prior <- c(.2, .3, .5) # priors`
			`> aprior <- c(4,12,5)/21 # altered priors`
			`> lmat <- matrix(c(0,1,2, 2,0,1, 2,6,0), ncol=3) #loss matrix`
			`>`
			`> gini <- function(p) 1-sum(p^2)`
			`> loss <- function(n, class) sum(n * lmat[,class])`
			`>`
			`> phat <- function(n, ntot=c(6,5,4), prior=c(.2, .3, .5)) {`
			`+ n*prior/ntot`
			`+ }`
			`>`
			`> # Are the losses correct?`
			`> # Class counts for the two children are (4,4,0) and (2,1,4), when`
			`> # using surrogates`
			`> aeq(pfit$frame$dev/15, c(loss(prior,2), loss(phat(c(4,4,0)),2),`
			`+ loss(phat(c(2,1,4)),3)))`
			`[1] TRUE`
			`> # Node probabilities?`
			`> aeq(pfit$frame$yval2[,8] ,`
			`+ c(1, sum(phat(c(4,4,0))), sum(phat(c(2,1,4)))))`
			`[1] TRUE`
			`>`
			`> aeq(pfit$frame$yval2[,5:7] , rbind(prior,`
			`+ phat(c(4,4,0))/ sum(phat(c(4,4,0))),`
			`+ phat(c(2,1,4))/ sum(phat(c(2,1,4)))))`
			`[1] TRUE`
			`>`
			`> # Now the node and class probs, under altered priors`
			`> phat2 <- function(n, ntot=c(6,5,4), prior=aprior) {`
			`+ n*prior/ntot`
			`+ }`
			`>`
			`> # Use these to create the gini losses, base data, and for the best`
			`> # splits on variables 1, 2, 3`
			`> gfun <- function(n) { #The gini loss for a node, given the counts`
			`+ temp <- phat2(n)`
			`+ sum(temp) * gini(temp/sum(temp))`
			`+ }`
			`>`
			`> # These are in order x3, x2, x1 (best split to worst)`
			`> # Note that for x3, missing values cause the "parent" to be viewed as`
			`> # having 12 obs instead of 15.`
			`> # Each line is gini(parent) - gini(children)`
			`> aeq(pfit$splits[1:3, 3],`
			`+ 15* c(gfun(c(4,4,4)) - (gfun(c(3,4,0)) + gfun(c(1,0,4))),`
			`+ gfun(c(6,5,4)) - (gfun(c(6,5,2)) + gfun(c(0,0,2))),`
			`+ gfun(c(6,5,4)) - (gfun(c(4,4,4)) + gfun(c(2,1,0)))))`
			`[1] TRUE`
			`>`
			`> proc.time()`
			`user system elapsed`
			`0.129 0.012 0.135`