CircosHeatmap-aardio/dist/lib/r-library/MASS/scripts/ch05.R

#-*- R -*-

## Script from Fourth Edition of `Modern Applied Statistics with S'

# Chapter 5   Univariate Statistics

# for later use, from section 5.6
perm.t.test <- function(d) {
# ttest is function(x) mean(x)/sqrt(var(x)/length(x))
    binary.v <- function(x, digits) {
       if(missing(digits)) {
           mx <- max(x)
           digits <- if(mx > 0) 1 + floor(log(mx, base = 2)) else 1
       }
       ans <- 0:(digits - 1)
       lx <- length(x)
       x <- rep(x, rep(digits, lx))
       x <- (x %/% 2^ans) %% 2
       dim(x) <- c(digits, lx)
       x
    }
    digits <- length(d)
    n <- 2^digits
    x <- d * 2 * (binary.v(1:n, digits) - 0.5)
    mx <- matrix(1/digits, 1, digits) %*% x
    s <- matrix(1/(digits - 1), 1, digits)
    vx <- s %*% (x - matrix(mx, digits, n, byrow=TRUE))^2
    as.vector(mx/sqrt(vx/digits))
}

library(MASS)
options(width=65, digits=5, height=9999)
library(lattice)
pdf(file="ch05.pdf", width=8, height=6, pointsize=9)

rm(A, B) # precautionary clear-out
attach(shoes)
tperm <- perm.t.test(B - A) # see section 5.6
detach()

# from ch04
if(!exists("fgl.df")) {
fgl0 <- fgl[ ,-10] # omit type.
fgl.df <- data.frame(type = rep(fgl$type, 9),
  y = as.vector(as.matrix(fgl0)),
  meas = factor(rep(1:9, each = 214), labels = names(fgl0)))
  invisible()
}


# 5.1  Probability distributions


x <- rt(250, df = 9)
par(pty = "s")
qqnorm(x)
qqline(x)
par(pty = "m")

x <- rgamma(100, shape = 5, rate = 0.1)
fitdistr(x, "gamma")
x2 <- rt(250, df = 9)
fitdistr(x2, "t", df = 9)
fitdistr(x2, "t")


# 5.2  Generating random data

contam <- rnorm( 100, 0, (1 + 2*rbinom(100, 1, 0.05)) )


# 5.3  Data summaries

par(mfrow=c(2,3))
hist(geyser$duration, "scott", xlab="duration")
hist(chem, "scott")
hist(tperm, "scott")
hist(geyser$duration, "FD", xlab="duration")
hist(chem, "FD")
hist(tperm, "FD")
par(mfrow=c(1,1))

swiss.fertility <- swiss[, 1]
stem(swiss.fertility)
stem(chem)
stem(abbey)
stem(abbey, scale = 0.4) ## use scale = 0.4 in R


par(mfrow = c(1,2))
boxplot(chem, sub = "chem", range = 0.5)
boxplot(abbey, sub = "abbey")
par(mfrow = c(1,1))

bwplot(type ~ y | meas, data = fgl.df, scales = list(x="free"),
  strip = function(...) strip.default(..., style=1), xlab = "")


# 5.4  Classical univariate statistics

attach(shoes)
t.test(A, mu = 10)
t.test(A)$conf.int

wilcox.test(A, mu = 10)

var.test(A, B)

t.test(A, B, var.equal = TRUE)

t.test(A, B, var.equal = FALSE)

wilcox.test(A, B)

t.test(A, B, paired = TRUE)

wilcox.test(A, B, paired = TRUE)
detach()

par(mfrow = c(1, 2))
truehist(tperm, xlab = "diff")
x <- seq(-4,4, 0.1)
lines(x, dt(x,9))
#cdf.compare(tperm, distribution = "t", df = 9)
sres <- c(sort(tperm), 4)
yres <- (0:1024)/1024
plot(sres, yres, type="S", xlab="diff", ylab="")
lines(x, pt(x,9), lty=3)

legend(-5, 1.05, c("Permutation dsn","t_9 cdf"), lty = c(1,3))
par(mfrow = c(1, 1))


# 5.5  Robust summaries

# Figure 5.7 was obtained by
x <- seq(-10, 10, len=500)
y <- dt(x, 25, log = TRUE)
z <- -diff(y)/diff(x)
plot(x[-1], z, type = "l", xlab = "", ylab = "psi")
y2 <-  dt(x, 5, log = TRUE)
z2 <- -diff(y2)/diff(x)
lines(x[-1], z2, lty = 2)


sort(chem)
mean(chem)
median(chem)
#location.m(chem)
#location.m(chem, psi.fun="huber")
mad(chem)
#scale.tau(chem)
#scale.tau(chem, center=3.68)
unlist(huber(chem))
unlist(hubers(chem))
fitdistr(chem, "t", list(m = 3, s = 0.5), df = 5)

sort(abbey)
mean(abbey)
median(abbey)
#location.m(abbey)
#location.m(abbey, psi.fun="huber")
unlist(hubers(abbey))
unlist(hubers(abbey, k = 2))
unlist(hubers(abbey, k = 1))
fitdistr(abbey, "t", list(m = 12, s = 5), df = 10)


# 5.6  Density estimation

# Figure 5.8
attach(geyser)
par(mfrow=c(2,3))
truehist(duration, h=0.5, x0=0.0, xlim=c(0, 6), ymax=0.7)
truehist(duration, h=0.5, x0=0.1, xlim=c(0, 6), ymax=0.7)
truehist(duration, h=0.5, x0=0.2, xlim=c(0, 6), ymax=0.7)
truehist(duration, h=0.5, x0=0.3, xlim=c(0, 6), ymax=0.7)
truehist(duration, h=0.5, x0=0.4, xlim=c(0, 6), ymax=0.7)

breaks <- seq(0, 5.9, 0.1)
counts <- numeric(length(breaks))
for(i in (0:4)) counts[i+(1:55)] <- counts[i+(1:55)] +
    rep(hist(duration, breaks=0.1*i + seq(0, 5.5, 0.5),
    prob=TRUE, plot=FALSE)$density, rep(5,11))
plot(breaks+0.05, counts/5, type="l", xlab="duration",
    ylab="averaged", bty="n", xlim=c(0, 6), ylim=c(0, 0.7))
detach()


attach(geyser)
truehist(duration, nbins = 15, xlim = c(0.5, 6), ymax = 1.2)
lines(density(duration, width = "nrd"))

truehist(duration, nbins = 15, xlim = c(0.5, 6), ymax = 1.2)
lines(density(duration, width = "SJ", n = 256), lty = 3)
lines(density(duration, n = 256, width = "SJ-dpi"), lty = 1)
detach()

gal <- galaxies/1000
plot(x = c(0, 40), y = c(0, 0.3), type = "n", bty = "l",
    xlab = "velocity of galaxy (1000km/s)", ylab = "density")
rug(gal)
lines(density(gal, width = "SJ-dpi", n = 256), lty = 1)
lines(density(gal, width = "SJ", n = 256), lty = 3)
library(polspline)
x <- seq(5, 40, length = 500)
lines(x, doldlogspline(x, oldlogspline(gal)), lty = 2)


geyser2 <- data.frame(as.data.frame(geyser)[-1, ],
                      pduration = geyser$duration[-299])
attach(geyser2)
par(mfrow = c(2, 2))
plot(pduration, waiting, xlim = c(0.5, 6), ylim = c(40, 110),
   xlab = "previous duration", ylab = "waiting")
f1 <- kde2d(pduration, waiting, n = 50, lims=c(0.5, 6, 40, 110))
image(f1, zlim = c(0, 0.075),
     xlab = "previous duration", ylab = "waiting")
f2 <- kde2d(pduration, waiting, n = 50, lims=c(0.5, 6, 40, 110),
  h = c(width.SJ(duration), width.SJ(waiting)) )
image(f2, zlim = c(0, 0.075),
      xlab = "previous duration", ylab = "waiting")
persp(f2,  phi = 30, theta = 20, d = 5,
      xlab = "previous duration", ylab = "waiting", zlab = "")
detach()

density(gal, n = 1, from = 20.833, to = 20.834, width = "SJ")$y
1/(2 * sqrt(length(gal)) * 0.13)

set.seed(101)
m <- 1000
res <- numeric(m)
for (i in 1:m) res[i] <- median(sample(gal, replace = TRUE))
mean(res - median(gal))
sqrt(var(res))

truehist(res, h = 0.1)
lines(density(res, width = "SJ-dpi", n = 256))
quantile(res, p = c(0.025, 0.975))
x <- seq(19.5, 22.5, length = 500)
lines(x, doldlogspline(x, oldlogspline(res)), lty = 3)

library(boot)
set.seed(101)
gal.boot <- boot(gal, function(x, i) median(x[i]), R = 1000)
gal.boot

boot.ci(gal.boot, conf = c(0.90, 0.95),
         type = c("norm","basic","perc","bca"))
plot(gal.boot)

if(FALSE) { # bootstrap() is an S-PLUS function
gal.bt <- bootstrap(gal, median, seed = 101, B = 1000)
summary(gal.bt)
plot(gal.bt)
qqnorm(gal.bt)

limits.emp(gal.bt)
limits.bca(gal.bt)
}

sim.gen  <- function(data, mle) {
 n <- length(data)
 data[sample(n, replace = TRUE)]  + mle*rnorm(n)
}
gal.boot2 <- boot(gal, median, R = 1000,
 sim = "parametric", ran.gen = sim.gen, mle = 0.5)
boot.ci(gal.boot2, conf = c(0.90, 0.95),
         type = c("norm","basic","perc"))

attach(shoes)
t.test(B - A)
shoes.boot <- boot(B - A, function(x,i) mean(x[i]), R = 1000)
boot.ci(shoes.boot, type = c("norm", "basic", "perc", "bca"))
mean.fun <- function(d, i) {
 n <- length(i)
 c(mean(d[i]), (n-1)*var(d[i])/n^2)
}
shoes.boot2 <- boot(B - A, mean.fun, R = 1000)
boot.ci(shoes.boot2, type = "stud")
detach()

# End of ch05
首次上传 2025-01-12 00:52:51 +08:00			`#-- R --`

			## Script from Fourth Edition of `Modern Applied Statistics with S'

			`# Chapter 5 Univariate Statistics`

			`# for later use, from section 5.6`
			`perm.t.test <- function(d) {`
			`# ttest is function(x) mean(x)/sqrt(var(x)/length(x))`
			`binary.v <- function(x, digits) {`
			`if(missing(digits)) {`
			`mx <- max(x)`
			`digits <- if(mx > 0) 1 + floor(log(mx, base = 2)) else 1`
			`}`
			`ans <- 0:(digits - 1)`
			`lx <- length(x)`
			`x <- rep(x, rep(digits, lx))`
			`x <- (x %/% 2^ans) %% 2`
			`dim(x) <- c(digits, lx)`
			`x`
			`}`
			`digits <- length(d)`
			`n <- 2^digits`
			`x <- d * 2 * (binary.v(1:n, digits) - 0.5)`
			`mx <- matrix(1/digits, 1, digits) %*% x`
			`s <- matrix(1/(digits - 1), 1, digits)`
			`vx <- s %*% (x - matrix(mx, digits, n, byrow=TRUE))^2`
			`as.vector(mx/sqrt(vx/digits))`
			`}`

			`library(MASS)`
			`options(width=65, digits=5, height=9999)`
			`library(lattice)`
			`pdf(file="ch05.pdf", width=8, height=6, pointsize=9)`

			`rm(A, B) # precautionary clear-out`
			`attach(shoes)`
			`tperm <- perm.t.test(B - A) # see section 5.6`
			`detach()`

			`# from ch04`
			`if(!exists("fgl.df")) {`
			`fgl0 <- fgl[ ,-10] # omit type.`
			`fgl.df <- data.frame(type = rep(fgl$type, 9),`
			`y = as.vector(as.matrix(fgl0)),`
			`meas = factor(rep(1:9, each = 214), labels = names(fgl0)))`
			`invisible()`
			`}`


			`# 5.1 Probability distributions`


			`x <- rt(250, df = 9)`
			`par(pty = "s")`
			`qqnorm(x)`
			`qqline(x)`
			`par(pty = "m")`

			`x <- rgamma(100, shape = 5, rate = 0.1)`
			`fitdistr(x, "gamma")`
			`x2 <- rt(250, df = 9)`
			`fitdistr(x2, "t", df = 9)`
			`fitdistr(x2, "t")`


			`# 5.2 Generating random data`

			`contam <- rnorm( 100, 0, (1 + 2*rbinom(100, 1, 0.05)) )`


			`# 5.3 Data summaries`

			`par(mfrow=c(2,3))`
			`hist(geyser$duration, "scott", xlab="duration")`
			`hist(chem, "scott")`
			`hist(tperm, "scott")`
			`hist(geyser$duration, "FD", xlab="duration")`
			`hist(chem, "FD")`
			`hist(tperm, "FD")`
			`par(mfrow=c(1,1))`

			`swiss.fertility <- swiss[, 1]`
			`stem(swiss.fertility)`
			`stem(chem)`
			`stem(abbey)`
			`stem(abbey, scale = 0.4) ## use scale = 0.4 in R`


			`par(mfrow = c(1,2))`
			`boxplot(chem, sub = "chem", range = 0.5)`
			`boxplot(abbey, sub = "abbey")`
			`par(mfrow = c(1,1))`

			`bwplot(type ~ y \| meas, data = fgl.df, scales = list(x="free"),`
			`strip = function(...) strip.default(..., style=1), xlab = "")`


			`# 5.4 Classical univariate statistics`

			`attach(shoes)`
			`t.test(A, mu = 10)`
			`t.test(A)$conf.int`

			`wilcox.test(A, mu = 10)`

			`var.test(A, B)`

			`t.test(A, B, var.equal = TRUE)`

			`t.test(A, B, var.equal = FALSE)`

			`wilcox.test(A, B)`

			`t.test(A, B, paired = TRUE)`

			`wilcox.test(A, B, paired = TRUE)`
			`detach()`

			`par(mfrow = c(1, 2))`
			`truehist(tperm, xlab = "diff")`
			`x <- seq(-4,4, 0.1)`
			`lines(x, dt(x,9))`
			`#cdf.compare(tperm, distribution = "t", df = 9)`
			`sres <- c(sort(tperm), 4)`
			`yres <- (0:1024)/1024`
			`plot(sres, yres, type="S", xlab="diff", ylab="")`
			`lines(x, pt(x,9), lty=3)`

			`legend(-5, 1.05, c("Permutation dsn","t_9 cdf"), lty = c(1,3))`
			`par(mfrow = c(1, 1))`


			`# 5.5 Robust summaries`

			`# Figure 5.7 was obtained by`
			`x <- seq(-10, 10, len=500)`
			`y <- dt(x, 25, log = TRUE)`
			`z <- -diff(y)/diff(x)`
			`plot(x[-1], z, type = "l", xlab = "", ylab = "psi")`
			`y2 <- dt(x, 5, log = TRUE)`
			`z2 <- -diff(y2)/diff(x)`
			`lines(x[-1], z2, lty = 2)`


			`sort(chem)`
			`mean(chem)`
			`median(chem)`
			`#location.m(chem)`
			`#location.m(chem, psi.fun="huber")`
			`mad(chem)`
			`#scale.tau(chem)`
			`#scale.tau(chem, center=3.68)`
			`unlist(huber(chem))`
			`unlist(hubers(chem))`
			`fitdistr(chem, "t", list(m = 3, s = 0.5), df = 5)`

			`sort(abbey)`
			`mean(abbey)`
			`median(abbey)`
			`#location.m(abbey)`
			`#location.m(abbey, psi.fun="huber")`
			`unlist(hubers(abbey))`
			`unlist(hubers(abbey, k = 2))`
			`unlist(hubers(abbey, k = 1))`
			`fitdistr(abbey, "t", list(m = 12, s = 5), df = 10)`


			`# 5.6 Density estimation`

			`# Figure 5.8`
			`attach(geyser)`
			`par(mfrow=c(2,3))`
			`truehist(duration, h=0.5, x0=0.0, xlim=c(0, 6), ymax=0.7)`
			`truehist(duration, h=0.5, x0=0.1, xlim=c(0, 6), ymax=0.7)`
			`truehist(duration, h=0.5, x0=0.2, xlim=c(0, 6), ymax=0.7)`
			`truehist(duration, h=0.5, x0=0.3, xlim=c(0, 6), ymax=0.7)`
			`truehist(duration, h=0.5, x0=0.4, xlim=c(0, 6), ymax=0.7)`

			`breaks <- seq(0, 5.9, 0.1)`
			`counts <- numeric(length(breaks))`
			`for(i in (0:4)) counts[i+(1:55)] <- counts[i+(1:55)] +`
			`rep(hist(duration, breaks=0.1*i + seq(0, 5.5, 0.5),`
			`prob=TRUE, plot=FALSE)$density, rep(5,11))`
			`plot(breaks+0.05, counts/5, type="l", xlab="duration",`
			`ylab="averaged", bty="n", xlim=c(0, 6), ylim=c(0, 0.7))`
			`detach()`


			`attach(geyser)`
			`truehist(duration, nbins = 15, xlim = c(0.5, 6), ymax = 1.2)`
			`lines(density(duration, width = "nrd"))`

			`truehist(duration, nbins = 15, xlim = c(0.5, 6), ymax = 1.2)`
			`lines(density(duration, width = "SJ", n = 256), lty = 3)`
			`lines(density(duration, n = 256, width = "SJ-dpi"), lty = 1)`
			`detach()`

			`gal <- galaxies/1000`
			`plot(x = c(0, 40), y = c(0, 0.3), type = "n", bty = "l",`
			`xlab = "velocity of galaxy (1000km/s)", ylab = "density")`
			`rug(gal)`
			`lines(density(gal, width = "SJ-dpi", n = 256), lty = 1)`
			`lines(density(gal, width = "SJ", n = 256), lty = 3)`
			`library(polspline)`
			`x <- seq(5, 40, length = 500)`
			`lines(x, doldlogspline(x, oldlogspline(gal)), lty = 2)`


			`geyser2 <- data.frame(as.data.frame(geyser)[-1, ],`
			`pduration = geyser$duration[-299])`
			`attach(geyser2)`
			`par(mfrow = c(2, 2))`
			`plot(pduration, waiting, xlim = c(0.5, 6), ylim = c(40, 110),`
			`xlab = "previous duration", ylab = "waiting")`
			`f1 <- kde2d(pduration, waiting, n = 50, lims=c(0.5, 6, 40, 110))`
			`image(f1, zlim = c(0, 0.075),`
			`xlab = "previous duration", ylab = "waiting")`
			`f2 <- kde2d(pduration, waiting, n = 50, lims=c(0.5, 6, 40, 110),`
			`h = c(width.SJ(duration), width.SJ(waiting)) )`
			`image(f2, zlim = c(0, 0.075),`
			`xlab = "previous duration", ylab = "waiting")`
			`persp(f2, phi = 30, theta = 20, d = 5,`
			`xlab = "previous duration", ylab = "waiting", zlab = "")`
			`detach()`

			`density(gal, n = 1, from = 20.833, to = 20.834, width = "SJ")$y`
			`1/(2 * sqrt(length(gal)) * 0.13)`

			`set.seed(101)`
			`m <- 1000`
			`res <- numeric(m)`
			`for (i in 1:m) res[i] <- median(sample(gal, replace = TRUE))`
			`mean(res - median(gal))`
			`sqrt(var(res))`

			`truehist(res, h = 0.1)`
			`lines(density(res, width = "SJ-dpi", n = 256))`
			`quantile(res, p = c(0.025, 0.975))`
			`x <- seq(19.5, 22.5, length = 500)`
			`lines(x, doldlogspline(x, oldlogspline(res)), lty = 3)`

			`library(boot)`
			`set.seed(101)`
			`gal.boot <- boot(gal, function(x, i) median(x[i]), R = 1000)`
			`gal.boot`

			`boot.ci(gal.boot, conf = c(0.90, 0.95),`
			`type = c("norm","basic","perc","bca"))`
			`plot(gal.boot)`

			`if(FALSE) { # bootstrap() is an S-PLUS function`
			`gal.bt <- bootstrap(gal, median, seed = 101, B = 1000)`
			`summary(gal.bt)`
			`plot(gal.bt)`
			`qqnorm(gal.bt)`

			`limits.emp(gal.bt)`
			`limits.bca(gal.bt)`
			`}`

			`sim.gen <- function(data, mle) {`
			`n <- length(data)`
			`data[sample(n, replace = TRUE)] + mle*rnorm(n)`
			`}`
			`gal.boot2 <- boot(gal, median, R = 1000,`
			`sim = "parametric", ran.gen = sim.gen, mle = 0.5)`
			`boot.ci(gal.boot2, conf = c(0.90, 0.95),`
			`type = c("norm","basic","perc"))`

			`attach(shoes)`
			`t.test(B - A)`
			`shoes.boot <- boot(B - A, function(x,i) mean(x[i]), R = 1000)`
			`boot.ci(shoes.boot, type = c("norm", "basic", "perc", "bca"))`
			`mean.fun <- function(d, i) {`
			`n <- length(i)`
			`c(mean(d[i]), (n-1)*var(d[i])/n^2)`
			`}`
			`shoes.boot2 <- boot(B - A, mean.fun, R = 1000)`
			`boot.ci(shoes.boot2, type = "stud")`
			`detach()`

			`# End of ch05`