2025-01-12 00:52:51 +08:00

225 lines
6.9 KiB
R

#-*- R -*-
## Script from Fourth Edition of `Modern Applied Statistics with S'
# Chapter 11 Exploratory Multivariate Analysis
library(MASS)
pdf(file="ch11.pdf", width=8, height=6, pointsize=9)
options(width=65, digits=5)
# 11.1 Visualization methods
# ir <- rbind(iris[,,1], iris[,,2], iris[,,3])
ir <- rbind(iris3[,,1], iris3[,,2], iris3[,,3])
ir.species <- factor(c(rep("s", 50), rep("c", 50), rep("v", 50)))
(ir.pca <- princomp(log(ir), cor = TRUE))
summary(ir.pca)
plot(ir.pca)
ir.pc <- predict(ir.pca)
eqscplot(ir.pc[, 1:2], type = "n",
xlab = "first principal component",
ylab = "second principal component")
text(ir.pc[, 1:2], labels = as.character(ir.species),
col = 3 + unclass(ir.species))
lcrabs <- log(crabs[, 4:8])
crabs.grp <- factor(c("B", "b", "O", "o")[rep(1:4, each = 50)])
(lcrabs.pca <- princomp(lcrabs))
loadings(lcrabs.pca)
lcrabs.pc <- predict(lcrabs.pca)
dimnames(lcrabs.pc) <- list(NULL, paste("PC", 1:5, sep = ""))
if(FALSE) { ## needs interaction with XGobi, or, better, rggobi
## Both have been withdrawn for R.
library(xgobi)
xgobi(lcrabs, colors = c("SkyBlue", "SlateBlue", "Orange",
"Red")[rep(1:4, each = 50)])
xgobi(lcrabs, glyphs = 12 + 5*rep(0:3, each = 50, 4))
library(rggobi)
g <- ggobi(lcrabs)
d <- displays(g)[[1]]
pmode(d) <- "2D Tour"
crabs.grp <- factor(c("B", "b", "O", "o")[rep(1:4, each = 50)])
glyph_colour(g$lcrabs) <- crabs.grp
colorscheme(g) <- "Paired 4"
}
ir.scal <- cmdscale(dist(ir), k = 2, eig = TRUE)
ir.scal$points[, 2] <- -ir.scal$points[, 2]
eqscplot(ir.scal$points, type = "n")
text(ir.scal$points, labels = as.character(ir.species),
col = 3 + unclass(ir.species), cex = 0.8)
distp <- dist(ir)
dist2 <- dist(ir.scal$points)
sum((distp - dist2)^2)/sum(distp^2)
ir.sam <- sammon(dist(ir[-143,]))
eqscplot(ir.sam$points, type = "n")
text(ir.sam$points, labels = as.character(ir.species[-143]),
col = 3 + unclass(ir.species), cex = 0.8)
ir.iso <- isoMDS(dist(ir[-143,]))
eqscplot(ir.iso$points, type = "n")
text(ir.iso$points, labels = as.character(ir.species[-143]),
col = 3 + unclass(ir.species), cex = 0.8)
cr.scale <- 0.5 * log(crabs$CL * crabs$CW)
slcrabs <- lcrabs - cr.scale
cr.means <- matrix(0, 2, 5)
cr.means[1,] <- colMeans(slcrabs[crabs$sex == "F", ])
cr.means[2,] <- colMeans(slcrabs[crabs$sex == "M", ])
dslcrabs <- slcrabs - cr.means[as.numeric(crabs$sex), ]
lcrabs.sam <- sammon(dist(dslcrabs))
eqscplot(lcrabs.sam$points, type = "n", xlab = "", ylab = "")
text(lcrabs.sam$points, labels = as.character(crabs.grp))
fgl.iso <- isoMDS(dist(as.matrix(fgl[-40, -10])))
eqscplot(fgl.iso$points, type = "n", xlab = "", ylab = "", axes = FALSE)
# either
# for(i in seq(along = levels(fgl$type))) {
# set <- fgl$type[-40] == levels(fgl$type)[i]
# points(fgl.iso$points[set,], pch = 18, cex = 0.6, col = 2 + i)}
# key(text = list(levels(fgl$type), col = 3:8))
# or
text(fgl.iso$points,
labels = c("F", "N", "V", "C", "T", "H")[fgl$type[-40]],
cex = 0.6)
fgl.iso3 <- isoMDS(dist(as.matrix(fgl[-40, -10])), k = 3)
# S: brush(fgl.iso3$points)
fgl.col <- c("SkyBlue", "SlateBlue", "Orange", "Orchid",
"Green", "HotPink")[fgl$type]
# xgobi(fgl.iso3$points, colors = fgl.col)
library(class)
gr <- somgrid(topo = "hexagonal")
crabs.som <- batchSOM(lcrabs, gr, c(4, 4, 2, 2, 1, 1, 1, 0, 0))
plot(crabs.som)
bins <- as.numeric(knn1(crabs.som$code, lcrabs, 0:47))
plot(crabs.som$grid, type = "n")
symbols(crabs.som$grid$pts[, 1], crabs.som$grid$pts[, 2],
circles = rep(0.4, 48), inches = FALSE, add = TRUE)
text(crabs.som$grid$pts[bins, ] + rnorm(400, 0, 0.1),
as.character(crabs.grp))
crabs.som2 <- SOM(lcrabs, gr); plot(crabs.som2)
state <- state.x77[, 2:7]; row.names(state) <- state.abb
biplot(princomp(state, cor = TRUE), pc.biplot = TRUE, cex = 0.7,
expand = 0.8)
library(fastICA)
nICA <- 4
crabs.ica <- fastICA(crabs[, 4:8], nICA)
Z <- crabs.ica$S
par(mfrow = c(2, nICA))
for(i in 1:nICA) boxplot(Z[, i] ~ crabs.grp)
par(mfrow = c(1, 1))
# S: stars(state.x77[, c(7, 4, 6, 2, 5, 3)], byrow = TRUE)
stars(state.x77[, c(7, 4, 6, 2, 5, 3)])
parcoord(state.x77[, c(7, 4, 6, 2, 5, 3)])
parcoord(log(ir)[, c(3, 4, 2, 1)], col = 1 + (0:149)%/%50)
# 11.2 Cluster analysis
swiss.x <- as.matrix(swiss[,-1])
library(cluster)
# S: h <- hclust(dist(swiss.x), method = "connected")
h <- hclust(dist(swiss.x), method = "single")
plot(h)
cutree(h, 3)
# S: plclust( clorder(h, cutree(h, 3) ))
pltree(diana(swiss.x))
par(mfrow = c(1, 1))
h <- hclust(dist(swiss.x), method = "average")
initial <- tapply(swiss.x, list(rep(cutree(h, 3),
ncol(swiss.x)), col(swiss.x)), mean)
dimnames(initial) <- list(NULL, dimnames(swiss.x)[[2]])
km <- kmeans(swiss.x, initial)
(swiss.pca <- princomp(swiss.x))
swiss.px <- predict(swiss.pca)
dimnames(km$centers)[[2]] <- dimnames(swiss.x)[[2]]
swiss.centers <- predict(swiss.pca, km$centers)
eqscplot(swiss.px[, 1:2], type = "n",
xlab = "first principal component",
ylab = "second principal component")
text(swiss.px[, 1:2], labels = km$cluster)
points(swiss.centers[,1:2], pch = 3, cex = 3)
if(interactive()) identify(swiss.px[, 1:2], cex = 0.5)
swiss.pam <- pam(swiss.px, 3)
summary(swiss.pam)
eqscplot(swiss.px[, 1:2], type = "n",
xlab = "first principal component",
ylab = "second principal component")
text(swiss.px[,1:2], labels = swiss.pam$clustering)
points(swiss.pam$medoid[,1:2], pch = 3, cex = 3)
fanny(swiss.px, 3)
## From the on-line Errata:
##
## `The authors of mclust have chosen to re-use the name for a
## completely incompatible package. We can no longer recommend its
## use, and the code given in the first printing does not work in R's
## mclust-2.x.'
##
## And later mclust was given a restrictive licence, so this example
## has been removed. Finally in 2012 it was given an OpenSource licence.
# 11.3 Factor analysis
ability.FA <- factanal(covmat = ability.cov, factors = 1)
ability.FA
(ability.FA <- update(ability.FA, factors = 2))
#summary(ability.FA)
round(loadings(ability.FA) %*% t(loadings(ability.FA)) +
diag(ability.FA$uniq), 3)
if(require("GPArotation")) {
# loadings(rotate(ability.FA, rotation = "oblimin"))
L <- loadings(ability.FA)
print(oblirot <- oblimin(L))
par(pty = "s")
eqscplot(L, xlim = c(0,1), ylim = c(0,1))
if(interactive()) identify(L, dimnames(L)[[1]])
naxes <- oblirot$Th
arrows(rep(0, 2), rep(0, 2), naxes[,1], naxes[,2])
}
# 11.4 Discrete multivariate analysis
caith <- as.matrix(caith)
names(dimnames(caith)) <- c("eyes", "hair")
mosaicplot(caith, color = TRUE)
House <- xtabs(Freq ~ Type + Infl + Cont + Sat, housing)
mosaicplot(House, color = TRUE)
corresp(caith)
caith2 <- caith
dimnames(caith2)[[2]] <- c("F", "R", "M", "D", "B")
par(mfcol = c(1, 3))
plot(corresp(caith2, nf = 2)); title("symmetric")
plot(corresp(caith2, nf = 2), type = "rows"); title("rows")
plot(corresp(caith2, nf = 2), type = "col"); title("columns")
par(mfrow = c(1, 1))
farms.mca <- mca(farms, abbrev = TRUE) # Use levels as names
plot(farms.mca, cex = rep(0.7, 2))
# End of ch11