200 lines
3.8 KiB
R
200 lines
3.8 KiB
R
#-*- R -*-
|
|
|
|
## Script from Fourth Edition of `Modern Applied Statistics with S'
|
|
|
|
# Chapter 2 Data Manipulation
|
|
|
|
library(MASS)
|
|
options(width=65, digits=5, height=9999)
|
|
|
|
-2:2
|
|
|
|
powers.of.pi <- pi^(-2:2)
|
|
powers.of.pi
|
|
class(powers.of.pi)
|
|
|
|
print(powers.of.pi)
|
|
summary(powers.of.pi)
|
|
|
|
# rm(powers.of.pi)
|
|
|
|
powers.of.pi[5]
|
|
|
|
names(powers.of.pi) <- -2:2
|
|
powers.of.pi
|
|
powers.of.pi["2"]
|
|
class(powers.of.pi)
|
|
|
|
as.vector(powers.of.pi)
|
|
names(powers.of.pi) <- NULL
|
|
powers.of.pi
|
|
|
|
citizen <- factor(c("uk", "us", "no", "au", "uk", "us", "us"))
|
|
citizen
|
|
|
|
unclass(citizen)
|
|
|
|
citizen[5:7]
|
|
|
|
citizen <- factor(c("uk", "us", "no", "au", "uk", "us", "us"),
|
|
levels = c("us", "fr", "no", "au", "uk"))
|
|
citizen
|
|
|
|
income <- ordered(c("Mid", "Hi", "Lo", "Mid", "Lo", "Hi", "Lo"))
|
|
income
|
|
|
|
as.numeric(income)
|
|
|
|
inc <- ordered(c("Mid", "Hi", "Lo", "Mid", "Lo", "Hi", "Lo"),
|
|
levels = c("Lo", "Mid", "Hi"))
|
|
inc
|
|
|
|
erupt <- cut(geyser$duration, breaks = 0:6)
|
|
erupt <- ordered(erupt, labels=levels(erupt))
|
|
erupt
|
|
|
|
painters
|
|
row.names(painters)
|
|
|
|
summary(painters) # try it!
|
|
|
|
attach(painters)
|
|
School
|
|
detach("painters")
|
|
|
|
mymat <- matrix(1:30, 3, 10)
|
|
mymat
|
|
|
|
myarr <- mymat
|
|
dim(myarr) <- c(3, 5, 2)
|
|
class(myarr)
|
|
myarr
|
|
dim(myarr)
|
|
|
|
dimnames(myarr) <- list(letters[1:3], NULL, c("(i)", "(ii)"))
|
|
myarr
|
|
|
|
newvar <- NA
|
|
class(NA)
|
|
|
|
newvar > 3
|
|
|
|
x <- c(pi, 4, 5)
|
|
x[2] <- NA
|
|
x
|
|
class(x)
|
|
|
|
is.na(x)
|
|
|
|
1/0
|
|
|
|
x <- c(-1, 0, 1)/0
|
|
x
|
|
is.na(x)
|
|
x > Inf
|
|
|
|
x <- c(2.9, 3.1, 3.4, 3.4, 3.7, 3.7, 2.8, 2.5)
|
|
|
|
|
|
|
|
letters[1:3]
|
|
letters[c(1:3,3:1)]
|
|
|
|
longitude <- state.center$x
|
|
names(longitude) <- state.name
|
|
longitude[c("Hawaii", "Alaska")]
|
|
|
|
myarr[1, 2:4, ]
|
|
myarr[1, 2:4, , drop = FALSE]
|
|
|
|
attach(painters)
|
|
painters[Colour >= 17, ]
|
|
|
|
painters[Colour >= 15 & Composition > 10, ]
|
|
painters[Colour >= 15 & School != "D", ]
|
|
|
|
painters[is.element(School, c("A", "B", "D")), ]
|
|
painters[School %in% c("A", "B", "D"), ] ## R only
|
|
|
|
painters[cbind(1:nrow(painters), ifelse(Colour > Expression, 3, 4))]
|
|
|
|
painters[grep("io$", row.names(painters)), ]
|
|
|
|
detach("painters")
|
|
|
|
m <- 30
|
|
fglsub1 <- fgl[sort(sample(1:nrow(fgl), m)), ]
|
|
|
|
fglsub2 <- fgl[rbinom(nrow(fgl), 1, 0.1) == 1, ]
|
|
|
|
fglsub3 <- fgl[seq(1, nrow(fgl), by = 10), ]
|
|
|
|
painters[sort.list(row.names(painters)), ]
|
|
|
|
|
|
lcrabs <- crabs # make a copy
|
|
lcrabs[, 4:8] <- log(crabs[, 4:8])
|
|
|
|
scrabs <- crabs # make a copy
|
|
scrabs[, 4:8] <- lapply(scrabs[, 4:8], scale)
|
|
## or to just centre the variables
|
|
scrabs[, 4:8] <- lapply(scrabs[, 4:8], scale, scale = FALSE)
|
|
|
|
scrabs <- crabs # make a copy
|
|
scrabs[ ] <- lapply(scrabs,
|
|
function(x) {if(is.numeric(x)) scale(x) else x})
|
|
|
|
sapply(crabs, is.numeric)
|
|
|
|
by(crabs[, 4:8], list(crabs$sp, crabs$sex), summary)
|
|
|
|
aggregate(crabs[, 4:8], by = list(sp=crabs$sp, sex=crabs$sex),
|
|
median)
|
|
|
|
authors <- data.frame(
|
|
surname = c("Tukey", "Venables", "Tierney", "Ripley", "McNeil"),
|
|
nationality = c("US", "Australia", "US", "UK", "Australia"),
|
|
deceased = c("yes", rep("no", 4)))
|
|
books <- data.frame(
|
|
name = c("Tukey", "Venables", "Tierney",
|
|
"Ripley", "Ripley", "McNeil", "R Core"),
|
|
title = c("Exploratory Data Analysis",
|
|
"Modern Applied Statistics ...",
|
|
"LISP-STAT",
|
|
"Spatial Statistics", "Stochastic Simulation",
|
|
"Interactive Data Analysis",
|
|
"An Introduction to R"))
|
|
|
|
authors
|
|
books
|
|
|
|
merge(authors, books, by.x = "surname", by.y = "name")
|
|
|
|
attach(quine)
|
|
table(Age)
|
|
table(Sex, Age)
|
|
|
|
tab <- xtabs(~ Sex + Age, quine)
|
|
unclass(tab)
|
|
|
|
tapply(Days, Age, mean)
|
|
|
|
tapply(Days, Age, mean, trim = 0.1)
|
|
|
|
tapply(Days, list(Sex, Age), mean)
|
|
|
|
tapply(Days, list(Sex, Age),
|
|
function(x) sqrt(var(x)/length(x)))
|
|
|
|
quineFO <- quine[sapply(quine, is.factor)]
|
|
|
|
#tab <- do.call("table", quineFO)
|
|
tab <- table(quineFO)
|
|
|
|
QuineF <- expand.grid(lapply(quineFO, levels))
|
|
|
|
QuineF$Freq <- as.vector(tab)
|
|
QuineF
|
|
|
|
# End of ch02
|