2025-01-12 04:36:52 +08:00

185 lines
5.8 KiB
R

#-*- R -*-
## Script from Fourth Edition of `Modern Applied Statistics with S'
# Chapter 7 Generalized Linear Models
library(MASS)
options(width=65, digits=5, height=9999)
pdf(file="ch07.pdf", width=8, height=6, pointsize=9)
options(contrasts = c("contr.treatment", "contr.poly"))
ax.1 <- glm(Postwt ~ Prewt + Treat + offset(Prewt),
family = gaussian, data = anorexia)
summary(ax.1)
# 7.2 Binomial data
options(contrasts = c("contr.treatment", "contr.poly"))
ldose <- rep(0:5, 2)
numdead <- c(1, 4, 9, 13, 18, 20, 0, 2, 6, 10, 12, 16)
sex <- factor(rep(c("M", "F"), c(6, 6)))
SF <- cbind(numdead, numalive = 20 - numdead)
budworm.lg <- glm(SF ~ sex*ldose, family = binomial)
summary(budworm.lg)
plot(c(1,32), c(0,1), type = "n", xlab = "dose",
ylab = "prob", log = "x")
text(2^ldose, numdead/20, labels = as.character(sex))
ld <- seq(0, 5, 0.1)
lines(2^ld, predict(budworm.lg, data.frame(ldose = ld,
sex = factor(rep("M", length(ld)), levels = levels(sex))),
type = "response"), col = 3)
lines(2^ld, predict(budworm.lg, data.frame(ldose = ld,
sex = factor(rep("F", length(ld)), levels = levels(sex))),
type = "response"), lty = 2, col = 2)
budworm.lgA <- update(budworm.lg, . ~ sex * I(ldose - 3))
summary(budworm.lgA, cor = F)$coefficients
anova(update(budworm.lg, . ~ . + sex * I(ldose^2)),
test = "Chisq")
budworm.lg0 <- glm(SF ~ sex + ldose - 1, family = binomial)
summary(budworm.lg0, cor = F)$coefficients
dose.p(budworm.lg0, cf = c(1,3), p = 1:3/4)
dose.p(update(budworm.lg0, family = binomial(link = probit)),
cf = c(1, 3), p = 1:3/4)
options(contrasts = c("contr.treatment", "contr.poly"))
attach(birthwt)
race <- factor(race, labels = c("white", "black", "other"))
table(ptl)
ptd <- factor(ptl > 0)
table(ftv)
ftv <- factor(ftv)
levels(ftv)[-(1:2)] <- "2+"
table(ftv) # as a check
bwt <- data.frame(low = factor(low), age, lwt, race,
smoke = (smoke > 0), ptd, ht = (ht > 0), ui = (ui > 0), ftv)
detach(); rm(race, ptd, ftv)
birthwt.glm <- glm(low ~ ., family = binomial, data = bwt)
summary(birthwt.glm)
birthwt.step <- stepAIC(birthwt.glm, trace = FALSE)
birthwt.step$anova
birthwt.step2 <- stepAIC(birthwt.glm, ~ .^2 + I(scale(age)^2)
+ I(scale(lwt)^2), trace = FALSE)
birthwt.step2$anova
summary(birthwt.step2)$coef
table(bwt$low, predict(birthwt.step2) > 0)
## R has a similar gam() in package gam and a different gam() in package mgcv
if(require(gam)) {
attach(bwt)
age1 <- age*(ftv=="1"); age2 <- age*(ftv=="2+")
birthwt.gam <- gam(low ~ s(age) + s(lwt) + smoke + ptd +
ht + ui + ftv + s(age1) + s(age2) + smoke:ui, binomial,
bwt, bf.maxit=25)
print(summary(birthwt.gam))
print(table(low, predict(birthwt.gam) > 0))
par(mfrow = c(2, 2))
if(interactive()) plot(birthwt.gam, ask = TRUE, se = TRUE)
par(mfrow = c(1, 1))
detach()
}
library(mgcv)
attach(bwt)
age1 <- age*(ftv=="1"); age2 <- age*(ftv=="2+")
(birthwt.gam <- gam(low ~ s(age) + s(lwt) + smoke + ptd +
ht + ui + ftv + s(age1) + s(age2) + smoke:ui, binomial, bwt))
table(low, predict(birthwt.gam) > 0)
par(mfrow = c(2, 2))
plot(birthwt.gam, se = TRUE)
par(mfrow = c(1, 1))
detach()
# 7.3 Poisson models
names(housing)
house.glm0 <- glm(Freq ~ Infl*Type*Cont + Sat,
family = poisson, data = housing)
summary(house.glm0)
addterm(house.glm0, ~. + Sat:(Infl+Type+Cont), test = "Chisq")
house.glm1 <- update(house.glm0, . ~ . + Sat:(Infl+Type+Cont))
summary(house.glm1)
1 - pchisq(deviance(house.glm1), house.glm1$df.resid)
dropterm(house.glm1, test = "Chisq")
addterm(house.glm1, ~. + Sat:(Infl+Type+Cont)^2, test = "Chisq")
hnames <- lapply(housing[, -5], levels) # omit Freq
house.pm <- predict(house.glm1, expand.grid(hnames),
type = "response") # poisson means
house.pm <- matrix(house.pm, ncol = 3, byrow = TRUE,
dimnames = list(NULL, hnames[[1]]))
house.pr <- house.pm/drop(house.pm %*% rep(1, 3))
cbind(expand.grid(hnames[-1]), round(house.pr, 2))
loglm(Freq ~ Infl*Type*Cont + Sat*(Infl+Type+Cont),
data = housing)
library(nnet)
(house.mult <- multinom(Sat ~ Infl + Type + Cont,
weights = Freq, data = housing))
house.mult2 <- multinom(Sat ~ Infl*Type*Cont,
weights = Freq, data = housing)
anova(house.mult, house.mult2, test = "none")
house.pm <- predict(house.mult, expand.grid(hnames[-1]),
type = "probs")
cbind(expand.grid(hnames[-1]), round(house.pm, 2))
house.cpr <- apply(house.pr, 1, cumsum)
logit <- function(x) log(x/(1-x))
house.ld <- logit(house.cpr[2, ]) - logit(house.cpr[1, ])
sort(drop(house.ld))
mean(.Last.value)
house.plr <- polr(Sat ~ Infl + Type + Cont,
data = housing, weights = Freq)
house.plr
house.pr1 <- predict(house.plr, expand.grid(hnames[-1]),
type = "probs")
cbind(expand.grid(hnames[-1]), round(house.pr1, 2))
Fr <- matrix(housing$Freq, ncol = 3, byrow = TRUE)
2 * sum(Fr * log(house.pr/house.pr1))
house.plr2 <- stepAIC(house.plr, ~.^2)
house.plr2$anova
# 7.4 A negative binomial family
glm(Days ~ .^4, family = poisson, data = quine)
quine.nb <- glm(Days ~ .^4, family = negative.binomial(2), data = quine)
quine.nb0 <- update(quine.nb, . ~ Sex/(Age + Eth*Lrn))
anova(quine.nb0, quine.nb, test = "Chisq")
quine.nb <- glm.nb(Days ~ .^4, data = quine)
quine.nb2 <- stepAIC(quine.nb)
quine.nb2$anova
dropterm(quine.nb2, test = "Chisq")
quine.nb3 <-
update(quine.nb2, . ~ . - Eth:Age:Lrn - Sex:Age:Lrn)
anova(quine.nb2, quine.nb3)
c(theta = quine.nb2$theta, SE = quine.nb2$SE)
par(mfrow = c(2,2), pty = "m")
rs <- resid(quine.nb2, type = "deviance")
plot(predict(quine.nb2), rs, xlab = "Linear predictors",
ylab = "Deviance residuals")
abline(h = 0, lty = 2)
qqnorm(rs, ylab = "Deviance residuals")
qqline(rs)
par(mfrow = c(1,1))
# End of ch07