418 lines
18 KiB
Plaintext
Raw Normal View History

2025-01-12 00:52:51 +08:00
R Under development (unstable) (2022-03-19 r81942) -- "Unsuffered Consequences"
Copyright (C) 2022 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> ## tests of the simulate.lm method, added Feb 2009
>
> options(digits = 5)
>
> ## cases should be named
> hills <- readRDS("hills.rds") # copied from package MASS
> fit1 <- lm(time ~ dist, data = hills)
> set.seed(1)
> simulate(fit1, nsim = 3)
sim_1 sim_2 sim_3
Greenmantle 3.4841 7.7039 25.4746
Carnethy 48.8068 37.2737 30.9745
Craig Dunain 28.4665 43.9584 57.3295
Ben Rha 89.4727 79.5895 38.9971
Ben Lomond 68.3785 77.0326 36.7858
Goatfell 45.4299 58.5197 67.6189
Bens of Jura 138.1736 123.3906 119.6004
Cairnpapple 59.8758 59.0504 45.1641
Scolty 48.3017 47.9202 38.2951
Traprain 39.0478 31.3974 33.3777
Lairig Ghru 258.5807 214.2935 217.0639
Dollar 44.5912 44.0871 34.1140
Lomonds 61.9013 89.6352 97.8082
Cairn Table 0.9461 42.9001 14.7382
Eildon Two 55.0951 50.2295 44.4990
Cairngorm 77.5672 86.4083 85.1081
Seven Hills 111.4626 99.5722 133.0006
Knock Hill 38.9856 26.9579 14.0804
Black Hill 49.0344 10.1091 40.0303
Creag Beag 52.8285 69.5738 46.3069
Kildcon Hill 38.4895 59.6709 9.3243
Meall Ant-Suidhe 39.9240 16.9877 48.4197
Half Ben Nevis 46.6300 24.3056 68.2987
Cow Hill -27.8787 23.1894 25.7935
N Berwick Law 32.5197 17.4555 51.8170
Creag Dubh 27.3610 76.4071 39.6261
Burnswark 42.0330 44.3590 19.6667
Largo Law 7.4616 50.5758 25.3716
Criffel 39.7654 49.8660 24.8692
Acmony 45.1519 21.9790 27.3645
Ben Nevis 105.5773 82.2313 66.0840
Knockfarrel 43.0908 9.1228 45.9825
Two Breweries 152.8438 174.3537 126.9294
Cockleroi 31.5726 35.7046 35.7999
Moffat Chase 134.2882 205.1244 148.7057
>
> ## and weights should be taken into account
> fit2 <- lm(time ~ -1 + dist + climb, hills[-18, ], weight = 1/dist^2)
> coef(summary(fit2))
Estimate Std. Error t value Pr(>|t|)
dist 4.8999847 0.4737032 10.3440 9.8468e-12
climb 0.0084718 0.0016869 5.0221 1.8636e-05
> set.seed(1); ( ys <- simulate(fit2, nsim = 3) )
sim_1 sim_2 sim_3
Greenmantle 15.754 13.355 18.247
Carnethy 51.988 47.396 67.247
Craig Dunain 30.614 34.000 40.672
Ben Rha 58.825 42.959 36.719
Ben Lomond 68.579 76.460 71.455
Goatfell 55.088 71.287 53.926
Bens of Jura 151.910 138.573 116.292
Cairnpapple 41.841 34.234 38.413
Scolty 34.958 35.733 28.443
Traprain 32.564 39.177 34.915
Lairig Ghru 209.113 130.333 157.652
Dollar 43.936 36.921 37.675
Lomonds 57.642 69.616 58.280
Cairn Table 16.646 39.532 32.599
Eildon Two 41.230 34.111 41.536
Cairngorm 73.841 85.681 54.935
Seven Hills 86.948 94.364 97.870
Black Hill 35.952 27.000 32.437
Creag Beag 37.808 34.432 39.509
Kildcon Hill 19.520 12.910 16.075
Meall Ant-Suidhe 33.970 36.271 31.514
Half Ben Nevis 54.038 63.231 50.087
Cow Hill 17.615 16.486 16.037
N Berwick Law 12.152 15.778 24.416
Creag Dubh 39.714 39.457 42.478
Burnswark 35.747 35.141 41.549
Largo Law 31.552 47.902 42.693
Criffel 34.452 46.350 51.317
Acmony 25.679 33.145 20.575
Ben Nevis 91.620 86.634 78.946
Knockfarrel 44.906 28.781 25.088
Two Breweries 129.888 136.598 121.358
Cockleroi 31.482 18.866 25.682
Moffat Chase 138.983 177.836 141.436
> for(i in seq_len(3))
+ print(coef(summary(update(fit2, ys[, i] ~ .))))
Estimate Std. Error t value Pr(>|t|)
dist 4.8759333 0.4295826 11.3504 9.3646e-13
climb 0.0091824 0.0015298 6.0023 1.0781e-06
Estimate Std. Error t value Pr(>|t|)
dist 4.6969341 0.4406227 10.66 4.6417e-12
climb 0.0099795 0.0015691 6.36 3.8442e-07
Estimate Std. Error t value Pr(>|t|)
dist 4.8215499 0.420077 11.4778 7.0162e-13
climb 0.0090388 0.001496 6.0422 9.6065e-07
> ## should be identical to glm(*, gaussian):
> fit2. <- glm(time ~ -1 + dist + climb, family=gaussian, data=hills[-18, ],
+ weight = 1/dist^2)
> set.seed(1); ys. <- simulate(fit2., nsim = 3)
> stopifnot(all.equal(ys, ys.))
>
> ## Poisson fit
> load("anorexia.rda") # copied from package MASS
> fit3 <- glm(Postwt ~ Prewt + Treat + offset(Prewt),
+ family = gaussian, data = anorexia)
> coef(summary(fit3))
Estimate Std. Error t value Pr(>|t|)
(Intercept) 49.77111 13.39096 3.7168 0.00041011
Prewt -0.56554 0.16118 -3.5087 0.00080343
TreatCont -4.09707 1.89349 -2.1638 0.03399931
TreatFT 4.56306 2.13334 2.1389 0.03603508
> set.seed(1)
> simulate(fit3, nsim = 8)
sim_1 sim_2 sim_3 sim_4 sim_5 sim_6 sim_7 sim_8
1 76.364 84.997 72.948 78.581 83.762 62.645 70.046 87.655
2 85.796 77.997 79.276 75.769 91.529 93.684 84.340 95.638
3 79.726 76.809 100.122 90.039 82.835 81.123 89.696 75.979
4 88.956 79.858 77.946 77.512 80.451 74.824 76.441 76.082
5 81.905 76.512 70.629 67.511 81.309 78.424 85.830 87.696
6 78.312 84.045 72.589 84.052 74.084 88.309 83.858 76.262
7 87.004 84.121 86.744 79.204 96.013 88.336 79.083 65.958
8 83.454 74.188 78.173 75.923 79.240 82.265 82.812 71.771
9 84.710 76.723 78.472 72.621 86.034 76.696 77.664 73.942
10 77.605 78.792 73.251 92.318 86.401 70.223 92.105 80.067
11 89.938 87.609 69.008 77.078 79.035 76.676 79.261 76.571
12 86.931 73.579 76.708 73.007 82.077 86.150 90.162 85.826
13 76.661 85.140 87.974 82.372 87.232 75.252 82.427 78.048
14 64.151 81.929 75.270 81.442 72.297 79.125 58.615 82.216
15 84.154 83.722 66.643 69.424 90.060 68.155 66.771 73.749
16 78.944 77.135 92.302 59.098 76.581 79.200 76.298 87.563
17 82.577 85.272 85.657 78.221 94.233 83.589 84.343 77.545
18 89.624 84.902 81.372 87.019 93.590 82.020 66.690 85.066
19 87.943 78.426 89.599 81.795 82.791 81.068 88.923 76.038
20 84.445 88.729 86.486 79.615 84.259 92.607 76.083 81.752
21 89.233 90.918 78.499 86.734 75.671 88.142 77.567 82.487
22 87.800 87.229 97.737 74.063 84.597 90.098 71.487 70.588
23 80.777 91.330 78.478 87.911 87.540 73.815 70.112 79.251
24 65.463 83.242 69.404 79.307 80.036 80.492 79.738 87.581
25 81.411 68.177 76.078 82.021 73.917 85.144 80.640 81.841
26 83.949 80.341 85.789 91.557 79.765 83.947 69.702 85.341
27 83.658 76.200 100.851 86.305 84.495 69.886 77.737 76.425
28 76.394 83.353 87.395 80.525 94.118 89.063 90.396 94.816
29 81.843 80.851 88.369 93.295 81.802 71.887 82.018 85.732
30 88.574 85.951 85.119 71.700 84.813 79.997 100.768 82.505
31 93.966 78.128 82.154 80.683 75.454 93.724 93.178 95.943
32 87.591 89.411 88.065 86.524 91.757 92.604 92.463 82.937
33 93.707 86.434 96.498 89.842 100.128 98.619 91.036 93.118
34 82.545 95.253 97.402 90.041 93.367 85.060 84.870 91.865
35 75.353 89.964 92.132 85.913 90.648 84.194 80.037 89.165
36 81.849 91.097 93.174 87.587 71.698 78.295 89.128 82.603
37 83.949 89.381 78.108 86.214 90.064 97.816 97.030 83.781
38 88.111 100.264 95.391 86.797 91.708 88.839 96.085 91.003
39 92.769 80.657 86.627 89.946 82.627 80.103 79.418 88.676
40 88.333 79.786 72.769 91.006 84.197 89.045 71.711 83.137
41 79.035 90.178 83.819 63.414 74.154 87.681 79.418 89.384
42 82.934 80.161 83.594 88.698 89.442 97.930 87.778 84.242
43 90.825 84.515 96.182 88.577 83.679 81.754 95.389 81.075
44 89.716 83.090 80.486 82.864 74.882 83.104 76.630 89.581
45 83.067 85.640 84.871 94.510 85.309 84.969 90.416 72.509
46 81.416 84.405 79.890 83.637 95.874 83.731 87.982 89.088
47 89.853 90.757 86.073 85.324 84.976 84.750 95.640 90.776
48 88.370 81.770 85.813 88.991 88.121 80.944 82.813 81.438
49 83.831 81.084 79.509 96.615 91.220 94.676 82.122 76.819
50 94.065 97.289 93.711 89.801 87.947 83.049 79.914 85.160
51 88.740 84.464 77.532 83.016 83.503 83.253 82.351 96.777
52 80.127 83.145 77.085 76.100 80.701 88.951 81.871 79.209
53 88.863 85.784 96.540 84.173 91.644 94.332 102.886 70.212
54 76.995 89.849 77.787 78.317 77.455 79.488 101.948 90.544
55 97.743 87.230 90.618 85.936 89.461 84.197 86.580 84.245
56 104.562 90.479 88.083 93.494 88.722 94.396 83.459 87.177
57 87.962 85.768 93.382 84.580 74.720 97.627 76.757 82.044
58 84.412 89.435 103.483 110.184 81.867 89.945 95.289 91.540
59 94.153 90.597 101.249 91.266 96.569 80.198 82.567 95.071
60 91.060 87.893 89.693 99.889 90.667 103.929 107.945 87.902
61 105.676 92.626 72.970 72.943 94.523 98.931 82.737 84.683
62 87.470 77.149 105.173 92.915 100.915 82.787 88.520 95.397
63 100.074 97.400 99.915 86.075 105.545 94.806 121.849 93.533
64 86.419 75.502 90.001 92.642 90.950 73.946 78.485 85.108
65 84.122 87.208 89.215 92.087 91.960 93.284 91.455 84.941
66 91.104 86.100 93.346 86.942 88.441 101.037 82.062 96.070
67 77.408 85.453 88.856 99.244 101.014 78.578 92.429 83.066
68 98.275 87.651 90.984 83.155 92.209 82.608 81.955 93.975
69 91.681 77.253 87.819 86.560 82.422 86.137 91.151 96.234
70 108.553 101.603 83.831 86.407 92.306 88.639 91.321 90.129
71 95.016 80.079 98.591 87.035 78.307 77.509 83.441 97.618
72 87.308 89.028 102.868 98.858 90.900 95.758 92.341 99.148
>
> ## two-column binomial fit
> ldose <- rep(0:5, 2)
> numdead <- c(1, 4, 9, 13, 18, 20, 0, 2, 6, 10, 12, 16)
> sex <- factor(rep(c("M", "F"), c(6, 6)))
> SF <- cbind(numdead, numalive = 20 - numdead)
> fit4 <- glm(SF ~ sex + ldose - 1, family = binomial)
> coef(summary(fit4))
Estimate Std. Error z value Pr(>|z|)
sexF -3.4732 0.46852 -7.4130 1.2344e-13
sexM -2.3724 0.38551 -6.1539 7.5579e-10
ldose 1.0642 0.13108 8.1190 4.7015e-16
> set.seed(1)
> ( ys <- simulate(fit4, nsim = 3) )
sim_1.numdead sim_1.numalive sim_2.numdead sim_2.numalive sim_3.numdead
1 1 19 2 18 1
2 4 16 4 16 4
3 9 11 10 10 4
4 11 9 14 6 15
5 19 1 17 3 16
6 18 2 16 4 20
7 2 18 0 20 0
8 2 18 3 17 2
9 5 15 7 13 4
10 5 15 7 13 7
11 15 5 13 7 12
12 19 1 19 1 17
sim_3.numalive
1 19
2 16
3 16
4 5
5 4
6 0
7 20
8 18
9 16
10 13
11 8
12 3
> for(i in seq_len(3))
+ print(coef(summary(update(fit4, ys[, i] ~ .))))
Estimate Std. Error z value Pr(>|z|)
sexF -3.3079 0.45218 -7.3154 2.5656e-13
sexM -2.5067 0.39240 -6.3880 1.6812e-10
ldose 1.0482 0.12869 8.1454 3.7800e-16
Estimate Std. Error z value Pr(>|z|)
sexF -2.84478 0.40661 -6.9963 2.6289e-12
sexM -2.11845 0.35818 -5.9145 3.3286e-09
ldose 0.90935 0.11578 7.8541 4.0273e-15
Estimate Std. Error z value Pr(>|z|)
sexF -3.9838 0.52156 -7.6384 2.1996e-14
sexM -2.8717 0.43047 -6.6712 2.5376e-11
ldose 1.1487 0.14102 8.1459 3.7655e-16
>
> ## same via proportions
> fit5 <- glm(numdead/20 ~ sex + ldose - 1, family = binomial,
+ weights = rep(20, 12))
> set.seed(1)
> ( ys <- simulate(fit5, nsim = 3) )
sim_1 sim_2 sim_3
1 0.05 0.10 0.05
2 0.20 0.20 0.20
3 0.45 0.50 0.20
4 0.55 0.70 0.75
5 0.95 0.85 0.80
6 0.90 0.80 1.00
7 0.10 0.00 0.00
8 0.10 0.15 0.10
9 0.25 0.35 0.20
10 0.25 0.35 0.35
11 0.75 0.65 0.60
12 0.95 0.95 0.85
> for(i in seq_len(3))
+ print(coef(summary(update(fit5, ys[, i] ~ .))))
Estimate Std. Error z value Pr(>|z|)
sexF -3.3079 0.45218 -7.3154 2.5656e-13
sexM -2.5067 0.39240 -6.3880 1.6812e-10
ldose 1.0482 0.12869 8.1454 3.7800e-16
Estimate Std. Error z value Pr(>|z|)
sexF -2.84478 0.40661 -6.9963 2.6289e-12
sexM -2.11845 0.35818 -5.9145 3.3286e-09
ldose 0.90935 0.11578 7.8541 4.0273e-15
Estimate Std. Error z value Pr(>|z|)
sexF -3.9838 0.52156 -7.6384 2.1996e-14
sexM -2.8717 0.43047 -6.6712 2.5376e-11
ldose 1.1487 0.14102 8.1459 3.7655e-16
>
>
> ## factor binomial fit
> load("birthwt.rda") # copied from package MASS
> bwt <- with(birthwt, {
+ race <- factor(race, labels = c("white", "black", "other"))
+ table(ptl)
+ ptd <- factor(ptl > 0)
+ table(ftv)
+ ftv <- factor(ftv)
+ levels(ftv)[-(1:2)] <- "2+"
+ data.frame(low = factor(low), age, lwt, race,
+ smoke = (smoke > 0), ptd, ht = (ht > 0), ui = (ui > 0), ftv)
+ })
> fit6 <- glm(low ~ ., family = binomial, data = bwt)
> coef(summary(fit6))
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.823019 1.2447143 0.66121 0.5084769
age -0.037234 0.0387024 -0.96207 0.3360159
lwt -0.015653 0.0070804 -2.21075 0.0270532
raceblack 1.192413 0.5359646 2.22480 0.0260948
raceother 0.740685 0.4617443 1.60410 0.1086916
smokeTRUE 0.755528 0.4250166 1.77764 0.0754623
ptdTRUE 1.343763 0.4806207 2.79589 0.0051757
htTRUE 1.913166 0.7207369 2.65446 0.0079436
uiTRUE 0.680195 0.4643403 1.46486 0.1429580
ftv1 -0.436380 0.4793936 -0.91027 0.3626779
ftv2+ 0.179009 0.4563778 0.39224 0.6948827
> set.seed(1)
> ys <- simulate(fit6, nsim = 3)
> ys[1:10, ]
sim_1 sim_2 sim_3
1 0 0 0
2 0 0 0
3 0 0 0
4 1 0 1
5 0 1 1
6 1 0 1
7 1 0 0
8 0 0 0
9 0 0 0
10 0 0 1
> for(i in seq_len(3))
+ print(coef(summary(update(fit6, ys[, i] ~ .))))
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.284354 1.2297301 -0.23123 0.817134
age -0.011485 0.0389166 -0.29513 0.767897
lwt -0.012314 0.0069041 -1.78353 0.074500
raceblack 0.844051 0.5514350 1.53064 0.125857
raceother 0.565993 0.4665978 1.21302 0.225122
smokeTRUE 1.024337 0.4253416 2.40827 0.016028
ptdTRUE 1.578045 0.4887012 3.22906 0.001242
htTRUE 1.171556 0.7060321 1.65935 0.097045
uiTRUE 0.384214 0.4835840 0.79451 0.426897
ftv1 -0.790699 0.5271049 -1.50008 0.133594
ftv2+ 0.522245 0.4498448 1.16094 0.245665
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.916922 1.433819 0.63950 5.2250e-01
age 0.034662 0.041752 0.83019 4.0643e-01
lwt -0.036299 0.009577 -3.79021 1.5052e-04
raceblack 2.588992 0.631372 4.10058 4.1211e-05
raceother 1.073046 0.522576 2.05338 4.0036e-02
smokeTRUE 0.710930 0.478870 1.48460 1.3765e-01
ptdTRUE 0.679588 0.503299 1.35027 1.7693e-01
htTRUE 1.988224 0.801811 2.47967 1.3151e-02
uiTRUE 1.080176 0.489033 2.20880 2.7189e-02
ftv1 -0.418153 0.549009 -0.76165 4.4627e-01
ftv2+ 0.657962 0.500605 1.31433 1.8873e-01
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.4927579 1.2779154 1.168119 0.24275859
age -0.0986485 0.0415094 -2.376532 0.01747625
lwt -0.0088426 0.0070635 -1.251885 0.21061194
raceblack 0.2106382 0.5622160 0.374657 0.70791549
raceother 0.8608985 0.4814556 1.788116 0.07375727
smokeTRUE 0.7884781 0.4449757 1.771958 0.07640158
ptdTRUE 1.9686209 0.5435116 3.622040 0.00029229
htTRUE 1.7835637 0.7279712 2.450047 0.01428375
uiTRUE 1.4816743 0.5139743 2.882779 0.00394184
ftv1 -0.7388479 0.5090637 -1.451386 0.14667244
ftv2+ -0.0097438 0.4670024 -0.020865 0.98335371
>
> ## This requires MASS::gamma.shape
> if(!require("MASS", quietly = TRUE)) {
+ message("skipping tests requiring the MASS package")
+ q()
+ }
>
> ## gamma fit, from example(glm)
> clotting <- data.frame(u = c(5,10,15,20,30,40,60,80,100),
+ lot1 = c(118,58,42,35,27,25,21,19,18))
> fit7 <- glm(lot1 ~ log(u), data = clotting, family = Gamma)
> coef(summary(fit7))
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.016554 0.00092755 -17.847 4.2791e-07
log(u) 0.015343 0.00041496 36.975 2.7512e-09
> set.seed(1)
> ( ys <- simulate(fit7, nsim = 3) )
sim_1 sim_2 sim_3
1 119.451 118.552 123.140
2 56.310 51.798 48.747
3 42.193 39.456 40.843
4 34.581 33.905 35.580
5 26.208 26.971 27.210
6 25.476 25.840 24.437
7 22.287 22.151 21.375
8 20.206 20.503 19.626
9 18.224 19.094 17.952
> for(i in seq_len(3))
+ print(coef(summary(update(fit7, ys[, i] ~ .))))
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.016008 0.00077887 -20.553 1.6197e-07
log(u) 0.015044 0.00034611 43.465 8.9084e-10
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.015483 0.00050102 -30.903 9.5910e-09
log(u) 0.014935 0.00021999 67.889 3.9547e-11
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.016850 0.00075120 -22.431 8.8554e-08
log(u) 0.015569 0.00033659 46.256 5.7704e-10
>
>
> proc.time()
user system elapsed
0.597 0.096 0.670