2025-01-12 04:36:52 +08:00

366 lines
11 KiB
Plaintext

R Under development (unstable) (2023-04-19 r84284) -- "Unsuffered Consequences"
Copyright (C) 2023 The R Foundation for Statistical Computing
Platform: aarch64-apple-darwin22.4.0 (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> #### Some examples of the KS and Wilcoxon tests
>
> ### ------ Kolmogorov Smirnov (KS) --------------
>
> ## unrealistic one of PR#14561
> ds1 <- c(1.7,2,3,3,4,4,5,5,6,6)
> ks.test(ds1, "pnorm", mean = 3.3, sd = 1.55216)
Asymptotic one-sample Kolmogorov-Smirnov test
data: ds1
D = 0.274, p-value = 0.4407
alternative hypothesis: two-sided
Warning message:
In ks.test.default(ds1, "pnorm", mean = 3.3, sd = 1.55216) :
ties should not be present for the one-sample Kolmogorov-Smirnov test
> # how on earth can sigma = 1.55216 be known?
>
> # R >= 2.14.0 allows the equally invalid
> ks.test(ds1, "pnorm", mean = 3.3, sd = 1.55216, exact = TRUE)
Exact one-sample Kolmogorov-Smirnov test
data: ds1
D = 0.274, p-value = 0.3715
alternative hypothesis: two-sided
Warning message:
In ks.test.default(ds1, "pnorm", mean = 3.3, sd = 1.55216, exact = TRUE) :
ties should not be present for the one-sample Kolmogorov-Smirnov test
>
> ## Try out the effects of rounding
> set.seed(123)
> ds2 <- rnorm(1000)
> ks.test(ds2, "pnorm") # exact = FALSE is default for n = 1000
Asymptotic one-sample Kolmogorov-Smirnov test
data: ds2
D = 0.019416, p-value = 0.8452
alternative hypothesis: two-sided
> ks.test(ds2, "pnorm", exact = TRUE)
Exact one-sample Kolmogorov-Smirnov test
data: ds2
D = 0.019416, p-value = 0.8379
alternative hypothesis: two-sided
> ## next two are still close
> ks.test(round(ds2, 2), "pnorm")
Asymptotic one-sample Kolmogorov-Smirnov test
data: round(ds2, 2)
D = 0.019169, p-value = 0.856
alternative hypothesis: two-sided
Warning message:
In ks.test.default(round(ds2, 2), "pnorm") :
ties should not be present for the one-sample Kolmogorov-Smirnov test
> ks.test(round(ds2, 2), "pnorm", exact = TRUE)
Exact one-sample Kolmogorov-Smirnov test
data: round(ds2, 2)
D = 0.019169, p-value = 0.8489
alternative hypothesis: two-sided
Warning message:
In ks.test.default(round(ds2, 2), "pnorm", exact = TRUE) :
ties should not be present for the one-sample Kolmogorov-Smirnov test
> # now D has doubled, but p-values remain similar (if very different from ds2)
> ks.test(round(ds2, 1), "pnorm")
Asymptotic one-sample Kolmogorov-Smirnov test
data: round(ds2, 1)
D = 0.03674, p-value = 0.1344
alternative hypothesis: two-sided
Warning message:
In ks.test.default(round(ds2, 1), "pnorm") :
ties should not be present for the one-sample Kolmogorov-Smirnov test
> ks.test(round(ds2, 1), "pnorm", exact = TRUE)
Exact one-sample Kolmogorov-Smirnov test
data: round(ds2, 1)
D = 0.03674, p-value = 0.1311
alternative hypothesis: two-sided
Warning message:
In ks.test.default(round(ds2, 1), "pnorm", exact = TRUE) :
ties should not be present for the one-sample Kolmogorov-Smirnov test
>
>
> ### ------ Wilkoxon (Mann Whitney) --------------
>
> options(nwarnings = 1000)
> (alts <- setNames(, eval(formals(stats:::wilcox.test.default)$alternative)))
two.sided less greater
"two.sided" "less" "greater"
> x0 <- 0:4
> (x.set <- list(s0 = lapply(x0, function(m) 0:m),
+ s. = lapply(x0, function(m) c(1e-9, seq_len(m)))))
$s0
$s0[[1]]
[1] 0
$s0[[2]]
[1] 0 1
$s0[[3]]
[1] 0 1 2
$s0[[4]]
[1] 0 1 2 3
$s0[[5]]
[1] 0 1 2 3 4
$s.
$s.[[1]]
[1] 1e-09
$s.[[2]]
[1] 1e-09 1e+00
$s.[[3]]
[1] 1e-09 1e+00 2e+00
$s.[[4]]
[1] 1e-09 1e+00 2e+00 3e+00
$s.[[5]]
[1] 1e-09 1e+00 2e+00 3e+00 4e+00
> stats <- setNames(nm = c("statistic", "p.value", "conf.int", "estimate"))
>
> ## Even with conf.int = TRUE, do not want errors :
> RR <-
+ lapply(x.set, ## for all data sets
+ function(xs)
+ lapply(alts, ## for all three alternatives
+ function(alt)
+ lapply(xs, function(x)
+ ## try(
+ wilcox.test(x, exact=TRUE, conf.int=TRUE, alternative = alt)
+ ## )
+ )))
There were 52 warnings (use warnings() to see them)
> length(ww <- warnings()) # 52 (or 43 for x0 <- 0:3)
[1] 52
> unique(ww) # 4 different ones
Warning messages:
1: In wilcox.test.default(x, exact = TRUE, conf.int = TRUE, ... :
cannot compute exact p-value with zeroes
2: In wilcox.test.default(x, exact = TRUE, conf.int = TRUE, ... :
cannot compute exact confidence interval with zeroes
3: cannot compute confidence interval when all observations are zero or tied
4: In wilcox.test.default(x, exact = TRUE, conf.int = TRUE, ... :
requested conf.level not achievable
>
> cc <- lapply(RR, function(A) lapply(A, function(bb) lapply(bb, class)))
> table(unlist(cc))
htest
30
> ## in R <= 3.3.1, with try( .. ) above, we got
> ## htest try-error
> ## 23 7
> uc <- unlist(cc[["s0"]]); noquote(names(uc)[uc != "htest"]) ## these 7 cases :
character(0)
> ## two.sided1 two.sided2 two.sided3
> ## less1 less2
> ## greater1 greater2
>
> ##--- How close are the stats of (0:m) to those of (eps, 1:m) ------------
>
> ## a version that still works with above try(.) and errors there:
> getC <- function(L, C) if(inherits(L,"try-error")) c(L) else L[[C]]
> stR <- lapply(stats, function(COMP)
+ lapply(RR, function(A)
+ lapply(A, function(bb)
+ lapply(bb, getC, C=COMP) )))
>
> ## a) P-value
> pv <- stR[["p.value"]]
> ## only the first is NaN, all others in [0,1]:
> sapply(pv$s0, unlist)
two.sided less greater
[1,] NaN 1.0000000 1.00000000
[2,] 1.0000000 0.9772499 0.50000000
[3,] 0.3710934 0.9631809 0.18554668
[4,] 0.1814492 0.9693156 0.09072460
[5,] 0.1003482 0.9776951 0.05017412
> sapply(pv$s., unlist) # not really close, but ..
two.sided less greater
[1,] 1.0000 1 0.50000
[2,] 0.5000 1 0.25000
[3,] 0.2500 1 0.12500
[4,] 0.1250 1 0.06250
[5,] 0.0625 1 0.03125
>
> pv$s0$two.sided[1] <- 1 ## artificially
> stopifnot(all.equal(pv$s0, pv$s., tol = 0.5 + 1e-6), # seen 0.5
+ ## "less" are close:
+ all.equal(unlist(pv[[c("s0","less")]]),
+ unlist(pv[[c("s.","less")]]), tol = 0.03),
+ 0 <= unlist(pv), unlist(pv) <= 1) # <- no further NA ..
> ## b)
> sapply(stR[["statistic"]], unlist)
s0 s.
two.sided.V 0 1
two.sided.V 1 3
two.sided.V 3 6
two.sided.V 6 10
two.sided.V 10 15
less.V 0 1
less.V 1 3
less.V 3 6
less.V 6 10
less.V 10 15
greater.V 0 1
greater.V 1 3
greater.V 3 6
greater.V 6 10
greater.V 10 15
> ## Conf.int.:
> ## c)
> sapply(stR[["estimate" ]], unlist)
s0 s.
two.sided1 NaN 1.0e-09
two.sided.midrange 1.0 5.0e-01
two.sided.(pseudo)median 1.5 1.0e+00
two.sided.(pseudo)median 2.0 1.5e+00
two.sided.(pseudo)median 2.5 2.0e+00
less1 NaN 1.0e-09
less.midrange 1.0 5.0e-01
less.(pseudo)median 1.5 1.0e+00
less.(pseudo)median 2.0 1.5e+00
less.(pseudo)median 2.5 2.0e+00
greater1 NaN 1.0e-09
greater.midrange 1.0 5.0e-01
greater.(pseudo)median 1.5 1.0e+00
greater.(pseudo)median 2.0 1.5e+00
greater.(pseudo)median 2.5 2.0e+00
> ## d) confidence interval
> formatCI <- function(ci)
+ sprintf("[%g, %g] (%g%%)", ci[[1]], ci[[2]],
+ round(100*attr(ci,"conf.level")))
> nx <- length(x0)
> noquote(vapply(stR[["conf.int"]], function(ss)
+ vapply(ss, function(alt) vapply(alt, formatCI, ""), character(nx)),
+ matrix("", nx, length(alts))))
, , s0
two.sided less greater
[1,] [NaN, NaN] (0%) [-Inf, NaN] (0%) [NaN, Inf] (0%)
[2,] [NaN, NaN] (0%) [-Inf, NaN] (0%) [NaN, Inf] (0%)
[3,] [1.5, 1.5] (0%) [-Inf, 1.5] (0%) [1.50001, Inf] (20%)
[4,] [1.00007, 2.99993] (60%) [-Inf, 2.00002] (60%) [1.00009, Inf] (80%)
[5,] [1.00006, 3.99994] (80%) [-Inf, 3.00002] (80%) [1.00009, Inf] (90%)
, , s.
two.sided less greater
[1,] [1e-09, 1e-09] (0%) [-Inf, 1e-09] (50%) [1e-09, Inf] (50%)
[2,] [1e-09, 1] (50%) [-Inf, 1] (75%) [1e-09, Inf] (75%)
[3,] [1e-09, 2] (75%) [-Inf, 2] (87%) [1e-09, Inf] (87%)
[4,] [1e-09, 3] (88%) [-Inf, 3] (95%) [1e-09, Inf] (95%)
[5,] [1e-09, 4] (95%) [-Inf, 4] (95%) [1e-09, Inf] (95%)
>
>
> ##-------- 2-sample tests (working unchanged) ------------------
>
> R2 <- lapply(alts, ## for all three alternatives
+ function(alt)
+ lapply(seq_along(x0), function(k)
+ wilcox.test(x = x.set$s0[[k]], y = x.set$s.[[k]],
+ exact=TRUE, conf.int=TRUE, alternative = alt)))
There were 27 warnings (use warnings() to see them)
> length(w2 <- warnings()) # 27
[1] 27
> unique(w2) # 3 different ones
Warning messages:
1: In wilcox.test.default(x = x.set$s0[[k]], y = x.set$s.[[k]], ... :
Requested conf.level not achievable
2: In wilcox.test.default(x = x.set$s0[[k]], y = x.set$s.[[k]], ... :
cannot compute exact p-value with ties
3: In wilcox.test.default(x = x.set$s0[[k]], y = x.set$s.[[k]], ... :
cannot compute exact confidence intervals with ties
>
> table(uc2 <- unlist(c2 <- lapply(R2, function(A) lapply(A, class))))
htest
15
> stopifnot(uc2 == "htest")
>
> stR2 <- lapply(stats,
+ function(COMP)
+ lapply(R2, function(A) lapply(A, getC, C=COMP)))
>
> lapply(stats[-3], ## -3: "conf.int" separately
+ function(ST) sapply(stR2[[ST]], unlist))
$statistic
two.sided less greater
W 0.0 0.0 0.0
W 1.5 1.5 1.5
W 4.0 4.0 4.0
W 7.5 7.5 7.5
W 12.0 12.0 12.0
$p.value
two.sided less greater
[1,] 1 0.5 1.0000000
[2,] 1 0.5 0.7928919
[3,] 1 0.5 0.6734524
[4,] 1 0.5 0.6156105
[5,] 1 0.5 0.5837406
$estimate
two.sided less greater
difference in location -1.000000e-09 -1.000000e-09 -1.000000e-09
difference in location -4.467848e-05 -4.467848e-05 -4.467848e-05
difference in location -4.692131e-05 -4.692131e-05 -4.692131e-05
difference in location 1.937902e-05 1.937902e-05 1.937902e-05
difference in location 3.741417e-05 3.741417e-05 3.741417e-05
>
> noquote(sapply(stR2[["conf.int"]], function(.) vapply(., formatCI, "")))
two.sided less greater
[1,] [-1e-09, -1e-09] (0%) [-Inf, -1e-09] (50%) [-1e-09, Inf] (50%)
[2,] [-1, 1] (95%) [-Inf, 1] (95%) [-1, Inf] (95%)
[3,] [-2, 2] (95%) [-Inf, 2] (95%) [-2, Inf] (95%)
[4,] [-3, 3] (95%) [-Inf, 2.00005] (95%) [-2.00003, Inf] (95%)
[5,] [-2.99998, 2.99996] (95%) [-Inf, 2.00005] (95%) [-2.00006, Inf] (95%)
>
>
> proc.time()
user system elapsed
0.130 0.017 0.145