366 lines
11 KiB
Plaintext
366 lines
11 KiB
Plaintext
|
|
R Under development (unstable) (2023-04-19 r84284) -- "Unsuffered Consequences"
|
|
Copyright (C) 2023 The R Foundation for Statistical Computing
|
|
Platform: aarch64-apple-darwin22.4.0 (64-bit)
|
|
|
|
R is free software and comes with ABSOLUTELY NO WARRANTY.
|
|
You are welcome to redistribute it under certain conditions.
|
|
Type 'license()' or 'licence()' for distribution details.
|
|
|
|
R is a collaborative project with many contributors.
|
|
Type 'contributors()' for more information and
|
|
'citation()' on how to cite R or R packages in publications.
|
|
|
|
Type 'demo()' for some demos, 'help()' for on-line help, or
|
|
'help.start()' for an HTML browser interface to help.
|
|
Type 'q()' to quit R.
|
|
|
|
> #### Some examples of the KS and Wilcoxon tests
|
|
>
|
|
> ### ------ Kolmogorov Smirnov (KS) --------------
|
|
>
|
|
> ## unrealistic one of PR#14561
|
|
> ds1 <- c(1.7,2,3,3,4,4,5,5,6,6)
|
|
> ks.test(ds1, "pnorm", mean = 3.3, sd = 1.55216)
|
|
|
|
Asymptotic one-sample Kolmogorov-Smirnov test
|
|
|
|
data: ds1
|
|
D = 0.274, p-value = 0.4407
|
|
alternative hypothesis: two-sided
|
|
|
|
Warning message:
|
|
In ks.test.default(ds1, "pnorm", mean = 3.3, sd = 1.55216) :
|
|
ties should not be present for the one-sample Kolmogorov-Smirnov test
|
|
> # how on earth can sigma = 1.55216 be known?
|
|
>
|
|
> # R >= 2.14.0 allows the equally invalid
|
|
> ks.test(ds1, "pnorm", mean = 3.3, sd = 1.55216, exact = TRUE)
|
|
|
|
Exact one-sample Kolmogorov-Smirnov test
|
|
|
|
data: ds1
|
|
D = 0.274, p-value = 0.3715
|
|
alternative hypothesis: two-sided
|
|
|
|
Warning message:
|
|
In ks.test.default(ds1, "pnorm", mean = 3.3, sd = 1.55216, exact = TRUE) :
|
|
ties should not be present for the one-sample Kolmogorov-Smirnov test
|
|
>
|
|
> ## Try out the effects of rounding
|
|
> set.seed(123)
|
|
> ds2 <- rnorm(1000)
|
|
> ks.test(ds2, "pnorm") # exact = FALSE is default for n = 1000
|
|
|
|
Asymptotic one-sample Kolmogorov-Smirnov test
|
|
|
|
data: ds2
|
|
D = 0.019416, p-value = 0.8452
|
|
alternative hypothesis: two-sided
|
|
|
|
> ks.test(ds2, "pnorm", exact = TRUE)
|
|
|
|
Exact one-sample Kolmogorov-Smirnov test
|
|
|
|
data: ds2
|
|
D = 0.019416, p-value = 0.8379
|
|
alternative hypothesis: two-sided
|
|
|
|
> ## next two are still close
|
|
> ks.test(round(ds2, 2), "pnorm")
|
|
|
|
Asymptotic one-sample Kolmogorov-Smirnov test
|
|
|
|
data: round(ds2, 2)
|
|
D = 0.019169, p-value = 0.856
|
|
alternative hypothesis: two-sided
|
|
|
|
Warning message:
|
|
In ks.test.default(round(ds2, 2), "pnorm") :
|
|
ties should not be present for the one-sample Kolmogorov-Smirnov test
|
|
> ks.test(round(ds2, 2), "pnorm", exact = TRUE)
|
|
|
|
Exact one-sample Kolmogorov-Smirnov test
|
|
|
|
data: round(ds2, 2)
|
|
D = 0.019169, p-value = 0.8489
|
|
alternative hypothesis: two-sided
|
|
|
|
Warning message:
|
|
In ks.test.default(round(ds2, 2), "pnorm", exact = TRUE) :
|
|
ties should not be present for the one-sample Kolmogorov-Smirnov test
|
|
> # now D has doubled, but p-values remain similar (if very different from ds2)
|
|
> ks.test(round(ds2, 1), "pnorm")
|
|
|
|
Asymptotic one-sample Kolmogorov-Smirnov test
|
|
|
|
data: round(ds2, 1)
|
|
D = 0.03674, p-value = 0.1344
|
|
alternative hypothesis: two-sided
|
|
|
|
Warning message:
|
|
In ks.test.default(round(ds2, 1), "pnorm") :
|
|
ties should not be present for the one-sample Kolmogorov-Smirnov test
|
|
> ks.test(round(ds2, 1), "pnorm", exact = TRUE)
|
|
|
|
Exact one-sample Kolmogorov-Smirnov test
|
|
|
|
data: round(ds2, 1)
|
|
D = 0.03674, p-value = 0.1311
|
|
alternative hypothesis: two-sided
|
|
|
|
Warning message:
|
|
In ks.test.default(round(ds2, 1), "pnorm", exact = TRUE) :
|
|
ties should not be present for the one-sample Kolmogorov-Smirnov test
|
|
>
|
|
>
|
|
> ### ------ Wilkoxon (Mann Whitney) --------------
|
|
>
|
|
> options(nwarnings = 1000)
|
|
> (alts <- setNames(, eval(formals(stats:::wilcox.test.default)$alternative)))
|
|
two.sided less greater
|
|
"two.sided" "less" "greater"
|
|
> x0 <- 0:4
|
|
> (x.set <- list(s0 = lapply(x0, function(m) 0:m),
|
|
+ s. = lapply(x0, function(m) c(1e-9, seq_len(m)))))
|
|
$s0
|
|
$s0[[1]]
|
|
[1] 0
|
|
|
|
$s0[[2]]
|
|
[1] 0 1
|
|
|
|
$s0[[3]]
|
|
[1] 0 1 2
|
|
|
|
$s0[[4]]
|
|
[1] 0 1 2 3
|
|
|
|
$s0[[5]]
|
|
[1] 0 1 2 3 4
|
|
|
|
|
|
$s.
|
|
$s.[[1]]
|
|
[1] 1e-09
|
|
|
|
$s.[[2]]
|
|
[1] 1e-09 1e+00
|
|
|
|
$s.[[3]]
|
|
[1] 1e-09 1e+00 2e+00
|
|
|
|
$s.[[4]]
|
|
[1] 1e-09 1e+00 2e+00 3e+00
|
|
|
|
$s.[[5]]
|
|
[1] 1e-09 1e+00 2e+00 3e+00 4e+00
|
|
|
|
|
|
> stats <- setNames(nm = c("statistic", "p.value", "conf.int", "estimate"))
|
|
>
|
|
> ## Even with conf.int = TRUE, do not want errors :
|
|
> RR <-
|
|
+ lapply(x.set, ## for all data sets
|
|
+ function(xs)
|
|
+ lapply(alts, ## for all three alternatives
|
|
+ function(alt)
|
|
+ lapply(xs, function(x)
|
|
+ ## try(
|
|
+ wilcox.test(x, exact=TRUE, conf.int=TRUE, alternative = alt)
|
|
+ ## )
|
|
+ )))
|
|
There were 52 warnings (use warnings() to see them)
|
|
> length(ww <- warnings()) # 52 (or 43 for x0 <- 0:3)
|
|
[1] 52
|
|
> unique(ww) # 4 different ones
|
|
Warning messages:
|
|
1: In wilcox.test.default(x, exact = TRUE, conf.int = TRUE, ... :
|
|
cannot compute exact p-value with zeroes
|
|
2: In wilcox.test.default(x, exact = TRUE, conf.int = TRUE, ... :
|
|
cannot compute exact confidence interval with zeroes
|
|
3: cannot compute confidence interval when all observations are zero or tied
|
|
4: In wilcox.test.default(x, exact = TRUE, conf.int = TRUE, ... :
|
|
requested conf.level not achievable
|
|
>
|
|
> cc <- lapply(RR, function(A) lapply(A, function(bb) lapply(bb, class)))
|
|
> table(unlist(cc))
|
|
|
|
htest
|
|
30
|
|
> ## in R <= 3.3.1, with try( .. ) above, we got
|
|
> ## htest try-error
|
|
> ## 23 7
|
|
> uc <- unlist(cc[["s0"]]); noquote(names(uc)[uc != "htest"]) ## these 7 cases :
|
|
character(0)
|
|
> ## two.sided1 two.sided2 two.sided3
|
|
> ## less1 less2
|
|
> ## greater1 greater2
|
|
>
|
|
> ##--- How close are the stats of (0:m) to those of (eps, 1:m) ------------
|
|
>
|
|
> ## a version that still works with above try(.) and errors there:
|
|
> getC <- function(L, C) if(inherits(L,"try-error")) c(L) else L[[C]]
|
|
> stR <- lapply(stats, function(COMP)
|
|
+ lapply(RR, function(A)
|
|
+ lapply(A, function(bb)
|
|
+ lapply(bb, getC, C=COMP) )))
|
|
>
|
|
> ## a) P-value
|
|
> pv <- stR[["p.value"]]
|
|
> ## only the first is NaN, all others in [0,1]:
|
|
> sapply(pv$s0, unlist)
|
|
two.sided less greater
|
|
[1,] NaN 1.0000000 1.00000000
|
|
[2,] 1.0000000 0.9772499 0.50000000
|
|
[3,] 0.3710934 0.9631809 0.18554668
|
|
[4,] 0.1814492 0.9693156 0.09072460
|
|
[5,] 0.1003482 0.9776951 0.05017412
|
|
> sapply(pv$s., unlist) # not really close, but ..
|
|
two.sided less greater
|
|
[1,] 1.0000 1 0.50000
|
|
[2,] 0.5000 1 0.25000
|
|
[3,] 0.2500 1 0.12500
|
|
[4,] 0.1250 1 0.06250
|
|
[5,] 0.0625 1 0.03125
|
|
>
|
|
> pv$s0$two.sided[1] <- 1 ## artificially
|
|
> stopifnot(all.equal(pv$s0, pv$s., tol = 0.5 + 1e-6), # seen 0.5
|
|
+ ## "less" are close:
|
|
+ all.equal(unlist(pv[[c("s0","less")]]),
|
|
+ unlist(pv[[c("s.","less")]]), tol = 0.03),
|
|
+ 0 <= unlist(pv), unlist(pv) <= 1) # <- no further NA ..
|
|
> ## b)
|
|
> sapply(stR[["statistic"]], unlist)
|
|
s0 s.
|
|
two.sided.V 0 1
|
|
two.sided.V 1 3
|
|
two.sided.V 3 6
|
|
two.sided.V 6 10
|
|
two.sided.V 10 15
|
|
less.V 0 1
|
|
less.V 1 3
|
|
less.V 3 6
|
|
less.V 6 10
|
|
less.V 10 15
|
|
greater.V 0 1
|
|
greater.V 1 3
|
|
greater.V 3 6
|
|
greater.V 6 10
|
|
greater.V 10 15
|
|
> ## Conf.int.:
|
|
> ## c)
|
|
> sapply(stR[["estimate" ]], unlist)
|
|
s0 s.
|
|
two.sided1 NaN 1.0e-09
|
|
two.sided.midrange 1.0 5.0e-01
|
|
two.sided.(pseudo)median 1.5 1.0e+00
|
|
two.sided.(pseudo)median 2.0 1.5e+00
|
|
two.sided.(pseudo)median 2.5 2.0e+00
|
|
less1 NaN 1.0e-09
|
|
less.midrange 1.0 5.0e-01
|
|
less.(pseudo)median 1.5 1.0e+00
|
|
less.(pseudo)median 2.0 1.5e+00
|
|
less.(pseudo)median 2.5 2.0e+00
|
|
greater1 NaN 1.0e-09
|
|
greater.midrange 1.0 5.0e-01
|
|
greater.(pseudo)median 1.5 1.0e+00
|
|
greater.(pseudo)median 2.0 1.5e+00
|
|
greater.(pseudo)median 2.5 2.0e+00
|
|
> ## d) confidence interval
|
|
> formatCI <- function(ci)
|
|
+ sprintf("[%g, %g] (%g%%)", ci[[1]], ci[[2]],
|
|
+ round(100*attr(ci,"conf.level")))
|
|
> nx <- length(x0)
|
|
> noquote(vapply(stR[["conf.int"]], function(ss)
|
|
+ vapply(ss, function(alt) vapply(alt, formatCI, ""), character(nx)),
|
|
+ matrix("", nx, length(alts))))
|
|
, , s0
|
|
|
|
two.sided less greater
|
|
[1,] [NaN, NaN] (0%) [-Inf, NaN] (0%) [NaN, Inf] (0%)
|
|
[2,] [NaN, NaN] (0%) [-Inf, NaN] (0%) [NaN, Inf] (0%)
|
|
[3,] [1.5, 1.5] (0%) [-Inf, 1.5] (0%) [1.50001, Inf] (20%)
|
|
[4,] [1.00007, 2.99993] (60%) [-Inf, 2.00002] (60%) [1.00009, Inf] (80%)
|
|
[5,] [1.00006, 3.99994] (80%) [-Inf, 3.00002] (80%) [1.00009, Inf] (90%)
|
|
|
|
, , s.
|
|
|
|
two.sided less greater
|
|
[1,] [1e-09, 1e-09] (0%) [-Inf, 1e-09] (50%) [1e-09, Inf] (50%)
|
|
[2,] [1e-09, 1] (50%) [-Inf, 1] (75%) [1e-09, Inf] (75%)
|
|
[3,] [1e-09, 2] (75%) [-Inf, 2] (87%) [1e-09, Inf] (87%)
|
|
[4,] [1e-09, 3] (88%) [-Inf, 3] (95%) [1e-09, Inf] (95%)
|
|
[5,] [1e-09, 4] (95%) [-Inf, 4] (95%) [1e-09, Inf] (95%)
|
|
|
|
>
|
|
>
|
|
> ##-------- 2-sample tests (working unchanged) ------------------
|
|
>
|
|
> R2 <- lapply(alts, ## for all three alternatives
|
|
+ function(alt)
|
|
+ lapply(seq_along(x0), function(k)
|
|
+ wilcox.test(x = x.set$s0[[k]], y = x.set$s.[[k]],
|
|
+ exact=TRUE, conf.int=TRUE, alternative = alt)))
|
|
There were 27 warnings (use warnings() to see them)
|
|
> length(w2 <- warnings()) # 27
|
|
[1] 27
|
|
> unique(w2) # 3 different ones
|
|
Warning messages:
|
|
1: In wilcox.test.default(x = x.set$s0[[k]], y = x.set$s.[[k]], ... :
|
|
Requested conf.level not achievable
|
|
2: In wilcox.test.default(x = x.set$s0[[k]], y = x.set$s.[[k]], ... :
|
|
cannot compute exact p-value with ties
|
|
3: In wilcox.test.default(x = x.set$s0[[k]], y = x.set$s.[[k]], ... :
|
|
cannot compute exact confidence intervals with ties
|
|
>
|
|
> table(uc2 <- unlist(c2 <- lapply(R2, function(A) lapply(A, class))))
|
|
|
|
htest
|
|
15
|
|
> stopifnot(uc2 == "htest")
|
|
>
|
|
> stR2 <- lapply(stats,
|
|
+ function(COMP)
|
|
+ lapply(R2, function(A) lapply(A, getC, C=COMP)))
|
|
>
|
|
> lapply(stats[-3], ## -3: "conf.int" separately
|
|
+ function(ST) sapply(stR2[[ST]], unlist))
|
|
$statistic
|
|
two.sided less greater
|
|
W 0.0 0.0 0.0
|
|
W 1.5 1.5 1.5
|
|
W 4.0 4.0 4.0
|
|
W 7.5 7.5 7.5
|
|
W 12.0 12.0 12.0
|
|
|
|
$p.value
|
|
two.sided less greater
|
|
[1,] 1 0.5 1.0000000
|
|
[2,] 1 0.5 0.7928919
|
|
[3,] 1 0.5 0.6734524
|
|
[4,] 1 0.5 0.6156105
|
|
[5,] 1 0.5 0.5837406
|
|
|
|
$estimate
|
|
two.sided less greater
|
|
difference in location -1.000000e-09 -1.000000e-09 -1.000000e-09
|
|
difference in location -4.467848e-05 -4.467848e-05 -4.467848e-05
|
|
difference in location -4.692131e-05 -4.692131e-05 -4.692131e-05
|
|
difference in location 1.937902e-05 1.937902e-05 1.937902e-05
|
|
difference in location 3.741417e-05 3.741417e-05 3.741417e-05
|
|
|
|
>
|
|
> noquote(sapply(stR2[["conf.int"]], function(.) vapply(., formatCI, "")))
|
|
two.sided less greater
|
|
[1,] [-1e-09, -1e-09] (0%) [-Inf, -1e-09] (50%) [-1e-09, Inf] (50%)
|
|
[2,] [-1, 1] (95%) [-Inf, 1] (95%) [-1, Inf] (95%)
|
|
[3,] [-2, 2] (95%) [-Inf, 2] (95%) [-2, Inf] (95%)
|
|
[4,] [-3, 3] (95%) [-Inf, 2.00005] (95%) [-2.00003, Inf] (95%)
|
|
[5,] [-2.99998, 2.99996] (95%) [-Inf, 2.00005] (95%) [-2.00006, Inf] (95%)
|
|
>
|
|
>
|
|
> proc.time()
|
|
user system elapsed
|
|
0.130 0.017 0.145
|