| |
| R version 3.6.2 Patched (2020-02-12 r77795) -- "Dark and Stormy Night" |
| Copyright (C) 2020 The R Foundation for Statistical Computing |
| Platform: x86_64-pc-linux-gnu (64-bit) |
| |
| R is free software and comes with ABSOLUTELY NO WARRANTY. |
| You are welcome to redistribute it under certain conditions. |
| Type 'license()' or 'licence()' for distribution details. |
| |
| R is a collaborative project with many contributors. |
| Type 'contributors()' for more information and |
| 'citation()' on how to cite R or R packages in publications. |
| |
| Type 'demo()' for some demos, 'help()' for on-line help, or |
| 'help.start()' for an HTML browser interface to help. |
| Type 'q()' to quit R. |
| |
| > #### Some examples of the KS and Wilcoxon tests |
| > |
| > ### ------ Kolmogorov Smirnov (KS) -------------- |
| > |
| > ## unrealistic one of PR#14561 |
| > ds1 <- c(1.7,2,3,3,4,4,5,5,6,6) |
| > ks.test(ds1, "pnorm", mean = 3.3, sd = 1.55216) |
| |
| One-sample Kolmogorov-Smirnov test |
| |
| data: ds1 |
| D = 0.274, p-value = 0.4407 |
| alternative hypothesis: two-sided |
| |
| Warning message: |
| In ks.test(ds1, "pnorm", mean = 3.3, sd = 1.55216) : |
| ties should not be present for the Kolmogorov-Smirnov test |
| > # how on earth can sigma = 1.55216 be known? |
| > |
| > # R >= 2.14.0 allows the equally invalid |
| > ks.test(ds1, "pnorm", mean = 3.3, sd = 1.55216, exact = TRUE) |
| |
| One-sample Kolmogorov-Smirnov test |
| |
| data: ds1 |
| D = 0.274, p-value = 0.3715 |
| alternative hypothesis: two-sided |
| |
| Warning message: |
| In ks.test(ds1, "pnorm", mean = 3.3, sd = 1.55216, exact = TRUE) : |
| ties should not be present for the Kolmogorov-Smirnov test |
| > |
| > ## Try out the effects of rounding |
| > set.seed(123) |
| > ds2 <- rnorm(1000) |
| > ks.test(ds2, "pnorm") # exact = FALSE is default for n = 1000 |
| |
| One-sample Kolmogorov-Smirnov test |
| |
| data: ds2 |
| D = 0.019416, p-value = 0.8452 |
| alternative hypothesis: two-sided |
| |
| > ks.test(ds2, "pnorm", exact = TRUE) |
| |
| One-sample Kolmogorov-Smirnov test |
| |
| data: ds2 |
| D = 0.019416, p-value = 0.8379 |
| alternative hypothesis: two-sided |
| |
| > ## next two are still close |
| > ks.test(round(ds2, 2), "pnorm") |
| |
| One-sample Kolmogorov-Smirnov test |
| |
| data: round(ds2, 2) |
| D = 0.019169, p-value = 0.856 |
| alternative hypothesis: two-sided |
| |
| Warning message: |
| In ks.test(round(ds2, 2), "pnorm") : |
| ties should not be present for the Kolmogorov-Smirnov test |
| > ks.test(round(ds2, 2), "pnorm", exact = TRUE) |
| |
| One-sample Kolmogorov-Smirnov test |
| |
| data: round(ds2, 2) |
| D = 0.019169, p-value = 0.8489 |
| alternative hypothesis: two-sided |
| |
| Warning message: |
| In ks.test(round(ds2, 2), "pnorm", exact = TRUE) : |
| ties should not be present for the Kolmogorov-Smirnov test |
| > # now D has doubled, but p-values remain similar (if very different from ds2) |
| > ks.test(round(ds2, 1), "pnorm") |
| |
| One-sample Kolmogorov-Smirnov test |
| |
| data: round(ds2, 1) |
| D = 0.03674, p-value = 0.1344 |
| alternative hypothesis: two-sided |
| |
| Warning message: |
| In ks.test(round(ds2, 1), "pnorm") : |
| ties should not be present for the Kolmogorov-Smirnov test |
| > ks.test(round(ds2, 1), "pnorm", exact = TRUE) |
| |
| One-sample Kolmogorov-Smirnov test |
| |
| data: round(ds2, 1) |
| D = 0.03674, p-value = 0.1311 |
| alternative hypothesis: two-sided |
| |
| Warning message: |
| In ks.test(round(ds2, 1), "pnorm", exact = TRUE) : |
| ties should not be present for the Kolmogorov-Smirnov test |
| > |
| > |
| > ### ------ Wilkoxon (Mann Whitney) -------------- |
| > |
| > options(nwarnings = 1000) |
| > (alts <- setNames(, eval(formals(stats:::wilcox.test.default)$alternative))) |
| two.sided less greater |
| "two.sided" "less" "greater" |
| > x0 <- 0:4 |
| > (x.set <- list(s0 = lapply(x0, function(m) 0:m), |
| + s. = lapply(x0, function(m) c(1e-9, seq_len(m))))) |
| $s0 |
| $s0[[1]] |
| [1] 0 |
| |
| $s0[[2]] |
| [1] 0 1 |
| |
| $s0[[3]] |
| [1] 0 1 2 |
| |
| $s0[[4]] |
| [1] 0 1 2 3 |
| |
| $s0[[5]] |
| [1] 0 1 2 3 4 |
| |
| |
| $s. |
| $s.[[1]] |
| [1] 1e-09 |
| |
| $s.[[2]] |
| [1] 1e-09 1e+00 |
| |
| $s.[[3]] |
| [1] 1e-09 1e+00 2e+00 |
| |
| $s.[[4]] |
| [1] 1e-09 1e+00 2e+00 3e+00 |
| |
| $s.[[5]] |
| [1] 1e-09 1e+00 2e+00 3e+00 4e+00 |
| |
| |
| > stats <- setNames(nm = c("statistic", "p.value", "conf.int", "estimate")) |
| > |
| > ## Even with conf.int = TRUE, do not want errors : |
| > RR <- |
| + lapply(x.set, ## for all data sets |
| + function(xs) |
| + lapply(alts, ## for all three alternatives |
| + function(alt) |
| + lapply(xs, function(x) |
| + ## try( |
| + wilcox.test(x, exact=TRUE, conf.int=TRUE, alternative = alt) |
| + ## ) |
| + ))) |
| There were 52 warnings (use warnings() to see them) |
| > length(ww <- warnings()) # 52 (or 43 for x0 <- 0:3) |
| [1] 52 |
| > unique(ww) # 4 different ones |
| Warning messages: |
| 1: In wilcox.test.default(x, exact = TRUE, conf.int = TRUE, ... : |
| cannot compute exact p-value with zeroes |
| 2: In wilcox.test.default(x, exact = TRUE, conf.int = TRUE, ... : |
| cannot compute exact confidence interval with zeroes |
| 3: cannot compute confidence interval when all observations are zero or tied |
| 4: In wilcox.test.default(x, exact = TRUE, conf.int = TRUE, ... : |
| requested conf.level not achievable |
| > |
| > cc <- lapply(RR, function(A) lapply(A, function(bb) lapply(bb, class))) |
| > table(unlist(cc)) |
| |
| htest |
| 30 |
| > ## in R <= 3.3.1, with try( .. ) above, we got |
| > ## htest try-error |
| > ## 23 7 |
| > uc <- unlist(cc[["s0"]]); noquote(names(uc)[uc != "htest"]) ## these 7 cases : |
| character(0) |
| > ## two.sided1 two.sided2 two.sided3 |
| > ## less1 less2 |
| > ## greater1 greater2 |
| > |
| > ##--- How close are the stats of (0:m) to those of (eps, 1:m) ------------ |
| > |
| > ## a version that still works with above try(.) and errors there: |
| > getC <- function(L, C) if(inherits(L,"try-error")) c(L) else L[[C]] |
| > stR <- lapply(stats, function(COMP) |
| + lapply(RR, function(A) |
| + lapply(A, function(bb) |
| + lapply(bb, getC, C=COMP) ))) |
| > |
| > ## a) P-value |
| > pv <- stR[["p.value"]] |
| > ## only the first is NaN, all others in [0,1]: |
| > sapply(pv$s0, unlist) |
| two.sided less greater |
| [1,] NaN 1.0000000 1.00000000 |
| [2,] 1.0000000 0.9772499 0.50000000 |
| [3,] 0.3710934 0.9631809 0.18554668 |
| [4,] 0.1814492 0.9693156 0.09072460 |
| [5,] 0.1003482 0.9776951 0.05017412 |
| > sapply(pv$s., unlist) # not really close, but .. |
| two.sided less greater |
| [1,] 1.0000 1 0.50000 |
| [2,] 0.5000 1 0.25000 |
| [3,] 0.2500 1 0.12500 |
| [4,] 0.1250 1 0.06250 |
| [5,] 0.0625 1 0.03125 |
| > |
| > pv$s0$two.sided[1] <- 1 ## artificially |
| > stopifnot(all.equal(pv$s0, pv$s., tol = 0.5 + 1e-6), # seen 0.5 |
| + ## "less" are close: |
| + all.equal(unlist(pv[[c("s0","less")]]), |
| + unlist(pv[[c("s.","less")]]), tol = 0.03), |
| + 0 <= unlist(pv), unlist(pv) <= 1) # <- no further NA .. |
| > ## b) |
| > sapply(stR[["statistic"]], unlist) |
| s0 s. |
| two.sided.V 0 1 |
| two.sided.V 1 3 |
| two.sided.V 3 6 |
| two.sided.V 6 10 |
| two.sided.V 10 15 |
| less.V 0 1 |
| less.V 1 3 |
| less.V 3 6 |
| less.V 6 10 |
| less.V 10 15 |
| greater.V 0 1 |
| greater.V 1 3 |
| greater.V 3 6 |
| greater.V 6 10 |
| greater.V 10 15 |
| > ## Conf.int.: |
| > ## c) |
| > sapply(stR[["estimate" ]], unlist) |
| s0 s. |
| two.sided1 NaN 1.0e-09 |
| two.sided.midrange 1.0 5.0e-01 |
| two.sided.(pseudo)median 1.5 1.0e+00 |
| two.sided.(pseudo)median 2.0 1.5e+00 |
| two.sided.(pseudo)median 2.5 2.0e+00 |
| less1 NaN 1.0e-09 |
| less.midrange 1.0 5.0e-01 |
| less.(pseudo)median 1.5 1.0e+00 |
| less.(pseudo)median 2.0 1.5e+00 |
| less.(pseudo)median 2.5 2.0e+00 |
| greater1 NaN 1.0e-09 |
| greater.midrange 1.0 5.0e-01 |
| greater.(pseudo)median 1.5 1.0e+00 |
| greater.(pseudo)median 2.0 1.5e+00 |
| greater.(pseudo)median 2.5 2.0e+00 |
| > ## d) confidence interval |
| > formatCI <- function(ci) |
| + sprintf("[%g, %g] (%g%%)", ci[[1]], ci[[2]], |
| + round(100*attr(ci,"conf.level"))) |
| > nx <- length(x0) |
| > noquote(vapply(stR[["conf.int"]], function(ss) |
| + vapply(ss, function(alt) vapply(alt, formatCI, ""), character(nx)), |
| + matrix("", nx, length(alts)))) |
| , , s0 |
| |
| two.sided less greater |
| [1,] [NaN, NaN] (0%) [-Inf, NaN] (0%) [NaN, Inf] (0%) |
| [2,] [NaN, NaN] (0%) [-Inf, NaN] (0%) [NaN, Inf] (0%) |
| [3,] [1.5, 1.5] (0%) [-Inf, 1.5] (0%) [1.50001, Inf] (20%) |
| [4,] [1.00007, 2.99993] (60%) [-Inf, 2.00002] (60%) [1.00009, Inf] (80%) |
| [5,] [1.00006, 3.99994] (80%) [-Inf, 3.00002] (80%) [1.00009, Inf] (90%) |
| |
| , , s. |
| |
| two.sided less greater |
| [1,] [1e-09, 1e-09] (0%) [-Inf, 1e-09] (50%) [1e-09, Inf] (50%) |
| [2,] [1e-09, 1] (50%) [-Inf, 1] (75%) [1e-09, Inf] (75%) |
| [3,] [1e-09, 2] (75%) [-Inf, 2] (87%) [1e-09, Inf] (87%) |
| [4,] [1e-09, 3] (88%) [-Inf, 3] (95%) [1e-09, Inf] (95%) |
| [5,] [1e-09, 4] (95%) [-Inf, 4] (95%) [1e-09, Inf] (95%) |
| |
| > |
| > |
| > ##-------- 2-sample tests (working unchanged) ------------------ |
| > |
| > R2 <- lapply(alts, ## for all three alternatives |
| + function(alt) |
| + lapply(seq_along(x0), function(k) |
| + wilcox.test(x = x.set$s0[[k]], y = x.set$s.[[k]], |
| + exact=TRUE, conf.int=TRUE, alternative = alt))) |
| There were 27 warnings (use warnings() to see them) |
| > length(w2 <- warnings()) # 27 |
| [1] 27 |
| > unique(w2) # 3 different ones |
| Warning messages: |
| 1: In wilcox.test.default(x = x.set$s0[[k]], y = x.set$s.[[k]], ... : |
| Requested conf.level not achievable |
| 2: In wilcox.test.default(x = x.set$s0[[k]], y = x.set$s.[[k]], ... : |
| cannot compute exact p-value with ties |
| 3: In wilcox.test.default(x = x.set$s0[[k]], y = x.set$s.[[k]], ... : |
| cannot compute exact confidence intervals with ties |
| > |
| > table(uc2 <- unlist(c2 <- lapply(R2, function(A) lapply(A, class)))) |
| |
| htest |
| 15 |
| > stopifnot(uc2 == "htest") |
| > |
| > stR2 <- lapply(stats, |
| + function(COMP) |
| + lapply(R2, function(A) lapply(A, getC, C=COMP))) |
| > |
| > lapply(stats[-3], ## -3: "conf.int" separately |
| + function(ST) sapply(stR2[[ST]], unlist)) |
| $statistic |
| two.sided less greater |
| W 0.0 0.0 0.0 |
| W 1.5 1.5 1.5 |
| W 4.0 4.0 4.0 |
| W 7.5 7.5 7.5 |
| W 12.0 12.0 12.0 |
| |
| $p.value |
| two.sided less greater |
| [1,] 1 0.5 1.0000000 |
| [2,] 1 0.5 0.7928919 |
| [3,] 1 0.5 0.6734524 |
| [4,] 1 0.5 0.6156105 |
| [5,] 1 0.5 0.5837406 |
| |
| $estimate |
| two.sided less greater |
| difference in location -1.000000e-09 -1.000000e-09 -1.000000e-09 |
| difference in location -4.467848e-05 -4.467848e-05 -4.467848e-05 |
| difference in location -4.692131e-05 -4.692131e-05 -4.692131e-05 |
| difference in location 1.937902e-05 1.937902e-05 1.937902e-05 |
| difference in location 3.741417e-05 3.741417e-05 3.741417e-05 |
| |
| > |
| > noquote(sapply(stR2[["conf.int"]], function(.) vapply(., formatCI, ""))) |
| two.sided less greater |
| [1,] [-1e-09, -1e-09] (0%) [-Inf, -1e-09] (50%) [-1e-09, Inf] (50%) |
| [2,] [-1, 1] (95%) [-Inf, 1] (95%) [-1, Inf] (95%) |
| [3,] [-2, 2] (95%) [-Inf, 2] (95%) [-2, Inf] (95%) |
| [4,] [-3, 3] (95%) [-Inf, 2.00005] (95%) [-2.00003, Inf] (95%) |
| [5,] [-2.99998, 2.99996] (95%) [-Inf, 2.00005] (95%) [-2.00006, Inf] (95%) |
| > |
| > |
| > proc.time() |
| user system elapsed |
| 0.374 0.047 0.400 |