blob: 3d2070ce887e68a9124b8329dea6d36756b69173 [file] [log] [blame]
# File src/library/stats/R/kruskal.test.R
# Part of the R package, https://www.R-project.org
#
# Copyright (C) 1995-2015 The R Core Team
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# https://www.R-project.org/Licenses/
kruskal.test <- function(x, ...) UseMethod("kruskal.test")
kruskal.test.default <-
function(x, g, ...)
{
if (is.list(x)) {
if (length(x) < 2L)
stop("'x' must be a list with at least 2 elements")
if (!missing(g))
warning("'x' is a list, so ignoring argument 'g'")
DNAME <- deparse(substitute(x))
x <- lapply(x, function(u) u <- u[complete.cases(u)])
if (!all(sapply(x, is.numeric)))
warning("some elements of 'x' are not numeric and will be coerced to numeric")
k <- length(x)
l <- lengths(x)
if (any(l == 0L))
stop("all groups must contain data")
g <- factor(rep.int(seq_len(k), l))
x <- unlist(x)
}
else {
if (length(x) != length(g))
stop("'x' and 'g' must have the same length")
DNAME <- paste(deparse(substitute(x)), "and",
deparse(substitute(g)))
OK <- complete.cases(x, g)
x <- x[OK]
g <- g[OK]
g <- factor(g)
k <- nlevels(g)
if (k < 2L)
stop("all observations are in the same group")
}
n <- length(x)
if (n < 2L)
stop("not enough observations")
r <- rank(x)
TIES <- table(x)
STATISTIC <- sum(tapply(r, g, "sum")^2 / tapply(r, g, "length"))
## keep as n+1 to avoid (implausible) integer overflows
STATISTIC <- ((12 * STATISTIC / (n * (n + 1)) - 3 * (n + 1)) /
(1 - sum(TIES^3 - TIES) / (n^3 - n)))
PARAMETER <- k - 1L
PVAL <- pchisq(STATISTIC, PARAMETER, lower.tail = FALSE)
names(STATISTIC) <- "Kruskal-Wallis chi-squared"
names(PARAMETER) <- "df"
RVAL <- list(statistic = STATISTIC,
parameter = PARAMETER,
p.value = PVAL,
method = "Kruskal-Wallis rank sum test",
data.name = DNAME)
class(RVAL) <- "htest"
return(RVAL)
}
kruskal.test.formula <-
function(formula, data, subset, na.action, ...)
{
if(missing(formula) || (length(formula) != 3L))
stop("'formula' missing or incorrect")
m <- match.call(expand.dots = FALSE)
if(is.matrix(eval(m$data, parent.frame())))
m$data <- as.data.frame(data)
## need stats:: for non-standard evaluation
m[[1L]] <- quote(stats::model.frame)
mf <- eval(m, parent.frame())
if(length(mf) > 2L)
stop("'formula' should be of the form response ~ group")
DNAME <- paste(names(mf), collapse = " by ")
names(mf) <- NULL
y <- do.call("kruskal.test", as.list(mf))
y$data.name <- DNAME
y
}