blob: df2e85f276e346fabf51cdc38113de209d187780 [file] [log] [blame]
# File src/library/stats/R/mantelhaen.test.R
# Part of the R package, https://www.R-project.org
#
# Copyright (C) 1995-2018 The R Core Team
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# https://www.R-project.org/Licenses/
mantelhaen.test <-
function(x, y = NULL, z = NULL,
alternative = c("two.sided", "less", "greater"),
correct = TRUE, exact = FALSE, conf.level = 0.95)
{
DNAME <- deparse(substitute(x))
if(is.array(x)) {
if(length(dim(x)) == 3L) {
if(anyNA(x)) stop("NAs are not allowed")
if(any(dim(x) < 2L)) stop("each dimension in table must be >= 2")
}
else
stop("'x' must be a 3-dimensional array")
}
else {
if(is.null(y)) stop("if 'x' is not an array, 'y' must be given")
if(is.null(z)) stop("if 'x' is not an array, 'z' must be given")
if(any(diff(c(length(x), length(y), length(z))) != 0L ))
stop("'x', 'y', and 'z' must have the same length")
DNAME <- paste(DNAME, "and", deparse(substitute(y)), "and",
deparse(substitute(z)))
OK <- complete.cases(x, y, z)
x <- factor(x[OK])
y <- factor(y[OK])
if((nlevels(x) < 2L) || (nlevels(y) < 2L))
stop("'x' and 'y' must have at least 2 levels")
else
x <- table(x, y, z[OK])
}
if(any(apply(x, 3L, sum) < 2))
stop("sample size in each stratum must be > 1")
I <- dim(x)[1L]
J <- dim(x)[2L]
K <- dim(x)[3L]
if((I == 2) && (J == 2)) {
## 2 x 2 x K case
alternative <- match.arg(alternative)
if(!missing(conf.level) &&
(length(conf.level) != 1 || !is.finite(conf.level) ||
conf.level < 0 || conf.level > 1))
stop("'conf.level' must be a single number between 0 and 1")
NVAL <- c("common odds ratio" = 1)
if(!exact) {
## Classical Mantel-Haenszel 2 x 2 x K test
s.x <- apply(x, c(1L, 3L), sum)
s.y <- apply(x, c(2L, 3L), sum)
n <- as.double(apply(x, 3L, sum)) # avoid overflows below
DELTA <- sum(x[1, 1, ] - s.x[1, ] * s.y[1, ] / n)
YATES <- if(correct && (abs(DELTA) >= .5)) .5 else 0
STATISTIC <- ((abs(DELTA) - YATES)^2 /
sum(apply(rbind(s.x, s.y), 2L, prod)
/ (n^2 * (n - 1))))
PARAMETER <- 1
if (alternative == "two.sided")
PVAL <- pchisq(STATISTIC, PARAMETER, lower.tail = FALSE)
else {
z <- sign(DELTA) * sqrt(STATISTIC)
PVAL <- pnorm(z, lower.tail = (alternative == "less"))
}
names(STATISTIC) <- "Mantel-Haenszel X-squared"
names(PARAMETER) <- "df"
METHOD <- paste("Mantel-Haenszel chi-squared test",
if(YATES) "with" else "without",
"continuity correction")
s.diag <- sum(x[1L, 1L, ] * x[2L, 2L, ] / n)
s.offd <- sum(x[1L, 2L, ] * x[2L, 1L, ] / n)
## Mantel-Haenszel (1959) estimate of the common odds ratio.
ESTIMATE <- s.diag / s.offd
## Robins et al. (1986) estimate of the standard deviation
## of the log of the Mantel-Haenszel estimator.
sd <-
sqrt( sum((x[1L,1L,] + x[2L,2L,]) * x[1L,1L,] * x[2L,2L,]
/ n^2)
/ (2 * s.diag^2)
+ sum(( (x[1L,1L,] + x[2L,2L,]) * x[1L,2L,] * x[2L,1L,]
+ (x[1L,2L,] + x[2L,1L,]) * x[1L,1L,] * x[2L,2L,])
/ n^2)
/ (2 * s.diag * s.offd)
+ sum((x[1L,2L,] + x[2L,1L,]) * x[1L,2L,] * x[2L,1L,]
/ n^2)
/ (2 * s.offd^2))
CINT <-
switch(alternative,
less = c(0, ESTIMATE * exp(qnorm(conf.level) * sd)),
greater = c(ESTIMATE * exp(qnorm(conf.level,
lower.tail = FALSE) * sd), Inf),
two.sided = {
ESTIMATE * exp(c(1, -1) *
qnorm((1 - conf.level) / 2) * sd)
})
RVAL <- list(statistic = STATISTIC,
parameter = PARAMETER,
p.value = PVAL)
}
else {
## Exact inference for the 2 x 2 x k case can be carried out
## conditional on the strata margins, similar to the case
## for Fisher's exact test (k = 1). Again, the distribution
## of S (in our case, sum(x[2, 1, ]) to be consistent with
## the notation in Mehta et al. (1985), is of the form
## P(S = s) \propto d(s) * or^s, lo <= s <= hi
## where or is the common odds ratio in the k tables (and
## d(.) is a product hypergeometric distribution).
METHOD <- paste("Exact conditional test of independence",
"in 2 x 2 x k tables")
mn <- apply(x, c(2L, 3L), sum)
m <- mn[1L, ]
n <- mn[2L, ]
t <- apply(x, c(1L, 3L), sum)[1L, ]
s <- sum(x[1L, 1L, ])
lo <- sum(pmax(0, t - n))
hi <- sum(pmin(m, t))
support <- lo : hi
## Density of the *central* product hypergeometric
## distribution on its support: store for once as this is
## needed quite a bit.
dc <- .Call(C_d2x2xk, K, m, n, t, hi - lo + 1L)
logdc <- log(dc)
dn2x2xk <- function(ncp) {
## Does not work for boundary values for ncp (0, Inf)
## but it does not need to.
if(ncp == 1) return(dc)
d <- logdc + log(ncp) * support
d <- exp(d - max(d)) # beware of overflow
d / sum(d)
}
mn2x2xk <- function(ncp) {
if(ncp == 0)
return(lo)
if(ncp == Inf)
return(hi)
sum(support * dn2x2xk(ncp))
}
pn2x2xk <- function(q, ncp = 1, upper.tail = FALSE) {
if(ncp == 0) {
as.numeric(if(upper.tail) q <= lo else q >= lo)
}
else if(ncp == Inf)
as.numeric(if(upper.tail) q <= hi else q >= hi)
else {
d <- dn2x2xk(ncp)
sum(d[if(upper.tail) support >= q else support <= q])
}
}
## Determine the p-value.
PVAL <-
switch(alternative,
less = pn2x2xk(s, 1),
greater = pn2x2xk(s, 1, upper.tail = TRUE),
two.sided = {
## Note that we need a little fuzz.
relErr <- 1 + 10 ^ (-7)
d <- dc # same as dn2x2xk(1)
sum(d[d <= d[s - lo + 1] * relErr])
})
## Determine the MLE for ncp by solving E(S) = s, where the
## expectation is with respect to the above distribution.
mle <- function(x) {
if(x == lo)
return(0)
if(x == hi)
return(Inf)
mu <- mn2x2xk(1)
if(mu > x)
uniroot(function(t) mn2x2xk(t) - x,
c(0, 1))$root
else if(mu < x)
1 / uniroot(function(t) mn2x2xk(1/t) - x,
c(.Machine$double.eps, 1))$root
else
1
}
ESTIMATE <- mle(s)
## Determine confidence intervals for the odds ratio.
ncp.U <- function(x, alpha) {
if(x == hi)
return(Inf)
p <- pn2x2xk(x, 1)
if(p < alpha)
uniroot(function(t) pn2x2xk(x, t) - alpha,
c(0, 1))$root
else if(p > alpha)
1 / uniroot(function(t) pn2x2xk(x, 1/t) - alpha,
c(.Machine$double.eps, 1))$root
else
1
}
ncp.L <- function(x, alpha) {
if(x == lo)
return(0)
p <- pn2x2xk(x, 1, upper.tail = TRUE)
if(p > alpha)
uniroot(function(t)
pn2x2xk(x, t, upper.tail = TRUE) - alpha,
c(0, 1))$root
else if (p < alpha)
1 / uniroot(function(t)
pn2x2xk(x, 1/t, upper.tail = TRUE) - alpha,
c(.Machine$double.eps, 1))$root
else
1
}
CINT <- switch(alternative,
less = c(0, ncp.U(s, 1 - conf.level)),
greater = c(ncp.L(s, 1 - conf.level), Inf),
two.sided = {
alpha <- (1 - conf.level) / 2
c(ncp.L(s, alpha), ncp.U(s, alpha))
})
STATISTIC <- c(S = s)
RVAL <- list(statistic = STATISTIC,
p.value = PVAL)
}
names(ESTIMATE) <- names(NVAL)
attr(CINT, "conf.level") <- conf.level
RVAL <- c(RVAL,
list(conf.int = CINT,
estimate = ESTIMATE,
null.value = NVAL,
alternative = alternative))
}
else {
## Generalized Cochran-Mantel-Haenszel I x J x K test
## Agresti (1990), pages 234--235.
## Agresti (2002), pages 295ff.
## Note that n in the reference is in column-major order.
## (Thanks to Torsten Hothorn for spotting this.)
df <- (I - 1) * (J - 1)
n <- m <- double(length = df)
V <- matrix(0, nrow = df, ncol = df)
for (k in 1 : K) {
f <- x[ , , k] # frequencies in stratum k
ntot <- as.double(sum(f)) # n_{..k} (and avoid overflow)
rowsums <- rowSums(f)[-I] # n_{i.k}, i = 1 to I-1
colsums <- colSums(f)[-J] # n_{.jk}, j = 1 to J-1
n <- n + c(f[-I, -J])
m <- m + c(outer(rowsums, colsums, "*")) / ntot
V <- V + (kronecker(diag(ntot * colsums, nrow = J - 1L)
- outer(colsums, colsums),
diag(ntot * rowsums, nrow = I - 1L)
- outer(rowsums, rowsums))
/ (ntot^2 * (ntot - 1)))
}
n <- n - m
STATISTIC <- c(crossprod(n, qr.solve(V, n)))
PARAMETER <- df
PVAL <- pchisq(STATISTIC, PARAMETER, lower.tail = FALSE)
names(STATISTIC) <- "Cochran-Mantel-Haenszel M^2"
names(PARAMETER) <- "df"
METHOD <- "Cochran-Mantel-Haenszel test"
RVAL <- list(statistic = STATISTIC,
parameter = PARAMETER,
p.value = PVAL)
}
structure(c(RVAL,
list(method = METHOD,
data.name = DNAME)),
class = "htest")
}