blob: 7f3e0d80dcef4d9ba8601d94d77bd25bd1b1c620 [file] [log] [blame]
# File src/library/stats/R/prop.test.R
# Part of the R package, https://www.R-project.org
#
# Copyright (C) 1995-2012 The R Core Team
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# https://www.R-project.org/Licenses/
prop.test <-
function(x, n, p = NULL, alternative = c("two.sided", "less", "greater"),
conf.level = 0.95, correct = TRUE)
{
DNAME <- deparse(substitute(x))
if (is.table(x) && length(dim(x)) == 1L) {
if (dim(x) != 2L)
stop("table 'x' should have 2 entries")
l <- 1
n <- sum(x)
x <- x[1L]
}
else if (is.matrix(x)) {
if (ncol(x) != 2L)
stop("'x' must have 2 columns")
l <- nrow(x)
n <- rowSums(x)
x <- x[, 1L]
}
else {
DNAME <- paste(DNAME, "out of", deparse(substitute(n)))
if ((l <- length(x)) != length(n))
stop("'x' and 'n' must have the same length")
}
OK <- complete.cases(x, n)
x <- x[OK]
n <- n[OK]
if ((k <- length(x)) < 1L)
stop("not enough data")
if (any(n <= 0))
stop("elements of 'n' must be positive")
if (any(x < 0))
stop("elements of 'x' must be nonnegative")
if (any(x > n))
stop("elements of 'x' must not be greater than those of 'n'")
if (is.null(p) && (k == 1))
p <- .5
if (!is.null(p)) {
DNAME <- paste0(DNAME, ", null ",
if(k == 1) "probability " else "probabilities ",
deparse(substitute(p)))
if (length(p) != l)
stop("'p' must have the same length as 'x' and 'n'")
p <- p[OK]
if (any((p <= 0) | (p >= 1)))
stop("elements of 'p' must be in (0,1)")
}
alternative <- match.arg(alternative)
if (k > 2 || (k == 2) && !is.null(p))
alternative <- "two.sided"
if ((length(conf.level) != 1L) || is.na(conf.level) ||
(conf.level <= 0) || (conf.level >= 1))
stop("'conf.level' must be a single number between 0 and 1")
correct <- as.logical(correct)
ESTIMATE <- setNames(x/n,
if (k == 1) "p" else paste("prop", 1L:l)[OK])
NVAL <- p
CINT <- NULL
YATES <- if(correct && (k <= 2)) .5 else 0
if (k == 1) {
z <- qnorm(if(alternative == "two.sided")
(1 + conf.level) / 2 else conf.level)
YATES <- min(YATES, abs(x - n * p))
z22n <- z^2 / (2 * n)
p.c <- ESTIMATE + YATES / n
p.u <- if(p.c >= 1) 1 else (p.c + z22n
+ z * sqrt(p.c * (1 - p.c) / n + z22n / (2 * n))) / (1+2*z22n)
p.c <- ESTIMATE - YATES / n
p.l <- if(p.c <= 0) 0 else (p.c + z22n
- z * sqrt(p.c * (1 - p.c) / n + z22n / (2 * n))) / (1+2*z22n)
CINT <- switch(alternative,
"two.sided" = c(max(p.l, 0), min(p.u, 1)),
"greater" = c(max(p.l, 0), 1),
"less" = c(0, min(p.u, 1)))
}
else if ((k == 2) & is.null(p)) {
DELTA <- ESTIMATE[1L] - ESTIMATE[2L]
YATES <- min(YATES, abs(DELTA) / sum(1/n))
WIDTH <- (switch(alternative,
"two.sided" = qnorm((1 + conf.level) / 2),
qnorm(conf.level))
* sqrt(sum(ESTIMATE * (1 - ESTIMATE) / n))
+ YATES * sum(1/n))
CINT <- switch(alternative,
"two.sided" = c(max(DELTA - WIDTH, -1),
min(DELTA + WIDTH, 1)),
"greater" = c(max(DELTA - WIDTH, -1), 1),
"less" = c(-1, min(DELTA + WIDTH, 1)))
}
if (!is.null(CINT))
attr(CINT, "conf.level") <- conf.level
METHOD <- paste(if(k == 1) "1-sample proportions test" else
paste0(k, "-sample test for ",
if(is.null(p)) "equality of" else "given",
" proportions"),
if(YATES) "with" else "without",
"continuity correction")
if (is.null(p)) {
p <- sum(x)/sum(n)
PARAMETER <- k - 1
}
else {
PARAMETER <- k
names(NVAL) <- names(ESTIMATE)
}
names(PARAMETER) <- "df"
x <- cbind(x, n - x)
E <- cbind(n * p, n * (1 - p))
if (any(E < 5))
warning("Chi-squared approximation may be incorrect")
STATISTIC <- sum((abs(x - E) - YATES)^2 / E)
names(STATISTIC) <- "X-squared"
if (alternative == "two.sided")
PVAL <- pchisq(STATISTIC, PARAMETER, lower.tail = FALSE)
else {
if (k == 1)
z <- sign(ESTIMATE - p) * sqrt(STATISTIC)
else
z <- sign(DELTA) * sqrt(STATISTIC)
PVAL <- pnorm(z, lower.tail = (alternative == "less"))
}
RVAL <- list(statistic = STATISTIC,
parameter = PARAMETER,
p.value = as.numeric(PVAL),
estimate = ESTIMATE,
null.value = NVAL,
conf.int = CINT,
alternative = alternative,
method = METHOD,
data.name = DNAME)
class(RVAL) <- "htest"
return(RVAL)
}