src/library/stats/R/prop.test.R - R - Git at Google

 #  File src/library/stats/R/prop.test.R
 #  Part of the R package, https://www.R-project.org
 #
 #  Copyright (C) 1995-2012 The R Core Team
 #
 #  This program is free software; you can redistribute it and/or modify
 #  it under the terms of the GNU General Public License as published by
 #  the Free Software Foundation; either version 2 of the License, or
 #  (at your option) any later version.
 #
 #  This program is distributed in the hope that it will be useful,
 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #  GNU General Public License for more details.
 #
 #  A copy of the GNU General Public License is available at
 #  https://www.R-project.org/Licenses/

 prop.test <-
 function(x, n, p = NULL, alternative = c("two.sided", "less", "greater"),
          conf.level = 0.95, correct = TRUE)
 {
     DNAME <- deparse(substitute(x))

     if (is.table(x) && length(dim(x)) == 1L) {
         if (dim(x) != 2L)
             stop("table 'x' should have 2 entries")
         l <- 1
         n <- sum(x)
         x <- x[1L]
     }
     else if (is.matrix(x)) {
 	if (ncol(x) != 2L)
 	    stop("'x' must have 2 columns")
 	l <- nrow(x)
 	n <- rowSums(x)
 	x <- x[, 1L]
     }
     else {
 	DNAME <- paste(DNAME, "out of", deparse(substitute(n)))
 	if ((l <- length(x)) != length(n))
 	    stop("'x' and 'n' must have the same length")
     }

     OK <- complete.cases(x, n)
     x <- x[OK]
     n <- n[OK]
     if ((k <- length(x)) < 1L)
 	stop("not enough data")
     if (any(n <= 0))
 	stop("elements of 'n' must be positive")
     if (any(x < 0))
 	stop("elements of 'x' must be nonnegative")
     if (any(x > n))
 	stop("elements of 'x' must not be greater than those of 'n'")

     if (is.null(p) && (k == 1))
 	p <- .5
     if (!is.null(p)) {
 	DNAME <- paste0(DNAME, ", null ",
 		       if(k == 1) "probability " else "probabilities ",
 		       deparse(substitute(p)))
 	if (length(p) != l)
 	    stop("'p' must have the same length as 'x' and 'n'")
 	p <- p[OK]
 	if (any((p <= 0) | (p >= 1)))
 	    stop("elements of 'p' must be in (0,1)")
     }

     alternative <- match.arg(alternative)
     if (k > 2 || (k == 2) && !is.null(p))
 	alternative <- "two.sided"

     if ((length(conf.level) != 1L) || is.na(conf.level) ||
 	(conf.level <= 0) || (conf.level >= 1))
 	stop("'conf.level' must be a single number between 0 and 1")

     correct <- as.logical(correct)

     ESTIMATE <- setNames(x/n,
 			 if (k == 1) "p" else paste("prop", 1L:l)[OK])
     NVAL <- p
     CINT <- NULL
     YATES <- if(correct && (k <= 2)) .5 else 0

     if (k == 1) {
 	z <- qnorm(if(alternative == "two.sided")
 		   (1 + conf.level) / 2 else conf.level)
 	YATES <- min(YATES, abs(x - n * p))
         z22n <- z^2 / (2 * n)
 	p.c <- ESTIMATE + YATES / n
 	p.u <- if(p.c >= 1) 1 else (p.c + z22n
                   + z * sqrt(p.c * (1 - p.c) / n + z22n / (2 * n))) / (1+2*z22n)
 	p.c <- ESTIMATE - YATES / n
 	p.l <- if(p.c <= 0) 0 else (p.c + z22n
                   - z * sqrt(p.c * (1 - p.c) / n + z22n / (2 * n))) / (1+2*z22n)
 	CINT <- switch(alternative,
 		       "two.sided" = c(max(p.l, 0), min(p.u, 1)),
 		       "greater" = c(max(p.l, 0), 1),
 		       "less" = c(0, min(p.u, 1)))
     }
     else if ((k == 2) & is.null(p)) {
 	DELTA <- ESTIMATE[1L] - ESTIMATE[2L]
 	YATES <- min(YATES, abs(DELTA) / sum(1/n))
 	WIDTH <- (switch(alternative,
 			 "two.sided" = qnorm((1 + conf.level) / 2),
 			 qnorm(conf.level))
 		  * sqrt(sum(ESTIMATE * (1 - ESTIMATE) / n))
 		  + YATES * sum(1/n))
 	CINT <- switch(alternative,
 		       "two.sided" = c(max(DELTA - WIDTH, -1),
 		       min(DELTA + WIDTH, 1)),
 		       "greater" = c(max(DELTA - WIDTH, -1), 1),
 		       "less" = c(-1, min(DELTA + WIDTH, 1)))
     }
     if (!is.null(CINT))
 	attr(CINT, "conf.level") <- conf.level

     METHOD <- paste(if(k == 1) "1-sample proportions test" else
                     paste0(k, "-sample test for ",
                            if(is.null(p)) "equality of" else "given",
                            " proportions"),
 		    if(YATES) "with" else "without",
 		    "continuity correction")

     if (is.null(p)) {
 	p <- sum(x)/sum(n)
 	PARAMETER <- k - 1
     }
     else {
 	PARAMETER <- k
 	names(NVAL) <- names(ESTIMATE)
     }
     names(PARAMETER) <- "df"

     x <- cbind(x, n - x)
     E <- cbind(n * p, n * (1 - p))
     if (any(E < 5))
 	warning("Chi-squared approximation may be incorrect")
     STATISTIC <- sum((abs(x - E) - YATES)^2 / E)
     names(STATISTIC) <- "X-squared"

     if (alternative == "two.sided")
 	PVAL <- pchisq(STATISTIC, PARAMETER, lower.tail = FALSE)
     else {
 	if (k == 1)
 	    z <- sign(ESTIMATE - p) * sqrt(STATISTIC)
 	else
 	    z <- sign(DELTA) * sqrt(STATISTIC)
 	PVAL <- pnorm(z, lower.tail = (alternative == "less"))
     }

     RVAL <- list(statistic = STATISTIC,
 		 parameter = PARAMETER,
 		 p.value = as.numeric(PVAL),
 		 estimate = ESTIMATE,
 		 null.value = NVAL,
 		 conf.int = CINT,
 		 alternative = alternative,
 		 method = METHOD,
 		 data.name = DNAME)
     class(RVAL) <- "htest"
     return(RVAL)
 }
	# File src/library/stats/R/prop.test.R
	# Part of the R package, https://www.R-project.org
	#
	# Copyright (C) 1995-2012 The R Core Team
	#
	# This program is free software; you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation; either version 2 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# A copy of the GNU General Public License is available at
	# https://www.R-project.org/Licenses/

	prop.test <-
	function(x, n, p = NULL, alternative = c("two.sided", "less", "greater"),
	conf.level = 0.95, correct = TRUE)
	{
	DNAME <- deparse(substitute(x))

	if (is.table(x) && length(dim(x)) == 1L) {
	if (dim(x) != 2L)
	stop("table 'x' should have 2 entries")
	l <- 1
	n <- sum(x)
	x <- x[1L]
	}
	else if (is.matrix(x)) {
	if (ncol(x) != 2L)
	stop("'x' must have 2 columns")
	l <- nrow(x)
	n <- rowSums(x)
	x <- x[, 1L]
	}
	else {
	DNAME <- paste(DNAME, "out of", deparse(substitute(n)))
	if ((l <- length(x)) != length(n))
	stop("'x' and 'n' must have the same length")
	}

	OK <- complete.cases(x, n)
	x <- x[OK]
	n <- n[OK]
	if ((k <- length(x)) < 1L)
	stop("not enough data")
	if (any(n <= 0))
	stop("elements of 'n' must be positive")
	if (any(x < 0))
	stop("elements of 'x' must be nonnegative")
	if (any(x > n))
	stop("elements of 'x' must not be greater than those of 'n'")

	if (is.null(p) && (k == 1))
	p <- .5
	if (!is.null(p)) {
	DNAME <- paste0(DNAME, ", null ",
	if(k == 1) "probability " else "probabilities ",
	deparse(substitute(p)))
	if (length(p) != l)
	stop("'p' must have the same length as 'x' and 'n'")
	p <- p[OK]
	if (any((p <= 0) \| (p >= 1)))
	stop("elements of 'p' must be in (0,1)")
	}

	alternative <- match.arg(alternative)
	if (k > 2 \|\| (k == 2) && !is.null(p))
	alternative <- "two.sided"

	if ((length(conf.level) != 1L) \|\| is.na(conf.level) \|\|
	(conf.level <= 0) \|\| (conf.level >= 1))
	stop("'conf.level' must be a single number between 0 and 1")

	correct <- as.logical(correct)

	ESTIMATE <- setNames(x/n,
	if (k == 1) "p" else paste("prop", 1L:l)[OK])
	NVAL <- p
	CINT <- NULL
	YATES <- if(correct && (k <= 2)) .5 else 0

	if (k == 1) {
	z <- qnorm(if(alternative == "two.sided")
	(1 + conf.level) / 2 else conf.level)
	YATES <- min(YATES, abs(x - n * p))
	z22n <- z^2 / (2 * n)
	p.c <- ESTIMATE + YATES / n
	p.u <- if(p.c >= 1) 1 else (p.c + z22n
	+ z * sqrt(p.c * (1 - p.c) / n + z22n / (2 * n))) / (1+2*z22n)
	p.c <- ESTIMATE - YATES / n
	p.l <- if(p.c <= 0) 0 else (p.c + z22n
	- z * sqrt(p.c * (1 - p.c) / n + z22n / (2 * n))) / (1+2*z22n)
	CINT <- switch(alternative,
	"two.sided" = c(max(p.l, 0), min(p.u, 1)),
	"greater" = c(max(p.l, 0), 1),
	"less" = c(0, min(p.u, 1)))
	}
	else if ((k == 2) & is.null(p)) {
	DELTA <- ESTIMATE[1L] - ESTIMATE[2L]
	YATES <- min(YATES, abs(DELTA) / sum(1/n))
	WIDTH <- (switch(alternative,
	"two.sided" = qnorm((1 + conf.level) / 2),
	qnorm(conf.level))
	* sqrt(sum(ESTIMATE * (1 - ESTIMATE) / n))
	+ YATES * sum(1/n))
	CINT <- switch(alternative,
	"two.sided" = c(max(DELTA - WIDTH, -1),
	min(DELTA + WIDTH, 1)),
	"greater" = c(max(DELTA - WIDTH, -1), 1),
	"less" = c(-1, min(DELTA + WIDTH, 1)))
	}
	if (!is.null(CINT))
	attr(CINT, "conf.level") <- conf.level

	METHOD <- paste(if(k == 1) "1-sample proportions test" else
	paste0(k, "-sample test for ",
	if(is.null(p)) "equality of" else "given",
	" proportions"),
	if(YATES) "with" else "without",
	"continuity correction")

	if (is.null(p)) {
	p <- sum(x)/sum(n)
	PARAMETER <- k - 1
	}
	else {
	PARAMETER <- k
	names(NVAL) <- names(ESTIMATE)
	}
	names(PARAMETER) <- "df"

	x <- cbind(x, n - x)
	E <- cbind(n * p, n * (1 - p))
	if (any(E < 5))
	warning("Chi-squared approximation may be incorrect")
	STATISTIC <- sum((abs(x - E) - YATES)^2 / E)
	names(STATISTIC) <- "X-squared"

	if (alternative == "two.sided")
	PVAL <- pchisq(STATISTIC, PARAMETER, lower.tail = FALSE)
	else {
	if (k == 1)
	z <- sign(ESTIMATE - p) * sqrt(STATISTIC)
	else
	z <- sign(DELTA) * sqrt(STATISTIC)
	PVAL <- pnorm(z, lower.tail = (alternative == "less"))
	}

	RVAL <- list(statistic = STATISTIC,
	parameter = PARAMETER,
	p.value = as.numeric(PVAL),
	estimate = ESTIMATE,
	null.value = NVAL,
	conf.int = CINT,
	alternative = alternative,
	method = METHOD,
	data.name = DNAME)
	class(RVAL) <- "htest"
	return(RVAL)
	}