src/library/stats/R/cor.test.R - R - Git at Google

 #  File src/library/stats/R/cor.test.R
 #  Part of the R package, https://www.R-project.org
 #
 #  Copyright (C) 1995-2016 The R Core Team
 #
 #  This program is free software; you can redistribute it and/or modify
 #  it under the terms of the GNU General Public License as published by
 #  the Free Software Foundation; either version 2 of the License, or
 #  (at your option) any later version.
 #
 #  This program is distributed in the hope that it will be useful,
 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #  GNU General Public License for more details.
 #
 #  A copy of the GNU General Public License is available at
 #  https://www.R-project.org/Licenses/

 cor.test <- function(x, ...) UseMethod("cor.test")

 cor.test.default <-
 function(x, y, alternative = c("two.sided", "less", "greater"),
          method = c("pearson", "kendall", "spearman"), exact = NULL,
          conf.level = 0.95, continuity = FALSE, ...)
 {
     alternative <- match.arg(alternative)
     method <- match.arg(method)
     DNAME <- paste(deparse(substitute(x)), "and", deparse(substitute(y)))

     if(length(x) != length(y))
 	stop("'x' and 'y' must have the same length")
     if(!is.numeric(x)) stop("'x' must be a numeric vector")
     if(!is.numeric(y)) stop("'y' must be a numeric vector")
     OK <- complete.cases(x, y)
     x <- x[OK]
     y <- y[OK]
     n <- length(x)

     NVAL <- 0
     conf.int <- FALSE

     if(method == "pearson") {
 	if(n < 3L)
 	    stop("not enough finite observations")
 	method <- "Pearson's product-moment correlation"
 	names(NVAL) <- "correlation"
 	r <- cor(x, y)
         df <- n - 2L
 	ESTIMATE <- c(cor = r)
 	PARAMETER <- c(df = df)
 	STATISTIC <- c(t = sqrt(df) * r / sqrt(1 - r^2))
         if(n > 3) { ## confidence int.
             if(!missing(conf.level) &&
                (length(conf.level) != 1 || !is.finite(conf.level) ||
                 conf.level < 0 || conf.level > 1))
                 stop("'conf.level' must be a single number between 0 and 1")
             conf.int <- TRUE
             z <- atanh(r)
             sigma <- 1 / sqrt(n - 3)
             cint <-
                 switch(alternative,
                        less = c(-Inf, z + sigma * qnorm(conf.level)),
                        greater = c(z - sigma * qnorm(conf.level), Inf),
                        two.sided = z +
                        c(-1, 1) * sigma * qnorm((1 + conf.level) / 2))
             cint <- tanh(cint)
             attr(cint, "conf.level") <- conf.level
         }
 	PVAL <- switch(alternative,
 		       "less" = pt(STATISTIC, df),
 		       "greater" = pt(STATISTIC, df, lower.tail=FALSE),
 		       "two.sided" = 2 * min(pt(STATISTIC, df),
 					     pt(STATISTIC, df, lower.tail=FALSE)))
     }
     else {
 	if(n < 2)
 	    stop("not enough finite observations")
 	PARAMETER <- NULL
 	TIES <- (min(length(unique(x)), length(unique(y))) < n)
 	if(method == "kendall") {
 	    method <- "Kendall's rank correlation tau"
 	    names(NVAL) <- "tau"
 	    r <- cor(x,y, method = "kendall")
             ESTIMATE <- c(tau = r)

             if(!is.finite(ESTIMATE)) {  # all x or all y the same
                 ESTIMATE[] <- NA
                 STATISTIC <- c(T = NA)
                 PVAL <- NA
             }
             else {
                 if(is.null(exact))
                     exact <- (n < 50)
                 if(exact && !TIES) {
                     q <- round((r + 1) * n * (n - 1) / 4)
                     STATISTIC <- c(T = q)
                     pkendall <- function(q, n) .Call(C_pKendall, q, n)
                     PVAL <-
                         switch(alternative,
                                "two.sided" = {
                                    if(q > n * (n - 1) / 4)
                                        p <- 1 - pkendall(q - 1, n)
                                    else
                                        p <- pkendall(q, n)
                                    min(2 * p, 1)
                                },
                                "greater" = 1 - pkendall(q - 1, n),
                                "less" = pkendall(q, n))
                 } else {
                     xties <- table(x[duplicated(x)]) + 1
                     yties <- table(y[duplicated(y)]) + 1
                     T0 <- n * (n - 1)/2
                     T1 <- sum(xties * (xties - 1))/2
                     T2 <- sum(yties * (yties - 1))/2
                     S <- r * sqrt((T0 - T1) * (T0 - T2))
                     v0 <- n * (n - 1) * (2 * n + 5)
                     vt <- sum(xties * (xties - 1) * (2 * xties + 5))
                     vu <- sum(yties * (yties - 1) * (2 * yties + 5))
                     v1 <- sum(xties * (xties - 1)) * sum(yties * (yties - 1))
                     v2 <- sum(xties * (xties - 1) * (xties - 2)) *
                         sum(yties * (yties - 1) * (yties - 2))

                     var_S <- (v0 - vt - vu) / 18 +
                         v1 / (2 * n * (n - 1)) +
                             v2 / (9 * n * (n - 1) * (n - 2))

                     if(exact && TIES)
                         warning("Cannot compute exact p-value with ties")
                     if (continuity) S <- sign(S) * (abs(S) - 1)
                     STATISTIC <- c(z = S / sqrt(var_S))
 		    PVAL <- switch(alternative,
 				   "less" = pnorm(STATISTIC),
 				   "greater" = pnorm(STATISTIC, lower.tail=FALSE),
 				   "two.sided" = 2 * min(pnorm(STATISTIC),
 							 pnorm(STATISTIC, lower.tail=FALSE)))
                 }
             }
 	} else {
 	    method <- "Spearman's rank correlation rho"
             if (is.null(exact))
                 exact <- TRUE
 	    names(NVAL) <- "rho"
 	    r <- cor(rank(x), rank(y))
 	    ESTIMATE <- c(rho = r)
             if(!is.finite(ESTIMATE)) {  # all x or all y the same
                 ESTIMATE[] <- NA
                 STATISTIC <- c(S = NA)
                 PVAL <- NA
             }
             else {
                 ## Use the test statistic S = sum(rank(x) - rank(y))^2
                 ## and AS 89 for obtaining better p-values than via the
                 ## simple normal approximation.
                 ## In the case of no ties, S = (1-rho) * (n^3-n)/6.
                 pspearman <- function(q, n, lower.tail = TRUE) {
                     if(n <= 1290 && exact) # n*(n^2 - 1) does not overflow
                         .Call(C_pRho, round(q) + 2*lower.tail, n, lower.tail)
 		    else { # for large n: asymptotic t_{n-2}
                         den <- (n*(n^2-1))/6 # careful for overflow
                         ## Kendall et all (1939) p. 260
                         if (continuity) den <- den + 1
 			r <- 1 - q/den
 			pt(r / sqrt((1 - r^2)/(n-2)), df = n-2,
 			   lower.tail = !lower.tail)
 		    }
                 }
                 q <- (n^3 - n) * (1 - r) / 6
                 STATISTIC <- c(S = q)
                 if(TIES && exact){
                     exact <- FALSE
                     warning("Cannot compute exact p-value with ties")
                 }
                 PVAL <-
                     switch(alternative,
                            "two.sided" = {
                                p <- if(q > (n^3 - n) / 6)
                                    pspearman(q, n, lower.tail = FALSE)
                                else
 				   pspearman(q, n, lower.tail = TRUE)
 			       min(2 * p, 1)
 			   },
 			   "greater" = pspearman(q, n, lower.tail = TRUE),
 			   "less" = pspearman(q, n, lower.tail = FALSE))
             }
         }
     }

     RVAL <- list(statistic = STATISTIC,
                  parameter = PARAMETER,
                  p.value = as.numeric(PVAL),
                  estimate = ESTIMATE,
                  null.value = NVAL,
                  alternative = alternative,
                  method = method,
                  data.name = DNAME)
     if(conf.int)
         RVAL <- c(RVAL, list(conf.int = cint))
     class(RVAL) <- "htest"
     RVAL
 }

 cor.test.formula <-
 function(formula, data, subset, na.action, ...)
 {
     if(missing(formula)
        || !inherits(formula, "formula")
        || length(formula) != 2L)
         stop("'formula' missing or invalid")
     m <- match.call(expand.dots = FALSE)
     if(is.matrix(eval(m$data, parent.frame())))
         m$data <- as.data.frame(data)
     ## need stats:: for non-standard evaluation
     m[[1L]] <- quote(stats::model.frame)
     m$... <- NULL
     mf <- eval(m, environment(formula))
     if(length(mf) != 2L)
         stop("invalid formula")
     DNAME <- paste(names(mf), collapse = " and ")
     names(mf) <- c("x", "y")
     y <- do.call("cor.test", c(mf, list(...)))
     y$data.name <- DNAME
     y
 }
	# File src/library/stats/R/cor.test.R
	# Part of the R package, https://www.R-project.org
	#
	# Copyright (C) 1995-2016 The R Core Team
	#
	# This program is free software; you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation; either version 2 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# A copy of the GNU General Public License is available at
	# https://www.R-project.org/Licenses/

	cor.test <- function(x, ...) UseMethod("cor.test")

	cor.test.default <-
	function(x, y, alternative = c("two.sided", "less", "greater"),
	method = c("pearson", "kendall", "spearman"), exact = NULL,
	conf.level = 0.95, continuity = FALSE, ...)
	{
	alternative <- match.arg(alternative)
	method <- match.arg(method)
	DNAME <- paste(deparse(substitute(x)), "and", deparse(substitute(y)))

	if(length(x) != length(y))
	stop("'x' and 'y' must have the same length")
	if(!is.numeric(x)) stop("'x' must be a numeric vector")
	if(!is.numeric(y)) stop("'y' must be a numeric vector")
	OK <- complete.cases(x, y)
	x <- x[OK]
	y <- y[OK]
	n <- length(x)

	NVAL <- 0
	conf.int <- FALSE

	if(method == "pearson") {
	if(n < 3L)
	stop("not enough finite observations")
	method <- "Pearson's product-moment correlation"
	names(NVAL) <- "correlation"
	r <- cor(x, y)
	df <- n - 2L
	ESTIMATE <- c(cor = r)
	PARAMETER <- c(df = df)
	STATISTIC <- c(t = sqrt(df) * r / sqrt(1 - r^2))
	if(n > 3) { ## confidence int.
	if(!missing(conf.level) &&
	(length(conf.level) != 1 \|\| !is.finite(conf.level) \|\|
	conf.level < 0 \|\| conf.level > 1))
	stop("'conf.level' must be a single number between 0 and 1")
	conf.int <- TRUE
	z <- atanh(r)
	sigma <- 1 / sqrt(n - 3)
	cint <-
	switch(alternative,
	less = c(-Inf, z + sigma * qnorm(conf.level)),
	greater = c(z - sigma * qnorm(conf.level), Inf),
	two.sided = z +
	c(-1, 1) * sigma * qnorm((1 + conf.level) / 2))
	cint <- tanh(cint)
	attr(cint, "conf.level") <- conf.level
	}
	PVAL <- switch(alternative,
	"less" = pt(STATISTIC, df),
	"greater" = pt(STATISTIC, df, lower.tail=FALSE),
	"two.sided" = 2 * min(pt(STATISTIC, df),
	pt(STATISTIC, df, lower.tail=FALSE)))
	}
	else {
	if(n < 2)
	stop("not enough finite observations")
	PARAMETER <- NULL
	TIES <- (min(length(unique(x)), length(unique(y))) < n)
	if(method == "kendall") {
	method <- "Kendall's rank correlation tau"
	names(NVAL) <- "tau"
	r <- cor(x,y, method = "kendall")
	ESTIMATE <- c(tau = r)

	if(!is.finite(ESTIMATE)) { # all x or all y the same
	ESTIMATE[] <- NA
	STATISTIC <- c(T = NA)
	PVAL <- NA
	}
	else {
	if(is.null(exact))
	exact <- (n < 50)
	if(exact && !TIES) {
	q <- round((r + 1) * n * (n - 1) / 4)
	STATISTIC <- c(T = q)
	pkendall <- function(q, n) .Call(C_pKendall, q, n)
	PVAL <-
	switch(alternative,
	"two.sided" = {
	if(q > n * (n - 1) / 4)
	p <- 1 - pkendall(q - 1, n)
	else
	p <- pkendall(q, n)
	min(2 * p, 1)
	},
	"greater" = 1 - pkendall(q - 1, n),
	"less" = pkendall(q, n))
	} else {
	xties <- table(x[duplicated(x)]) + 1
	yties <- table(y[duplicated(y)]) + 1
	T0 <- n * (n - 1)/2
	T1 <- sum(xties * (xties - 1))/2
	T2 <- sum(yties * (yties - 1))/2
	S <- r * sqrt((T0 - T1) * (T0 - T2))
	v0 <- n * (n - 1) * (2 * n + 5)
	vt <- sum(xties * (xties - 1) * (2 * xties + 5))
	vu <- sum(yties * (yties - 1) * (2 * yties + 5))
	v1 <- sum(xties * (xties - 1)) * sum(yties * (yties - 1))
	v2 <- sum(xties * (xties - 1) * (xties - 2)) *
	sum(yties * (yties - 1) * (yties - 2))

	var_S <- (v0 - vt - vu) / 18 +
	v1 / (2 * n * (n - 1)) +
	v2 / (9 * n * (n - 1) * (n - 2))

	if(exact && TIES)
	warning("Cannot compute exact p-value with ties")
	if (continuity) S <- sign(S) * (abs(S) - 1)
	STATISTIC <- c(z = S / sqrt(var_S))
	PVAL <- switch(alternative,
	"less" = pnorm(STATISTIC),
	"greater" = pnorm(STATISTIC, lower.tail=FALSE),
	"two.sided" = 2 * min(pnorm(STATISTIC),
	pnorm(STATISTIC, lower.tail=FALSE)))
	}
	}
	} else {
	method <- "Spearman's rank correlation rho"
	if (is.null(exact))
	exact <- TRUE
	names(NVAL) <- "rho"
	r <- cor(rank(x), rank(y))
	ESTIMATE <- c(rho = r)
	if(!is.finite(ESTIMATE)) { # all x or all y the same
	ESTIMATE[] <- NA
	STATISTIC <- c(S = NA)
	PVAL <- NA
	}
	else {
	## Use the test statistic S = sum(rank(x) - rank(y))^2
	## and AS 89 for obtaining better p-values than via the
	## simple normal approximation.
	## In the case of no ties, S = (1-rho) * (n^3-n)/6.
	pspearman <- function(q, n, lower.tail = TRUE) {
	if(n <= 1290 && exact) # n*(n^2 - 1) does not overflow
	.Call(C_pRho, round(q) + 2*lower.tail, n, lower.tail)
	else { # for large n: asymptotic t_{n-2}
	den <- (n*(n^2-1))/6 # careful for overflow
	## Kendall et all (1939) p. 260
	if (continuity) den <- den + 1
	r <- 1 - q/den
	pt(r / sqrt((1 - r^2)/(n-2)), df = n-2,
	lower.tail = !lower.tail)
	}
	}
	q <- (n^3 - n) * (1 - r) / 6
	STATISTIC <- c(S = q)
	if(TIES && exact){
	exact <- FALSE
	warning("Cannot compute exact p-value with ties")
	}
	PVAL <-
	switch(alternative,
	"two.sided" = {
	p <- if(q > (n^3 - n) / 6)
	pspearman(q, n, lower.tail = FALSE)
	else
	pspearman(q, n, lower.tail = TRUE)
	min(2 * p, 1)
	},
	"greater" = pspearman(q, n, lower.tail = TRUE),
	"less" = pspearman(q, n, lower.tail = FALSE))
	}
	}
	}

	RVAL <- list(statistic = STATISTIC,
	parameter = PARAMETER,
	p.value = as.numeric(PVAL),
	estimate = ESTIMATE,
	null.value = NVAL,
	alternative = alternative,
	method = method,
	data.name = DNAME)
	if(conf.int)
	RVAL <- c(RVAL, list(conf.int = cint))
	class(RVAL) <- "htest"
	RVAL
	}

	cor.test.formula <-
	function(formula, data, subset, na.action, ...)
	{
	if(missing(formula)
	\|\| !inherits(formula, "formula")
	\|\| length(formula) != 2L)
	stop("'formula' missing or invalid")
	m <- match.call(expand.dots = FALSE)
	if(is.matrix(eval(m$data, parent.frame())))
	m$data <- as.data.frame(data)
	## need stats:: for non-standard evaluation
	m[[1L]] <- quote(stats::model.frame)
	m$... <- NULL
	mf <- eval(m, environment(formula))
	if(length(mf) != 2L)
	stop("invalid formula")
	DNAME <- paste(names(mf), collapse = " and ")
	names(mf) <- c("x", "y")
	y <- do.call("cor.test", c(mf, list(...)))
	y$data.name <- DNAME
	y
	}