src/library/stats/R/cor.R - R - Git at Google

 #  File src/library/stats/R/cor.R
 #  Part of the R package, https://www.R-project.org
 #
 #  Copyright (C) 1995-2012 The R Core Team
 #
 #  This program is free software; you can redistribute it and/or modify
 #  it under the terms of the GNU General Public License as published by
 #  the Free Software Foundation; either version 2 of the License, or
 #  (at your option) any later version.
 #
 #  This program is distributed in the hope that it will be useful,
 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #  GNU General Public License for more details.
 #
 #  A copy of the GNU General Public License is available at
 #  https://www.R-project.org/Licenses/

 #### cor() , cov() and var() : Based on the same C code

 cor <- function(x, y = NULL, use = "everything",
                 method = c("pearson", "kendall", "spearman"))
 {
     na.method <-
 	pmatch(use, c("all.obs", "complete.obs", "pairwise.complete.obs",
 		      "everything", "na.or.complete"))
     if(is.na(na.method)) stop("invalid 'use' argument")
     method <- match.arg(method)
     if(is.data.frame(y)) y <- as.matrix(y)
     if(is.data.frame(x)) x <- as.matrix(x)
     if(!is.matrix(x) && is.null(y))
         stop("supply both 'x' and 'y' or a matrix-like 'x'")
     ## non-atomic x should not be 'numeric', but in case a method messes up
     ## allow logicals for back-compatibility (package mice).
     if(!(is.numeric(x) || is.logical(x))) stop("'x' must be numeric")
     stopifnot(is.atomic(x))
     if(!is.null(y)) {
         if(!(is.numeric(y) || is.logical(y))) stop("'y' must be numeric")
         stopifnot(is.atomic(y))
     }

     ## Rank transform
     Rank <- function(u) {
         ## take care not to drop dims on a 0- or 1-row matrix
         if(length(u) == 0L) u
         else if(is.matrix(u)) {
             if(nrow(u) > 1L) apply(u, 2L, rank, na.last="keep") else row(u)
         } else rank(u, na.last="keep")
     }
     if(method == "pearson")
         .Call(C_cor, x, y, na.method, FALSE)
     else if (na.method %in% c(2L, 5L)) { ## "complete.obs" / "na.or.complete"
         if (is.null(y)) {
             .Call(C_cor, Rank(na.omit(x)), NULL, na.method,
                   method == "kendall")
         } else {
             nas <- attr(na.omit(cbind(x,y)), "na.action")
             dropNA <- function(x, nas) {
                 if(length(nas)) {
                     if (is.matrix(x)) x[-nas, , drop = FALSE] else x[-nas]
                 } else x
             }
             .Call(C_cor, Rank(dropNA(x, nas)), Rank(dropNA(y, nas)),
                   na.method, method == "kendall")
         }
     } else if (na.method != 3L) {
         ## i.e., 1 or 4, i.e. "all.obs" or "everything":
 	x <- Rank(x)
 	if(!is.null(y)) y <- Rank(y)
         .Call(C_cor, x, y, na.method, method == "kendall")
     }
     else { # rank correlations and "pairwise.complete.obs"; the hard case
          ## Based on contribution from Shigenobu Aoki.
          ## matrix
          if (is.null(y)) {
              ncy <- ncx <- ncol(x)
              if(ncx == 0) stop("'x' is empty")
              r <- matrix(0, nrow = ncx, ncol = ncy)
              ## 2.6.0 assumed the diagonal was 1, but not so for all NAs,
              ## nor single non-NA pairs.
              for (i in seq_len(ncx)) {
                  for (j in seq_len(i)) {
                      x2 <- x[,i]
                      y2 <- x[,j]
                      ok <- complete.cases(x2, y2)
                      x2 <- rank(x2[ok])
                      y2 <- rank(y2[ok])
                      ## we've removed all NAs
                      r[i, j] <- if(any(ok)) .Call(C_cor, x2, y2, 1L, method == "kendall") else NA
                  }
              }
              r <- r + t(r) - diag(diag(r))
 	     rownames(r) <- colnames(x)
 	     colnames(r) <- colnames(x)
              r
          }
          ## vector/matrix x vector/matrix
          else {
              if(length(x) == 0L || length(y) == 0L)
                  stop("both 'x' and 'y' must be non-empty")
              matrix_result <- is.matrix(x) || is.matrix(y)
 	     if (!is.matrix(x)) x <- matrix(x, ncol=1L)
 	     if (!is.matrix(y)) y <- matrix(y, ncol=1L)
              ncx <- ncol(x)
              ncy <- ncol(y)
              r <- matrix(0, nrow = ncx, ncol = ncy)
              for (i in seq_len(ncx)) {
                  for (j in seq_len(ncy)) {
                      x2 <- x[,i]
                      y2 <- y[,j]
                      ok <- complete.cases(x2, y2)
                      x2 <- rank(x2[ok])
                      y2 <- rank(y2[ok])
                      r[i, j] <- if(any(ok)) .Call(C_cor, x2, y2, 1L, method == "kendall") else NA
                  }
              }
 	     rownames(r) <- colnames(x)
 	     colnames(r) <- colnames(y)
              if(matrix_result) r else drop(r)
          }
      }
 }

 cov <- function(x, y = NULL, use = "everything",
                 method = c("pearson", "kendall", "spearman"))
 {
     na.method <-
 	pmatch(use, c("all.obs", "complete.obs", "pairwise.complete.obs",
 		      "everything", "na.or.complete"))
     if(is.na(na.method)) stop("invalid 'use' argument")
     method <- match.arg(method)
     if(is.data.frame(y)) y <- as.matrix(y)
     if(is.data.frame(x)) x <- as.matrix(x)
     if(!is.matrix(x) && is.null(y))
         stop("supply both 'x' and 'y' or a matrix-like 'x'")
     ## non-atomic x should not be 'numeric', but in case a method messes up
     stopifnot(is.numeric(x) || is.logical(x), is.atomic(x))
     if(!is.null(y)) stopifnot(is.numeric(y) || is.logical(y), is.atomic(y))

     ## Rank transform
     Rank <- function(u) {
         ## take care not to drop dims on a 0- or 1-row matrix
         if(length(u) == 0L) u
         else if(is.matrix(u)) {
             if(nrow(u) > 1L) apply(u, 2L, rank, na.last="keep") else row(u)
         } else rank(u, na.last="keep")
     }

     if(method == "pearson")
 	.Call(C_cov, x, y, na.method, method == "kendall")
     else if (na.method %in% c(2L, 5L)) { ## "complete.obs"  or  "na.or.complete"

         if (is.null(y)) {
             .Call(C_cov, Rank(na.omit(x)), NULL, na.method,
                   method == "kendall")
         } else {
             nas <- attr(na.omit(cbind(x,y)), "na.action")
             dropNA <- function(x, nas) {
                 if(length(nas)) {
                     if (is.matrix(x)) x[-nas, , drop = FALSE] else x[-nas]
                 } else x
             }
             .Call(C_cov, Rank(dropNA(x, nas)), Rank(dropNA(y, nas)),
                   na.method, method == "kendall")
         }
     } else if (na.method != 3L) { ## 1 or 4: "all.obs"  or  "everything"
 	x <- Rank(x)
 	if(!is.null(y)) y <- Rank(y)
 	.Call(C_cov, x, y, na.method, method == "kendall")
     }
     else ##  "pairwise.complete.obs"
 	stop("cannot handle 'pairwise.complete.obs'")
 }

 var <- function(x, y = NULL, na.rm = FALSE, use) {
     if(missing(use))
 	use <- if(na.rm) "na.or.complete" else "everything"
     na.method <-
 	pmatch(use, c("all.obs", "complete.obs", "pairwise.complete.obs",
 		      "everything", "na.or.complete"))
     if(is.na(na.method)) stop("invalid 'use' argument")
     if (is.data.frame(x)) x <- as.matrix(x) else stopifnot(is.atomic(x))
     if (is.data.frame(y)) y <- as.matrix(y) else stopifnot(is.atomic(y))
     .Call(C_cov, x, y, na.method, FALSE)
 }

 cov2cor <- function(V)
 {
     ## Purpose: Covariance matrix |--> Correlation matrix -- efficiently
     ## ----------------------------------------------------------------------
     ## Arguments: V: a covariance matrix (i.e. symmetric and positive definite)
     ## ----------------------------------------------------------------------
     ## Author: Martin Maechler, Date: 12 Jun 2003, 11L:50
     p <- (d <- dim(V))[1L]
     if(!is.numeric(V) || length(d) != 2L || p != d[2L])
 	stop("'V' is not a square numeric matrix")
     Is <- sqrt(1/diag(V)) # diag( 1/sigma_i )
     if(any(!is.finite(Is)))
 	warning("diag(.) had 0 or NA entries; non-finite result is doubtful")
     r <- V # keep dimnames
     r[] <- Is * V * rep(Is, each = p)
     ##	== D %*% V %*% D  where D = diag(Is)
     r[cbind(1L:p,1L:p)] <- 1 # exact in diagonal
     r
 }
	# File src/library/stats/R/cor.R
	# Part of the R package, https://www.R-project.org
	#
	# Copyright (C) 1995-2012 The R Core Team
	#
	# This program is free software; you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation; either version 2 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# A copy of the GNU General Public License is available at
	# https://www.R-project.org/Licenses/

	#### cor() , cov() and var() : Based on the same C code

	cor <- function(x, y = NULL, use = "everything",
	method = c("pearson", "kendall", "spearman"))
	{
	na.method <-
	pmatch(use, c("all.obs", "complete.obs", "pairwise.complete.obs",
	"everything", "na.or.complete"))
	if(is.na(na.method)) stop("invalid 'use' argument")
	method <- match.arg(method)
	if(is.data.frame(y)) y <- as.matrix(y)
	if(is.data.frame(x)) x <- as.matrix(x)
	if(!is.matrix(x) && is.null(y))
	stop("supply both 'x' and 'y' or a matrix-like 'x'")
	## non-atomic x should not be 'numeric', but in case a method messes up
	## allow logicals for back-compatibility (package mice).
	if(!(is.numeric(x) \|\| is.logical(x))) stop("'x' must be numeric")
	stopifnot(is.atomic(x))
	if(!is.null(y)) {
	if(!(is.numeric(y) \|\| is.logical(y))) stop("'y' must be numeric")
	stopifnot(is.atomic(y))
	}

	## Rank transform
	Rank <- function(u) {
	## take care not to drop dims on a 0- or 1-row matrix
	if(length(u) == 0L) u
	else if(is.matrix(u)) {
	if(nrow(u) > 1L) apply(u, 2L, rank, na.last="keep") else row(u)
	} else rank(u, na.last="keep")
	}
	if(method == "pearson")
	.Call(C_cor, x, y, na.method, FALSE)
	else if (na.method %in% c(2L, 5L)) { ## "complete.obs" / "na.or.complete"
	if (is.null(y)) {
	.Call(C_cor, Rank(na.omit(x)), NULL, na.method,
	method == "kendall")
	} else {
	nas <- attr(na.omit(cbind(x,y)), "na.action")
	dropNA <- function(x, nas) {
	if(length(nas)) {
	if (is.matrix(x)) x[-nas, , drop = FALSE] else x[-nas]
	} else x
	}
	.Call(C_cor, Rank(dropNA(x, nas)), Rank(dropNA(y, nas)),
	na.method, method == "kendall")
	}
	} else if (na.method != 3L) {
	## i.e., 1 or 4, i.e. "all.obs" or "everything":
	x <- Rank(x)
	if(!is.null(y)) y <- Rank(y)
	.Call(C_cor, x, y, na.method, method == "kendall")
	}
	else { # rank correlations and "pairwise.complete.obs"; the hard case
	## Based on contribution from Shigenobu Aoki.
	## matrix
	if (is.null(y)) {
	ncy <- ncx <- ncol(x)
	if(ncx == 0) stop("'x' is empty")
	r <- matrix(0, nrow = ncx, ncol = ncy)
	## 2.6.0 assumed the diagonal was 1, but not so for all NAs,
	## nor single non-NA pairs.
	for (i in seq_len(ncx)) {
	for (j in seq_len(i)) {
	x2 <- x[,i]
	y2 <- x[,j]
	ok <- complete.cases(x2, y2)
	x2 <- rank(x2[ok])
	y2 <- rank(y2[ok])
	## we've removed all NAs
	r[i, j] <- if(any(ok)) .Call(C_cor, x2, y2, 1L, method == "kendall") else NA
	}
	}
	r <- r + t(r) - diag(diag(r))
	rownames(r) <- colnames(x)
	colnames(r) <- colnames(x)
	r
	}
	## vector/matrix x vector/matrix
	else {
	if(length(x) == 0L \|\| length(y) == 0L)
	stop("both 'x' and 'y' must be non-empty")
	matrix_result <- is.matrix(x) \|\| is.matrix(y)
	if (!is.matrix(x)) x <- matrix(x, ncol=1L)
	if (!is.matrix(y)) y <- matrix(y, ncol=1L)
	ncx <- ncol(x)
	ncy <- ncol(y)
	r <- matrix(0, nrow = ncx, ncol = ncy)
	for (i in seq_len(ncx)) {
	for (j in seq_len(ncy)) {
	x2 <- x[,i]
	y2 <- y[,j]
	ok <- complete.cases(x2, y2)
	x2 <- rank(x2[ok])
	y2 <- rank(y2[ok])
	r[i, j] <- if(any(ok)) .Call(C_cor, x2, y2, 1L, method == "kendall") else NA
	}
	}
	rownames(r) <- colnames(x)
	colnames(r) <- colnames(y)
	if(matrix_result) r else drop(r)
	}
	}
	}

	cov <- function(x, y = NULL, use = "everything",
	method = c("pearson", "kendall", "spearman"))
	{
	na.method <-
	pmatch(use, c("all.obs", "complete.obs", "pairwise.complete.obs",
	"everything", "na.or.complete"))
	if(is.na(na.method)) stop("invalid 'use' argument")
	method <- match.arg(method)
	if(is.data.frame(y)) y <- as.matrix(y)
	if(is.data.frame(x)) x <- as.matrix(x)
	if(!is.matrix(x) && is.null(y))
	stop("supply both 'x' and 'y' or a matrix-like 'x'")
	## non-atomic x should not be 'numeric', but in case a method messes up
	stopifnot(is.numeric(x) \|\| is.logical(x), is.atomic(x))
	if(!is.null(y)) stopifnot(is.numeric(y) \|\| is.logical(y), is.atomic(y))

	## Rank transform
	Rank <- function(u) {
	## take care not to drop dims on a 0- or 1-row matrix
	if(length(u) == 0L) u
	else if(is.matrix(u)) {
	if(nrow(u) > 1L) apply(u, 2L, rank, na.last="keep") else row(u)
	} else rank(u, na.last="keep")
	}

	if(method == "pearson")
	.Call(C_cov, x, y, na.method, method == "kendall")
	else if (na.method %in% c(2L, 5L)) { ## "complete.obs" or "na.or.complete"

	if (is.null(y)) {
	.Call(C_cov, Rank(na.omit(x)), NULL, na.method,
	method == "kendall")
	} else {
	nas <- attr(na.omit(cbind(x,y)), "na.action")
	dropNA <- function(x, nas) {
	if(length(nas)) {
	if (is.matrix(x)) x[-nas, , drop = FALSE] else x[-nas]
	} else x
	}
	.Call(C_cov, Rank(dropNA(x, nas)), Rank(dropNA(y, nas)),
	na.method, method == "kendall")
	}
	} else if (na.method != 3L) { ## 1 or 4: "all.obs" or "everything"
	x <- Rank(x)
	if(!is.null(y)) y <- Rank(y)
	.Call(C_cov, x, y, na.method, method == "kendall")
	}
	else ## "pairwise.complete.obs"
	stop("cannot handle 'pairwise.complete.obs'")
	}

	var <- function(x, y = NULL, na.rm = FALSE, use) {
	if(missing(use))
	use <- if(na.rm) "na.or.complete" else "everything"
	na.method <-
	pmatch(use, c("all.obs", "complete.obs", "pairwise.complete.obs",
	"everything", "na.or.complete"))
	if(is.na(na.method)) stop("invalid 'use' argument")
	if (is.data.frame(x)) x <- as.matrix(x) else stopifnot(is.atomic(x))
	if (is.data.frame(y)) y <- as.matrix(y) else stopifnot(is.atomic(y))
	.Call(C_cov, x, y, na.method, FALSE)
	}

	cov2cor <- function(V)
	{
	## Purpose: Covariance matrix \|--> Correlation matrix -- efficiently
	## ----------------------------------------------------------------------
	## Arguments: V: a covariance matrix (i.e. symmetric and positive definite)
	## ----------------------------------------------------------------------
	## Author: Martin Maechler, Date: 12 Jun 2003, 11L:50
	p <- (d <- dim(V))[1L]
	if(!is.numeric(V) \|\| length(d) != 2L \|\| p != d[2L])
	stop("'V' is not a square numeric matrix")
	Is <- sqrt(1/diag(V)) # diag( 1/sigma_i )
	if(any(!is.finite(Is)))
	warning("diag(.) had 0 or NA entries; non-finite result is doubtful")
	r <- V # keep dimnames
	r[] <- Is * V * rep(Is, each = p)
	## == D %% V %% D where D = diag(Is)
	r[cbind(1L:p,1L:p)] <- 1 # exact in diagonal
	r
	}