blob: c86d93beed383450a69c9e51e03001feb49b67c8 [file] [log] [blame]
# File src/library/stats/R/cor.R
# Part of the R package, https://www.R-project.org
#
# Copyright (C) 1995-2012 The R Core Team
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# https://www.R-project.org/Licenses/
#### cor() , cov() and var() : Based on the same C code
cor <- function(x, y = NULL, use = "everything",
method = c("pearson", "kendall", "spearman"))
{
na.method <-
pmatch(use, c("all.obs", "complete.obs", "pairwise.complete.obs",
"everything", "na.or.complete"))
if(is.na(na.method)) stop("invalid 'use' argument")
method <- match.arg(method)
if(is.data.frame(y)) y <- as.matrix(y)
if(is.data.frame(x)) x <- as.matrix(x)
if(!is.matrix(x) && is.null(y))
stop("supply both 'x' and 'y' or a matrix-like 'x'")
## non-atomic x should not be 'numeric', but in case a method messes up
## allow logicals for back-compatibility (package mice).
if(!(is.numeric(x) || is.logical(x))) stop("'x' must be numeric")
stopifnot(is.atomic(x))
if(!is.null(y)) {
if(!(is.numeric(y) || is.logical(y))) stop("'y' must be numeric")
stopifnot(is.atomic(y))
}
## Rank transform
Rank <- function(u) {
## take care not to drop dims on a 0- or 1-row matrix
if(length(u) == 0L) u
else if(is.matrix(u)) {
if(nrow(u) > 1L) apply(u, 2L, rank, na.last="keep") else row(u)
} else rank(u, na.last="keep")
}
if(method == "pearson")
.Call(C_cor, x, y, na.method, FALSE)
else if (na.method %in% c(2L, 5L)) { ## "complete.obs" / "na.or.complete"
if (is.null(y)) {
.Call(C_cor, Rank(na.omit(x)), NULL, na.method,
method == "kendall")
} else {
nas <- attr(na.omit(cbind(x,y)), "na.action")
dropNA <- function(x, nas) {
if(length(nas)) {
if (is.matrix(x)) x[-nas, , drop = FALSE] else x[-nas]
} else x
}
.Call(C_cor, Rank(dropNA(x, nas)), Rank(dropNA(y, nas)),
na.method, method == "kendall")
}
} else if (na.method != 3L) {
## i.e., 1 or 4, i.e. "all.obs" or "everything":
x <- Rank(x)
if(!is.null(y)) y <- Rank(y)
.Call(C_cor, x, y, na.method, method == "kendall")
}
else { # rank correlations and "pairwise.complete.obs"; the hard case
## Based on contribution from Shigenobu Aoki.
## matrix
if (is.null(y)) {
ncy <- ncx <- ncol(x)
if(ncx == 0) stop("'x' is empty")
r <- matrix(0, nrow = ncx, ncol = ncy)
## 2.6.0 assumed the diagonal was 1, but not so for all NAs,
## nor single non-NA pairs.
for (i in seq_len(ncx)) {
for (j in seq_len(i)) {
x2 <- x[,i]
y2 <- x[,j]
ok <- complete.cases(x2, y2)
x2 <- rank(x2[ok])
y2 <- rank(y2[ok])
## we've removed all NAs
r[i, j] <- if(any(ok)) .Call(C_cor, x2, y2, 1L, method == "kendall") else NA
}
}
r <- r + t(r) - diag(diag(r))
rownames(r) <- colnames(x)
colnames(r) <- colnames(x)
r
}
## vector/matrix x vector/matrix
else {
if(length(x) == 0L || length(y) == 0L)
stop("both 'x' and 'y' must be non-empty")
matrix_result <- is.matrix(x) || is.matrix(y)
if (!is.matrix(x)) x <- matrix(x, ncol=1L)
if (!is.matrix(y)) y <- matrix(y, ncol=1L)
ncx <- ncol(x)
ncy <- ncol(y)
r <- matrix(0, nrow = ncx, ncol = ncy)
for (i in seq_len(ncx)) {
for (j in seq_len(ncy)) {
x2 <- x[,i]
y2 <- y[,j]
ok <- complete.cases(x2, y2)
x2 <- rank(x2[ok])
y2 <- rank(y2[ok])
r[i, j] <- if(any(ok)) .Call(C_cor, x2, y2, 1L, method == "kendall") else NA
}
}
rownames(r) <- colnames(x)
colnames(r) <- colnames(y)
if(matrix_result) r else drop(r)
}
}
}
cov <- function(x, y = NULL, use = "everything",
method = c("pearson", "kendall", "spearman"))
{
na.method <-
pmatch(use, c("all.obs", "complete.obs", "pairwise.complete.obs",
"everything", "na.or.complete"))
if(is.na(na.method)) stop("invalid 'use' argument")
method <- match.arg(method)
if(is.data.frame(y)) y <- as.matrix(y)
if(is.data.frame(x)) x <- as.matrix(x)
if(!is.matrix(x) && is.null(y))
stop("supply both 'x' and 'y' or a matrix-like 'x'")
## non-atomic x should not be 'numeric', but in case a method messes up
stopifnot(is.numeric(x) || is.logical(x), is.atomic(x))
if(!is.null(y)) stopifnot(is.numeric(y) || is.logical(y), is.atomic(y))
## Rank transform
Rank <- function(u) {
## take care not to drop dims on a 0- or 1-row matrix
if(length(u) == 0L) u
else if(is.matrix(u)) {
if(nrow(u) > 1L) apply(u, 2L, rank, na.last="keep") else row(u)
} else rank(u, na.last="keep")
}
if(method == "pearson")
.Call(C_cov, x, y, na.method, method == "kendall")
else if (na.method %in% c(2L, 5L)) { ## "complete.obs" or "na.or.complete"
if (is.null(y)) {
.Call(C_cov, Rank(na.omit(x)), NULL, na.method,
method == "kendall")
} else {
nas <- attr(na.omit(cbind(x,y)), "na.action")
dropNA <- function(x, nas) {
if(length(nas)) {
if (is.matrix(x)) x[-nas, , drop = FALSE] else x[-nas]
} else x
}
.Call(C_cov, Rank(dropNA(x, nas)), Rank(dropNA(y, nas)),
na.method, method == "kendall")
}
} else if (na.method != 3L) { ## 1 or 4: "all.obs" or "everything"
x <- Rank(x)
if(!is.null(y)) y <- Rank(y)
.Call(C_cov, x, y, na.method, method == "kendall")
}
else ## "pairwise.complete.obs"
stop("cannot handle 'pairwise.complete.obs'")
}
var <- function(x, y = NULL, na.rm = FALSE, use) {
if(missing(use))
use <- if(na.rm) "na.or.complete" else "everything"
na.method <-
pmatch(use, c("all.obs", "complete.obs", "pairwise.complete.obs",
"everything", "na.or.complete"))
if(is.na(na.method)) stop("invalid 'use' argument")
if (is.data.frame(x)) x <- as.matrix(x) else stopifnot(is.atomic(x))
if (is.data.frame(y)) y <- as.matrix(y) else stopifnot(is.atomic(y))
.Call(C_cov, x, y, na.method, FALSE)
}
cov2cor <- function(V)
{
## Purpose: Covariance matrix |--> Correlation matrix -- efficiently
## ----------------------------------------------------------------------
## Arguments: V: a covariance matrix (i.e. symmetric and positive definite)
## ----------------------------------------------------------------------
## Author: Martin Maechler, Date: 12 Jun 2003, 11L:50
p <- (d <- dim(V))[1L]
if(!is.numeric(V) || length(d) != 2L || p != d[2L])
stop("'V' is not a square numeric matrix")
Is <- sqrt(1/diag(V)) # diag( 1/sigma_i )
if(any(!is.finite(Is)))
warning("diag(.) had 0 or NA entries; non-finite result is doubtful")
r <- V # keep dimnames
r[] <- Is * V * rep(Is, each = p)
## == D %*% V %*% D where D = diag(Is)
r[cbind(1L:p,1L:p)] <- 1 # exact in diagonal
r
}