blob: 663548e9c68263b0956fc6684efb07db0d9350ad [file] [log] [blame]
# File src/library/stats/R/prcomp.R
# Part of the R package, https://www.R-project.org
#
# Copyright (C) 1995-2017 The R Core Team
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# https://www.R-project.org/Licenses/
prcomp <- function (x, ...) UseMethod("prcomp")
prcomp.default <-
function(x, retx = TRUE, center = TRUE, scale. = FALSE, tol = NULL,
rank. = NULL, ...)
{
chkDots(...)
x <- as.matrix(x)
x <- scale(x, center = center, scale = scale.)
cen <- attr(x, "scaled:center")
sc <- attr(x, "scaled:scale")
if(any(sc == 0))
stop("cannot rescale a constant/zero column to unit variance")
n <- nrow(x)
p <- ncol(x)
k <- if(!is.null(rank.)) {
stopifnot(length(rank.) == 1, is.finite(rank.), as.integer(rank.) > 0)
min(as.integer(rank.), n, p)
## Note that La.svd() *still* needs a (n x p) and a (p x p) auxiliary
} else
min(n, p)
s <- svd(x, nu = 0, nv = k)
j <- seq_len(k)
s$d <- s$d / sqrt(max(1, n - 1))
if (!is.null(tol)) {
## we get rank at least one even for a 0 matrix.
rank <- sum(s$d > (s$d[1L]*tol))
if (rank < k) {
j <- seq_len(k <- rank)
s$v <- s$v[,j , drop = FALSE]
}
}
dimnames(s$v) <- list(colnames(x), paste0("PC", j))
r <- list(sdev = s$d, rotation = s$v,
center = if(is.null(cen)) FALSE else cen,
scale = if(is.null(sc)) FALSE else sc)
if (retx) r$x <- x %*% s$v
class(r) <- "prcomp"
r
}
prcomp.formula <- function (formula, data = NULL, subset, na.action, ...)
{
mt <- terms(formula, data = data)
if (attr(mt, "response") > 0L)
stop("response not allowed in formula")
cl <- match.call()
mf <- match.call(expand.dots = FALSE)
mf$... <- NULL
## need stats:: for non-standard evaluation
mf[[1L]] <- quote(stats::model.frame)
mf <- eval.parent(mf)
## this is not a `standard' model-fitting function,
## so no need to consider contrasts or levels
if (.check_vars_numeric(mf))
stop("PCA applies only to numerical variables")
na.act <- attr(mf, "na.action")
mt <- attr(mf, "terms")
attr(mt, "intercept") <- 0L
x <- model.matrix(mt, mf)
res <- prcomp.default(x, ...)
## fix up call to refer to the generic, but leave arg name as `formula'
cl[[1L]] <- as.name("prcomp")
res$call <- cl
if (!is.null(na.act)) {
res$na.action <- na.act
if (!is.null(sc <- res$x))
res$x <- napredict(na.act, sc)
}
res
}
plot.prcomp <- function(x, main = deparse(substitute(x)), ...)
screeplot.default(x, main = main, ...)
print.prcomp <- function(x, print.x = FALSE, ...) {
cat(sprintf("Standard deviations (1, .., p=%d):\n", length(x$sdev)))
print(x$sdev, ...)
d <- dim(x$rotation)
cat(sprintf("\nRotation (n x k) = (%d x %d):\n", d[1], d[2]))
print(x$rotation, ...)
if (print.x && length(x$x)) {
cat("\nRotated variables:\n")
print(x$x, ...)
}
invisible(x)
}
summary.prcomp <- function(object, ...)
{
chkDots(...)
vars <- object$sdev^2
vars <- vars/sum(vars)
importance <- rbind("Standard deviation" = object$sdev,
"Proportion of Variance" = round(vars, 5),
"Cumulative Proportion" = round(cumsum(vars), 5))
k <- ncol(object$rotation)
colnames(importance) <- c(colnames(object$rotation), rep("", length(vars) - k))
object$importance <- importance
class(object) <- "summary.prcomp"
object
}
print.summary.prcomp <-
function(x, digits = max(3L, getOption("digits") - 3L), ...)
{
dr <- dim(x$rotation); k <- dr[2]
p <- length(x$sdev)
if(k < p) {
cat(sprintf("Importance of first k=%d (out of %d) components:\n", k, p))
print(x$importance[, 1:k, drop=FALSE], digits = digits, ...)
} else {
cat("Importance of components:\n")
print(x$importance, digits = digits, ...)
}
invisible(x)
}
predict.prcomp <- function(object, newdata, ...)
{
chkDots(...)
if (missing(newdata)) {
if(!is.null(object$x)) return(object$x)
else stop("no scores are available: refit with 'retx=TRUE'")
}
if(length(dim(newdata)) != 2L)
stop("'newdata' must be a matrix or data frame")
nm <- rownames(object$rotation)
if(!is.null(nm)) {
if(!all(nm %in% colnames(newdata)))
stop("'newdata' does not have named columns matching one or more of the original columns")
newdata <- newdata[, nm, drop = FALSE]
} else {
if(NCOL(newdata) != NROW(object$rotation) )
stop("'newdata' does not have the correct number of columns")
}
## next line does as.matrix
scale(newdata, object$center, object$scale) %*% object$rotation
}
.check_vars_numeric <- function(mf)
{
## we need to test just the columns which are actually used.
mt <- attr(mf, "terms")
mterms <- attr(mt, "factors")
mterms <- rownames(mterms)[apply(mterms, 1L, function(x) any(x > 0L))]
any(sapply(mterms, function(x) is.factor(mf[,x]) || !is.numeric(mf[,x])))
}