| % File src/library/stats/man/princomp.Rd |
| % Part of the R package, https://www.R-project.org |
| % Copyright 1995-2018 R Core Team |
| % Distributed under GPL 2 or later |
| |
| \name{princomp} |
| \alias{princomp} |
| \alias{princomp.formula} |
| \alias{princomp.default} |
| \alias{plot.princomp} |
| \alias{print.princomp} |
| \alias{predict.princomp} |
| \title{Principal Components Analysis} |
| \concept{PCA} |
| \usage{ |
| princomp(x, \dots) |
| |
| \method{princomp}{formula}(formula, data = NULL, subset, na.action, \dots) |
| |
| \method{princomp}{default}(x, cor = FALSE, scores = TRUE, covmat = NULL, |
| subset = rep_len(TRUE, nrow(as.matrix(x))), fix_sign = TRUE, \dots) |
| |
| \method{predict}{princomp}(object, newdata, \dots) |
| } |
| \arguments{ |
| \item{formula}{a formula with no response variable, referring only to |
| numeric variables.} |
| \item{data}{an optional data frame (or similar: see |
| \code{\link{model.frame}}) containing the variables in the |
| formula \code{formula}. By default the variables are taken from |
| \code{environment(formula)}.} |
| \item{subset}{an optional vector used to select rows (observations) of the |
| data matrix \code{x}.} |
| \item{na.action}{a function which indicates what should happen |
| when the data contain \code{NA}s. The default is set by |
| the \code{na.action} setting of \code{\link{options}}, and is |
| \code{\link{na.fail}} if that is unset. The \sQuote{factory-fresh} |
| default is \code{\link{na.omit}}.} |
| \item{x}{a numeric matrix or data frame which provides the data for the |
| principal components analysis.} |
| \item{cor}{a logical value indicating whether the calculation should |
| use the correlation matrix or the covariance matrix. (The |
| correlation matrix can only be used if there are no constant variables.)} |
| \item{scores}{a logical value indicating whether the score on each |
| principal component should be calculated.} |
| \item{covmat}{a covariance matrix, or a covariance list as returned by |
| \code{\link{cov.wt}} (and \code{\link[MASS:cov.rob]{cov.mve}} or |
| \code{\link[MASS:cov.rob]{cov.mcd}} from package \CRANpkg{MASS}). |
| If supplied, this is used rather than the covariance matrix of |
| \code{x}.} |
| \item{fix_sign}{Should the signs of the loadings and scores be chosen |
| so that the first element of each loading is non-negative?} |
| \item{\dots}{arguments passed to or from other methods. If \code{x} is |
| a formula one might specify \code{cor} or \code{scores}.} |
| \item{object}{Object of class inheriting from \code{"princomp"}.} |
| \item{newdata}{An optional data frame or matrix in which to look for |
| variables with which to predict. If omitted, the scores are used. |
| If the original fit used a formula or a data frame or a matrix with |
| column names, \code{newdata} must contain columns with the same |
| names. Otherwise it must contain the same number of columns, to be |
| used in the same order. |
| } |
| } |
| \description{ |
| \code{princomp} performs a principal components analysis on the given |
| numeric data matrix and returns the results as an object of class |
| \code{princomp}. |
| } |
| \value{ |
| \code{princomp} returns a list with class \code{"princomp"} |
| containing the following components: |
| \item{sdev}{the standard deviations of the principal components.} |
| \item{loadings}{the matrix of variable loadings (i.e., a matrix |
| whose columns contain the eigenvectors). This is of class |
| \code{"loadings"}: see \code{\link{loadings}} for its \code{print} |
| method.} |
| \item{center}{the means that were subtracted.} |
| \item{scale}{the scalings applied to each variable.} |
| \item{n.obs}{the number of observations.} |
| \item{scores}{if \code{scores = TRUE}, the scores of the supplied |
| data on the principal components. These are non-null only if |
| \code{x} was supplied, and if \code{covmat} was also supplied if it |
| was a covariance list. For the formula method, |
| \code{\link{napredict}()} is applied to handle the treatment of |
| values omitted by the \code{na.action}.} |
| \item{call}{the matched call.} |
| \item{na.action}{If relevant.} |
| } |
| \details{ |
| \code{princomp} is a generic function with \code{"formula"} and |
| \code{"default"} methods. |
| |
| The calculation is done using \code{\link{eigen}} on the correlation or |
| covariance matrix, as determined by \code{\link{cor}}. This is done for |
| compatibility with the S-PLUS result. A preferred method of |
| calculation is to use \code{\link{svd}} on \code{x}, as is done in |
| \code{prcomp}. |
| |
| Note that the default calculation uses divisor \code{N} for the |
| covariance matrix. |
| |
| The \code{\link{print}} method for these objects prints the |
| results in a nice format and the \code{\link{plot}} method produces |
| a scree plot (\code{\link{screeplot}}). There is also a |
| \code{\link{biplot}} method. |
| |
| If \code{x} is a formula then the standard NA-handling is applied to |
| the scores (if requested): see \code{\link{napredict}}. |
| |
| \code{princomp} only handles so-called R-mode PCA, that is feature |
| extraction of variables. If a data matrix is supplied (possibly via a |
| formula) it is required that there are at least as many units as |
| variables. For Q-mode PCA use \code{\link{prcomp}}. |
| } |
| \note{ |
| The signs of the columns of the loadings and scores are arbitrary, and |
| so may differ between different programs for PCA, and even between |
| different builds of \R: \code{fix_sign = TRUE} alleviates that. |
| } |
| \references{ |
| Mardia, K. V., J. T. Kent and J. M. Bibby (1979). |
| \emph{Multivariate Analysis}, London: Academic Press. |
| |
| Venables, W. N. and B. D. Ripley (2002). |
| \emph{Modern Applied Statistics with S}, Springer-Verlag. |
| } |
| \seealso{ |
| \code{\link{summary.princomp}}, \code{\link{screeplot}}, |
| \code{\link{biplot.princomp}}, |
| \code{\link{prcomp}}, \code{\link{cor}}, \code{\link{cov}}, |
| \code{\link{eigen}}. |
| } |
| \examples{ |
| require(graphics) |
| |
| ## The variances of the variables in the |
| ## USArrests data vary by orders of magnitude, so scaling is appropriate |
| (pc.cr <- princomp(USArrests)) # inappropriate |
| princomp(USArrests, cor = TRUE) # =^= prcomp(USArrests, scale=TRUE) |
| ## Similar, but different: |
| ## The standard deviations differ by a factor of sqrt(49/50) |
| |
| summary(pc.cr <- princomp(USArrests, cor = TRUE)) |
| loadings(pc.cr) # note that blank entries are small but not zero |
| ## The signs of the columns of the loadings are arbitrary |
| plot(pc.cr) # shows a screeplot. |
| biplot(pc.cr) |
| |
| ## Formula interface |
| princomp(~ ., data = USArrests, cor = TRUE) |
| |
| ## NA-handling |
| USArrests[1, 2] <- NA |
| pc.cr <- princomp(~ Murder + Assault + UrbanPop, |
| data = USArrests, na.action = na.exclude, cor = TRUE) |
| \donttest{pc.cr$scores[1:5, ]} |
| |
| ## (Simple) Robust PCA: |
| ## Classical: |
| (pc.cl <- princomp(stackloss)) |
| \donttest{## Robust: |
| (pc.rob <- princomp(stackloss, covmat = MASS::cov.rob(stackloss))) |
| }} |
| \keyword{multivariate} |