blob: 68b88975e187ea088b29491041955a96ae5c44a6 [file] [log] [blame]
% File src/library/base/man/split.Rd
% Part of the R package, https://www.R-project.org
% Copyright 1995-2016 R Core Team
% Distributed under GPL 2 or later
\name{split}
\title{Divide into Groups and Reassemble}
\alias{split}
\alias{split.default}
\alias{split.data.frame}
\alias{split<-}
\alias{split<-.default}
\alias{split<-.data.frame}
\alias{unsplit}
\description{
\code{split} divides the data in the vector \code{x} into the groups
defined by \code{f}. The replacement forms replace values
corresponding to such a division. \code{unsplit} reverses the effect of
\code{split}.
}
\usage{
split(x, f, drop = FALSE, \dots)
\method{split}{default}(x, f, drop = FALSE, sep = ".", lex.order = FALSE, \dots)
split(x, f, drop = FALSE, \dots) <- value
unsplit(value, f, drop = FALSE)
}
\arguments{
\item{x}{vector or data frame containing values to be divided into groups.}
\item{f}{a \sQuote{factor} in the sense that \code{\link{as.factor}(f)}
defines the grouping, or a list of such factors in which case their
interaction is used for the grouping.}
\item{drop}{logical indicating if levels that do not occur should be dropped
(if \code{f} is a \code{factor} or a list).}
\item{value}{a list of vectors or data frames compatible with a
splitting of \code{x}. Recycling applies if the lengths do not match.}
\item{sep}{character string, passed to \code{\link{interaction}} in the
case where \code{f} is a \code{\link{list}}.}
\item{lex.order}{logical, passed to \code{\link{interaction}} when
\code{f} is a list.}
\item{\dots}{further potential arguments passed to methods.}
}
\details{
\code{split} and \code{split<-} are generic functions with default and
\code{data.frame} methods. The data frame method can also be used to
split a matrix into a list of matrices, and the replacement form
likewise, provided they are invoked explicitly.
\code{unsplit} works with lists of vectors or data frames (assumed to
have compatible structure, as if created by \code{split}). It puts
elements or rows back in the positions given by \code{f}. In the data
frame case, row names are obtained by unsplitting the row name
vectors from the elements of \code{value}.
\code{f} is recycled as necessary and if the length of \code{x} is not
a multiple of the length of \code{f} a warning is printed.
Any missing values in \code{f} are dropped together with the
corresponding values of \code{x}.
The default method calls \code{\link{interaction}} when \code{f} is a
\code{\link{list}}. If the levels of the factors contain \samp{.}
the factors may not be split as expected, unless \code{sep} is set to
string not present in the factor \code{\link{levels}}.
}
\value{
The value returned from \code{split} is a list of vectors containing
the values for the groups. The components of the list are named by
the levels of \code{f} (after converting to a factor, or if already a
factor and \code{drop = TRUE}, dropping unused levels).
The replacement forms return their right hand side. \code{unsplit}
returns a vector or data frame for which \code{split(x, f)} equals
\code{value}
}
\seealso{
\code{\link{cut}} to categorize numeric values.
\code{\link{strsplit}} to split strings.
}
\references{
Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988)
\emph{The New S Language}.
Wadsworth & Brooks/Cole.
}
\examples{
require(stats); require(graphics)
n <- 10; nn <- 100
g <- factor(round(n * runif(n * nn)))
x <- rnorm(n * nn) + sqrt(as.numeric(g))
xg <- split(x, g)
boxplot(xg, col = "lavender", notch = TRUE, varwidth = TRUE)
sapply(xg, length)
sapply(xg, mean)
### Calculate 'z-scores' by group (standardize to mean zero, variance one)
z <- unsplit(lapply(split(x, g), scale), g)
# or
zz <- x
split(zz, g) <- lapply(split(x, g), scale)
# and check that the within-group std dev is indeed one
tapply(z, g, sd)
tapply(zz, g, sd)
### data frame variation
## Notice that assignment form is not used since a variable is being added
g <- airquality$Month
l <- split(airquality, g)
l <- lapply(l, transform, Oz.Z = scale(Ozone))
aq2 <- unsplit(l, g)
head(aq2)
with(aq2, tapply(Oz.Z, Month, sd, na.rm = TRUE))
### Split a matrix into a list by columns
ma <- cbind(x = 1:10, y = (-4:5)^2)
split(ma, col(ma))
split(1:10, 1:2)
}
\keyword{category}