| % File src/library/base/man/split.Rd |
| % Part of the R package, https://www.R-project.org |
| % Copyright 1995-2016 R Core Team |
| % Distributed under GPL 2 or later |
| |
| \name{split} |
| \title{Divide into Groups and Reassemble} |
| \alias{split} |
| \alias{split.default} |
| \alias{split.data.frame} |
| \alias{split<-} |
| \alias{split<-.default} |
| \alias{split<-.data.frame} |
| \alias{unsplit} |
| \description{ |
| \code{split} divides the data in the vector \code{x} into the groups |
| defined by \code{f}. The replacement forms replace values |
| corresponding to such a division. \code{unsplit} reverses the effect of |
| \code{split}. |
| } |
| \usage{ |
| split(x, f, drop = FALSE, \dots) |
| \method{split}{default}(x, f, drop = FALSE, sep = ".", lex.order = FALSE, \dots) |
| |
| split(x, f, drop = FALSE, \dots) <- value |
| unsplit(value, f, drop = FALSE) |
| } |
| \arguments{ |
| \item{x}{vector or data frame containing values to be divided into groups.} |
| \item{f}{a \sQuote{factor} in the sense that \code{\link{as.factor}(f)} |
| defines the grouping, or a list of such factors in which case their |
| interaction is used for the grouping.} |
| \item{drop}{logical indicating if levels that do not occur should be dropped |
| (if \code{f} is a \code{factor} or a list).} |
| \item{value}{a list of vectors or data frames compatible with a |
| splitting of \code{x}. Recycling applies if the lengths do not match.} |
| \item{sep}{character string, passed to \code{\link{interaction}} in the |
| case where \code{f} is a \code{\link{list}}.} |
| \item{lex.order}{logical, passed to \code{\link{interaction}} when |
| \code{f} is a list.} |
| \item{\dots}{further potential arguments passed to methods.} |
| } |
| \details{ |
| \code{split} and \code{split<-} are generic functions with default and |
| \code{data.frame} methods. The data frame method can also be used to |
| split a matrix into a list of matrices, and the replacement form |
| likewise, provided they are invoked explicitly. |
| |
| \code{unsplit} works with lists of vectors or data frames (assumed to |
| have compatible structure, as if created by \code{split}). It puts |
| elements or rows back in the positions given by \code{f}. In the data |
| frame case, row names are obtained by unsplitting the row name |
| vectors from the elements of \code{value}. |
| |
| \code{f} is recycled as necessary and if the length of \code{x} is not |
| a multiple of the length of \code{f} a warning is printed. |
| |
| Any missing values in \code{f} are dropped together with the |
| corresponding values of \code{x}. |
| |
| The default method calls \code{\link{interaction}} when \code{f} is a |
| \code{\link{list}}. If the levels of the factors contain \samp{.} |
| the factors may not be split as expected, unless \code{sep} is set to |
| string not present in the factor \code{\link{levels}}. |
| } |
| \value{ |
| The value returned from \code{split} is a list of vectors containing |
| the values for the groups. The components of the list are named by |
| the levels of \code{f} (after converting to a factor, or if already a |
| factor and \code{drop = TRUE}, dropping unused levels). |
| |
| The replacement forms return their right hand side. \code{unsplit} |
| returns a vector or data frame for which \code{split(x, f)} equals |
| \code{value} |
| |
| } |
| \seealso{ |
| \code{\link{cut}} to categorize numeric values. |
| |
| \code{\link{strsplit}} to split strings. |
| } |
| \references{ |
| Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) |
| \emph{The New S Language}. |
| Wadsworth & Brooks/Cole. |
| } |
| \examples{ |
| require(stats); require(graphics) |
| n <- 10; nn <- 100 |
| g <- factor(round(n * runif(n * nn))) |
| x <- rnorm(n * nn) + sqrt(as.numeric(g)) |
| xg <- split(x, g) |
| boxplot(xg, col = "lavender", notch = TRUE, varwidth = TRUE) |
| sapply(xg, length) |
| sapply(xg, mean) |
| |
| ### Calculate 'z-scores' by group (standardize to mean zero, variance one) |
| z <- unsplit(lapply(split(x, g), scale), g) |
| |
| # or |
| |
| zz <- x |
| split(zz, g) <- lapply(split(x, g), scale) |
| |
| # and check that the within-group std dev is indeed one |
| tapply(z, g, sd) |
| tapply(zz, g, sd) |
| |
| |
| ### data frame variation |
| |
| ## Notice that assignment form is not used since a variable is being added |
| |
| g <- airquality$Month |
| l <- split(airquality, g) |
| l <- lapply(l, transform, Oz.Z = scale(Ozone)) |
| aq2 <- unsplit(l, g) |
| head(aq2) |
| with(aq2, tapply(Oz.Z, Month, sd, na.rm = TRUE)) |
| |
| |
| ### Split a matrix into a list by columns |
| ma <- cbind(x = 1:10, y = (-4:5)^2) |
| split(ma, col(ma)) |
| |
| split(1:10, 1:2) |
| } |
| \keyword{category} |