blob: 4c5cb1a8b90e82a4530403fb7db712cb4e4cb679 [file] [log] [blame]
% File src/library/base/man/tapply.Rd
% Part of the R package, https://www.R-project.org
% Copyright 1995-2019 R Core Team
% Distributed under GPL 2 or later
\name{tapply}
\alias{tapply}
\title{Apply a Function Over a Ragged Array}
\description{
Apply a function to each cell of a ragged array, that is to each
(non-empty) group of values given by a unique combination of the
levels of certain factors.
}
\usage{
tapply(X, INDEX, FUN = NULL, \dots, default = NA, simplify = TRUE)
}
\arguments{
\item{X}{an \R object for which a \code{\link{split}} method
exists. Typically vector-like, allowing subsetting with
\code{\link{[}}.}
\item{INDEX}{a \code{\link{list}} of one or more \code{\link{factor}}s,
each of same length as \code{X}. The elements are coerced to
factors by \code{\link{as.factor}}.}
\item{FUN}{a function (or name of a function) to be applied, or \code{NULL}.
In the case of functions like \code{+}, \code{\%*\%}, etc.,
the function name must be backquoted or quoted. If \code{FUN} is
\code{NULL}, tapply returns a vector which can be used to subscript
the multi-way array \code{tapply} normally produces.}
\item{\dots}{optional arguments to \code{FUN}: the Note section.}
\item{default}{(only in the case of simplification to an array) the
value with which the array is initialized as
\code{\link{array}(default, dim = ..)}. Before \R 3.4.0, this
was hard coded to \code{\link{array}()}'s default \code{NA}. If it
is \code{NA} (the default), the missing value of the answer type,
e.g. \code{\link{NA_real_}}, is chosen (\code{\link{as.raw}(0)} for
\code{"raw"}). In a numerical case, it may be set, e.g., to
\code{FUN(integer(0))}, e.g., in the case of \code{FUN = sum} to
\code{0} or \code{0L}.}
\item{simplify}{logical; if \code{FALSE}, \code{tapply} always returns
an array of mode \code{"list"}; in other words, a \code{\link{list}}
with a \code{\link{dim}} attribute. If \code{TRUE} (the default), then if
\code{FUN} always returns a scalar, \code{tapply} returns an array
with the mode of the scalar.}
}
\details{
If \code{FUN} is not \code{NULL}, it is passed to
\code{\link{match.fun}}, and hence it can be a function or a symbol or
character string naming a function.
}
\value{
When \code{FUN} is present, \code{tapply} calls \code{FUN} for each
cell that has any data in it. If \code{FUN} returns a single atomic
value for each such cell (e.g., functions \code{mean} or \code{var})
and when \code{simplify} is \code{TRUE}, \code{tapply} returns a
multi-way \link{array} containing the values, and \code{NA} for the
empty cells. The array has the same number of dimensions as
\code{INDEX} has components; the number of levels in a dimension is
the number of levels (\code{nlevels()}) in the corresponding component
of \code{INDEX}. Note that if the return value has a class (e.g., an
object of class \code{"\link{Date}"}) the class is discarded.
\code{simplify = TRUE} always returns an array, possibly 1-dimensional.
If \code{FUN} does not return a single atomic value, \code{tapply}
returns an array of mode \code{\link{list}} whose components are the
values of the individual calls to \code{FUN}, i.e., the result is a
list with a \code{\link{dim}} attribute.
When there is an array answer, its \code{\link{dimnames}} are named by
the names of \code{INDEX} and are based on the levels of the grouping
factors (possibly after coercion).
For a list result, the elements corresponding to empty cells are
\code{NULL}.
}
\note{
Optional arguments to \code{FUN} supplied by the \code{...} argument
are not divided into cells. It is therefore inappropriate for
\code{FUN} to expect additional arguments with the same length as
\code{X}.
}
\references{
Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988)
\emph{The New S Language}.
Wadsworth & Brooks/Cole.
}
\seealso{
the convenience functions \code{\link{by}} and
\code{\link{aggregate}} (using \code{tapply});
\code{\link{apply}},
\code{\link{lapply}} with its versions
\code{\link{sapply}} and \code{\link{mapply}}.
}
\examples{
require(stats)
groups <- as.factor(rbinom(32, n = 5, prob = 0.4))
tapply(groups, groups, length) #- is almost the same as
table(groups)
## contingency table from data.frame : array with named dimnames
tapply(warpbreaks$breaks, warpbreaks[,-1], sum)
tapply(warpbreaks$breaks, warpbreaks[, 3, drop = FALSE], sum)
n <- 17; fac <- factor(rep_len(1:3, n), levels = 1:5)
table(fac)
tapply(1:n, fac, sum)
tapply(1:n, fac, sum, default = 0) # maybe more desirable
tapply(1:n, fac, sum, simplify = FALSE)
tapply(1:n, fac, range)
tapply(1:n, fac, quantile)
tapply(1:n, fac, length) ## NA's
tapply(1:n, fac, length, default = 0) # == table(fac)
\dontshow{stopifnot(all.equal(
unname(unclass(table(fac))),
unname( tapply(1:n, fac, length, default = 0))))}
## example of ... argument: find quarterly means
tapply(presidents, cycle(presidents), mean, na.rm = TRUE)
ind <- list(c(1, 2, 2), c("A", "A", "B"))
table(ind)
tapply(1:3, ind) #-> the split vector
tapply(1:3, ind, sum)
## Some assertions (not held by all patch propsals):
nq <- names(quantile(1:5))
stopifnot(
identical(tapply(1:3, ind), c(1L, 2L, 4L)),
identical(tapply(1:3, ind, sum),
matrix(c(1L, 2L, NA, 3L), 2, dimnames = list(c("1", "2"), c("A", "B")))),
identical(tapply(1:n, fac, quantile)[-1],
array(list(`2` = structure(c(2, 5.75, 9.5, 13.25, 17), .Names = nq),
`3` = structure(c(3, 6, 9, 12, 15), .Names = nq),
`4` = NULL, `5` = NULL), dim=4, dimnames=list(as.character(2:5)))))
}
\keyword{iteration}
\keyword{category}