src/library/base/man/tapply.Rd - R - Git at Google

 % File src/library/base/man/tapply.Rd
 % Part of the R package, https://www.R-project.org
 % Copyright 1995-2019 R Core Team
 % Distributed under GPL 2 or later

 \name{tapply}
 \alias{tapply}
 \title{Apply a Function Over a Ragged Array}
 \description{
   Apply a function to each cell of a ragged array, that is to each
   (non-empty) group of values given by a unique combination of the
   levels of certain factors.
 }
 \usage{
 tapply(X, INDEX, FUN = NULL, \dots, default = NA, simplify = TRUE)
 }
 \arguments{
   \item{X}{an \R object for which a \code{\link{split}} method
     exists.  Typically vector-like, allowing subsetting with
     \code{\link{[}}.}
   \item{INDEX}{a \code{\link{list}} of one or more \code{\link{factor}}s,
     each of same length as \code{X}.  The elements are coerced to
     factors by \code{\link{as.factor}}.}
   \item{FUN}{a function (or name of a function) to be applied, or \code{NULL}.
     In the case of functions like \code{+}, \code{\%*\%}, etc.,
     the function name must be backquoted or quoted.  If \code{FUN} is
     \code{NULL}, tapply returns a vector which can be used to subscript
     the multi-way array \code{tapply} normally produces.}
   \item{\dots}{optional arguments to \code{FUN}: the Note section.}
   \item{default}{(only in the case of simplification to an array) the
     value with which the array is initialized as
     \code{\link{array}(default, dim = ..)}.  Before \R 3.4.0, this
     was hard coded to \code{\link{array}()}'s default \code{NA}.  If it
     is \code{NA} (the default), the missing value of the answer type,
     e.g. \code{\link{NA_real_}}, is chosen (\code{\link{as.raw}(0)} for
     \code{"raw"}).  In a numerical case, it may be set, e.g., to
     \code{FUN(integer(0))}, e.g., in the case of \code{FUN = sum} to
     \code{0} or \code{0L}.}
   \item{simplify}{logical; if \code{FALSE}, \code{tapply} always returns
     an array of mode \code{"list"}; in other words, a \code{\link{list}}
     with a \code{\link{dim}} attribute.  If \code{TRUE} (the default), then if
     \code{FUN} always returns a scalar, \code{tapply} returns an array
     with the mode of the scalar.}
 }

 \details{
   If \code{FUN} is not \code{NULL}, it is passed to
   \code{\link{match.fun}}, and hence it can be a function or a symbol or
   character string naming a function.
 }

 \value{
   When \code{FUN} is present, \code{tapply} calls \code{FUN} for each
   cell that has any data in it.  If \code{FUN} returns a single atomic
   value for each such cell (e.g., functions \code{mean} or \code{var})
   and when \code{simplify} is \code{TRUE}, \code{tapply} returns a
   multi-way \link{array} containing the values, and \code{NA} for the
   empty cells.  The array has the same number of dimensions as
   \code{INDEX} has components; the number of levels in a dimension is
   the number of levels (\code{nlevels()}) in the corresponding component
   of \code{INDEX}.  Note that if the return value has a class (e.g., an
   object of class \code{"\link{Date}"}) the class is discarded.

   \code{simplify = TRUE} always returns an array, possibly 1-dimensional.

   If \code{FUN} does not return a single atomic value, \code{tapply}
   returns an array of mode \code{\link{list}} whose components are the
   values of the individual calls to \code{FUN}, i.e., the result is a
   list with a \code{\link{dim}} attribute.

   When there is an array answer, its \code{\link{dimnames}} are named by
   the names of \code{INDEX} and are based on the levels of the grouping
   factors (possibly after coercion).

   For a list result, the elements corresponding to empty cells are
   \code{NULL}.
 }
 \note{
   Optional arguments to \code{FUN} supplied by the \code{...} argument
   are not divided into cells.  It is therefore inappropriate for
   \code{FUN} to expect additional arguments with the same length as
   \code{X}.
 }
 \references{
   Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988)
   \emph{The New S Language}.
   Wadsworth & Brooks/Cole.
 }
 \seealso{
   the convenience functions \code{\link{by}} and
   \code{\link{aggregate}} (using \code{tapply});
   \code{\link{apply}},
   \code{\link{lapply}} with its versions
   \code{\link{sapply}} and \code{\link{mapply}}.
 }
 \examples{
 require(stats)
 groups <- as.factor(rbinom(32, n = 5, prob = 0.4))
 tapply(groups, groups, length) #- is almost the same as
 table(groups)

 ## contingency table from data.frame : array with named dimnames
 tapply(warpbreaks$breaks, warpbreaks[,-1], sum)
 tapply(warpbreaks$breaks, warpbreaks[, 3, drop = FALSE], sum)

 n <- 17; fac <- factor(rep_len(1:3, n), levels = 1:5)
 table(fac)
 tapply(1:n, fac, sum)
 tapply(1:n, fac, sum, default = 0) # maybe more desirable
 tapply(1:n, fac, sum, simplify = FALSE)
 tapply(1:n, fac, range)
 tapply(1:n, fac, quantile)
 tapply(1:n, fac, length) ## NA's
 tapply(1:n, fac, length, default = 0) # == table(fac)
 \dontshow{stopifnot(all.equal(
   unname(unclass(table(fac))),
   unname(        tapply(1:n, fac, length, default = 0))))}
 ## example of ... argument: find quarterly means
 tapply(presidents, cycle(presidents), mean, na.rm = TRUE)

 ind <- list(c(1, 2, 2), c("A", "A", "B"))
 table(ind)
 tapply(1:3, ind) #-> the split vector
 tapply(1:3, ind, sum)

 ## Some assertions (not held by all patch propsals):
 nq <- names(quantile(1:5))
 stopifnot(
   identical(tapply(1:3, ind), c(1L, 2L, 4L)),
   identical(tapply(1:3, ind, sum),
             matrix(c(1L, 2L, NA, 3L), 2, dimnames = list(c("1", "2"), c("A", "B")))),
   identical(tapply(1:n, fac, quantile)[-1],
             array(list(`2` = structure(c(2, 5.75, 9.5, 13.25, 17), .Names = nq),
                  `3` = structure(c(3, 6, 9, 12, 15), .Names = nq),
                  `4` = NULL, `5` = NULL), dim=4, dimnames=list(as.character(2:5)))))
 }
 \keyword{iteration}
 \keyword{category}
	% File src/library/base/man/tapply.Rd
	% Part of the R package, https://www.R-project.org
	% Copyright 1995-2019 R Core Team
	% Distributed under GPL 2 or later

	\name{tapply}
	\alias{tapply}
	\title{Apply a Function Over a Ragged Array}
	\description{
	Apply a function to each cell of a ragged array, that is to each
	(non-empty) group of values given by a unique combination of the
	levels of certain factors.
	}
	\usage{
	tapply(X, INDEX, FUN = NULL, \dots, default = NA, simplify = TRUE)
	}
	\arguments{
	\item{X}{an \R object for which a \code{\link{split}} method
	exists. Typically vector-like, allowing subsetting with
	\code{\link{[}}.}
	\item{INDEX}{a \code{\link{list}} of one or more \code{\link{factor}}s,
	each of same length as \code{X}. The elements are coerced to
	factors by \code{\link{as.factor}}.}
	\item{FUN}{a function (or name of a function) to be applied, or \code{NULL}.
	In the case of functions like \code{+}, \code{\%*\%}, etc.,
	the function name must be backquoted or quoted. If \code{FUN} is
	\code{NULL}, tapply returns a vector which can be used to subscript
	the multi-way array \code{tapply} normally produces.}
	\item{\dots}{optional arguments to \code{FUN}: the Note section.}
	\item{default}{(only in the case of simplification to an array) the
	value with which the array is initialized as
	\code{\link{array}(default, dim = ..)}. Before \R 3.4.0, this
	was hard coded to \code{\link{array}()}'s default \code{NA}. If it
	is \code{NA} (the default), the missing value of the answer type,
	e.g. \code{\link{NA_real_}}, is chosen (\code{\link{as.raw}(0)} for
	\code{"raw"}). In a numerical case, it may be set, e.g., to
	\code{FUN(integer(0))}, e.g., in the case of \code{FUN = sum} to
	\code{0} or \code{0L}.}
	\item{simplify}{logical; if \code{FALSE}, \code{tapply} always returns
	an array of mode \code{"list"}; in other words, a \code{\link{list}}
	with a \code{\link{dim}} attribute. If \code{TRUE} (the default), then if
	\code{FUN} always returns a scalar, \code{tapply} returns an array
	with the mode of the scalar.}
	}

	\details{
	If \code{FUN} is not \code{NULL}, it is passed to
	\code{\link{match.fun}}, and hence it can be a function or a symbol or
	character string naming a function.
	}

	\value{
	When \code{FUN} is present, \code{tapply} calls \code{FUN} for each
	cell that has any data in it. If \code{FUN} returns a single atomic
	value for each such cell (e.g., functions \code{mean} or \code{var})
	and when \code{simplify} is \code{TRUE}, \code{tapply} returns a
	multi-way \link{array} containing the values, and \code{NA} for the
	empty cells. The array has the same number of dimensions as
	\code{INDEX} has components; the number of levels in a dimension is
	the number of levels (\code{nlevels()}) in the corresponding component
	of \code{INDEX}. Note that if the return value has a class (e.g., an
	object of class \code{"\link{Date}"}) the class is discarded.

	\code{simplify = TRUE} always returns an array, possibly 1-dimensional.

	If \code{FUN} does not return a single atomic value, \code{tapply}
	returns an array of mode \code{\link{list}} whose components are the
	values of the individual calls to \code{FUN}, i.e., the result is a
	list with a \code{\link{dim}} attribute.

	When there is an array answer, its \code{\link{dimnames}} are named by
	the names of \code{INDEX} and are based on the levels of the grouping
	factors (possibly after coercion).

	For a list result, the elements corresponding to empty cells are
	\code{NULL}.
	}
	\note{
	Optional arguments to \code{FUN} supplied by the \code{...} argument
	are not divided into cells. It is therefore inappropriate for
	\code{FUN} to expect additional arguments with the same length as
	\code{X}.
	}
	\references{
	Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988)
	\emph{The New S Language}.
	Wadsworth & Brooks/Cole.
	}
	\seealso{
	the convenience functions \code{\link{by}} and
	\code{\link{aggregate}} (using \code{tapply});
	\code{\link{apply}},
	\code{\link{lapply}} with its versions
	\code{\link{sapply}} and \code{\link{mapply}}.
	}
	\examples{
	require(stats)
	groups <- as.factor(rbinom(32, n = 5, prob = 0.4))
	tapply(groups, groups, length) #- is almost the same as
	table(groups)

	## contingency table from data.frame : array with named dimnames
	tapply(warpbreaks$breaks, warpbreaks[,-1], sum)
	tapply(warpbreaks$breaks, warpbreaks[, 3, drop = FALSE], sum)

	n <- 17; fac <- factor(rep_len(1:3, n), levels = 1:5)
	table(fac)
	tapply(1:n, fac, sum)
	tapply(1:n, fac, sum, default = 0) # maybe more desirable
	tapply(1:n, fac, sum, simplify = FALSE)
	tapply(1:n, fac, range)
	tapply(1:n, fac, quantile)
	tapply(1:n, fac, length) ## NA's
	tapply(1:n, fac, length, default = 0) # == table(fac)
	\dontshow{stopifnot(all.equal(
	unname(unclass(table(fac))),
	unname( tapply(1:n, fac, length, default = 0))))}
	## example of ... argument: find quarterly means
	tapply(presidents, cycle(presidents), mean, na.rm = TRUE)

	ind <- list(c(1, 2, 2), c("A", "A", "B"))
	table(ind)
	tapply(1:3, ind) #-> the split vector
	tapply(1:3, ind, sum)

	## Some assertions (not held by all patch propsals):
	nq <- names(quantile(1:5))
	stopifnot(
	identical(tapply(1:3, ind), c(1L, 2L, 4L)),
	identical(tapply(1:3, ind, sum),
	matrix(c(1L, 2L, NA, 3L), 2, dimnames = list(c("1", "2"), c("A", "B")))),
	identical(tapply(1:n, fac, quantile)[-1],
	array(list(`2` = structure(c(2, 5.75, 9.5, 13.25, 17), .Names = nq),
	`3` = structure(c(3, 6, 9, 12, 15), .Names = nq),
	`4` = NULL, `5` = NULL), dim=4, dimnames=list(as.character(2:5)))))
	}
	\keyword{iteration}
	\keyword{category}