|  | % File src/library/stats/man/xtabs.Rd | 
|  | % Part of the R package, https://www.R-project.org | 
|  | % Copyright 1995-2018 R Core Team | 
|  | % Distributed under GPL 2 or later | 
|  |  | 
|  | \name{xtabs} | 
|  | \alias{xtabs} | 
|  | \alias{print.xtabs} | 
|  | \title{Cross Tabulation} | 
|  | \description{ | 
|  | Create a contingency table (optionally a sparse matrix) from | 
|  | cross-classifying factors, usually contained in a data frame, | 
|  | using a formula interface. | 
|  | } | 
|  | \usage{ | 
|  | xtabs(formula = ~., data = parent.frame(), subset, sparse = FALSE, | 
|  | na.action, addNA = FALSE, exclude = if(!addNA) c(NA, NaN), | 
|  | drop.unused.levels = FALSE) | 
|  |  | 
|  | \method{print}{xtabs}(x, na.print = "", \dots) | 
|  | } | 
|  | \arguments{ | 
|  | \item{formula}{a \link{formula} object with the cross-classifying variables | 
|  | (separated by \code{+}) on the right hand side (or an object which | 
|  | can be coerced to a formula).  Interactions are not allowed.  On the | 
|  | left hand side, one may optionally give a vector or a matrix of | 
|  | counts; in the latter case, the columns are interpreted as | 
|  | corresponding to the levels of a variable.  This is useful if the | 
|  | data have already been tabulated, see the examples below.} | 
|  | \item{data}{an optional matrix or data frame (or similar: see | 
|  | \code{\link{model.frame}}) containing the variables in the | 
|  | formula \code{formula}.  By default the variables are taken from | 
|  | \code{environment(formula)}.} | 
|  | \item{subset}{an optional vector specifying a subset of observations | 
|  | to be used.} | 
|  | \item{sparse}{logical specifying if the result should be a | 
|  | \emph{sparse} matrix, i.e., inheriting from | 
|  | \code{\link[Matrix:sparseMatrix-class]{sparseMatrix}}%\linkS4class{sparseMatrix}. | 
|  | Only works for two factors (since there | 
|  | are no higher-order sparse array classes yet). | 
|  | } | 
|  | \item{na.action}{a function which indicates what should happen when | 
|  | the data contain \code{\link{NA}}s.  If unspecified, and | 
|  | \code{addNA} is true, this is set to \code{\link{na.pass}}.  When it | 
|  | is \code{na.pass} and \code{formula} has a left hand side (with | 
|  | counts), \code{\link{sum}(*, na.rm = TRUE)} is used instead of | 
|  | \code{sum(*)} for the counts.} | 
|  | \item{addNA}{logical indicating if \code{NA}s should get a separate | 
|  | level and be counted, using \code{\link{addNA}(*, ifany=TRUE)} and | 
|  | setting the default for \code{na.action}.} | 
|  | \item{exclude}{a vector of values to be excluded when forming the | 
|  | set of levels of the classifying factors.} | 
|  | \item{drop.unused.levels}{a logical indicating whether to drop unused | 
|  | levels in the classifying factors.  If this is \code{FALSE} and | 
|  | there are unused levels, the table will contain zero marginals, and | 
|  | a subsequent chi-squared test for independence of the factors will | 
|  | not work.} | 
|  |  | 
|  | \item{x}{an object of class \code{"xtabs"}.} | 
|  | \item{na.print}{character string (or \code{NULL}) indicating how | 
|  | \code{\link{NA}} are printed.  The default (\code{""}) does not show | 
|  | \code{NA}s clearly, and \code{na.print = "NA"} maybe advisable | 
|  | instead.} | 
|  | \item{\dots}{further arguments passed to or from other methods.} | 
|  | } | 
|  | \details{ | 
|  | There is a \code{summary} method for contingency table objects created | 
|  | by \code{table} or \code{xtabs(*, sparse = FALSE)}, which gives basic | 
|  | information and performs a chi-squared test for independence of | 
|  | factors (note that the function \code{\link{chisq.test}} currently | 
|  | only handles 2-d tables). | 
|  |  | 
|  | If a left hand side is given in \code{formula}, its entries are simply | 
|  | summed over the cells corresponding to the right hand side; this also | 
|  | works if the lhs does not give counts. | 
|  |  | 
|  | For variables in \code{formula} which are factors, \code{exclude} | 
|  | must be specified explicitly; the default exclusions will not be used. | 
|  |  | 
|  | In \R versions before 3.4.0, e.g., when \code{na.action = na.pass}, | 
|  | sometimes zeroes (\code{0}) were returned instead of \code{NA}s. | 
|  | } | 
|  | \value{ | 
|  | By default, when \code{sparse = FALSE}, | 
|  | a contingency table in array representation of S3 class \code{c("xtabs", | 
|  | "table")}, with a \code{"call"} attribute storing the matched call. | 
|  |  | 
|  | When \code{sparse = TRUE}, a sparse numeric matrix, specifically an | 
|  | object of S4 class %\linkS4class{dgTMatrix} | 
|  | \code{\link[Matrix:dgTMatrix-class]{dgTMatrix}} from package | 
|  | \CRANpkg{Matrix}. | 
|  | } | 
|  | \seealso{ | 
|  | \code{\link{table}} for traditional cross-tabulation, and | 
|  | \code{\link{as.data.frame.table}} which is the inverse operation of | 
|  | \code{xtabs} (see the \code{DF} example below). | 
|  |  | 
|  | \code{\link[Matrix:sparseMatrix-class]{sparseMatrix}} on sparse | 
|  | matrices in package \CRANpkg{Matrix}. | 
|  | } | 
|  | \examples{ | 
|  | ## 'esoph' has the frequencies of cases and controls for all levels of | 
|  | ## the variables 'agegp', 'alcgp', and 'tobgp'. | 
|  | xtabs(cbind(ncases, ncontrols) ~ ., data = esoph) | 
|  | ## Output is not really helpful ... flat tables are better: | 
|  | ftable(xtabs(cbind(ncases, ncontrols) ~ ., data = esoph)) | 
|  | ## In particular if we have fewer factors ... | 
|  | ftable(xtabs(cbind(ncases, ncontrols) ~ agegp, data = esoph)) | 
|  |  | 
|  | ## This is already a contingency table in array form. | 
|  | DF <- as.data.frame(UCBAdmissions) | 
|  | ## Now 'DF' is a data frame with a grid of the factors and the counts | 
|  | ## in variable 'Freq'. | 
|  | DF | 
|  | ## Nice for taking margins ... | 
|  | xtabs(Freq ~ Gender + Admit, DF) | 
|  | ## And for testing independence ... | 
|  | summary(xtabs(Freq ~ ., DF)) | 
|  |  | 
|  | ## with NA's | 
|  | DN <- DF; DN[cbind(6:9, c(1:2,4,1))] <- NA; DN | 
|  | tools::assertError(# 'na.fail' should fail : | 
|  | xtabs(Freq ~ Gender + Admit, DN, na.action=na.fail)) | 
|  | xtabs(Freq ~ Gender + Admit, DN) | 
|  | xtabs(Freq ~ Gender + Admit, DN, na.action = na.pass) | 
|  | ## The Female:Rejected combination has NA 'Freq' (and NA prints 'invisibly' as "") | 
|  | xtabs(Freq ~ Gender + Admit, DN, addNA = TRUE) # ==> count NAs | 
|  |  | 
|  | ## Create a nice display for the warp break data. | 
|  | warpbreaks$replicate <- rep_len(1:9, 54) | 
|  | ftable(xtabs(breaks ~ wool + tension + replicate, data = warpbreaks)) | 
|  |  | 
|  | ### ---- Sparse Examples ---- | 
|  |  | 
|  | \donttest{if(require("Matrix")) withAutoprint({ | 
|  | ## similar to "nlme"s  'ergoStool' : | 
|  | d.ergo <- data.frame(Type = paste0("T", rep(1:4, 9*4)), | 
|  | Subj = gl(9, 4, 36*4)) | 
|  | xtabs(~ Type + Subj, data = d.ergo) # 4 replicates each | 
|  | set.seed(15) # a subset of cases: | 
|  | xtabs(~ Type + Subj, data = d.ergo[sample(36, 10), ], sparse = TRUE) | 
|  |  | 
|  | ## Hypothetical two-level setup: | 
|  | inner <- factor(sample(letters[1:25], 100, replace = TRUE)) | 
|  | inout <- factor(sample(LETTERS[1:5], 25, replace = TRUE)) | 
|  | fr <- data.frame(inner = inner, outer = inout[as.integer(inner)]) | 
|  | xtabs(~ inner + outer, fr, sparse = TRUE) | 
|  | })}% only if Matrix is available | 
|  | } | 
|  | \keyword{category} |