blob: c618430619bd0ae700b9bcc08d14b40a6ff58363 [file] [log] [blame]
# File src/library/base/R/table.R
# Part of the R package, https://www.R-project.org
#
# Copyright (C) 1995-2017 The R Core Team
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# https://www.R-project.org/Licenses/
table <- function (..., exclude = if (useNA=="no") c(NA, NaN),
useNA = c("no", "ifany", "always"),
dnn = list.names(...), deparse.level = 1)
{
list.names <- function(...) {
l <- as.list(substitute(list(...)))[-1L]
nm <- names(l)
fixup <- if (is.null(nm)) seq_along(l) else nm == ""
dep <- vapply(l[fixup], function(x)
switch(deparse.level + 1,
"", ## 0
if (is.symbol(x)) as.character(x) else "", ## 1
deparse(x, nlines=1)[1L] ## 2
),
"")
if (is.null(nm))
dep
else {
nm[fixup] <- dep
nm
}
}
miss.use <- missing(useNA)
miss.exc <- missing(exclude)
## useNA <- if (!miss.exc && is.null(exclude)) "always" (2.8.0 <= R <= 3.3.1)
useNA <- if (miss.use && !miss.exc &&
!match(NA, exclude, nomatch=0L)) "ifany"
else match.arg(useNA)
doNA <- useNA != "no"
if(!miss.use && !miss.exc && doNA && match(NA, exclude, nomatch=0L))
warning("'exclude' containing NA and 'useNA' != \"no\"' are a bit contradicting")
args <- list(...)
if (!length(args))
stop("nothing to tabulate")
if (length(args) == 1L && is.list(args[[1L]])) { ## e.g. a data.frame
args <- args[[1L]]
if (length(dnn) != length(args))
dnn <- if (!is.null(argn <- names(args))) argn
else paste(dnn[1L], seq_along(args), sep = ".")
}
# 0L, 1L, etc: keep 'bin' and 'pd' integer - as long as tabulate() requires it
bin <- 0L
lens <- NULL
dims <- integer()
pd <- 1L
dn <- NULL
for (a in args) {
if (is.null(lens)) lens <- length(a)
else if (length(a) != lens)
stop("all arguments must have the same length")
fact.a <- is.factor(a)
## The logic here is tricky in order to be sensible if
## both 'exclude' and 'useNA' are set.
##
if(doNA) aNA <- anyNA(a) # *before* the following
if(!fact.a) { ## factor(*, exclude=*) may generate NA levels where there were none!
a0 <- a
## A non-null setting of 'exclude' sets the
## excluded levels to missing, which is different
## from the <NA> factor level, but these
## excluded levels must NOT EVER be tabulated.
a <- # NB: this excludes first, unlike the is.factor() case
factor(a, exclude = exclude)
}
## if(doNA)
## a <- addNA(a, ifany = (useNA == "ifany"))
## Instead, do the addNA() manually and remember *if* we did :
add.na <- doNA
if(add.na) {
ifany <- (useNA == "ifany") # FALSE when "always"
anNAc <- anyNA(a) # sometimes, but not always == aNA above
add.na <- if (!ifany || anNAc) {
ll <- levels(a)
if(add.ll <- !anyNA(ll)) {
ll <- c(ll, NA)
## FIXME? can we call a <- factor(a, ...)
## only here,and be done?
TRUE
}
else if (!ifany && !anNAc)
FALSE
else
TRUE
}
else
FALSE
} # else remains FALSE
if(add.na) ## complete the "manual" addNA():
a <- factor(a, levels = ll, exclude = NULL)
else
ll <- levels(a)
a <- as.integer(a)
if (fact.a && !miss.exc) { ## remove excluded levels
ll <- ll[keep <- which(match(ll, exclude, nomatch=0L) == 0L)]
a <- match(a, keep)
} else if(!fact.a && add.na) {
## remove NA level if it was added only for excluded in factor(a, exclude=.)
## set those a[] to NA which correspond to excluded values,
## but not those which correspond to NA-levels:
## if(doNA) they must be counted, possibly as 0, e.g.,
## for table(1:3, exclude = 1) #-> useNA = "ifany"
## or table(1:3, exclude = 1, useNA = "always")
if(ifany && !aNA && add.ll) { # rm the NA-level again (why did we add it?)
ll <- ll[!is.na(ll)]
is.na(a) <- match(a0, c(exclude,NA), nomatch=0L) > 0L
} else { # e.g. !ifany : useNA == "always"
is.na(a) <- match(a0, exclude, nomatch=0L) > 0L
}
}
nl <- length(ll)
dims <- c(dims, nl)
if (prod(dims) > .Machine$integer.max)
stop("attempt to make a table with >= 2^31 elements")
dn <- c(dn, list(ll))
## requiring all(unique(a) == 1:nl) :
bin <- bin + pd * (a - 1L)
pd <- pd * nl
}
names(dn) <- dnn
bin <- bin[!is.na(bin)]
if (length(bin)) bin <- bin + 1L # otherwise, that makes bin NA
y <- array(tabulate(bin, pd), dims, dimnames = dn)
class(y) <- "table"
y
}
## NB: NA in dimnames should be printed.
print.table <-
function (x, digits = getOption("digits"), quote = FALSE, na.print = "",
zero.print = "0",
## Numbers get right-justified by format(), irrespective of 'justify';
## need to keep column headers aligned:
right = is.numeric(x) || is.complex(x),
justify = "none", ...)
{
## tables with empty extents have no contents and are hard to
## output in a readable way, so just say something descriptive and
## return.
d <- dim(x)
if (any(d == 0)) {
cat ("< table of extent", paste(d, collapse=" x "), ">\n")
return ( invisible(x) )
}
xx <- format(unclass(x), digits = digits, justify = justify)
## na.print handled here
if(any(ina <- is.na(x)))
xx[ina] <- na.print
if(zero.print != "0" && any(i0 <- !ina & x == 0))
## MM thinks this should be an option for many more print methods...
xx[i0] <- zero.print ## keep it simple; was sub(..., xx[i0])
print(xx, quote = quote, right = right, ...)
invisible(x)
}
summary.table <- function(object, ...)
{
if(!inherits(object, "table"))
stop(gettextf("'object' must inherit from class %s",
dQuote("table")),
domain = NA)
n.cases <- sum(object)
n.vars <- length(dim(object))
y <- list(n.vars = n.vars,
n.cases = n.cases)
if(n.vars > 1) {
m <- vector("list", length = n.vars)
relFreqs <- object / n.cases
for(k in 1L:n.vars)
m[[k]] <- apply(relFreqs, k, sum)
expected <- apply(do.call("expand.grid", m), 1L, prod) * n.cases
statistic <- sum((c(object) - expected)^2 / expected)
lm <- lengths(m)
parameter <- prod(lm) - 1L - sum(lm - 1L)
y <- c(y, list(statistic = statistic,
parameter = parameter,
approx.ok = all(expected >= 5),
p.value = stats::pchisq(statistic, parameter, lower.tail=FALSE),
call = attr(object, "call")))
}
class(y) <- "summary.table"
y
}
print.summary.table <-
function(x, digits = max(1L, getOption("digits") - 3L), ...)
{
if(!inherits(x, "summary.table"))
stop(gettextf("'x' must inherit from class %s",
dQuote("summary.table")),
domain = NA)
if(!is.null(x$call)) {
cat("Call: "); print(x$call)
}
cat("Number of cases in table:", x$n.cases, "\n")
cat("Number of factors:", x$n.vars, "\n")
if(x$n.vars > 1) {
cat("Test for independence of all factors:\n")
ch <- x$statistic
cat("\tChisq = ", format(round(ch, max(0, digits - log10(ch)))),
", df = ", x$parameter,
", p-value = ", format.pval(x$p.value, digits, eps = 0),
"\n", sep = "")
if(!x$approx.ok)
cat("\tChi-squared approximation may be incorrect\n")
}
invisible(x)
}
as.data.frame.table <-
function(x, row.names = NULL, ..., responseName = "Freq",
stringsAsFactors = TRUE, sep="", base = list(LETTERS))
{
ex <- quote(data.frame(do.call("expand.grid",
c(dimnames(provideDimnames(x, sep=sep, base=base)),
KEEP.OUT.ATTRS = FALSE,
stringsAsFactors = stringsAsFactors)),
Freq = c(x),
row.names = row.names))
names(ex)[3L] <- responseName
eval(ex)
}
is.table <- function(x) inherits(x, "table")
as.table <- function(x, ...) UseMethod("as.table")
as.table.default <- function(x, ...)
{
if(is.table(x)) return(x)
else if(is.array(x) || is.numeric(x)) {
x <- as.array(x)
structure(class = c("table", oldClass(x)), provideDimnames(x))
} else stop("cannot coerce to a table")
}
prop.table <- function(x, margin = NULL)
{
if(length(margin))
sweep(x, margin, margin.table(x, margin), "/", check.margin=FALSE)
else
x / sum(x)
}
margin.table <- function(x, margin = NULL)
{
if(!is.array(x)) stop("'x' is not an array")
if (length(margin)) {
z <- apply(x, margin, sum)
dim(z) <- dim(x)[margin]
dimnames(z) <- dimnames(x)[margin]
}
else return(sum(x))
class(z) <- oldClass(x) # avoid adding "matrix"
z
}
`[.table` <-
function(x, i, j, ..., drop = TRUE)
{
ret <- NextMethod()
ldr <- length(dim(ret))
if((ldr > 1L) || (ldr == length(dim(x))))
class(ret) <- "table"
ret
}