blob: 16457884fbcc51677fb45099f2ae0ec10f3bf275 [file] [log] [blame]
# File src/library/base/R/factor.R
# Part of the R package, https://www.R-project.org
#
# Copyright (C) 1995-2018 The R Core Team
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# https://www.R-project.org/Licenses/
factor <- function(x = character(), levels, labels = levels,
exclude = NA, ordered = is.ordered(x), nmax = NA)
{
if(is.null(x)) x <- character()
nx <- names(x)
if (missing(levels)) {
y <- unique(x, nmax = nmax)
ind <- order(y)
levels <- unique(as.character(y)[ind])
}
force(ordered) # check if original x is an ordered factor
if(!is.character(x))
x <- as.character(x)
## levels could be a long vector, but match will not handle that.
levels <- levels[is.na(match(levels, exclude))]
f <- match(x, levels)
if(!is.null(nx))
names(f) <- nx
if(missing(labels)) { ## default: labels := levels
levels(f) <- as.character(levels)
} else { ## labels specified explicitly
nlab <- length(labels)
if(nlab == length(levels)) { ## NB: duplicated labels should work
## a version of f <- `levels<-.factor`(f, as.character(labels))
## ... but not dropping NA :
nlevs <- unique(xlevs <- as.character(labels))
at <- attributes(f)
at$levels <- nlevs
f <- match(xlevs, nlevs)[f]
attributes(f) <- at
}
else if(nlab == 1L)
levels(f) <- paste0(labels, seq_along(levels))
else ## nlab is neither 1 nor length(levels)
stop(gettextf("invalid 'labels'; length %d should be 1 or %d",
nlab, length(levels)),
domain = NA)
}
class(f) <- c(if(ordered) "ordered", "factor")
f
}
## Also used for methods::validObject(<factor>) :
.valid.factor <- function(object) {
levs <- levels(object)
if (!is.character(levs))
return("factor levels must be \"character\"")
if (d <- anyDuplicated(levs))
return(sprintf("duplicated level [%d] in factor", d))
## 'else' ok :
TRUE
}
is.factor <- function(x) inherits(x, "factor")
as.factor <- function(x) {
if (is.factor(x)) x
else if (!is.object(x) && is.integer(x)) {
## optimization for calls from tapply via split.default
levels <- sort.int(unique.default(x)) # avoid array methods
f <- match(x, levels)
levels(f) <- as.character(levels)
if(!is.null(nx <- names(x))) names(f) <- nx
class(f) <- "factor"
f
} else factor(x)
}
levels <- function(x) UseMethod("levels")
levels.default <- function(x) attr(x, "levels")
nlevels <- function(x) length(levels(x))
`levels<-.factor` <- function(x, value)
{
xlevs <- levels(x)
if (is.list(value)) {
nlevs <- rep.int(names(value), lapply(value, length))
value <- unlist(value)
m <- match(value, xlevs, nomatch = 0L)
xlevs[m] <- nlevs[m > 0L]
} else {
if (length(xlevs) > length(value))
stop("number of levels differs")
nlevs <- xlevs <- as.character(value)
nlevs <- nlevs[!is.na(nlevs)]
}
## take care here not to drop attributes, including class.
## factor(xlevs[x], levels = unique(nlevs))
nlevs <- unique(nlevs)
at <- attributes(x)
at$levels <- nlevs
y <- match(xlevs, nlevs)[x]
attributes(y) <- at
y
}
droplevels <- function(x, ...) UseMethod("droplevels")
## default 'exclude' matches `[.factor` (drop=TRUE)
droplevels.factor <- function(x, exclude = if(anyNA(levels(x))) NULL else NA, ...)
factor(x, exclude = exclude)
droplevels.data.frame <- function(x, except = NULL, exclude, ...)
{
ix <- vapply(x, is.factor, NA)
if (!is.null(except)) ix[except] <- FALSE
x[ix] <- if(missing(exclude))
lapply(x[ix], droplevels)
else lapply(x[ix], droplevels, exclude=exclude)
x
}
as.vector.factor <- function(x, mode="any")
{
if(mode=="list") as.list(x)
else if(mode== "any" || mode== "character" || mode== "logical")
as.vector(levels(x)[x], mode)
else
as.vector(unclass(x), mode)
}
as.character.factor <- function(x,...) .Internal(asCharacterFactor(x))
as.logical.factor <- function(x,...) as.logical(levels(x))[x]
as.list.factor <- function(x,...)
{
res <- vector("list", length(x))
for(i in seq_along(x)) res[[i]] <- x[i]
res
}
## for `factor' *and* `ordered' :
print.factor <- function (x, quote = FALSE, max.levels = NULL,
width = getOption("width"), ...)
{
ord <- is.ordered(x)
if (length(x) == 0L)
cat(if(ord)"ordered" else "factor", "(0)\n", sep = "")
else {
xx <- character(length(x))
xx[] <- as.character(x)
keepAttrs <- setdiff(names(attributes(x)), c("levels", "class"))
attributes(xx)[keepAttrs] <- attributes(x)[keepAttrs]
print(xx, quote = quote, ...)
}
maxl <- if(is.null(max.levels)) TRUE else max.levels
if (maxl) {
n <- length(lev <- encodeString(levels(x), quote=ifelse(quote, '"', '')))
colsep <- if(ord) " < " else " "
T0 <- "Levels: "
if(is.logical(maxl))
maxl <- { ## smart default
width <- width - (nchar(T0, "w") + 3L + 1L + 3L)
# 3='...', 3=#lev, 1=extra
lenl <- cumsum(nchar(lev, "w") + nchar(colsep, "w"))
if(n <= 1L || lenl[n] <= width) n
else max(1L, which.max(lenl > width) - 1L)
}
drop <- n > maxl
cat(if(drop) paste(format(n), ""), T0,
paste(if(drop)c(lev[1L:max(1,maxl-1)],"...",if(maxl > 1) lev[n])
else lev, collapse = colsep),
"\n", sep = "")
}
if(!isTRUE(val <- .valid.factor(x)))
warning(val) # stop() in the future
invisible(x)
}
Math.factor <- function(x, ...)
stop(gettextf("%s not meaningful for factors", sQuote(.Generic)))
## The next two have an .ordered method:
Summary.factor <- function(..., na.rm)
stop(gettextf("%s not meaningful for factors", sQuote(.Generic)))
Ops.factor <- function(e1, e2)
{
ok <- switch(.Generic, "=="=, "!="=TRUE, FALSE)
if(!ok) {
warning(gettextf("%s not meaningful for factors", sQuote(.Generic)))
return(rep.int(NA, max(length(e1), if(!missing(e2)) length(e2))))
}
## Need this for NA *levels* as opposed to missing
noNA.levels <- function(f) {
r <- levels(f)
if(any(ina <- is.na(r))) {
n <- " NA "
while(n %in% r) n <- paste(n, ".")
r[ina] <- n
}
r
}
if (nzchar(.Method[1L])) { # e1 *is* a factor
## fastpath for factor w/ no NA levels vs scalar character
if(!anyNA(levels(e1)) && is.character(e2) && length(e2) == 1L) {
if(.Generic == "==") {
## if e1[i] OR e2 is NA then (leq[e1])[i] is NA
## as desired
leq <- (levels(e1) == e2)
return(leq[e1])
} else { ## != case
leq <- (levels(e1) != e2)
return(leq[e1])
}
}
l1 <- noNA.levels(e1)
e1 <- l1[e1]
}
if (nzchar(.Method[2L])) { # e2 *is* a factor
## fastpath for factor w/ no NA levels vs scalar character
if(!anyNA(levels(e2)) && is.character(e1) && length(e1) == 1L){
if(.Generic == "==") {
leq <- (levels(e2) == e1)
return(leq[e2])
} else { ## != case
leq <- (levels(e2) != e1)
return(leq[e2])
}
}
l2 <- noNA.levels(e2)
e2 <- l2[e2]
}
if (all(nzchar(.Method)) &&
(length(l1) != length(l2) || !all(sort.int(l2) == sort.int(l1))))
stop("level sets of factors are different")
value <- NextMethod(.Generic)
nas <- is.na(e1) | is.na(e2)
value[nas] <- NA
value
}
## NB for next four:
## a factor has levels before class in attribute list (PR#6799)
`[.factor` <- function(x, ..., drop = FALSE)
{
y <- NextMethod("[")
attr(y,"contrasts") <- attr(x,"contrasts")
attr(y,"levels") <- attr(x,"levels")
class(y) <- oldClass(x)
if (drop)
factor(y, exclude = if(anyNA(levels(x))) NULL else NA ) else y
}
`[<-.factor` <- function(x, ..., value)
{
lx <- levels(x)
cx <- oldClass(x)
if (is.factor(value)) value <- levels(value)[value]
m <- match(value, lx)
if (any(is.na(m) & !is.na(value)))
warning("invalid factor level, NA generated")
class(x) <- NULL
x[...] <- m
attr(x,"levels") <- lx
class(x) <- cx
x
}
`[[.factor` <- function(x, ...)
{
y <- NextMethod("[[")
attr(y,"contrasts") <- attr(x,"contrasts")
attr(y,"levels") <- attr(x,"levels")
class(y) <- oldClass(x)
y
}
## added for 2.12.0
`[[<-.factor` <- function(x, ..., value)
{
lx <- levels(x)
cx <- oldClass(x)
if (is.factor(value)) value <- levels(value)[value]
m <- match(value, lx)
if (any(is.na(m) & !is.na(value)))
warning("invalid factor level, NA generated")
class(x) <- NULL
x[[...]] <- m
attr(x,"levels") <- lx
class(x) <- cx
x
}
## ordered factors ...
ordered <- function(x, ...) factor(x, ..., ordered=TRUE)
is.ordered <- function(x) inherits(x, "ordered")
as.ordered <- function(x) if(is.ordered(x)) x else ordered(x)
Ops.ordered <- function (e1, e2)
{
ok <- switch(.Generic,
"<" = , ">" = , "<=" = , ">=" = ,"=="=, "!=" =TRUE,
FALSE)
if(!ok) {
warning(sprintf("'%s' is not meaningful for ordered factors",
.Generic))
return(rep.int(NA, max(length(e1), if(!missing(e2)) length(e2))))
}
if (.Generic %in% c("==", "!="))
return(NextMethod(.Generic)) ##not S-PLUS compatible, but saner
nas <- is.na(e1) | is.na(e2)
ord1 <- FALSE
ord2 <- FALSE
if (nzchar(.Method[1L])) {
l1 <- levels(e1)
ord1 <- TRUE
}
if (nzchar(.Method[2L])) {
l2 <- levels(e2)
ord2 <- TRUE
}
if (all(nzchar(.Method)) &&
(length(l1) != length(l2) || !all(l2 == l1)))
stop("level sets of factors are different")
if (ord1 && ord2) {
e1 <- as.integer(e1) # was codes, but same thing for ordered factor.
e2 <- as.integer(e2)
}
else if (!ord1) {
e1 <- match(e1, l2)
e2 <- as.integer(e2)
}
else if (!ord2) {
e2 <- match(e2, l1)
e1 <- as.integer(e1)
}
value <- get(.Generic, mode = "function")(e1, e2)
value[nas] <- NA
value
}
Summary.ordered <- function(..., na.rm)
{
ok <- switch(.Generic, max = , min = , range = TRUE,
FALSE)
if (!ok)
stop(gettextf("'%s' not defined for ordered factors", .Generic),
domain = NA)
args <- list(...)
levl <- lapply(args, levels)
levset <- levl[[1]]
if (!all(vapply(args, is.ordered, NA)) ||
!all(vapply(levl, identical, NA, levset)))
stop(gettextf("'%s' is only meaningful for ordered factors if all arguments have the same level sets",
.Generic))
codes <- lapply(args, as.integer)
ind <- do.call(.Generic, c(codes, na.rm = na.rm))
ordered(levset[ind], levels = levset)
}
`is.na<-.factor` <- function(x, value)
{
lx <- levels(x)
cx <- oldClass(x)
class(x) <- NULL
x[value] <- NA
structure(x, levels = lx, class = cx)
}
`length<-.factor` <- function(x, value)
{
cl <- class(x)
levs <- levels(x)
x <- NextMethod()
structure(x, levels=levs, class=cl)
}
addNA <- function(x, ifany=FALSE)
{
if (!is.factor(x)) x <- factor(x)
if (ifany && !anyNA(x)) return(x)
ll <- levels(x)
if (!anyNA(ll)) ll <- c(ll, NA)
else if (!ifany && !anyNA(x)) return(x)
factor(x, levels=ll, exclude=NULL)
}