blob: 32011e8c61e82f2ebb8ef9979f228798143fccf2 [file] [log] [blame]
# File src/library/stats/R/reshape.R
# Part of the R package, https://www.R-project.org
#
# Copyright (C) 1995-2013 The R Core Team
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# https://www.R-project.org/Licenses/
reshape <-
function(data, varying = NULL, v.names = NULL, timevar = "time",
idvar = "id",
ids = 1L:NROW(data), times = seq_along(varying[[1L]]),
drop = NULL, direction, new.row.names = NULL,
sep = ".",
split = if (sep == "") {
list(regexp = "[A-Za-z][0-9]", include = TRUE)
} else {list(regexp = sep, include = FALSE, fixed = TRUE)})
{
if (!is.character(sep) || length(sep) != 1L)
stop("'sep' must be a character string")
ix2names <- function(ix)
if (is.character(ix)) ix else names(data)[ix]
guess <- function(nms,re = split$regexp,drop = !split$include,
fixed=if(is.null(split$fixed)) FALSE else split$fixed)
{
if (drop)
nn <- do.call("rbind",strsplit(nms, re, fixed = fixed))
else
nn <- cbind(substr(nms, 1L, regexpr(re,nms)),
substr(nms, regexpr(re,nms) + 1L, 10000L))
if (ncol(nn) != 2L)
stop("failed to guess time-varying variables from their names")
vn <- unique(nn[,1])
v.names <- split(nms, factor(nn[, 1L], levels = vn))
times <- unique(nn[, 2L])
attr(v.names,"v.names") <- vn
tt <- tryCatch(as.numeric(times), warning = function(w) times)
attr(v.names,"times") <- tt
v.names
}
reshapeLong <-
function(data, varying, v.names = NULL, timevar, idvar,
ids = 1L:NROW(data), times,
drop = NULL,new.row.names = NULL)
{
ll <- unlist(lapply(varying,length))
if (any(ll != ll[1L]))
stop("'varying' arguments must be the same length")
if (ll[1L] != length(times))
stop("'lengths(varying)' must all match 'length(times)'")
if (!is.null(drop)) {
if (is.character(drop))
drop <- names(data) %in% drop
data <- data[, if (is.logical(drop)) !drop else -drop, drop = FALSE]
}
## store information for back-transformation.
undoInfo <- list(varying = varying, v.names = v.names,
idvar = idvar, timevar = timevar)
## multiple id variables
if (length(idvar) > 1L) {
ids <- interaction(data[, idvar], drop=TRUE)
} else if (idvar %in% names(data)) {
ids <- data[, idvar]
}
d <- data
all.varying <- unlist(varying)
d <- d[,!(names(data) %in% all.varying), drop = FALSE]
if (is.null(v.names))
v.names <- vapply(varying, `[`, 1L, FUN.VALUE=character(1L))
rval <- do.call(rbind, lapply(seq_along(times), function(i) {
d[, timevar] <- times[i]
varying.i <- vapply(varying, `[`, i, FUN.VALUE=character(1L))
d[, v.names] <- data[, varying.i]
if (is.null(new.row.names))
row.names(d) <- paste(ids, times[i], sep = ".")
else
row.names(d) <- new.row.names[(i-1L)*NROW(d) + 1L:NROW(d)]
d
}))
if (length(idvar) == 1L && !(idvar %in% names(data))) {
rval[, idvar] <- ids
}
attr(rval,"reshapeLong") <- undoInfo
return(rval)
} ## re..Long()
reshapeWide <- function(data,timevar,idvar,varying = NULL,v.names = NULL,
drop = NULL,new.row.names = NULL)
{
if (!is.null(drop)) {
if (is.character(drop)) drop <- names(data) %in% drop
data <- data[, if (is.logical(drop)) !drop else -drop, drop = FALSE]
}
undoInfo <- list(v.names = v.names, timevar = timevar,idvar = idvar)
orig.idvar <- idvar
if (length(idvar) > 1L) {
repeat({
tempidname <- basename(tempfile("tempID"))
if (!(tempidname %in% names(data))) break
})
data[, tempidname] <- interaction(data[, idvar], drop=TRUE)
idvar <- tempidname
drop.idvar <- TRUE
} else drop.idvar <- FALSE
## times <- sort(unique(data[,timevar]))
## varying and times must have the same order
times <- unique(data[, timevar])
if (anyNA(times))
warning("there are records with missing times, which will be dropped.")
undoInfo$times <- times
if (is.null(v.names))
v.names <- names(data)[!(names(data) %in% c(timevar, idvar, orig.idvar))]
if (is.null(varying)) varying <- outer(v.names, times, paste, sep = sep)
if (is.list(varying)) varying <- do.call("rbind", varying)
undoInfo$varying <- varying
keep <- !(names(data) %in% c(timevar, v.names, idvar, orig.idvar))
if(any(keep)) {
rval <- data[keep]
tmp <- data[, idvar]
really.constant <-
unlist(lapply(rval,
function(a) all(tapply(a, as.vector(tmp),
function(b) length(unique(b)) == 1L))))
if (!all(really.constant))
warning(gettextf("some constant variables (%s) are really varying",
paste(names(rval)[!really.constant],collapse = ",")), domain = NA)
}
rval <- data[!duplicated(data[, idvar]),
!(names(data) %in% c(timevar, v.names)), drop = FALSE]
for(i in seq_along(times)) {
thistime <- data[data[, timevar] %in% times[i], ]
tab <- table(thistime[, idvar])
if (any(tab > 1L))
warning(sprintf("multiple rows match for %s=%s: first taken",
timevar, times[i]), domain = NA)
rval[, varying[, i]] <-
thistime[match(rval[, idvar], thistime[, idvar]), v.names]
}
if (!is.null(new.row.names)) row.names(rval) <- new.row.names
## temporary id variable to be dropped.
if (drop.idvar) rval[, idvar] <- NULL
## information for back-transformation
attr(rval,"reshapeWide") <- undoInfo
rval
} ## re..Wide()
## Begin reshape()
if (missing(direction)) {
undo <- c("wide", "long")[c("reshapeLong", "reshapeWide")
%in% names(attributes(data))]
if (length(undo) == 1L) direction <- undo
}
direction <- match.arg(direction, c("wide", "long"))
switch(direction,
"wide" =
{
back <- attr(data,"reshapeLong")
if (missing(timevar) && missing(idvar) && !is.null(back)) {
reshapeWide(data, idvar = back$idvar, timevar = back$timevar,
varying = back$varying, v.names = back$v.names,
new.row.names = new.row.names)
} else {
reshapeWide(data, idvar = idvar, timevar = timevar,
varying = varying, v.names = v.names, drop = drop,
new.row.names = new.row.names)
}
},
"long" =
{
if (missing(varying)) {
back <- attr(data,"reshapeWide")
if (is.null(back))
stop("no 'reshapeWide' attribute, must specify 'varying'")
varying <- back$varying
idvar <- back$idvar
timevar <- back$timevar
v.names <- back$v.names
times <- back$times
}
if (is.matrix(varying)) {
## <FIXME split.matrix>
varying <- split(c(varying), row(varying))
}
if (is.null(varying))
stop("'varying' must be nonempty list or vector")
if(is.atomic(varying)) {
varying <- ix2names(varying) # normalize
if (missing(v.names))
varying <- guess(varying)
else {
if (length(varying) %% length(v.names))
stop("length of 'v.names' does not evenly divide length of 'varying'")
ntimes <- length(varying) %/% length(v.names)
if (missing(times))
times <- seq_len(ntimes)
else if (length(times) != ntimes)
stop("length of 'varying' must be the product of length of 'v.names' and length of 'times'")
varying <- split(varying, rep(v.names, ntimes))
attr(varying, "v.names") <- v.names
attr(varying, "times") <- times
}
}
else varying <- lapply(varying, ix2names)
## This must happen after guess()
if (missing(v.names) && !is.null(attr(varying,"v.names"))) {
v.names <- attr(varying, "v.names")
times <- attr(varying, "times")
}
reshapeLong(data, idvar = idvar, timevar = timevar,
varying = varying, v.names = v.names, drop = drop,
times = times, ids = ids, new.row.names = new.row.names)
})
}