| # File src/library/stats/R/aggregate.R |
| # Part of the R package, https://www.R-project.org |
| # |
| # Copyright (C) 1995-2018 The R Core Team |
| # |
| # This program is free software; you can redistribute it and/or modify |
| # it under the terms of the GNU General Public License as published by |
| # the Free Software Foundation; either version 2 of the License, or |
| # (at your option) any later version. |
| # |
| # This program is distributed in the hope that it will be useful, |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| # GNU General Public License for more details. |
| # |
| # A copy of the GNU General Public License is available at |
| # https://www.R-project.org/Licenses/ |
| |
| aggregate <- |
| function(x, ...) |
| UseMethod("aggregate") |
| |
| aggregate.default <- |
| function(x, ...) |
| { |
| if(is.ts(x)) |
| aggregate.ts(as.ts(x), ...) |
| else |
| aggregate.data.frame(as.data.frame(x), ...) |
| } |
| |
| aggregate.data.frame <- |
| function(x, by, FUN, ..., simplify = TRUE, drop = TRUE) |
| { |
| if(!is.data.frame(x)) x <- as.data.frame(x) |
| ## Do this here to avoid masking by non-function (could happen) |
| FUN <- match.fun(FUN) |
| if(NROW(x) == 0L) stop("no rows to aggregate") |
| if(NCOL(x) == 0L) { |
| ## fake it |
| x <- data.frame(x = rep(1, NROW(x))) |
| return(aggregate.data.frame(x, by, function(x) 0L)[seq_along(by)]) |
| } |
| if(!is.list(by)) |
| stop("'by' must be a list") |
| if(is.null(names(by)) && length(by)) |
| names(by) <- paste0("Group.", seq_along(by)) |
| else { |
| nam <- names(by) |
| ind <- which(!nzchar(nam)) |
| names(by)[ind] <- paste0("Group.", ind) |
| } |
| |
| if(any(lengths(by) != NROW(x))) |
| stop("arguments must have same length") |
| |
| y <- as.data.frame(by, stringsAsFactors = FALSE) |
| keep <- complete.cases(by) |
| y <- y[keep, , drop = FALSE] |
| x <- x[keep, , drop = FALSE] |
| nrx <- NROW(x) |
| |
| ## Generate a group identifier vector with integers and dots. |
| ident <- function(x) { |
| y <- as.factor(x) |
| l <- length(levels(y)) |
| s <- as.character(seq_len(l)) |
| n <- nchar(s) |
| levels(y) <- paste0(strrep("0", n[l] - n), s) |
| y # levels used for drop = FALSE |
| } |
| grp <- lapply(y, ident) |
| multi.y <- !drop && ncol(y) |
| if(multi.y) { |
| lev <- lapply(grp, levels) |
| y <- as.list(y) |
| for (i in seq_along(y)) { |
| z <- y[[i]][match(lev[[i]], grp[[i]])] |
| if(is.factor(z) && any(keep <- is.na(z))) |
| z[keep] <- levels(z)[keep] |
| y[[i]] <- z |
| } |
| eGrid <- function(L) |
| expand.grid(L, KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE) |
| y <- eGrid(y) |
| } |
| grp <- if(ncol(y)) { |
| names(grp) <- NULL |
| do.call(paste, c(rev(grp), list(sep = "."))) |
| } else |
| integer(nrx) |
| if(multi.y) { |
| lev <- as.list(eGrid(lev)) |
| names(lev) <- NULL |
| lev <- do.call(paste, c(rev(lev), list(sep = "."))) |
| } else |
| y <- y[match(sort(unique(grp)), grp, 0L), , drop = FALSE] |
| z <- lapply(x, |
| function(e) { |
| ## In case of a common length > 1, sapply() gives |
| ## the transpose of what we need ... |
| ans <- lapply(X = unname(split(e, grp)), FUN = FUN, ...) |
| if(simplify && |
| length(len <- unique(lengths(ans))) == 1L) { |
| ## this used to lose classes |
| if(len == 1L) { |
| cl <- lapply(ans, oldClass) |
| cl1 <- cl[[1L]] |
| ans <- unlist(ans, recursive = FALSE, use.names = FALSE) |
| if (!is.null(cl1) && |
| all(vapply(cl, identical, NA, y = cl1))) |
| class(ans) <- cl1 |
| } else if(len > 1L) |
| ans <- matrix(unlist(ans, recursive = FALSE, use.names = FALSE), |
| ncol = len, |
| byrow = TRUE, |
| dimnames = |
| if(!is.null(nms <- names(ans[[1L]]))) |
| list(NULL, nms) ## else NULL |
| ) |
| } |
| ans |
| }) |
| len <- length(y) |
| if(multi.y) { |
| keep <- match(lev, sort(unique(grp))) |
| for(i in seq_along(z)) |
| y[[len + i]] <- if(is.matrix(z[[i]])) |
| z[[i]][keep, , drop = FALSE] |
| else z[[i]][keep] |
| } else |
| for(i in seq_along(z)) |
| y[[len + i]] <- z[[i]] |
| names(y) <- c(names(by), names(x)) |
| row.names(y) <- NULL |
| y |
| } |
| |
| aggregate.formula <- |
| function(formula, data, FUN, ..., subset, na.action = na.omit) |
| { |
| if(missing(formula) || !inherits(formula, "formula")) |
| stop("'formula' missing or incorrect") |
| if(length(formula) != 3L) |
| stop("'formula' must have both left and right hand sides") |
| |
| m <- match.call(expand.dots = FALSE) |
| if(is.matrix(eval(m$data, parent.frame()))) |
| m$data <- as.data.frame(data) |
| m$... <- m$FUN <- NULL |
| ## need stats:: for non-standard evaluation |
| m[[1L]] <- quote(stats::model.frame) |
| |
| if (formula[[2L]] == ".") { |
| ## LHS is a dot, expand it ... |
| ##rhs <- unlist(strsplit(deparse(formula[[3L]]), " *[:+] *")) |
| ## <NOTE> |
| ## Note that this will not do quite the right thing in case the |
| ## RHS contains transformed variables, such that |
| ## setdiff(rhs, names(data)) |
| ## is non-empty ... |
| ##lhs <- sprintf("cbind(%s)", |
| ## paste(setdiff(names(data), rhs), collapse = ",")) |
| ## formula[[2L]] <- parse(text = lhs)[[1L]] |
| ## </NOTE> |
| |
| ## New logic May 2012 --pd |
| |
| ## Dot expansion: |
| ## lhs ends up as quote(cbind(v1, v2, ....)) using all variables in |
| ## data, except those that are used on the RHS. |
| |
| ## This version uses terms() to get the rhs variables, which means |
| ## that it will NOT remove a variable from the expansion if a |
| ## transformation of it is on the RHS of the formula. |
| |
| rhs <- as.list(attr(terms(formula[-2L]),"variables")[-1]) |
| lhs <- as.call(c(quote(cbind), |
| setdiff(lapply(names(data), as.name), |
| rhs) |
| ) |
| ) |
| formula[[2L]] <- lhs |
| m[[2L]] <- formula |
| } |
| mf <- eval(m, parent.frame()) |
| |
| if(is.matrix(mf[[1L]])) { |
| ## LHS is a cbind() combo, convert to data frame and fix names. |
| ## Commented out May 2012 (seems to work without it) -- pd |
| ##lhs <- setNames(as.data.frame(mf[[1L]]), |
| ## as.character(m[[2L]][[2L]])[-1L]) |
| lhs <- as.data.frame(mf[[1L]]) |
| aggregate.data.frame(lhs, mf[-1L], FUN = FUN, ...) |
| } |
| else |
| aggregate.data.frame(mf[1L], mf[-1L], FUN = FUN, ...) |
| } |
| |
| aggregate.ts <- |
| function(x, nfrequency = 1, FUN = sum, ndeltat = 1, |
| ts.eps = getOption("ts.eps"), ...) |
| { |
| x <- as.ts(x) |
| ofrequency <- tsp(x)[3L] |
| ## do this here to avoid masking by non-function (could happen) |
| FUN <- match.fun(FUN) |
| ## Set up the new frequency, and make sure it is an integer. |
| if(missing(nfrequency)) |
| nfrequency <- 1 / ndeltat |
| if((nfrequency > 1) && |
| (abs(nfrequency - round(nfrequency)) < ts.eps)) |
| nfrequency <- round(nfrequency) |
| |
| if(nfrequency == ofrequency) |
| return(x) |
| ratio <- ofrequency /nfrequency |
| if(abs(ratio - round(ratio)) > ts.eps) |
| stop(gettextf("cannot change frequency from %g to %g", |
| ofrequency, nfrequency), domain = NA) |
| ## The desired result is obtained by applying FUN to blocks of |
| ## length ofrequency/nfrequency, for each of the variables in x. |
| ## We first get the new start and end right, and then break x into |
| ## such blocks by reshaping it into an array and setting dim. |
| ## avoid e.g. 1.0 %/% 0.2 |
| ## https://stat.ethz.ch/pipermail/r-devel/2010-April/057225.html |
| len <- trunc((ofrequency / nfrequency) + ts.eps) |
| mat <- is.matrix(x) |
| if(mat) cn <- colnames(x) |
| ## nstart <- ceiling(tsp(x)[1L] * nfrequency) / nfrequency |
| ## x <- as.matrix(window(x, start = nstart)) |
| nstart <- tsp(x)[1L] |
| ## Can't use nstart <- start(x) as this causes problems if |
| ## you get a vector of length 2. |
| x <- as.matrix(x) |
| nend <- floor(nrow(x) / len) * len |
| x <- apply(array(c(x[1 : nend, ]), |
| dim = c(len, nend / len, ncol(x))), |
| MARGIN = c(2L, 3L), FUN = FUN, ...) |
| if(!mat) x <- as.vector(x) |
| else colnames(x) <- cn |
| ts(x, start = nstart, frequency = nfrequency) |
| } |
| |