| # File src/library/stats/R/lsfit.R |
| # Part of the R package, https://www.R-project.org |
| # |
| # Copyright (C) 1995-2019 The R Core Team |
| # |
| # This program is free software; you can redistribute it and/or modify |
| # it under the terms of the GNU General Public License as published by |
| # the Free Software Foundation; either version 2 of the License, or |
| # (at your option) any later version. |
| # |
| # This program is distributed in the hope that it will be useful, |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| # GNU General Public License for more details. |
| # |
| # A copy of the GNU General Public License is available at |
| # https://www.R-project.org/Licenses/ |
| |
| lsfit <- function(x, y, wt = NULL, intercept = TRUE, tolerance = 1e-07, |
| yname = NULL) |
| { |
| ## find names of x variables (design matrix) |
| |
| x <- as.matrix(x) |
| y <- as.matrix(y) |
| xnames <- colnames(x) |
| if( is.null(xnames) ) { |
| if(ncol(x) == 1L) xnames <- "X" |
| else xnames <- paste0("X", 1L:ncol(x)) |
| } |
| if( intercept ) { |
| x <- cbind(1, x) |
| xnames <- c("Intercept", xnames) |
| } |
| |
| ## find names of y variables (responses) |
| |
| if(is.null(yname) && ncol(y) > 1) yname <- paste0("Y", 1L:ncol(y)) |
| |
| ## remove missing values |
| |
| good <- complete.cases(x, y, wt) |
| dimy <- dim(as.matrix(y)) |
| if( any(!good) ) { |
| warning(sprintf(ngettext(sum(!good), |
| "%d missing value deleted", |
| "%d missing values deleted"), |
| sum(!good)), domain = NA) |
| x <- as.matrix(x)[good, , drop=FALSE] |
| y <- as.matrix(y)[good, , drop=FALSE] |
| wt <- wt[good] |
| } |
| |
| ## check for compatible lengths |
| |
| nrx <- NROW(x) |
| ncx <- NCOL(x) |
| nry <- NROW(y) |
| ncy <- NCOL(y) |
| nwts <- length(wt) |
| if(nry != nrx) |
| stop(sprintf(paste0(ngettext(nrx, |
| "'X' matrix has %d case (row)", |
| "'X' matrix has %d cases (rows)"), |
| ", ", |
| ngettext(nry, |
| "'Y' has %d case (row)", |
| "'Y' has %d cases (rows)")), |
| nrx, nry), |
| domain = NA) |
| if(nry < ncx) |
| stop(sprintf(paste0(ngettext(nry, |
| "only %d case", |
| "only %d cases"), |
| ", ", |
| ngettext(ncx, |
| "but %d variable", |
| "but %d variables")), |
| nry, ncx), |
| domain = NA) |
| ## check weights if necessary |
| if( !is.null(wt) ) { |
| if(any(wt < 0)) stop("negative weights not allowed") |
| if(nwts != nry) |
| stop(gettextf("number of weights = %d should equal %d (number of responses)", nwts, nry), domain = NA) |
| wtmult <- sqrt(wt) |
| if(any(wt == 0)) { |
| xzero <- as.matrix(x)[wt == 0, ] |
| yzero <- as.matrix(y)[wt == 0, ] |
| } |
| x <- x*wtmult |
| y <- y*wtmult |
| invmult <- 1/ifelse(wt == 0, 1, wtmult) |
| } |
| |
| # Here y is a matrix, so z$residuals and z$effects will be |
| z <- .Call(C_Cdqrls, x, y, tolerance, FALSE) |
| |
| resids <- array(NA, dim = dimy) |
| dim(z$residuals) <- c(nry, ncy) |
| if(!is.null(wt)) { |
| if(any(wt == 0)) { |
| if(ncx == 1L) fitted.zeros <- xzero * z$coefficients |
| else fitted.zeros <- xzero %*% z$coefficients |
| z$residuals[wt == 0, ] <- yzero - fitted.zeros |
| } |
| z$residuals <- z$residuals*invmult |
| } |
| resids[good, ] <- z$residuals |
| if(dimy[2L] == 1 && is.null(yname)) { |
| resids <- drop(resids) |
| names(z$coefficients) <- xnames |
| } else { |
| colnames(resids) <- yname |
| colnames(z$effects) <- yname |
| dim(z$coefficients) <- c(ncx, ncy) |
| dimnames(z$coefficients) <- list(xnames, yname) |
| } |
| z$qr <- as.matrix(z$qr) |
| colnames(z$qr) <- xnames |
| output <- list(coefficients = z$coefficients, residuals = resids) |
| |
| ## if X matrix was collinear, then the columns may have been |
| ## pivoted hence xnames may need to be corrected |
| |
| if( z$rank != ncx ) { |
| xnames <- xnames[z$pivot] |
| dimnames(z$qr) <- list(NULL, xnames) |
| warning("'X' matrix was collinear") |
| } |
| |
| ## return weights if necessary |
| |
| if (!is.null(wt) ) { |
| weights <- rep.int(NA, dimy[1L]) |
| weights[good] <- wt |
| output <- c(output, list(wt=weights)) |
| } |
| |
| ## return rest of output |
| |
| ## Neither qt nor tol are documented to be there. |
| rqr <- list(qt = drop(z$effects), qr = z$qr, qraux = z$qraux, rank = z$rank, |
| pivot = z$pivot, tol = z$tol) |
| oldClass(rqr) <- "qr" |
| output <- c(output, list(intercept = intercept, qr = rqr)) |
| return(output) |
| } |
| |
| ls.diag <- function(ls.out) |
| { |
| resids <- as.matrix(ls.out$residuals) |
| d0 <- dim(resids) |
| xnames <- colnames(ls.out$qr$qr) |
| yname <- colnames(resids) |
| |
| ## remove any missing values |
| |
| good <- complete.cases(resids, ls.out$wt) |
| if( any(!good) ) { |
| warning("missing observations deleted") |
| resids <- resids[good, , drop = FALSE] |
| } |
| |
| ## adjust residuals if needed |
| |
| if( !is.null(ls.out$wt) ) { |
| if( any(ls.out$wt[good] == 0) ) |
| warning("observations with 0 weight not used in calculating standard deviation") |
| resids <- resids * sqrt(ls.out$wt[good]) |
| } |
| |
| ## initialize |
| |
| p <- ls.out$qr$rank |
| n <- nrow(resids) |
| hatdiag <- rep.int(NA, n) |
| stats <- array(NA, dim = d0) |
| colnames(stats) <- yname |
| stdres <- studres <- dfits <- Cooks <- stats |
| |
| ## calculate hat matrix diagonals |
| |
| q <- qr.qy(ls.out$qr, rbind(diag(p), matrix(0, nrow=n-p, ncol=p))) |
| hatdiag[good] <- rowSums(as.matrix(q^2)) |
| |
| ## calculate diagnostics |
| |
| stddev <- sqrt(colSums(as.matrix(resids^2))/(n - p)) |
| stddevmat <- matrix(stddev, nrow=sum(good), ncol=ncol(resids), byrow=TRUE) |
| stdres[good, ] <- resids/(sqrt(1-hatdiag[good]) * stddevmat) |
| studres[good, ] <- (stdres[good, ]*stddevmat) / |
| sqrt(((n-p)*stddevmat^2 - resids^2/(1-hatdiag[good]))/(n-p-1)) |
| dfits[good, ] <- sqrt(hatdiag[good]/(1-hatdiag[good])) * studres[good, ] |
| Cooks[good, ] <- ((stdres[good, ]^2 * hatdiag[good])/p)/(1-hatdiag[good]) |
| if(ncol(resids)==1 && is.null(yname)) { |
| stdres <- as.vector(stdres) |
| Cooks <- as.vector(Cooks) |
| studres <- as.vector(studres) |
| dfits <- as.vector(dfits) |
| } |
| |
| ## calculate unscaled covariance matrix |
| |
| qr <- as.matrix(ls.out$qr$qr[1L:p, 1L:p]) |
| qr[row(qr)>col(qr)] <- 0 |
| covmat.unscaled <- tcrossprod(solve(qr)) |
| dimnames(covmat.unscaled) <- list(xnames, xnames) |
| |
| ## calculate scaled covariance matrix |
| |
| covmat.scaled <- sum(stddev^2) * covmat.unscaled |
| |
| ## calculate correlation matrix |
| |
| cormat <- covmat.scaled / |
| sqrt(outer(diag(covmat.scaled), diag(covmat.scaled))) |
| |
| ## calculate standard error |
| |
| stderr <- outer(sqrt(diag(covmat.unscaled)), stddev) |
| dimnames(stderr) <- list(xnames, yname) |
| |
| return(list(std.dev=stddev, hat=hatdiag, std.res=stdres, |
| stud.res=studres, cooks=Cooks, dfits=dfits, |
| correlation=cormat, std.err=stderr, |
| cov.scaled=covmat.scaled, cov.unscaled=covmat.unscaled)) |
| } |
| |
| ls.print <- function(ls.out, digits = 4L, print.it = TRUE) |
| { |
| ## calculate residuals to be used |
| |
| resids <- as.matrix(ls.out$residuals) |
| if( !is.null(ls.out$wt) ) { |
| if(any(ls.out$wt == 0)) |
| warning("observations with 0 weights not used") |
| resids <- resids * sqrt(ls.out$wt) |
| } |
| n <- apply(resids, 2L, length) - colSums(is.na(resids)) |
| lsqr <- ls.out$qr |
| p <- lsqr$rank |
| |
| ## calculate total sum sq and df |
| |
| if(ls.out$intercept) { |
| if(is.matrix(lsqr$qt)) |
| totss <- colSums(lsqr$qt[-1L, ]^2) |
| else totss <- sum(lsqr$qt[-1L]^2) |
| degfree <- p - 1 |
| } else { |
| totss <- colSums(as.matrix(lsqr$qt^2)) |
| degfree <- p |
| } |
| |
| ## calculate residual sum sq and regression sum sq |
| |
| resss <- colSums(resids^2, na.rm=TRUE) |
| resse <- sqrt(resss/(n-p)) |
| regss <- totss - resss |
| rsquared <- regss/totss |
| fstat <- (regss/degfree)/(resss/(n-p)) |
| pvalue <- pf(fstat, degfree, (n-p), lower.tail = FALSE) |
| |
| ## construct summary |
| |
| Ynames <- colnames(resids) |
| summary <- cbind(format(round(resse, digits)), |
| format(round(rsquared, digits)), |
| format(round(fstat, digits)), |
| format(degfree), |
| format(n-p), |
| format(round(pvalue, digits))) |
| dimnames(summary) <- list(Ynames, |
| c("Mean Sum Sq", "R Squared", |
| "F-value", "Df 1", "Df 2", "Pr(>F)")) |
| mat <- as.matrix(lsqr$qr[1L:p, 1L:p]) |
| mat[row(mat)>col(mat)] <- 0 |
| uVar <- diag(tcrossprod(solve(mat))) |
| |
| ## construct coef table |
| |
| m.y <- ncol(resids) |
| coef.table <- as.list(1L:m.y) |
| coef <- if(m.y==1) matrix(ls.out$coefficients, ncol=1L) |
| else ls.out$coefficients |
| for(i in 1L:m.y) { |
| se <- sqrt((resss[i]/(n[i]-p)) * uVar) |
| coef.table[[i]] <- |
| cbind(coef[, i], se, coef[, i]/se, |
| 2*pt(abs(coef[, i]/se), n[i]-p, lower.tail = FALSE), |
| deparse.level=0L) |
| dimnames(coef.table[[i]]) <- |
| list(colnames(lsqr$qr), |
| c("Estimate", "Std.Err", "t-value", "Pr(>|t|)")) |
| |
| ##-- print results -- |
| |
| if(print.it) { |
| if(m.y>1) |
| cat("Response:", Ynames[i], "\n\n") |
| cat(paste0("Residual Standard Error=", |
| format(round(resse[i], digits)), "\nR-Square=", |
| format(round(rsquared[i], digits)), "\nF-statistic (df=", |
| format(degfree), ", ", format(n[i]-p), ")=", |
| format(round(fstat[i], digits)), "\np-value=", |
| format(round(pvalue[i], digits)), "\n\n")) |
| print(round(coef.table[[i]], digits)) |
| cat("\n\n") |
| } |
| } |
| names(coef.table) <- Ynames |
| |
| invisible(list(summary = summary, coef.table = coef.table)) |
| } |