blob: 46318bff5838093bffecbfa7d13fb9f556a17691 [file] [log] [blame]
# File src/library/stats/R/lsfit.R
# Part of the R package, https://www.R-project.org
#
# Copyright (C) 1995-2019 The R Core Team
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# https://www.R-project.org/Licenses/
lsfit <- function(x, y, wt = NULL, intercept = TRUE, tolerance = 1e-07,
yname = NULL)
{
## find names of x variables (design matrix)
x <- as.matrix(x)
y <- as.matrix(y)
xnames <- colnames(x)
if( is.null(xnames) ) {
if(ncol(x) == 1L) xnames <- "X"
else xnames <- paste0("X", 1L:ncol(x))
}
if( intercept ) {
x <- cbind(1, x)
xnames <- c("Intercept", xnames)
}
## find names of y variables (responses)
if(is.null(yname) && ncol(y) > 1) yname <- paste0("Y", 1L:ncol(y))
## remove missing values
good <- complete.cases(x, y, wt)
dimy <- dim(as.matrix(y))
if( any(!good) ) {
warning(sprintf(ngettext(sum(!good),
"%d missing value deleted",
"%d missing values deleted"),
sum(!good)), domain = NA)
x <- as.matrix(x)[good, , drop=FALSE]
y <- as.matrix(y)[good, , drop=FALSE]
wt <- wt[good]
}
## check for compatible lengths
nrx <- NROW(x)
ncx <- NCOL(x)
nry <- NROW(y)
ncy <- NCOL(y)
nwts <- length(wt)
if(nry != nrx)
stop(sprintf(paste0(ngettext(nrx,
"'X' matrix has %d case (row)",
"'X' matrix has %d cases (rows)"),
", ",
ngettext(nry,
"'Y' has %d case (row)",
"'Y' has %d cases (rows)")),
nrx, nry),
domain = NA)
if(nry < ncx)
stop(sprintf(paste0(ngettext(nry,
"only %d case",
"only %d cases"),
", ",
ngettext(ncx,
"but %d variable",
"but %d variables")),
nry, ncx),
domain = NA)
## check weights if necessary
if( !is.null(wt) ) {
if(any(wt < 0)) stop("negative weights not allowed")
if(nwts != nry)
stop(gettextf("number of weights = %d should equal %d (number of responses)", nwts, nry), domain = NA)
wtmult <- sqrt(wt)
if(any(wt == 0)) {
xzero <- as.matrix(x)[wt == 0, ]
yzero <- as.matrix(y)[wt == 0, ]
}
x <- x*wtmult
y <- y*wtmult
invmult <- 1/ifelse(wt == 0, 1, wtmult)
}
# Here y is a matrix, so z$residuals and z$effects will be
z <- .Call(C_Cdqrls, x, y, tolerance, FALSE)
resids <- array(NA, dim = dimy)
dim(z$residuals) <- c(nry, ncy)
if(!is.null(wt)) {
if(any(wt == 0)) {
if(ncx == 1L) fitted.zeros <- xzero * z$coefficients
else fitted.zeros <- xzero %*% z$coefficients
z$residuals[wt == 0, ] <- yzero - fitted.zeros
}
z$residuals <- z$residuals*invmult
}
resids[good, ] <- z$residuals
if(dimy[2L] == 1 && is.null(yname)) {
resids <- drop(resids)
names(z$coefficients) <- xnames
} else {
colnames(resids) <- yname
colnames(z$effects) <- yname
dim(z$coefficients) <- c(ncx, ncy)
dimnames(z$coefficients) <- list(xnames, yname)
}
z$qr <- as.matrix(z$qr)
colnames(z$qr) <- xnames
output <- list(coefficients = z$coefficients, residuals = resids)
## if X matrix was collinear, then the columns may have been
## pivoted hence xnames may need to be corrected
if( z$rank != ncx ) {
xnames <- xnames[z$pivot]
dimnames(z$qr) <- list(NULL, xnames)
warning("'X' matrix was collinear")
}
## return weights if necessary
if (!is.null(wt) ) {
weights <- rep.int(NA, dimy[1L])
weights[good] <- wt
output <- c(output, list(wt=weights))
}
## return rest of output
## Neither qt nor tol are documented to be there.
rqr <- list(qt = drop(z$effects), qr = z$qr, qraux = z$qraux, rank = z$rank,
pivot = z$pivot, tol = z$tol)
oldClass(rqr) <- "qr"
output <- c(output, list(intercept = intercept, qr = rqr))
return(output)
}
ls.diag <- function(ls.out)
{
resids <- as.matrix(ls.out$residuals)
d0 <- dim(resids)
xnames <- colnames(ls.out$qr$qr)
yname <- colnames(resids)
## remove any missing values
good <- complete.cases(resids, ls.out$wt)
if( any(!good) ) {
warning("missing observations deleted")
resids <- resids[good, , drop = FALSE]
}
## adjust residuals if needed
if( !is.null(ls.out$wt) ) {
if( any(ls.out$wt[good] == 0) )
warning("observations with 0 weight not used in calculating standard deviation")
resids <- resids * sqrt(ls.out$wt[good])
}
## initialize
p <- ls.out$qr$rank
n <- nrow(resids)
hatdiag <- rep.int(NA, n)
stats <- array(NA, dim = d0)
colnames(stats) <- yname
stdres <- studres <- dfits <- Cooks <- stats
## calculate hat matrix diagonals
q <- qr.qy(ls.out$qr, rbind(diag(p), matrix(0, nrow=n-p, ncol=p)))
hatdiag[good] <- rowSums(as.matrix(q^2))
## calculate diagnostics
stddev <- sqrt(colSums(as.matrix(resids^2))/(n - p))
stddevmat <- matrix(stddev, nrow=sum(good), ncol=ncol(resids), byrow=TRUE)
stdres[good, ] <- resids/(sqrt(1-hatdiag[good]) * stddevmat)
studres[good, ] <- (stdres[good, ]*stddevmat) /
sqrt(((n-p)*stddevmat^2 - resids^2/(1-hatdiag[good]))/(n-p-1))
dfits[good, ] <- sqrt(hatdiag[good]/(1-hatdiag[good])) * studres[good, ]
Cooks[good, ] <- ((stdres[good, ]^2 * hatdiag[good])/p)/(1-hatdiag[good])
if(ncol(resids)==1 && is.null(yname)) {
stdres <- as.vector(stdres)
Cooks <- as.vector(Cooks)
studres <- as.vector(studres)
dfits <- as.vector(dfits)
}
## calculate unscaled covariance matrix
qr <- as.matrix(ls.out$qr$qr[1L:p, 1L:p])
qr[row(qr)>col(qr)] <- 0
covmat.unscaled <- tcrossprod(solve(qr))
dimnames(covmat.unscaled) <- list(xnames, xnames)
## calculate scaled covariance matrix
covmat.scaled <- sum(stddev^2) * covmat.unscaled
## calculate correlation matrix
cormat <- covmat.scaled /
sqrt(outer(diag(covmat.scaled), diag(covmat.scaled)))
## calculate standard error
stderr <- outer(sqrt(diag(covmat.unscaled)), stddev)
dimnames(stderr) <- list(xnames, yname)
return(list(std.dev=stddev, hat=hatdiag, std.res=stdres,
stud.res=studres, cooks=Cooks, dfits=dfits,
correlation=cormat, std.err=stderr,
cov.scaled=covmat.scaled, cov.unscaled=covmat.unscaled))
}
ls.print <- function(ls.out, digits = 4L, print.it = TRUE)
{
## calculate residuals to be used
resids <- as.matrix(ls.out$residuals)
if( !is.null(ls.out$wt) ) {
if(any(ls.out$wt == 0))
warning("observations with 0 weights not used")
resids <- resids * sqrt(ls.out$wt)
}
n <- apply(resids, 2L, length) - colSums(is.na(resids))
lsqr <- ls.out$qr
p <- lsqr$rank
## calculate total sum sq and df
if(ls.out$intercept) {
if(is.matrix(lsqr$qt))
totss <- colSums(lsqr$qt[-1L, ]^2)
else totss <- sum(lsqr$qt[-1L]^2)
degfree <- p - 1
} else {
totss <- colSums(as.matrix(lsqr$qt^2))
degfree <- p
}
## calculate residual sum sq and regression sum sq
resss <- colSums(resids^2, na.rm=TRUE)
resse <- sqrt(resss/(n-p))
regss <- totss - resss
rsquared <- regss/totss
fstat <- (regss/degfree)/(resss/(n-p))
pvalue <- pf(fstat, degfree, (n-p), lower.tail = FALSE)
## construct summary
Ynames <- colnames(resids)
summary <- cbind(format(round(resse, digits)),
format(round(rsquared, digits)),
format(round(fstat, digits)),
format(degfree),
format(n-p),
format(round(pvalue, digits)))
dimnames(summary) <- list(Ynames,
c("Mean Sum Sq", "R Squared",
"F-value", "Df 1", "Df 2", "Pr(>F)"))
mat <- as.matrix(lsqr$qr[1L:p, 1L:p])
mat[row(mat)>col(mat)] <- 0
uVar <- diag(tcrossprod(solve(mat)))
## construct coef table
m.y <- ncol(resids)
coef.table <- as.list(1L:m.y)
coef <- if(m.y==1) matrix(ls.out$coefficients, ncol=1L)
else ls.out$coefficients
for(i in 1L:m.y) {
se <- sqrt((resss[i]/(n[i]-p)) * uVar)
coef.table[[i]] <-
cbind(coef[, i], se, coef[, i]/se,
2*pt(abs(coef[, i]/se), n[i]-p, lower.tail = FALSE),
deparse.level=0L)
dimnames(coef.table[[i]]) <-
list(colnames(lsqr$qr),
c("Estimate", "Std.Err", "t-value", "Pr(>|t|)"))
##-- print results --
if(print.it) {
if(m.y>1)
cat("Response:", Ynames[i], "\n\n")
cat(paste0("Residual Standard Error=",
format(round(resse[i], digits)), "\nR-Square=",
format(round(rsquared[i], digits)), "\nF-statistic (df=",
format(degfree), ", ", format(n[i]-p), ")=",
format(round(fstat[i], digits)), "\np-value=",
format(round(pvalue[i], digits)), "\n\n"))
print(round(coef.table[[i]], digits))
cat("\n\n")
}
}
names(coef.table) <- Ynames
invisible(list(summary = summary, coef.table = coef.table))
}