| % File src/library/stats/man/family.Rd |
| % Part of the R package, https://www.R-project.org |
| % Copyright 1995-2018 R Core Team |
| % Distributed under GPL 2 or later |
| |
| \name{family} |
| \alias{family} |
| \alias{binomial} |
| \alias{gaussian} |
| \alias{Gamma} |
| \alias{inverse.gaussian} |
| \alias{poisson} |
| \alias{quasi} |
| \alias{quasibinomial} |
| \alias{quasipoisson} |
| %\alias{print.family} |
| |
| \title{Family Objects for Models} |
| \usage{ |
| family(object, \dots) |
| |
| binomial(link = "logit") |
| gaussian(link = "identity") |
| Gamma(link = "inverse") |
| inverse.gaussian(link = "1/mu^2") |
| poisson(link = "log") |
| quasi(link = "identity", variance = "constant") |
| quasibinomial(link = "logit") |
| quasipoisson(link = "log") |
| } |
| \arguments{ |
| \item{link}{a specification for the model link function. This can be |
| a name/expression, a literal character string, a length-one character |
| vector, or an object of class |
| \code{"\link[=make.link]{link-glm}"} (such as generated by |
| \code{\link{make.link}}) provided it is not specified |
| \emph{via} one of the standard names given next. |
| |
| The \code{gaussian} family accepts the links (as names) |
| \code{identity}, \code{log} and \code{inverse}; |
| the \code{binomial} family the links \code{logit}, |
| \code{probit}, \code{cauchit}, (corresponding to logistic, |
| normal and Cauchy CDFs respectively) \code{log} and |
| \code{cloglog} (complementary log-log); |
| the \code{Gamma} family the links \code{inverse}, \code{identity} |
| and \code{log}; |
| the \code{poisson} family the links \code{log}, \code{identity}, |
| and \code{sqrt}; and the \code{inverse.gaussian} family the links |
| \code{1/mu^2}, \code{inverse}, \code{identity} |
| and \code{log}. |
| |
| The \code{quasi} family accepts the links \code{logit}, \code{probit}, |
| \code{cloglog}, \code{identity}, \code{inverse}, |
| \code{log}, \code{1/mu^2} and \code{sqrt}, and |
| the function \code{\link{power}} can be used to create a |
| power link function. |
| } |
| \item{variance}{for all families other than \code{quasi}, the variance |
| function is determined by the family. The \code{quasi} family will |
| accept the literal character string (or unquoted as a name/expression) |
| specifications \code{"constant"}, \code{"mu(1-mu)"}, \code{"mu"}, |
| \code{"mu^2"} and \code{"mu^3"}, a length-one character vector |
| taking one of those values, or a list containing components |
| \code{varfun}, \code{validmu}, \code{dev.resids}, \code{initialize} |
| and \code{name}. |
| } |
| \item{object}{the function \code{family} accesses the \code{family} |
| objects which are stored within objects created by modelling |
| functions (e.g., \code{glm}).} |
| \item{\dots}{further arguments passed to methods.} |
| } |
| \description{ |
| Family objects provide a convenient way to specify the details of the |
| models used by functions such as \code{\link{glm}}. See the |
| documentation for \code{\link{glm}} for the details on how such model |
| fitting takes place. |
| } |
| \details{ |
| \code{family} is a generic function with methods for classes |
| \code{"glm"} and \code{"lm"} (the latter returning \code{gaussian()}). |
| |
| |
| For the \code{binomial} and \code{quasibinomial} families the response |
| can be specified in one of three ways: |
| \enumerate{ |
| \item As a factor: \sQuote{success} is interpreted as the factor not |
| having the first level (and hence usually of having the second level). |
| \item As a numerical vector with values between \code{0} and |
| \code{1}, interpreted as the proportion of successful cases (with the |
| total number of cases given by the \code{weights}). |
| \item As a two-column integer matrix: the first column gives the |
| number of successes and the second the number of failures. |
| } |
| |
| The \code{quasibinomial} and \code{quasipoisson} families differ from |
| the \code{binomial} and \code{poisson} families only in that the |
| dispersion parameter is not fixed at one, so they can model |
| over-dispersion. For the binomial case see McCullagh and Nelder |
| (1989, pp.\sspace{}124--8). Although they show that there is (under some |
| restrictions) a model with |
| variance proportional to mean as in the quasi-binomial model, note |
| that \code{glm} does not compute maximum-likelihood estimates in that |
| model. The behaviour of S is closer to the quasi- variants. |
| } |
| \note{ |
| The \code{link} and \code{variance} arguments have rather awkward |
| semantics for back-compatibility. The recommended way is to supply |
| them as quoted character strings, but they can also be supplied |
| unquoted (as names or expressions). Additionally, they can be |
| supplied as a length-one character vector giving the name of one of |
| the options, or as a list (for \code{link}, of class |
| \code{"link-glm"}). The restrictions apply only to links given as |
| names: when given as a character string all the links known to |
| \code{\link{make.link}} are accepted. |
| |
| This is potentially ambiguous: supplying \code{link = logit} could mean |
| the unquoted name of a link or the value of object \code{logit}. It |
| is interpreted if possible as the name of an allowed link, then |
| as an object. (You can force the interpretation to always be the value of |
| an object via \code{logit[1]}.) |
| } |
| \value{ |
| An object of class \code{"family"} (which has a concise print method). |
| This is a list with elements |
| \item{family}{character: the family name.} |
| \item{link}{character: the link name.} |
| \item{linkfun}{function: the link.} |
| \item{linkinv}{function: the inverse of the link function.} |
| \item{variance}{function: the variance as a function of the mean.} |
| \item{dev.resids}{function giving the deviance for each observation |
| as a function of \code{(y, mu, wt)}, used by the |
| \code{\link[=residuals.glm]{residuals}} method when computing |
| deviance residuals.} |
| \item{aic}{function giving the AIC value if appropriate (but \code{NA} |
| for the quasi- families). More precisely, this function |
| returns \eqn{-2\ell + 2 s}{-2 ll + 2 s}, where \eqn{\ell}{ll} is the |
| log-likelihood and \eqn{s} is the number of estimated scale |
| parameters. Note that the penalty term for the location parameters |
| (typically the \dQuote{regression coefficients}) is added elsewhere, |
| e.g., in \code{\link{glm.fit}()}, or \code{\link{AIC}()}, see the |
| AIC example in \code{\link{glm}}. |
| See \code{\link{logLik}} for the assumptions made about the |
| dispersion parameter.} |
| \item{mu.eta}{function: derivative of the inverse-link function |
| with respect to the linear predictor. If the inverse-link |
| function is \eqn{\mu = g^{-1}(\eta)}{mu = ginv(eta)} where |
| \eqn{\eta}{eta} is the value of the linear predictor, then this |
| function returns |
| \eqn{d(g^{-1})/d\eta = d\mu/d\eta}{d(ginv(eta))/d(eta) = d(mu)/d(eta)}.} |
| \item{initialize}{expression. This needs to set up whatever data |
| objects are needed for the family as well as \code{n} (needed for |
| AIC in the binomial family) and \code{mustart} (see \code{\link{glm}}).} |
| \item{validmu}{logical function. Returns \code{TRUE} if a mean |
| vector \code{mu} is within the domain of \code{variance}.} |
| \item{valideta}{logical function. Returns \code{TRUE} if a linear |
| predictor \code{eta} is within the domain of \code{linkinv}.} |
| \item{simulate}{(optional) function \code{simulate(object, nsim)} to be |
| called by the \code{"lm"} method of \code{\link{simulate}}. It will |
| normally return a matrix with \code{nsim} columns and one row for |
| each fitted value, but it can also return a list of length |
| \code{nsim}. Clearly this will be missing for \sQuote{quasi-} families.} |
| } |
| \references{ |
| McCullagh P. and Nelder, J. A. (1989) |
| \emph{Generalized Linear Models.} |
| London: Chapman and Hall. |
| |
| Dobson, A. J. (1983) |
| \emph{An Introduction to Statistical Modelling.} |
| London: Chapman and Hall. |
| |
| Cox, D. R. and Snell, E. J. (1981). |
| \emph{Applied Statistics; Principles and Examples.} |
| London: Chapman and Hall. |
| |
| Hastie, T. J. and Pregibon, D. (1992) |
| \emph{Generalized linear models.} |
| Chapter 6 of \emph{Statistical Models in S} |
| eds J. M. Chambers and T. J. Hastie, Wadsworth & Brooks/Cole. |
| } |
| \author{ |
| The design was inspired by S functions of the same names described |
| in Hastie & Pregibon (1992) (except \code{quasibinomial} and |
| \code{quasipoisson}). |
| } |
| \seealso{ |
| \code{\link{glm}}, \code{\link{power}}, \code{\link{make.link}}. |
| |
| For binomial \emph{coefficients}, \code{\link{choose}}; |
| the binomial and negative binomial \emph{distributions}, |
| \code{\link{Binomial}}, and \code{\link{NegBinomial}}. |
| } |
| \examples{ |
| require(utils) # for str |
| |
| nf <- gaussian() # Normal family |
| nf |
| str(nf) |
| |
| gf <- Gamma() |
| gf |
| str(gf) |
| gf$linkinv |
| gf$variance(-3:4) #- == (.)^2 |
| |
| ## Binomial with default 'logit' link: Check some properties visually: |
| bi <- binomial() |
| et <- seq(-10,10, by=1/8) |
| plot(et, bi$mu.eta(et), type="l") |
| ## show that mu.eta() is derivative of linkinv() : |
| lines((et[-1]+et[-length(et)])/2, col=adjustcolor("red", 1/4), |
| diff(bi$linkinv(et))/diff(et), type="l", lwd=4) |
| ## which here is the logistic density: |
| lines(et, dlogis(et), lwd=3, col=adjustcolor("blue", 1/4)) |
| stopifnot(exprs = { |
| all.equal(bi$ mu.eta(et), dlogis(et)) |
| all.equal(bi$linkinv(et), plogis(et) -> m) |
| all.equal(bi$linkfun(m ), qlogis(m)) # logit(.) == qlogis(.) ! |
| }) |
| |
| ## Data from example(glm) : |
| d.AD <- data.frame(treatment = gl(3,3), |
| outcome = gl(3,1,9), |
| counts = c(18,17,15, 20,10,20, 25,13,12)) |
| glm.D93 <- glm(counts ~ outcome + treatment, d.AD, family = poisson()) |
| ## Quasipoisson: compare with above / example(glm) : |
| glm.qD93 <- glm(counts ~ outcome + treatment, d.AD, family = quasipoisson()) |
| \donttest{ |
| glm.qD93 |
| anova (glm.qD93, test = "F") |
| summary(glm.qD93) |
| ## for Poisson results (same as from 'glm.D93' !) use |
| anova (glm.qD93, dispersion = 1, test = "Chisq") |
| summary(glm.qD93, dispersion = 1) |
| } |
| |
| |
| ## Example of user-specified link, a logit model for p^days |
| ## See Shaffer, T. 2004. Auk 121(2): 526-540. |
| logexp <- function(days = 1) |
| { |
| linkfun <- function(mu) qlogis(mu^(1/days)) |
| linkinv <- function(eta) plogis(eta)^days |
| mu.eta <- function(eta) days * plogis(eta)^(days-1) * |
| binomial()$mu.eta(eta) |
| valideta <- function(eta) TRUE |
| link <- paste0("logexp(", days, ")") |
| structure(list(linkfun = linkfun, linkinv = linkinv, |
| mu.eta = mu.eta, valideta = valideta, name = link), |
| class = "link-glm") |
| } |
| (bil3 <- binomial(logexp(3))) |
| \dontshow{stopifnot(length(bil3$mu.eta(as.double(0:5))) == 6)} |
| ## in practice this would be used with a vector of 'days', in |
| ## which case use an offset of 0 in the corresponding formula |
| ## to get the null deviance right. |
| |
| ## Binomial with identity link: often not a good idea, as both |
| ## computationally and conceptually difficult: |
| binomial(link = "identity") ## is exactly the same as |
| binomial(link = make.link("identity")) |
| |
| |
| |
| ## tests of quasi |
| x <- rnorm(100) |
| y <- rpois(100, exp(1+x)) |
| glm(y ~ x, family = quasi(variance = "mu", link = "log")) |
| # which is the same as |
| glm(y ~ x, family = poisson) |
| glm(y ~ x, family = quasi(variance = "mu^2", link = "log")) |
| \dontrun{glm(y ~ x, family = quasi(variance = "mu^3", link = "log")) # fails} |
| y <- rbinom(100, 1, plogis(x)) |
| # need to set a starting value for the next fit |
| glm(y ~ x, family = quasi(variance = "mu(1-mu)", link = "logit"), start = c(0,1)) |
| } |
| \keyword{models} |