src/library/stats/man/family.Rd - R - Git at Google

 % File src/library/stats/man/family.Rd
 % Part of the R package, https://www.R-project.org
 % Copyright 1995-2018 R Core Team
 % Distributed under GPL 2 or later

 \name{family}
 \alias{family}
 \alias{binomial}
 \alias{gaussian}
 \alias{Gamma}
 \alias{inverse.gaussian}
 \alias{poisson}
 \alias{quasi}
 \alias{quasibinomial}
 \alias{quasipoisson}
 %\alias{print.family}

 \title{Family Objects for Models}
 \usage{
 family(object, \dots)

 binomial(link = "logit")
 gaussian(link = "identity")
 Gamma(link = "inverse")
 inverse.gaussian(link = "1/mu^2")
 poisson(link = "log")
 quasi(link = "identity", variance = "constant")
 quasibinomial(link = "logit")
 quasipoisson(link = "log")
 }
 \arguments{
   \item{link}{a specification for the model link function.  This can be
     a name/expression, a literal character string, a length-one character
     vector, or an object of class
     \code{"\link[=make.link]{link-glm}"} (such as generated by
     \code{\link{make.link}}) provided it is not specified
     \emph{via} one of the standard names given next.

     The \code{gaussian} family accepts the links (as names)
     \code{identity}, \code{log} and \code{inverse};
     the \code{binomial} family the links \code{logit},
     \code{probit}, \code{cauchit}, (corresponding to logistic,
     normal and Cauchy CDFs respectively) \code{log} and
     \code{cloglog} (complementary log-log);
     the \code{Gamma} family the links \code{inverse}, \code{identity}
      and \code{log};
     the \code{poisson} family the links \code{log}, \code{identity},
     and \code{sqrt}; and the \code{inverse.gaussian} family the links
     \code{1/mu^2}, \code{inverse}, \code{identity}
     and \code{log}.

     The \code{quasi} family accepts the links \code{logit}, \code{probit},
     \code{cloglog},  \code{identity}, \code{inverse},
     \code{log}, \code{1/mu^2} and \code{sqrt}, and
     the function \code{\link{power}} can be used to create a
     power link function.
   }
   \item{variance}{for all families other than \code{quasi}, the variance
     function is determined by the family.  The \code{quasi} family will
     accept the literal character string (or unquoted as a name/expression)
     specifications \code{"constant"}, \code{"mu(1-mu)"}, \code{"mu"},
     \code{"mu^2"} and \code{"mu^3"}, a length-one character vector
     taking one of those values, or a list containing components
     \code{varfun}, \code{validmu}, \code{dev.resids}, \code{initialize}
     and \code{name}.
   }
   \item{object}{the function \code{family} accesses the \code{family}
     objects which are stored within objects created by modelling
     functions (e.g., \code{glm}).}
   \item{\dots}{further arguments passed to methods.}
 }
 \description{
   Family objects provide a convenient way to specify the details of the
   models used by functions such as \code{\link{glm}}.  See the
   documentation for \code{\link{glm}} for the details on how such model
   fitting takes place.
 }
 \details{
   \code{family} is a generic function with methods for classes
   \code{"glm"} and \code{"lm"} (the latter returning \code{gaussian()}).


   For the \code{binomial} and \code{quasibinomial} families the response
   can be specified in one of three ways:
   \enumerate{
     \item As a factor: \sQuote{success} is interpreted as the factor not
     having the first level (and hence usually of having the second level).
     \item As a numerical vector with values  between \code{0} and
     \code{1}, interpreted as the proportion of successful cases (with the
     total number of cases given by the \code{weights}).
     \item As a two-column integer matrix: the first column gives the
     number of successes and the second the number of failures.
   }

   The \code{quasibinomial} and \code{quasipoisson} families differ from
   the \code{binomial} and \code{poisson} families only in that the
   dispersion parameter is not fixed at one, so they can model
   over-dispersion.  For the binomial case see McCullagh and Nelder
   (1989, pp.\sspace{}124--8).  Although they show that there is (under some
   restrictions) a model with
   variance proportional to mean as in the quasi-binomial model, note
   that \code{glm} does not compute maximum-likelihood estimates in that
   model.  The behaviour of S is closer to the quasi- variants.
 }
 \note{
   The \code{link} and \code{variance} arguments have rather awkward
   semantics for back-compatibility.  The recommended way is to supply
   them as quoted character strings, but they can also be supplied
   unquoted (as names or expressions).  Additionally, they can be
   supplied as a length-one character vector giving the name of one of
   the options, or as a list (for \code{link}, of class
   \code{"link-glm"}).  The restrictions apply only to links given as
   names: when given as a character string all the links known to
   \code{\link{make.link}} are accepted.

   This is potentially ambiguous: supplying \code{link = logit} could mean
   the unquoted name of a link or the value of object \code{logit}.  It
   is interpreted if possible as the name of an allowed link, then
   as an object.  (You can force the interpretation to always be the value of
   an object via \code{logit[1]}.)
 }
 \value{
   An object of class \code{"family"} (which has a concise print method).
   This is a list with elements
   \item{family}{character: the family name.}
   \item{link}{character: the link name.}
   \item{linkfun}{function: the link.}
   \item{linkinv}{function: the inverse of the link function.}
   \item{variance}{function: the variance as a function of the mean.}
   \item{dev.resids}{function giving the deviance for each observation
     as a function of \code{(y, mu, wt)}, used by the
     \code{\link[=residuals.glm]{residuals}} method when computing
     deviance residuals.}
     \item{aic}{function giving the AIC value if appropriate (but \code{NA}
     for the quasi- families).  More precisely, this function
     returns \eqn{-2\ell + 2 s}{-2 ll + 2 s}, where \eqn{\ell}{ll} is the
     log-likelihood and \eqn{s} is the number of estimated scale
     parameters.  Note that the penalty term for the location parameters
     (typically the \dQuote{regression coefficients}) is added elsewhere,
     e.g., in \code{\link{glm.fit}()}, or \code{\link{AIC}()}, see the
     AIC example in \code{\link{glm}}.
     See \code{\link{logLik}} for the assumptions made about the
     dispersion parameter.}
   \item{mu.eta}{function: derivative of the inverse-link function
     with respect to the linear predictor.  If the inverse-link
     function is \eqn{\mu = g^{-1}(\eta)}{mu = ginv(eta)} where
     \eqn{\eta}{eta} is the value of the linear predictor, then this
     function returns
     \eqn{d(g^{-1})/d\eta = d\mu/d\eta}{d(ginv(eta))/d(eta) = d(mu)/d(eta)}.}
   \item{initialize}{expression.  This needs to set up whatever data
     objects are needed for the family as well as \code{n} (needed for
     AIC in the binomial family) and \code{mustart} (see \code{\link{glm}}).}
   \item{validmu}{logical function.  Returns \code{TRUE} if a mean
     vector \code{mu} is within the domain of \code{variance}.}
   \item{valideta}{logical function.   Returns \code{TRUE} if a linear
     predictor \code{eta} is within the domain of \code{linkinv}.}
   \item{simulate}{(optional) function \code{simulate(object, nsim)} to be
     called by the \code{"lm"} method of \code{\link{simulate}}.  It will
     normally return a matrix with \code{nsim} columns and one row for
     each fitted value, but it can also return a list of length
     \code{nsim}. Clearly this will be missing for \sQuote{quasi-} families.}
 }
 \references{
   McCullagh P. and Nelder, J. A. (1989)
   \emph{Generalized Linear Models.}
   London: Chapman and Hall.

   Dobson, A. J. (1983)
   \emph{An Introduction to Statistical Modelling.}
   London: Chapman and Hall.

   Cox, D. R. and  Snell, E. J. (1981).
   \emph{Applied Statistics; Principles and Examples.}
   London: Chapman and Hall.

   Hastie, T. J. and Pregibon, D. (1992)
   \emph{Generalized linear models.}
   Chapter 6 of \emph{Statistical Models in S}
   eds J. M. Chambers and T. J. Hastie, Wadsworth & Brooks/Cole.
 }
 \author{
   The design was inspired by S functions of the same names described
   in Hastie & Pregibon (1992) (except \code{quasibinomial} and
   \code{quasipoisson}).
 }
 \seealso{
   \code{\link{glm}}, \code{\link{power}}, \code{\link{make.link}}.

   For binomial \emph{coefficients}, \code{\link{choose}};
   the binomial and negative binomial \emph{distributions},
   \code{\link{Binomial}}, and \code{\link{NegBinomial}}.
 }
 \examples{
 require(utils) # for str

 nf <- gaussian()  # Normal family
 nf
 str(nf)

 gf <- Gamma()
 gf
 str(gf)
 gf$linkinv
 gf$variance(-3:4) #- == (.)^2

 ## Binomial with default 'logit' link:  Check some properties visually:
 bi <- binomial()
 et <- seq(-10,10, by=1/8)
 plot(et, bi$mu.eta(et), type="l")
 ## show that mu.eta() is derivative of linkinv() :
 lines((et[-1]+et[-length(et)])/2, col=adjustcolor("red", 1/4),
       diff(bi$linkinv(et))/diff(et), type="l", lwd=4)
 ## which here is the logistic density:
 lines(et, dlogis(et), lwd=3, col=adjustcolor("blue", 1/4))
 stopifnot(exprs = {
   all.equal(bi$ mu.eta(et), dlogis(et))
   all.equal(bi$linkinv(et), plogis(et) -> m)
   all.equal(bi$linkfun(m ), qlogis(m))    #  logit(.) == qlogis(.) !
 })

 ## Data from example(glm) :
 d.AD <- data.frame(treatment = gl(3,3),
                    outcome   = gl(3,1,9),
                    counts    = c(18,17,15, 20,10,20, 25,13,12))
 glm.D93 <- glm(counts ~ outcome + treatment, d.AD, family = poisson())
 ## Quasipoisson: compare with above / example(glm) :
 glm.qD93 <- glm(counts ~ outcome + treatment, d.AD, family = quasipoisson())
 \donttest{
 glm.qD93
 anova  (glm.qD93, test = "F")
 summary(glm.qD93)
 ## for Poisson results (same as from 'glm.D93' !) use
 anova  (glm.qD93, dispersion = 1, test = "Chisq")
 summary(glm.qD93, dispersion = 1)
 }


 ## Example of user-specified link, a logit model for p^days
 ## See Shaffer, T.  2004. Auk 121(2): 526-540.
 logexp <- function(days = 1)
 {
     linkfun <- function(mu) qlogis(mu^(1/days))
     linkinv <- function(eta) plogis(eta)^days
     mu.eta  <- function(eta) days * plogis(eta)^(days-1) *
                   binomial()$mu.eta(eta)
     valideta <- function(eta) TRUE
     link <- paste0("logexp(", days, ")")
     structure(list(linkfun = linkfun, linkinv = linkinv,
                    mu.eta = mu.eta, valideta = valideta, name = link),
               class = "link-glm")
 }
 (bil3 <- binomial(logexp(3)))
 \dontshow{stopifnot(length(bil3$mu.eta(as.double(0:5))) == 6)}
 ## in practice this would be used with a vector of 'days', in
 ## which case use an offset of 0 in the corresponding formula
 ## to get the null deviance right.

 ## Binomial with identity link: often not a good idea, as both
 ## computationally and conceptually difficult:
 binomial(link = "identity")  ## is exactly the same as
 binomial(link = make.link("identity"))


 ## tests of quasi
 x <- rnorm(100)
 y <- rpois(100, exp(1+x))
 glm(y ~ x, family = quasi(variance = "mu", link = "log"))
 # which is the same as
 glm(y ~ x, family = poisson)
 glm(y ~ x, family = quasi(variance = "mu^2", link = "log"))
 \dontrun{glm(y ~ x, family = quasi(variance = "mu^3", link = "log")) # fails}
 y <- rbinom(100, 1, plogis(x))
 # need to set a starting value for the next fit
 glm(y ~ x, family = quasi(variance = "mu(1-mu)", link = "logit"), start = c(0,1))
 }
 \keyword{models}
	% File src/library/stats/man/family.Rd
	% Part of the R package, https://www.R-project.org
	% Copyright 1995-2018 R Core Team
	% Distributed under GPL 2 or later

	\name{family}
	\alias{family}
	\alias{binomial}
	\alias{gaussian}
	\alias{Gamma}
	\alias{inverse.gaussian}
	\alias{poisson}
	\alias{quasi}
	\alias{quasibinomial}
	\alias{quasipoisson}
	%\alias{print.family}

	\title{Family Objects for Models}
	\usage{
	family(object, \dots)

	binomial(link = "logit")
	gaussian(link = "identity")
	Gamma(link = "inverse")
	inverse.gaussian(link = "1/mu^2")
	poisson(link = "log")
	quasi(link = "identity", variance = "constant")
	quasibinomial(link = "logit")
	quasipoisson(link = "log")
	}
	\arguments{
	\item{link}{a specification for the model link function. This can be
	a name/expression, a literal character string, a length-one character
	vector, or an object of class
	\code{"\link[=make.link]{link-glm}"} (such as generated by
	\code{\link{make.link}}) provided it is not specified
	\emph{via} one of the standard names given next.

	The \code{gaussian} family accepts the links (as names)
	\code{identity}, \code{log} and \code{inverse};
	the \code{binomial} family the links \code{logit},
	\code{probit}, \code{cauchit}, (corresponding to logistic,
	normal and Cauchy CDFs respectively) \code{log} and
	\code{cloglog} (complementary log-log);
	the \code{Gamma} family the links \code{inverse}, \code{identity}
	and \code{log};
	the \code{poisson} family the links \code{log}, \code{identity},
	and \code{sqrt}; and the \code{inverse.gaussian} family the links
	\code{1/mu^2}, \code{inverse}, \code{identity}
	and \code{log}.

	The \code{quasi} family accepts the links \code{logit}, \code{probit},
	\code{cloglog}, \code{identity}, \code{inverse},
	\code{log}, \code{1/mu^2} and \code{sqrt}, and
	the function \code{\link{power}} can be used to create a
	power link function.
	}
	\item{variance}{for all families other than \code{quasi}, the variance
	function is determined by the family. The \code{quasi} family will
	accept the literal character string (or unquoted as a name/expression)
	specifications \code{"constant"}, \code{"mu(1-mu)"}, \code{"mu"},
	\code{"mu^2"} and \code{"mu^3"}, a length-one character vector
	taking one of those values, or a list containing components
	\code{varfun}, \code{validmu}, \code{dev.resids}, \code{initialize}
	and \code{name}.
	}
	\item{object}{the function \code{family} accesses the \code{family}
	objects which are stored within objects created by modelling
	functions (e.g., \code{glm}).}
	\item{\dots}{further arguments passed to methods.}
	}
	\description{
	Family objects provide a convenient way to specify the details of the
	models used by functions such as \code{\link{glm}}. See the
	documentation for \code{\link{glm}} for the details on how such model
	fitting takes place.
	}
	\details{
	\code{family} is a generic function with methods for classes
	\code{"glm"} and \code{"lm"} (the latter returning \code{gaussian()}).


	For the \code{binomial} and \code{quasibinomial} families the response
	can be specified in one of three ways:
	\enumerate{
	\item As a factor: \sQuote{success} is interpreted as the factor not
	having the first level (and hence usually of having the second level).
	\item As a numerical vector with values between \code{0} and
	\code{1}, interpreted as the proportion of successful cases (with the
	total number of cases given by the \code{weights}).
	\item As a two-column integer matrix: the first column gives the
	number of successes and the second the number of failures.
	}

	The \code{quasibinomial} and \code{quasipoisson} families differ from
	the \code{binomial} and \code{poisson} families only in that the
	dispersion parameter is not fixed at one, so they can model
	over-dispersion. For the binomial case see McCullagh and Nelder
	(1989, pp.\sspace{}124--8). Although they show that there is (under some
	restrictions) a model with
	variance proportional to mean as in the quasi-binomial model, note
	that \code{glm} does not compute maximum-likelihood estimates in that
	model. The behaviour of S is closer to the quasi- variants.
	}
	\note{
	The \code{link} and \code{variance} arguments have rather awkward
	semantics for back-compatibility. The recommended way is to supply
	them as quoted character strings, but they can also be supplied
	unquoted (as names or expressions). Additionally, they can be
	supplied as a length-one character vector giving the name of one of
	the options, or as a list (for \code{link}, of class
	\code{"link-glm"}). The restrictions apply only to links given as
	names: when given as a character string all the links known to
	\code{\link{make.link}} are accepted.

	This is potentially ambiguous: supplying \code{link = logit} could mean
	the unquoted name of a link or the value of object \code{logit}. It
	is interpreted if possible as the name of an allowed link, then
	as an object. (You can force the interpretation to always be the value of
	an object via \code{logit[1]}.)
	}
	\value{
	An object of class \code{"family"} (which has a concise print method).
	This is a list with elements
	\item{family}{character: the family name.}
	\item{link}{character: the link name.}
	\item{linkfun}{function: the link.}
	\item{linkinv}{function: the inverse of the link function.}
	\item{variance}{function: the variance as a function of the mean.}
	\item{dev.resids}{function giving the deviance for each observation
	as a function of \code{(y, mu, wt)}, used by the
	\code{\link[=residuals.glm]{residuals}} method when computing
	deviance residuals.}
	\item{aic}{function giving the AIC value if appropriate (but \code{NA}
	for the quasi- families). More precisely, this function
	returns \eqn{-2\ell + 2 s}{-2 ll + 2 s}, where \eqn{\ell}{ll} is the
	log-likelihood and \eqn{s} is the number of estimated scale
	parameters. Note that the penalty term for the location parameters
	(typically the \dQuote{regression coefficients}) is added elsewhere,
	e.g., in \code{\link{glm.fit}()}, or \code{\link{AIC}()}, see the
	AIC example in \code{\link{glm}}.
	See \code{\link{logLik}} for the assumptions made about the
	dispersion parameter.}
	\item{mu.eta}{function: derivative of the inverse-link function
	with respect to the linear predictor. If the inverse-link
	function is \eqn{\mu = g^{-1}(\eta)}{mu = ginv(eta)} where
	\eqn{\eta}{eta} is the value of the linear predictor, then this
	function returns
	\eqn{d(g^{-1})/d\eta = d\mu/d\eta}{d(ginv(eta))/d(eta) = d(mu)/d(eta)}.}
	\item{initialize}{expression. This needs to set up whatever data
	objects are needed for the family as well as \code{n} (needed for
	AIC in the binomial family) and \code{mustart} (see \code{\link{glm}}).}
	\item{validmu}{logical function. Returns \code{TRUE} if a mean
	vector \code{mu} is within the domain of \code{variance}.}
	\item{valideta}{logical function. Returns \code{TRUE} if a linear
	predictor \code{eta} is within the domain of \code{linkinv}.}
	\item{simulate}{(optional) function \code{simulate(object, nsim)} to be
	called by the \code{"lm"} method of \code{\link{simulate}}. It will
	normally return a matrix with \code{nsim} columns and one row for
	each fitted value, but it can also return a list of length
	\code{nsim}. Clearly this will be missing for \sQuote{quasi-} families.}
	}
	\references{
	McCullagh P. and Nelder, J. A. (1989)
	\emph{Generalized Linear Models.}
	London: Chapman and Hall.

	Dobson, A. J. (1983)
	\emph{An Introduction to Statistical Modelling.}
	London: Chapman and Hall.

	Cox, D. R. and Snell, E. J. (1981).
	\emph{Applied Statistics; Principles and Examples.}
	London: Chapman and Hall.

	Hastie, T. J. and Pregibon, D. (1992)
	\emph{Generalized linear models.}
	Chapter 6 of \emph{Statistical Models in S}
	eds J. M. Chambers and T. J. Hastie, Wadsworth & Brooks/Cole.
	}
	\author{
	The design was inspired by S functions of the same names described
	in Hastie & Pregibon (1992) (except \code{quasibinomial} and
	\code{quasipoisson}).
	}
	\seealso{
	\code{\link{glm}}, \code{\link{power}}, \code{\link{make.link}}.

	For binomial \emph{coefficients}, \code{\link{choose}};
	the binomial and negative binomial \emph{distributions},
	\code{\link{Binomial}}, and \code{\link{NegBinomial}}.
	}
	\examples{
	require(utils) # for str

	nf <- gaussian() # Normal family
	nf
	str(nf)

	gf <- Gamma()
	gf
	str(gf)
	gf$linkinv
	gf$variance(-3:4) #- == (.)^2

	## Binomial with default 'logit' link: Check some properties visually:
	bi <- binomial()
	et <- seq(-10,10, by=1/8)
	plot(et, bi$mu.eta(et), type="l")
	## show that mu.eta() is derivative of linkinv() :
	lines((et[-1]+et[-length(et)])/2, col=adjustcolor("red", 1/4),
	diff(bi$linkinv(et))/diff(et), type="l", lwd=4)
	## which here is the logistic density:
	lines(et, dlogis(et), lwd=3, col=adjustcolor("blue", 1/4))
	stopifnot(exprs = {
	all.equal(bi$ mu.eta(et), dlogis(et))
	all.equal(bi$linkinv(et), plogis(et) -> m)
	all.equal(bi$linkfun(m ), qlogis(m)) # logit(.) == qlogis(.) !
	})

	## Data from example(glm) :
	d.AD <- data.frame(treatment = gl(3,3),
	outcome = gl(3,1,9),
	counts = c(18,17,15, 20,10,20, 25,13,12))
	glm.D93 <- glm(counts ~ outcome + treatment, d.AD, family = poisson())
	## Quasipoisson: compare with above / example(glm) :
	glm.qD93 <- glm(counts ~ outcome + treatment, d.AD, family = quasipoisson())
	\donttest{
	glm.qD93
	anova (glm.qD93, test = "F")
	summary(glm.qD93)
	## for Poisson results (same as from 'glm.D93' !) use
	anova (glm.qD93, dispersion = 1, test = "Chisq")
	summary(glm.qD93, dispersion = 1)
	}


	## Example of user-specified link, a logit model for p^days
	## See Shaffer, T. 2004. Auk 121(2): 526-540.
	logexp <- function(days = 1)
	{
	linkfun <- function(mu) qlogis(mu^(1/days))
	linkinv <- function(eta) plogis(eta)^days
	mu.eta <- function(eta) days * plogis(eta)^(days-1) *
	binomial()$mu.eta(eta)
	valideta <- function(eta) TRUE
	link <- paste0("logexp(", days, ")")
	structure(list(linkfun = linkfun, linkinv = linkinv,
	mu.eta = mu.eta, valideta = valideta, name = link),
	class = "link-glm")
	}
	(bil3 <- binomial(logexp(3)))
	\dontshow{stopifnot(length(bil3$mu.eta(as.double(0:5))) == 6)}
	## in practice this would be used with a vector of 'days', in
	## which case use an offset of 0 in the corresponding formula
	## to get the null deviance right.

	## Binomial with identity link: often not a good idea, as both
	## computationally and conceptually difficult:
	binomial(link = "identity") ## is exactly the same as
	binomial(link = make.link("identity"))



	## tests of quasi
	x <- rnorm(100)
	y <- rpois(100, exp(1+x))
	glm(y ~ x, family = quasi(variance = "mu", link = "log"))
	# which is the same as
	glm(y ~ x, family = poisson)
	glm(y ~ x, family = quasi(variance = "mu^2", link = "log"))
	\dontrun{glm(y ~ x, family = quasi(variance = "mu^3", link = "log")) # fails}
	y <- rbinom(100, 1, plogis(x))
	# need to set a starting value for the next fit
	glm(y ~ x, family = quasi(variance = "mu(1-mu)", link = "logit"), start = c(0,1))
	}
	\keyword{models}