blob: a1acb29830246350fbfb49fe1ab4fd80d48c4f54 [file] [log] [blame]
% File src/library/stats/man/Smirnov.Rd
% Part of the R package, https://www.R-project.org
% Copyright 2022 R Core Team
% Distributed under GPL 2 or later
\name{Smirnov}
\alias{Smirnov}
\alias{psmirnov}
\alias{qsmirnov}
\alias{rsmirnov}
\title{Distribution of the Smirnov Statistic}
\description{
Distribution function, quantile function and random generation for the
distribution of the Smirnov statistic.}
\usage{
psmirnov(q, sizes, z = NULL, two.sided = TRUE,
exact = TRUE, simulate = FALSE, B = 2000,
lower.tail = TRUE, log.p = FALSE)
qsmirnov(p, sizes, z = NULL, two.sided = TRUE,
exact = TRUE, simulate = FALSE, B = 2000)
rsmirnov(n, sizes, z = NULL, two.sided = TRUE)
}
\arguments{
\item{q}{a numeric vector of quantiles.}
\item{p}{a numeric vector of probabilities.}
\item{sizes}{an integer vector of length two giving the sample sizes.}
\item{z}{a numeric vector of the pooled data values in both samples
when the exact conditional distribution of the Smirnov statistic
given the data shall be computed.}
\item{two.sided}{a logical indicating whether absolute (\code{TRUE}) or
raw differences of frequencies define the test statistic.}
\item{exact}{\code{NULL} or a logical indicating whether the exact
(conditional on the pooled data values in \code{z}) distribution
or the asymptotic distribution should be used.}
\item{simulate}{a logical indicating whether to compute the
distribution function by Monte Carlo simulation.}
\item{B}{an integer specifying the number of replicates used in the
Monte Carlo test.}
\item{lower.tail}{a logical, if \code{TRUE} (default), probabilities are
\eqn{P[D < q]}, otherwise, \eqn{P[D \ge q]}.}
\item{log.p}{a logical, if \code{TRUE} (default), probabilities are given
as log-probabilities.}
\item{n}{an integer giving number of observations.}
}
\value{
\code{psmirnov} gives the distribution function,
\code{qsmirnov} gives the quantile function, and
\code{rsmirnov} generates random deviates.
}
\details{
For samples \eqn{x} and \eqn{y} with respective sizes \eqn{n_x} and
\eqn{n_y} and empirical cumulative distribution functions
\eqn{F_{x,n_x}} and \eqn{F_{y,n_y}}, the Smirnov statistic is
\deqn{D = \sup_c | F_{x,n_x}(c) - F_{y,n_y}(c) |}
in the two-sided case and
\deqn{D = \sup_c ( F_{x,n_x}(c) - F_{y,n_y}(c) )}
otherwise.
These statistics are used in the Smirnov test of the null that \eqn{x}
and \eqn{y} were drawn from the same distribution, see
\code{\link{ks.test}}.
If the underlying common distribution function \eqn{F} is continuous,
the distribution of the test statistics does not depend on \eqn{F},
and has a simple asymptotic approximation. For arbitrary \eqn{F}, one
can compute the conditional distribution given the pooled data values
\eqn{z} of \eqn{x} and \eqn{y}, either exactly (feasible provided that
the product \eqn{n_x n_y} of the sample sizes is ``small enough'') or
approximately Monte Carlo simulation. If the pooled data values \eqn{z}
are not specified, a pooled sample without ties is assumed.
}
\seealso{
\code{\link{ks.test}} for references on the algorithms used for
computing exact distributions.
}