blob: 526aa9a1940ccf5763bf733898d7a9781ecaed7a [file] [log] [blame]
# File src/library/stats/R/ansari.test.R
# Part of the R package, https://www.R-project.org
#
# Copyright (C) 1995-2015 The R Core Team
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# https://www.R-project.org/Licenses/
ansari.test <- function(x, ...) UseMethod("ansari.test")
ansari.test.default <-
function(x, y, alternative = c("two.sided", "less", "greater"),
exact = NULL, conf.int = FALSE, conf.level = 0.95, ...)
{
alternative <- match.arg(alternative)
if(conf.int) {
if(!((length(conf.level) == 1L)
&& is.finite(conf.level)
&& (conf.level > 0)
&& (conf.level < 1)))
stop("'conf.level' must be a single number between 0 and 1")
}
DNAME <- paste(deparse(substitute(x)), "and", deparse(substitute(y)))
x <- x[complete.cases(x)]
y <- y[complete.cases(y)]
m <- as.integer(length(x))
if(is.na(m) || m < 1L)
stop("not enough 'x' observations")
n <- as.integer(length(y))
if(is.na(n) || n < 1L)
stop("not enough 'y' observations")
N <- m + n
r <- rank(c(x, y))
STATISTIC <- sum(pmin(r, N - r + 1)[seq_along(x)])
TIES <- (length(r) != length(unique(r)))
if(is.null(exact))
exact <- ((m < 50L) && (n < 50L))
if(exact && !TIES) {
pansari <- function(q, m, n) .Call(C_pAnsari, q, m, n)
PVAL <-
switch(alternative,
two.sided = {
if (STATISTIC > ((m + 1)^2 %/% 4
+ ((m * n) %/% 2) / 2))
p <- 1 - pansari(STATISTIC - 1, m, n)
else
p <- pansari(STATISTIC, m, n)
min(2 * p, 1)
},
less = 1 - pansari(STATISTIC - 1, m, n),
greater = pansari(STATISTIC, m, n))
if (conf.int) {
qansari <- function(p, m, n) .Call(C_qAnsari, p, m, n)
alpha <- 1 - conf.level
x <- sort(x)
y <- sort(y)
ab <- function(sig) {
rab <- rank(c(x/sig, y))
sum(pmin(rab, N - rab + 1)[seq_along(x)])
}
ratio <- outer(x, y, "/")
aratio <- ratio[ratio >= 0]
sigma <- sort(aratio)
cci <- function(alpha) {
u <- absigma - qansari(alpha/2, m, n)
l <- absigma - qansari(1 - alpha/2, m, n)
## Check if the statistic exceeds both quantiles first.
uci <- NULL
lci <- NULL
if(length(u[u >= 0]) == 0L || length(l[l > 0]) == 0L) {
warning("samples differ in location: cannot compute confidence set, returning NA")
return(c(NA, NA))
}
if (is.null(uci)) {
u[u < 0] <- NA
uci <- min(sigma[which(u == min(u, na.rm = TRUE))])
}
if (is.null(lci)) {
l[l <= 0] <- NA
lci <- max(sigma[which(l == min(l, na.rm = TRUE))])
}
## The process of the statistics does not need to be
## monotone in sigma: check this and interchange quantiles.
if (uci > lci) {
l <- absigma - qansari(alpha/2, m, n)
u <- absigma - qansari(1 - alpha/2, m, n)
u[u < 0] <- NA
uci <- min(sigma[which(u == min(u, na.rm = TRUE))])
l[l <= 0] <- NA
lci <- max(sigma[which(l == min(l, na.rm = TRUE))])
}
c(uci, lci)
}
cint <- if(length(sigma) < 1L) {
warning("cannot compute confidence set, returning NA")
c(NA, NA)
}
else {
## Compute statistics directly: computing the steps is
## not faster.
absigma <-
sapply(sigma + c(diff(sigma)/2,
sigma[length(sigma)]*1.01), ab)
switch(alternative, two.sided = cci(alpha),
greater = c(cci(alpha*2)[1L], Inf),
less = c(0, cci(alpha*2)[2L]))
}
attr(cint, "conf.level") <- conf.level
u <- absigma - qansari(0.5, m, n)
sgr <- sigma[u <= 0]
if (length(sgr) == 0L) sgr <- NA
else sgr <- max(sgr)
sle <- sigma[u > 0]
if (length(sle) == 0L) sle <- NA
else sle <- min(sle)
ESTIMATE <- mean(c(sle, sgr))
}
}
else {
EVEN <- ((N %% 2L) == 0L)
normalize <- function(s, r, TIES, m=length(x), n=length(y)) {
z <- if(EVEN)
s - m * (N + 2)/4
else
s - m * (N + 1)^2/(4 * N)
if (!TIES) {
SIGMA <- if(EVEN)
sqrt((m * n * (N + 2) * (N - 2))/(48 * (N - 1)))
else
sqrt((m * n * (N + 1) * (3 + N^2))/(48 * N^2))
}
else {
r <- rle(sort(pmin(r, N - r + 1)))
SIGMA <- if(EVEN)
sqrt(m * n
* (16 * sum(r$lengths * r$values^2) - N * (N + 2)^2)
/ (16 * N * (N - 1)))
else
sqrt(m * n
* (16 * N * sum(r$lengths * r$values^2) - (N + 1)^4)
/ (16 * N^2 * (N - 1)))
}
z / SIGMA
}
p <- pnorm(normalize(STATISTIC, r, TIES))
PVAL <- switch(alternative,
two.sided = 2 * min(p, 1 - p),
less = 1 - p,
greater = p)
if(conf.int && !exact) {
alpha <- 1 - conf.level
ab2 <- function(sig, zq) {
r <- rank(c(x / sig, y))
s <- sum(pmin(r, N - r + 1)[seq_along(x)])
TIES <- (length(r) != length(unique(r)))
normalize(s, r, TIES, length(x), length(y)) - zq
}
## Use uniroot here.
## Compute the range of sigma first.
srangepos <- NULL
srangeneg <- NULL
if (length(x[x > 0]) && length(y[y > 0]))
srangepos <-
c(min(x[x>0], na.rm=TRUE)/max(y[y>0], na.rm=TRUE),
max(x[x>0], na.rm=TRUE)/min(y[y>0], na.rm=TRUE))
if (length(x[x <= 0]) && length(y[y < 0]))
srangeneg <-
c(min(x[x<=0], na.rm=TRUE)/max(y[y<0], na.rm=TRUE),
max(x[x<=0], na.rm=TRUE)/min(y[y<0], na.rm=TRUE))
if (any(is.infinite(c(srangepos, srangeneg)))) {
warning("cannot compute asymptotic confidence set or estimator")
conf.int <- FALSE
} else {
ccia <- function(alpha) {
## Check if the statistic exceeds both quantiles
## first.
statu <- ab2(srange[1L], zq=qnorm(alpha/2))
statl <- ab2(srange[2L], zq=qnorm(alpha/2, lower.tail=FALSE))
if (statu > 0 || statl < 0) {
warning("samples differ in location: cannot compute confidence set, returning NA")
return(c(NA, NA))
}
u <- uniroot(ab2, srange, tol=1e-4,
zq=qnorm(alpha/2))$root
l <- uniroot(ab2, srange, tol=1e-4,
zq=qnorm(alpha/2, lower.tail=FALSE))$root
## The process of the statistics does not need to be
## monotone: sort is ok here.
sort(c(u, l))
}
srange <- range(c(srangepos, srangeneg), na.rm=FALSE)
cint <- switch(alternative,
two.sided = ccia(alpha),
greater = c(ccia(alpha*2)[1L], Inf),
less = c(0, ccia(alpha*2)[2L]) )
attr(cint, "conf.level") <- conf.level
## Check if the statistic exceeds both quantiles first.
statu <- ab2(srange[1L], zq=0)
statl <- ab2(srange[2L], zq=0)
if (statu > 0 || statl < 0) {
ESTIMATE <- NA
warning("cannot compute estimate, returning NA")
} else
ESTIMATE <- uniroot(ab2, srange, tol=1e-4, zq=0)$root
}
}
if(exact && TIES) {
warning("cannot compute exact p-value with ties")
if(conf.int)
warning("cannot compute exact confidence intervals with ties")
}
}
names(STATISTIC) <- "AB"
RVAL <- list(statistic = STATISTIC,
p.value = PVAL,
null.value = c("ratio of scales" = 1),
alternative = alternative,
method = "Ansari-Bradley test",
data.name = DNAME)
if(conf.int)
RVAL <- c(RVAL,
list(conf.int = cint,
estimate = c("ratio of scales" = ESTIMATE)))
class(RVAL) <- "htest"
return(RVAL)
}
ansari.test.formula <-
function(formula, data, subset, na.action, ...)
{
if(missing(formula)
|| (length(formula) != 3L)
|| (length(attr(terms(formula[-2L]), "term.labels")) != 1L))
stop("'formula' missing or incorrect")
m <- match.call(expand.dots = FALSE)
if(is.matrix(eval(m$data, parent.frame())))
m$data <- as.data.frame(data)
## need stats:: for non-standard evaluation
m[[1L]] <- quote(stats::model.frame)
m$... <- NULL
mf <- eval(m, parent.frame())
DNAME <- paste(names(mf), collapse = " by ")
names(mf) <- NULL
response <- attr(attr(mf, "terms"), "response")
g <- factor(mf[[-response]])
if(nlevels(g) != 2L)
stop("grouping factor must have exactly 2 levels")
DATA <- setNames(split(mf[[response]], g), c("x", "y"))
y <- do.call("ansari.test", c(DATA, list(...)))
y$data.name <- DNAME
y
}