src/library/stats/R/ansari.test.R - R - Git at Google

 #  File src/library/stats/R/ansari.test.R
 #  Part of the R package, https://www.R-project.org
 #
 #  Copyright (C) 1995-2015 The R Core Team
 #
 #  This program is free software; you can redistribute it and/or modify
 #  it under the terms of the GNU General Public License as published by
 #  the Free Software Foundation; either version 2 of the License, or
 #  (at your option) any later version.
 #
 #  This program is distributed in the hope that it will be useful,
 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #  GNU General Public License for more details.
 #
 #  A copy of the GNU General Public License is available at
 #  https://www.R-project.org/Licenses/

 ansari.test <- function(x, ...) UseMethod("ansari.test")

 ansari.test.default <-
 function(x, y, alternative = c("two.sided", "less", "greater"),
          exact = NULL, conf.int = FALSE, conf.level = 0.95, ...)
 {
     alternative <- match.arg(alternative)
     if(conf.int) {
         if(!((length(conf.level) == 1L)
              && is.finite(conf.level)
              && (conf.level > 0)
              && (conf.level < 1)))
             stop("'conf.level' must be a single number between 0 and 1")
     }
     DNAME <- paste(deparse(substitute(x)), "and", deparse(substitute(y)))

     x <- x[complete.cases(x)]
     y <- y[complete.cases(y)]
     m <- as.integer(length(x))
     if(is.na(m) || m < 1L)
         stop("not enough 'x' observations")
     n <- as.integer(length(y))
     if(is.na(n) || n < 1L)
         stop("not enough 'y' observations")
     N <- m + n

     r <- rank(c(x, y))
     STATISTIC <- sum(pmin(r, N - r + 1)[seq_along(x)])
     TIES <- (length(r) != length(unique(r)))

     if(is.null(exact))
         exact <- ((m < 50L) && (n < 50L))

     if(exact && !TIES) {
         pansari <- function(q, m, n) .Call(C_pAnsari, q, m, n)
         PVAL <-
             switch(alternative,
                    two.sided = {
                        if (STATISTIC > ((m + 1)^2 %/% 4
                                         + ((m * n) %/% 2) / 2))
                            p <- 1 - pansari(STATISTIC - 1, m, n)
                        else
                            p <- pansari(STATISTIC, m, n)
                        min(2 * p, 1)
                    },
                    less = 1 - pansari(STATISTIC - 1, m, n),
                    greater = pansari(STATISTIC, m, n))
         if (conf.int) {
             qansari <- function(p, m, n) .Call(C_qAnsari, p, m, n)
             alpha <- 1 - conf.level
             x <- sort(x)
             y <- sort(y)
             ab <- function(sig) {
                 rab <- rank(c(x/sig, y))
                 sum(pmin(rab, N - rab + 1)[seq_along(x)])
             }
             ratio <- outer(x, y, "/")
             aratio <- ratio[ratio >= 0]
             sigma <- sort(aratio)

             cci <- function(alpha) {
               u <- absigma - qansari(alpha/2,  m, n)
               l <- absigma - qansari(1 - alpha/2, m, n)
               ## Check if the statistic exceeds both quantiles first.
               uci <- NULL
               lci <- NULL
               if(length(u[u >= 0]) == 0L || length(l[l > 0]) == 0L) {
                   warning("samples differ in location: cannot compute confidence set, returning NA")
                   return(c(NA, NA))
               }
               if (is.null(uci)) {
                   u[u < 0] <- NA
                   uci <- min(sigma[which(u == min(u, na.rm = TRUE))])
               }
               if (is.null(lci)) {
                   l[l <= 0] <- NA
                   lci <- max(sigma[which(l == min(l, na.rm = TRUE))])
               }
               ## The process of the statistics does not need to be
               ## monotone in sigma: check this and interchange quantiles.
               if (uci > lci) {
                   l <- absigma - qansari(alpha/2,  m, n)
                   u <- absigma - qansari(1 - alpha/2, m, n)
                   u[u < 0] <- NA
                   uci <- min(sigma[which(u == min(u, na.rm = TRUE))])
                   l[l <= 0] <- NA
                   lci <- max(sigma[which(l == min(l, na.rm = TRUE))])
                }
                c(uci, lci)
             }

             cint <- if(length(sigma) < 1L) {
                 warning("cannot compute confidence set, returning NA")
                 c(NA, NA)
             }
             else {
                 ## Compute statistics directly: computing the steps is
                 ## not faster.
                 absigma <-
                     sapply(sigma + c(diff(sigma)/2,
                                      sigma[length(sigma)]*1.01), ab)
                 switch(alternative, two.sided = cci(alpha),
                        greater = c(cci(alpha*2)[1L], Inf),
                        less = c(0, cci(alpha*2)[2L]))
             }
             attr(cint, "conf.level") <- conf.level
             u <- absigma - qansari(0.5, m, n)
             sgr <- sigma[u <= 0]
             if (length(sgr) == 0L) sgr <- NA
             else sgr <- max(sgr)
             sle <- sigma[u > 0]
             if (length(sle) == 0L) sle <- NA
             else sle <- min(sle)
             ESTIMATE <- mean(c(sle, sgr))
         }
     }
     else {
         EVEN <- ((N %% 2L) == 0L)
         normalize <- function(s, r, TIES, m=length(x), n=length(y)) {
             z <- if(EVEN)
                 s - m * (N + 2)/4
             else
                 s - m * (N + 1)^2/(4 * N)
             if (!TIES) {
                 SIGMA <- if(EVEN)
                     sqrt((m * n * (N + 2) * (N - 2))/(48 * (N - 1)))
                 else
                     sqrt((m * n * (N + 1) * (3 + N^2))/(48 * N^2))
             }
             else {
                 r <- rle(sort(pmin(r, N - r + 1)))
                 SIGMA <- if(EVEN)
                     sqrt(m * n
                          * (16 * sum(r$lengths * r$values^2) - N * (N + 2)^2)
                          / (16 * N * (N - 1)))
                 else
                     sqrt(m * n
                          * (16 * N * sum(r$lengths * r$values^2) - (N + 1)^4)
                          / (16 * N^2 * (N - 1)))
             }
             z / SIGMA
         }
         p <- pnorm(normalize(STATISTIC, r, TIES))
         PVAL <- switch(alternative,
                        two.sided = 2 * min(p, 1 - p),
                        less = 1 - p,
                        greater = p)

         if(conf.int && !exact) {
             alpha <- 1 - conf.level
             ab2 <- function(sig, zq) {
                 r <- rank(c(x / sig, y))
                 s <- sum(pmin(r, N - r + 1)[seq_along(x)])
                 TIES <- (length(r) != length(unique(r)))
                 normalize(s, r, TIES, length(x), length(y)) - zq
             }
             ## Use uniroot here.
             ## Compute the range of sigma first.
             srangepos <- NULL
             srangeneg <- NULL
             if (length(x[x > 0]) && length(y[y > 0]))
                 srangepos <-
                     c(min(x[x>0], na.rm=TRUE)/max(y[y>0], na.rm=TRUE),
                       max(x[x>0], na.rm=TRUE)/min(y[y>0], na.rm=TRUE))
             if (length(x[x <= 0]) && length(y[y < 0]))
                 srangeneg <-
                     c(min(x[x<=0], na.rm=TRUE)/max(y[y<0], na.rm=TRUE),
                       max(x[x<=0], na.rm=TRUE)/min(y[y<0], na.rm=TRUE))
             if (any(is.infinite(c(srangepos, srangeneg)))) {
                 warning("cannot compute asymptotic confidence set or estimator")
                 conf.int <- FALSE
             } else {
                 ccia <- function(alpha) {
                     ## Check if the statistic exceeds both quantiles
                     ## first.
                     statu <- ab2(srange[1L], zq=qnorm(alpha/2))
                     statl <- ab2(srange[2L], zq=qnorm(alpha/2, lower.tail=FALSE))
                     if (statu > 0 || statl < 0) {
                         warning("samples differ in location: cannot compute confidence set, returning NA")
                         return(c(NA, NA))
                     }
                     u <- uniroot(ab2, srange, tol=1e-4,
                                  zq=qnorm(alpha/2))$root
                     l <- uniroot(ab2, srange, tol=1e-4,
                                  zq=qnorm(alpha/2, lower.tail=FALSE))$root
                     ## The process of the statistics does not need to be
                     ## monotone: sort is ok here.
                     sort(c(u, l))
                 }
                 srange <- range(c(srangepos, srangeneg), na.rm=FALSE)
                 cint <- switch(alternative,
                                two.sided = ccia(alpha),
                                greater = c(ccia(alpha*2)[1L], Inf),
                                less = c(0, ccia(alpha*2)[2L]) )
                 attr(cint, "conf.level") <- conf.level
                 ## Check if the statistic exceeds both quantiles first.
                 statu <- ab2(srange[1L], zq=0)
                 statl <- ab2(srange[2L], zq=0)
                 if (statu > 0 || statl < 0) {
                     ESTIMATE <- NA
                     warning("cannot compute estimate, returning NA")
                 } else
                     ESTIMATE <- uniroot(ab2, srange, tol=1e-4, zq=0)$root
             }
         }
         if(exact && TIES) {
             warning("cannot compute exact p-value with ties")
             if(conf.int)
                 warning("cannot compute exact confidence intervals with ties")
         }
     }

     names(STATISTIC) <- "AB"
     RVAL <- list(statistic = STATISTIC,
                  p.value = PVAL,
                  null.value = c("ratio of scales" = 1),
                  alternative = alternative,
                  method = "Ansari-Bradley test",
                  data.name = DNAME)
     if(conf.int)
         RVAL <- c(RVAL,
                   list(conf.int = cint,
                        estimate = c("ratio of scales" = ESTIMATE)))
     class(RVAL) <- "htest"
     return(RVAL)
 }

 ansari.test.formula <-
 function(formula, data, subset, na.action, ...)
 {
     if(missing(formula)
        || (length(formula) != 3L)
        || (length(attr(terms(formula[-2L]), "term.labels")) != 1L))
         stop("'formula' missing or incorrect")
     m <- match.call(expand.dots = FALSE)
     if(is.matrix(eval(m$data, parent.frame())))
         m$data <- as.data.frame(data)
     ## need stats:: for non-standard evaluation
     m[[1L]] <- quote(stats::model.frame)
     m$... <- NULL
     mf <- eval(m, parent.frame())
     DNAME <- paste(names(mf), collapse = " by ")
     names(mf) <- NULL
     response <- attr(attr(mf, "terms"), "response")
     g <- factor(mf[[-response]])
     if(nlevels(g) != 2L)
         stop("grouping factor must have exactly 2 levels")
     DATA <- setNames(split(mf[[response]], g), c("x", "y"))
     y <- do.call("ansari.test", c(DATA, list(...)))
     y$data.name <- DNAME
     y
 }
	# File src/library/stats/R/ansari.test.R
	# Part of the R package, https://www.R-project.org
	#
	# Copyright (C) 1995-2015 The R Core Team
	#
	# This program is free software; you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation; either version 2 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# A copy of the GNU General Public License is available at
	# https://www.R-project.org/Licenses/

	ansari.test <- function(x, ...) UseMethod("ansari.test")

	ansari.test.default <-
	function(x, y, alternative = c("two.sided", "less", "greater"),
	exact = NULL, conf.int = FALSE, conf.level = 0.95, ...)
	{
	alternative <- match.arg(alternative)
	if(conf.int) {
	if(!((length(conf.level) == 1L)
	&& is.finite(conf.level)
	&& (conf.level > 0)
	&& (conf.level < 1)))
	stop("'conf.level' must be a single number between 0 and 1")
	}
	DNAME <- paste(deparse(substitute(x)), "and", deparse(substitute(y)))

	x <- x[complete.cases(x)]
	y <- y[complete.cases(y)]
	m <- as.integer(length(x))
	if(is.na(m) \|\| m < 1L)
	stop("not enough 'x' observations")
	n <- as.integer(length(y))
	if(is.na(n) \|\| n < 1L)
	stop("not enough 'y' observations")
	N <- m + n

	r <- rank(c(x, y))
	STATISTIC <- sum(pmin(r, N - r + 1)[seq_along(x)])
	TIES <- (length(r) != length(unique(r)))

	if(is.null(exact))
	exact <- ((m < 50L) && (n < 50L))

	if(exact && !TIES) {
	pansari <- function(q, m, n) .Call(C_pAnsari, q, m, n)
	PVAL <-
	switch(alternative,
	two.sided = {
	if (STATISTIC > ((m + 1)^2 %/% 4
	+ ((m * n) %/% 2) / 2))
	p <- 1 - pansari(STATISTIC - 1, m, n)
	else
	p <- pansari(STATISTIC, m, n)
	min(2 * p, 1)
	},
	less = 1 - pansari(STATISTIC - 1, m, n),
	greater = pansari(STATISTIC, m, n))
	if (conf.int) {
	qansari <- function(p, m, n) .Call(C_qAnsari, p, m, n)
	alpha <- 1 - conf.level
	x <- sort(x)
	y <- sort(y)
	ab <- function(sig) {
	rab <- rank(c(x/sig, y))
	sum(pmin(rab, N - rab + 1)[seq_along(x)])
	}
	ratio <- outer(x, y, "/")
	aratio <- ratio[ratio >= 0]
	sigma <- sort(aratio)

	cci <- function(alpha) {
	u <- absigma - qansari(alpha/2, m, n)
	l <- absigma - qansari(1 - alpha/2, m, n)
	## Check if the statistic exceeds both quantiles first.
	uci <- NULL
	lci <- NULL
	if(length(u[u >= 0]) == 0L \|\| length(l[l > 0]) == 0L) {
	warning("samples differ in location: cannot compute confidence set, returning NA")
	return(c(NA, NA))
	}
	if (is.null(uci)) {
	u[u < 0] <- NA
	uci <- min(sigma[which(u == min(u, na.rm = TRUE))])
	}
	if (is.null(lci)) {
	l[l <= 0] <- NA
	lci <- max(sigma[which(l == min(l, na.rm = TRUE))])
	}
	## The process of the statistics does not need to be
	## monotone in sigma: check this and interchange quantiles.
	if (uci > lci) {
	l <- absigma - qansari(alpha/2, m, n)
	u <- absigma - qansari(1 - alpha/2, m, n)
	u[u < 0] <- NA
	uci <- min(sigma[which(u == min(u, na.rm = TRUE))])
	l[l <= 0] <- NA
	lci <- max(sigma[which(l == min(l, na.rm = TRUE))])
	}
	c(uci, lci)
	}

	cint <- if(length(sigma) < 1L) {
	warning("cannot compute confidence set, returning NA")
	c(NA, NA)
	}
	else {
	## Compute statistics directly: computing the steps is
	## not faster.
	absigma <-
	sapply(sigma + c(diff(sigma)/2,
	sigma[length(sigma)]*1.01), ab)
	switch(alternative, two.sided = cci(alpha),
	greater = c(cci(alpha*2)[1L], Inf),
	less = c(0, cci(alpha*2)[2L]))
	}
	attr(cint, "conf.level") <- conf.level
	u <- absigma - qansari(0.5, m, n)
	sgr <- sigma[u <= 0]
	if (length(sgr) == 0L) sgr <- NA
	else sgr <- max(sgr)
	sle <- sigma[u > 0]
	if (length(sle) == 0L) sle <- NA
	else sle <- min(sle)
	ESTIMATE <- mean(c(sle, sgr))
	}
	}
	else {
	EVEN <- ((N %% 2L) == 0L)
	normalize <- function(s, r, TIES, m=length(x), n=length(y)) {
	z <- if(EVEN)
	s - m * (N + 2)/4
	else
	s - m * (N + 1)^2/(4 * N)
	if (!TIES) {
	SIGMA <- if(EVEN)
	sqrt((m * n * (N + 2) * (N - 2))/(48 * (N - 1)))
	else
	sqrt((m * n * (N + 1) * (3 + N^2))/(48 * N^2))
	}
	else {
	r <- rle(sort(pmin(r, N - r + 1)))
	SIGMA <- if(EVEN)
	sqrt(m * n
	* (16 * sum(r$lengths * r$values^2) - N * (N + 2)^2)
	/ (16 * N * (N - 1)))
	else
	sqrt(m * n
	* (16 * N * sum(r$lengths * r$values^2) - (N + 1)^4)
	/ (16 * N^2 * (N - 1)))
	}
	z / SIGMA
	}
	p <- pnorm(normalize(STATISTIC, r, TIES))
	PVAL <- switch(alternative,
	two.sided = 2 * min(p, 1 - p),
	less = 1 - p,
	greater = p)

	if(conf.int && !exact) {
	alpha <- 1 - conf.level
	ab2 <- function(sig, zq) {
	r <- rank(c(x / sig, y))
	s <- sum(pmin(r, N - r + 1)[seq_along(x)])
	TIES <- (length(r) != length(unique(r)))
	normalize(s, r, TIES, length(x), length(y)) - zq
	}
	## Use uniroot here.
	## Compute the range of sigma first.
	srangepos <- NULL
	srangeneg <- NULL
	if (length(x[x > 0]) && length(y[y > 0]))
	srangepos <-
	c(min(x[x>0], na.rm=TRUE)/max(y[y>0], na.rm=TRUE),
	max(x[x>0], na.rm=TRUE)/min(y[y>0], na.rm=TRUE))
	if (length(x[x <= 0]) && length(y[y < 0]))
	srangeneg <-
	c(min(x[x<=0], na.rm=TRUE)/max(y[y<0], na.rm=TRUE),
	max(x[x<=0], na.rm=TRUE)/min(y[y<0], na.rm=TRUE))
	if (any(is.infinite(c(srangepos, srangeneg)))) {
	warning("cannot compute asymptotic confidence set or estimator")
	conf.int <- FALSE
	} else {
	ccia <- function(alpha) {
	## Check if the statistic exceeds both quantiles
	## first.
	statu <- ab2(srange[1L], zq=qnorm(alpha/2))
	statl <- ab2(srange[2L], zq=qnorm(alpha/2, lower.tail=FALSE))
	if (statu > 0 \|\| statl < 0) {
	warning("samples differ in location: cannot compute confidence set, returning NA")
	return(c(NA, NA))
	}
	u <- uniroot(ab2, srange, tol=1e-4,
	zq=qnorm(alpha/2))$root
	l <- uniroot(ab2, srange, tol=1e-4,
	zq=qnorm(alpha/2, lower.tail=FALSE))$root
	## The process of the statistics does not need to be
	## monotone: sort is ok here.
	sort(c(u, l))
	}
	srange <- range(c(srangepos, srangeneg), na.rm=FALSE)
	cint <- switch(alternative,
	two.sided = ccia(alpha),
	greater = c(ccia(alpha*2)[1L], Inf),
	less = c(0, ccia(alpha*2)[2L]) )
	attr(cint, "conf.level") <- conf.level
	## Check if the statistic exceeds both quantiles first.
	statu <- ab2(srange[1L], zq=0)
	statl <- ab2(srange[2L], zq=0)
	if (statu > 0 \|\| statl < 0) {
	ESTIMATE <- NA
	warning("cannot compute estimate, returning NA")
	} else
	ESTIMATE <- uniroot(ab2, srange, tol=1e-4, zq=0)$root
	}
	}
	if(exact && TIES) {
	warning("cannot compute exact p-value with ties")
	if(conf.int)
	warning("cannot compute exact confidence intervals with ties")
	}
	}

	names(STATISTIC) <- "AB"
	RVAL <- list(statistic = STATISTIC,
	p.value = PVAL,
	null.value = c("ratio of scales" = 1),
	alternative = alternative,
	method = "Ansari-Bradley test",
	data.name = DNAME)
	if(conf.int)
	RVAL <- c(RVAL,
	list(conf.int = cint,
	estimate = c("ratio of scales" = ESTIMATE)))
	class(RVAL) <- "htest"
	return(RVAL)
	}

	ansari.test.formula <-
	function(formula, data, subset, na.action, ...)
	{
	if(missing(formula)
	\|\| (length(formula) != 3L)
	\|\| (length(attr(terms(formula[-2L]), "term.labels")) != 1L))
	stop("'formula' missing or incorrect")
	m <- match.call(expand.dots = FALSE)
	if(is.matrix(eval(m$data, parent.frame())))
	m$data <- as.data.frame(data)
	## need stats:: for non-standard evaluation
	m[[1L]] <- quote(stats::model.frame)
	m$... <- NULL
	mf <- eval(m, parent.frame())
	DNAME <- paste(names(mf), collapse = " by ")
	names(mf) <- NULL
	response <- attr(attr(mf, "terms"), "response")
	g <- factor(mf[[-response]])
	if(nlevels(g) != 2L)
	stop("grouping factor must have exactly 2 levels")
	DATA <- setNames(split(mf[[response]], g), c("x", "y"))
	y <- do.call("ansari.test", c(DATA, list(...)))
	y$data.name <- DNAME
	y
	}