blob: 24c5b8f8588507843bda5bd530876cbe3b6135b2 [file] [log] [blame]
# File src/library/stats/R/bandwidths.R
# Part of the R package, https://www.R-project.org
#
# Copyright (C) 1994-2001 W. N. Venables and B. D. Ripley
# Copyright (C) 2001-2017 The R Core Team
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# https://www.R-project.org/Licenses/
#==== bandwidth selection rules ====
bw.nrd0 <- function (x)
{
if(length(x) < 2L) stop("need at least 2 data points")
hi <- sd(x)
if(!(lo <- min(hi, IQR(x)/1.34)))# qnorm(.75) - qnorm(.25) = 1.34898
(lo <- hi) || (lo <- abs(x[1L])) || (lo <- 1.)
0.9 * lo * length(x)^(-0.2)
}
bw.nrd <- function (x)
{
if(length(x) < 2L) stop("need at least 2 data points")
r <- quantile(x, c(0.25, 0.75))
h <- (r[2L] - r[1L])/1.34
1.06 * min(sqrt(var(x)), h) * length(x)^(-1/5)
}
## switch-over at n > nb/2 found by empirical timing.
bw_pair_cnts <- function(x, nb, binned)
{
if(binned) {
r <- range(x)
d <- diff(r) * 1.01/nb
## Emulate exactly how the C code does its binning.
xx <- trunc(abs(x)/d) *sign(x)
xx <- xx - min(xx) + 1
xxx <- tabulate(xx, nb)
list(d, .Call(C_bw_den_binned, xxx))
} else .Call(C_bw_den, nb, x)
}
bw.SJ <- function(x, nb = 1000L, lower = 0.1*hmax, upper = hmax,
method = c("ste", "dpi"), tol = 0.1*lower)
{
if((n <- length(x)) < 2L) stop("need at least 2 data points")
n <- as.integer(n)
if (is.na(n)) stop("invalid length(x)")
if(!is.numeric(x)) stop("invalid 'x'")
nb <- as.integer(nb)
if (is.na(nb) || nb <= 0L) stop("invalid 'nb'")
storage.mode(x) <- "double"
method <- match.arg(method)
SDh <- function(h) .Call(C_bw_phi4, n, d, cnt, h)
TDh <- function(h) .Call(C_bw_phi6, n, d, cnt, h)
Z <- bw_pair_cnts(x, nb, n > nb/2)
d <- Z[[1L]]; cnt <- Z[[2L]]
scale <- min(sd(x), IQR(x)/1.349)
a <- 1.24 * scale * n^(-1/7)
b <- 1.23 * scale * n^(-1/9)
c1 <- 1/(2*sqrt(pi)*n)
TD <- -TDh(b)
if(!is.finite(TD) || TD <= 0)
stop("sample is too sparse to find TD", domain = NA)
if(method == "dpi")
res <- (c1/SDh((2.394/(n * TD))^(1/7)))^(1/5)
else {
if(bnd.Miss <- missing(lower) || missing(upper)) {
## only used for lower & upper defaults :
hmax <- 1.144 * scale * n^(-1/5)
}
alph2 <- 1.357*(SDh(a)/TD)^(1/7)
if(!is.finite(alph2))
stop("sample is too sparse to find alph2", domain = NA)
itry <- 1L
fSD <- function(h) ( c1 / SDh(alph2 * h^(5/7)) )^(1/5) - h
while (fSD(lower) * fSD(upper) > 0) {
if(itry > 99L || !bnd.Miss) # 1.2 ^ 99 = 69'014'979 .. enough
stop("no solution in the specified range of bandwidths")
if(itry %% 2) upper <- upper * 1.2 else lower <- lower / 1.2
if(getOption("verbose"))
message(gettextf("increasing bw.SJ() search interval (%d) to [%.4g,%.4g]",
itry, lower, upper), domain = NA)
itry <- itry + 1L
}
res <- uniroot(fSD, c(lower, upper), tol = tol)$root
}
res
}
bw.ucv <- function(x, nb = 1000L, lower = 0.1*hmax, upper = hmax,
tol = 0.1*lower)
{
if((n <- length(x)) < 2L) stop("need at least 2 data points")
n <- as.integer(n)
if (is.na(n)) stop("invalid length(x)")
if(!is.numeric(x)) stop("invalid 'x'")
nb <- as.integer(nb)
if (is.na(nb) || nb <= 0L) stop("invalid 'nb'")
storage.mode(x) <- "double"
hmax <- 1.144 * sqrt(var(x)) * n^(-1/5)
Z <- bw_pair_cnts(x, nb, n > nb/2)
d <- Z[[1L]]; cnt <- Z[[2L]]
fucv <- function(h) .Call(C_bw_ucv, n, d, cnt, h)
h <- optimize(fucv, c(lower, upper), tol = tol)$minimum
if(h < lower+tol | h > upper-tol)
warning("minimum occurred at one end of the range")
h
}
bw.bcv <- function(x, nb = 1000L, lower = 0.1*hmax, upper = hmax,
tol = 0.1*lower)
{
if((n <- length(x)) < 2L) stop("need at least 2 data points")
n <- as.integer(n)
if (is.na(n)) stop("invalid length(x)")
if(!is.numeric(x)) stop("invalid 'x'")
nb <- as.integer(nb)
if (is.na(nb) || nb <= 0L) stop("invalid 'nb'")
storage.mode(x) <- "double"
hmax <- 1.144 * sqrt(var(x)) * n^(-1/5)
Z <- bw_pair_cnts(x, nb, n > nb/2)
d <- Z[[1L]]; cnt <- Z[[2L]]
fbcv <- function(h) .Call(C_bw_bcv, n, d, cnt, h)
h <- optimize(fbcv, c(lower, upper), tol = tol)$minimum
if(h < lower+tol | h > upper-tol)
warning("minimum occurred at one end of the range")
h
}