blob: 22bceeeb60e14e37e723ba9231a89d3f0124eb6d [file] [log] [blame]
/*
* R : A Computer Language for Statistical Data Analysis
* Copyright (C) 1997--2018 The R Core Team
* Copyright (C) 1995, 1996 Robert Gentleman and Ross Ihaka
* Copyright (C) 2003--2018 The R Foundation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, a copy is available at
* https://www.R-project.org/Licenses/
*/
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include <Defn.h>
#include <R_ext/Itermacros.h>
#include <R_ext/Random.h>
#include <R_ext/RS.h> /* for Calloc() */
#include <Rmath.h> /* for rxxx functions */
#include <errno.h>
/* Code down to do_random3 (inclusive) can be removed once the byte
compiler knows how to optimize to .External rather than .Internal */
#include <Internal.h>
static void NORET invalid(SEXP call)
{
error(_("invalid arguments"));
}
static Rboolean
random1(double (*f) (double), double *a, R_xlen_t na, double *x, R_xlen_t n)
{
Rboolean naflag = FALSE;
double ai;
R_xlen_t i, ia;
errno = 0;
MOD_ITERATE1(n, na, i, ia, {
ai = a[ia];
x[i] = f(ai);
if (ISNAN(x[i])) naflag = TRUE;
});
return(naflag);
}
#define RAND1(num,name) \
case num: \
naflag = random1(name, REAL(a), na, REAL(x), n); \
break
/* "do_random1" - random sampling from 1 parameter families. */
/* See switch below for distributions. */
SEXP attribute_hidden do_random1(SEXP call, SEXP op, SEXP args, SEXP rho)
{
SEXP x, a;
R_xlen_t i, n, na;
checkArity(op, args);
if (!isVector(CAR(args)) || !isNumeric(CADR(args)))
invalid(call);
if (XLENGTH(CAR(args)) == 1) {
#ifdef LONG_VECTOR_SUPPORT
double dn = asReal(CAR(args));
if (ISNAN(dn) || dn < 0 || dn > R_XLEN_T_MAX)
invalid(call);
n = (R_xlen_t) dn;
#else
n = asInteger(CAR(args));
if (n == NA_INTEGER || n < 0)
invalid(call);
#endif
}
else n = XLENGTH(CAR(args));
PROTECT(x = allocVector(REALSXP, n));
if (n == 0) {
UNPROTECT(1);
return(x);
}
na = XLENGTH(CADR(args));
if (na < 1) {
for (i = 0; i < n; i++)
REAL(x)[i] = NA_REAL;
warning(_("NAs produced"));
}
else {
Rboolean naflag = FALSE;
PROTECT(a = coerceVector(CADR(args), REALSXP));
GetRNGstate();
switch (PRIMVAL(op)) {
RAND1(0, rchisq);
RAND1(1, rexp);
RAND1(2, rgeom);
RAND1(3, rpois);
RAND1(4, rt);
RAND1(5, rsignrank);
default:
error("internal error in do_random1");
}
if (naflag)
warning(_("NAs produced"));
PutRNGstate();
UNPROTECT(1);
}
UNPROTECT(1);
return x;
}
static Rboolean random2(double (*f) (double, double),
double *a, R_xlen_t na, double *b, R_xlen_t nb,
double *x, R_xlen_t n)
{
double ai, bi;
R_xlen_t i, ia, ib;
Rboolean naflag = FALSE;
errno = 0;
MOD_ITERATE2(n, na, nb, i, ia, ib, {
ai = a[ia];
bi = b[ib];
x[i] = f(ai, bi);
if (ISNAN(x[i])) naflag = TRUE;
});
return(naflag);
}
#define RAND2(num,name) \
case num: \
naflag = random2(name, REAL(a), na, REAL(b), nb, REAL(x), n); \
break
/* "do_random2" - random sampling from 2 parameter families. */
/* See switch below for distributions. */
SEXP attribute_hidden do_random2(SEXP call, SEXP op, SEXP args, SEXP rho)
{
SEXP x, a, b;
R_xlen_t i, n, na, nb;
checkArity(op, args);
if (!isVector(CAR(args)) ||
!isNumeric(CADR(args)) ||
!isNumeric(CADDR(args)))
invalid(call);
if (XLENGTH(CAR(args)) == 1) {
#ifdef LONG_VECTOR_SUPPORT
double dn = asReal(CAR(args));
if (ISNAN(dn) || dn < 0 || dn > R_XLEN_T_MAX)
invalid(call);
n = (R_xlen_t) dn;
#else
n = asInteger(CAR(args));
if (n == NA_INTEGER || n < 0)
invalid(call);
#endif
}
else n = XLENGTH(CAR(args));
PROTECT(x = allocVector(REALSXP, n));
if (n == 0) {
UNPROTECT(1);
return(x);
}
na = XLENGTH(CADR(args));
nb = XLENGTH(CADDR(args));
if (na < 1 || nb < 1) {
for (i = 0; i < n; i++)
REAL(x)[i] = NA_REAL;
warning(_("NAs produced"));
}
else {
Rboolean naflag = FALSE;
PROTECT(a = coerceVector(CADR(args), REALSXP));
PROTECT(b = coerceVector(CADDR(args), REALSXP));
GetRNGstate();
switch (PRIMVAL(op)) {
RAND2(0, rbeta);
RAND2(1, rbinom);
RAND2(2, rcauchy);
RAND2(3, rf);
RAND2(4, rgamma);
RAND2(5, rlnorm);
RAND2(6, rlogis);
RAND2(7, rnbinom);
RAND2(8, rnorm);
RAND2(9, runif);
RAND2(10, rweibull);
RAND2(11, rwilcox);
RAND2(12, rnchisq);
RAND2(13, rnbinom_mu);
default:
error("internal error in do_random2");
}
if (naflag)
warning(_("NAs produced"));
PutRNGstate();
UNPROTECT(2);
}
UNPROTECT(1);
return x;
}
static Rboolean
random3(double (*f) (double, double, double), double *a,
R_xlen_t na, double *b, R_xlen_t nb, double *c, R_xlen_t nc,
double *x, R_xlen_t n)
{
double ai, bi, ci;
R_xlen_t i, ia, ib, ic;
Rboolean naflag = FALSE;
errno = 0;
MOD_ITERATE3(n, na, nb, nc, i, ia, ib, ic, {
ai = a[ia];
bi = b[ib];
ci = c[ic];
x[i] = f(ai, bi, ci);
if (ISNAN(x[i])) naflag = TRUE;
});
return(naflag);
}
#define RAND3(num,name) \
case num: \
naflag = random3(name, REAL(a), na, REAL(b), nb, REAL(c), nc, REAL(x), n); \
break
/* "do_random3" - random sampling from 3 parameter families. */
/* See switch below for distributions. */
SEXP attribute_hidden do_random3(SEXP call, SEXP op, SEXP args, SEXP rho)
{
SEXP x, a, b, c;
R_xlen_t i, n, na, nb, nc;
checkArity(op, args);
if (!isVector(CAR(args))) invalid(call);
if (LENGTH(CAR(args)) == 1) {
#ifdef LONG_VECTOR_SUPPORT
double dn = asReal(CAR(args));
if (ISNAN(dn) || dn < 0 || dn > R_XLEN_T_MAX)
invalid(call);
n = (R_xlen_t) dn;
#else
n = asInteger(CAR(args));
if (n == NA_INTEGER || n < 0)
invalid(call);
#endif
}
else n = XLENGTH(CAR(args));
PROTECT(x = allocVector(REALSXP, n));
if (n == 0) {
UNPROTECT(1);
return(x);
}
args = CDR(args); a = CAR(args);
args = CDR(args); b = CAR(args);
args = CDR(args); c = CAR(args);
if (!isNumeric(a) || !isNumeric(b) || !isNumeric(c))
invalid(call);
na = XLENGTH(a);
nb = XLENGTH(b);
nc = XLENGTH(c);
if (na < 1 || nb < 1 || nc < 1) {
for (i = 0; i < n; i++)
REAL(x)[i] = NA_REAL;
warning(_("NAs produced"));
}
else {
Rboolean naflag = FALSE;
PROTECT(a = coerceVector(a, REALSXP));
PROTECT(b = coerceVector(b, REALSXP));
PROTECT(c = coerceVector(c, REALSXP));
GetRNGstate();
switch (PRIMVAL(op)) {
RAND3(0, rhyper);
default:
error("internal error in do_random3");
}
if (naflag)
warning(_("NAs produced"));
PutRNGstate();
UNPROTECT(3);
}
UNPROTECT(1);
return x;
}
/*
* Unequal Probability Sampling.
*
* Modelled after Fortran code provided by:
* E. S. Venkatraman <venkat@biosta.mskcc.org>
* but with significant modifications in the
* "with replacement" case.
*/
/* Unequal probability sampling; with-replacement case */
static void ProbSampleReplace(int n, double *p, int *perm, int nans, int *ans)
{
double rU;
int i, j;
int nm1 = n - 1;
/* record element identities */
for (i = 0; i < n; i++)
perm[i] = i + 1;
/* sort the probabilities into descending order */
revsort(p, perm, n);
/* compute cumulative probabilities */
for (i = 1 ; i < n; i++)
p[i] += p[i - 1];
/* compute the sample */
for (i = 0; i < nans; i++) {
rU = unif_rand();
for (j = 0; j < nm1; j++) {
if (rU <= p[j])
break;
}
ans[i] = perm[j];
}
}
/* A version using Walker's alias method, based on Alg 3.13B in
Ripley (1987).
*/
#define SMALL 10000
static void
walker_ProbSampleReplace(int n, double *p, int *a, int nans, int *ans)
{
double *q, rU;
int i, j, k;
int *HL, *H, *L;
/* Create the alias tables.
The idea is that for HL[0] ... L-1 label the entries with q < 1
and L ... H[n-1] label those >= 1.
By rounding error we could have q[i] < 1. or > 1. for all entries.
*/
if(n <= SMALL) {
R_CheckStack2(n *(sizeof(int) + sizeof(double)));
/* might do this repeatedly, so speed matters */
HL = (int *) alloca(n * sizeof(int));
q = (double *) alloca(n * sizeof(double));
} else {
/* Slow enough anyway not to risk overflow */
HL = Calloc(n, int);
q = Calloc(n, double);
}
H = HL - 1; L = HL + n;
for (i = 0; i < n; i++) {
q[i] = p[i] * n;
if (q[i] < 1.) *++H = i; else *--L = i;
}
if (H >= HL && L < HL + n) { /* So some q[i] are >= 1 and some < 1 */
for (k = 0; k < n - 1; k++) {
i = HL[k];
j = *L;
a[i] = j;
q[j] += q[i] - 1;
if (q[j] < 1.) L++;
if(L >= HL + n) break; /* now all are >= 1 */
}
}
for (i = 0; i < n; i++) q[i] += i;
/* generate sample */
Sampletype Sample_kind = R_sample_kind();
for (i = 0; i < nans; i++) {
if (Sample_kind == ROUNDING) {
rU = unif_rand() * n;
k = (int) rU;
}
else {
k = (int) R_unif_index(n);
rU = k + unif_rand();
}
ans[i] = (rU < q[k]) ? k+1 : a[k]+1;
}
if(n > SMALL) {
Free(HL);
Free(q);
}
}
/* Unequal probability sampling; without-replacement case */
static void ProbSampleNoReplace(int n, double *p, int *perm,
int nans, int *ans)
{
double rT, mass, totalmass;
int i, j, k, n1;
/* Record element identities */
for (i = 0; i < n; i++)
perm[i] = i + 1;
/* Sort probabilities into descending order */
/* Order element identities in parallel */
revsort(p, perm, n);
/* Compute the sample */
totalmass = 1;
for (i = 0, n1 = n-1; i < nans; i++, n1--) {
rT = totalmass * unif_rand();
mass = 0;
for (j = 0; j < n1; j++) {
mass += p[j];
if (rT <= mass)
break;
}
ans[i] = perm[j];
totalmass -= p[j];
for(k = j; k < n1; k++) {
p[k] = p[k + 1];
perm[k] = perm[k + 1];
}
}
}
static void FixupProb(double *p, int n, int require_k, Rboolean replace)
{
double sum = 0.0;
int npos = 0;
for (int i = 0; i < n; i++) {
if (!R_FINITE(p[i]))
error(_("NA in probability vector"));
if (p[i] < 0.0)
error(_("negative probability"));
if (p[i] > 0.0) {
npos++;
sum += p[i];
}
}
if (npos == 0 || (!replace && require_k > npos))
error(_("too few positive probabilities"));
for (int i = 0; i < n; i++) p[i] /= sum;
}
/* do_sample - probability sampling with/without replacement.
.Internal(sample(n, size, replace, prob))
*/
SEXP attribute_hidden do_sample(SEXP call, SEXP op, SEXP args, SEXP rho)
{
SEXP x, y, sn, sk, prob, sreplace;
checkArity(op, args);
sn = CAR(args); args = CDR(args);
sk = CAR(args); args = CDR(args); /* size */
sreplace = CAR(args); args = CDR(args);
if(length(sreplace) != 1)
error(_("invalid '%s' argument"), "replace");
int replace = asLogical(sreplace);
prob = CAR(args);
if (replace == NA_LOGICAL)
error(_("invalid '%s' argument"), "replace");
GetRNGstate();
if (!isNull(prob)) {
int n = asInteger(sn), k = asInteger(sk);
if (n == NA_INTEGER || n < 0 || (k > 0 && n == 0))
error(_("invalid first argument"));
if (k == NA_INTEGER || k < 0)
error(_("invalid '%s' argument"), "size");
if (!replace && k > n)
error(_("cannot take a sample larger than the population when 'replace = FALSE'"));
PROTECT(y = allocVector(INTSXP, k));
prob = coerceVector(prob, REALSXP);
if (MAYBE_REFERENCED(prob)) prob = duplicate(prob);
PROTECT(prob);
double *p = REAL(prob);
if (length(prob) != n)
error(_("incorrect number of probabilities"));
FixupProb(p, n, k, (Rboolean) replace);
PROTECT(x = allocVector(INTSXP, n));
if (replace) {
int i, nc = 0;
for (i = 0; i < n; i++) if(n * p[i] > 0.1) nc++;
if (nc > 200)
walker_ProbSampleReplace(n, p, INTEGER(x), k, INTEGER(y));
else
ProbSampleReplace(n, p, INTEGER(x), k, INTEGER(y));
} else
ProbSampleNoReplace(n, p, INTEGER(x), k, INTEGER(y));
UNPROTECT(2);
}
else { // uniform sampling
double dn = asReal(sn);
R_xlen_t k = asVecSize(sk);
if (!R_FINITE(dn) || dn < 0 || dn > 4.5e15 || (k > 0 && dn == 0))
error(_("invalid first argument"));
if (k < 0) error(_("invalid '%s' argument"), "size");
if (!replace && k > dn)
error(_("cannot take a sample larger than the population when 'replace = FALSE'"));
if (dn > INT_MAX || k > INT_MAX) {
PROTECT(y = allocVector(REALSXP, k));
if (replace) {
double *ry = REAL(y);
for (R_xlen_t i = 0; i < k; i++) ry[i] = R_unif_index(dn) + 1;
} else {
#ifdef LONG_VECTOR_SUPPORT
R_xlen_t n = (R_xlen_t) dn;
double *x = (double *)R_alloc(n, sizeof(double));
double *ry = REAL(y);
for (R_xlen_t i = 0; i < n; i++) x[i] = (double) i;
for (R_xlen_t i = 0; i < k; i++) {
R_xlen_t j = (R_xlen_t) R_unif_index(n);
ry[i] = x[j] + 1;
x[j] = x[--n];
}
#else
error(_("n >= 2^31, replace = FALSE is only supported on 64-bit platforms"));
#endif
}
} else {
int n = (int) dn;
PROTECT(y = allocVector(INTSXP, k));
int *iy = INTEGER(y);
/* avoid allocation for a single sample */
if (replace || k < 2) {
for (int i = 0; i < k; i++) iy[i] = (int)(R_unif_index(n) + 1);
} else {
int *x = (int *)R_alloc(n, sizeof(int));
for (int i = 0; i < n; i++) x[i] = i;
for (int i = 0; i < k; i++) {
int j = (int)(R_unif_index(n));
iy[i] = x[j] + 1;
x[j] = x[--n];
}
}
}
}
PutRNGstate();
UNPROTECT(1);
return y;
}