blob: 1d784dbc67150acad97df49d4e40cdec88d51706 [file] [log] [blame]
/*
* R : A Computer Language for Statistical Data Analysis
* Copyright (C) 1997--2018 The R Core Team
* Copyright (C) 1995, 1996 Robert Gentleman and Ross Ihaka
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, a copy is available at
* https://www.R-project.org/Licenses/
*
* EXPORTS:
*
* OneIndex() -- used for "[[<-" in ./subassign.c
* get1index() -- used for "[[" in ./subassign.c & subset.c
* vectorIndex() -- used for "[[" and "[[<-" with a vector arg
* mat2indsub() -- for "mat[i]" " " "
* makeSubscript() -- for "[" and "[<-" in ./subset.c and ./subassign.c,
* and "[[<-" with a scalar in ./subassign.c
* arraySubscript() -- for "[i,j,..." and "[<-..." in ./subset.c, ./subassign.c
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <Defn.h>
#include <R_ext/Itermacros.h>
/* interval at which to check interrupts, a guess (~subsecond on current hw) */
#define NINTERRUPT 10000000
/* We might get a call with R_NilValue from subassignment code */
#define ECALL(call, yy) if(call == R_NilValue) error(yy); else errorcall(call, yy);
#define ECALL3(call, yy, A) if(call == R_NilValue) error(yy, A); else errorcall(call, yy, A);
/* This allows for the unusual case where x is of length 2,
and x[[-m]] selects one element for m = 1, 2.
So 'len' is only used if it is 2 and i is negative.
*/
static R_INLINE int integerOneIndex(int i, R_xlen_t len, SEXP call)
{
int indx = -1;
if (i > 0) /* a regular 1-based index from R */
indx = i - 1;
else if (i == 0 || len < 2) {
ECALL3(call, _("attempt to select less than one element in %s"), "integerOneIndex");
} else if (len == 2 && i > -3)
indx = 2 + i;
else {
ECALL3(call, _("attempt to select more than one element in %s"), "integerOneIndex");
}
return indx;
}
/* Utility used (only in) do_subassign2_dflt(), i.e. "[[<-" in ./subassign.c : */
R_xlen_t attribute_hidden
OneIndex(SEXP x, SEXP s, R_xlen_t nx, int partial, SEXP *newname,
int pos, SEXP call)
{
SEXP names;
R_xlen_t i, indx;
const void *vmax;
if (pos < 0 && length(s) > 1) {
ECALL3(call, _("attempt to select more than one element in %s"), "OneIndex");
}
if (pos < 0 && length(s) < 1) {
ECALL3(call, _("attempt to select less than one element in %s"), "OneIndex");
}
if(pos < 0) pos = 0;
indx = -1;
*newname = R_NilValue;
switch(TYPEOF(s)) {
case LGLSXP:
case INTSXP:
indx = integerOneIndex(INTEGER_ELT(s, pos), nx, call);
break;
case REALSXP:
indx = integerOneIndex((int)REAL_ELT(s, pos), nx, call);
break;
case STRSXP:
vmax = vmaxget();
names = getAttrib(x, R_NamesSymbol);
if (names != R_NilValue) {
PROTECT(names);
/* Try for exact match */
for (i = 0; i < nx; i++) {
const char *tmp = translateChar(STRING_ELT(names, i));
if (!tmp[0]) continue;
if (streql(tmp, translateChar(STRING_ELT(s, pos)))) {
indx = i;
break;
}
}
// Try for partial match -- not ever used in current R (partial is 0)
if (partial && indx < 0) {
size_t l = strlen(translateChar(STRING_ELT(s, pos)));
for(i = 0; i < nx; i++) {
const char *tmp = translateChar(STRING_ELT(names, i));
if (!tmp[0]) continue;
if(!strncmp(tmp, translateChar(STRING_ELT(s, pos)), l)) {
if(indx == -1 )
indx = i;
else
indx = -2;
}
}
}
UNPROTECT(1); /* names */
}
if (indx == -1)
indx = nx;
*newname = STRING_ELT(s, pos);
vmaxset(vmax);
break;
case SYMSXP:
vmax = vmaxget();
names = getAttrib(x, R_NamesSymbol);
if (names != R_NilValue) {
PROTECT(names);
for (i = 0; i < nx; i++)
if (streql(translateChar(STRING_ELT(names, i)),
translateChar(PRINTNAME(s)))) {
indx = i;
break;
}
UNPROTECT(1); /* names */
}
if (indx == -1)
indx = nx;
*newname = PRINTNAME(s);
vmaxset(vmax);
break;
default:
ECALL3(call, _("invalid subscript type '%s'"), type2char(TYPEOF(s)));
}
return indx;
}
/* used here and in subset.c and subassign.c */
R_xlen_t attribute_hidden
get1index(SEXP s, SEXP names, R_xlen_t len, int pok, int pos, SEXP call)
{
/* Get a single index for the [[ and [[<- operators.
Checks that only one index is being selected.
Returns -1 for no match.
s is the subscript
len is the length of the object or dimension, with names its (dim)names.
pos is len-1 or -1 for [[, -1 for [[<-
-1 means use the only element of length-1 s.
pok : is "partial ok" ?
if pok is -1, warn if partial matching occurs, but allow.
*/
int warn_pok = 0;
const char *ss, *cur_name;
R_xlen_t indx;
const void *vmax;
if (pok == -1) {
pok = 1;
warn_pok = 1;
}
if (pos < 0 && length(s) != 1) {
if (length(s) > 1) {
ECALL3(call, _("attempt to select more than one element in %s"), "get1index");
} else {
ECALL3(call, _("attempt to select less than one element in %s"), "get1index");
}
} else
if(pos >= length(s)) {
ECALL(call, _("internal error in use of recursive indexing"));
}
if(pos < 0) pos = 0;
indx = -1;
switch (TYPEOF(s)) {
case LGLSXP:
case INTSXP:
{
int i = INTEGER_ELT(s, pos);
if (i != NA_INTEGER)
indx = integerOneIndex(i, len, call);
break;
}
case REALSXP:
{
double dblind = REAL_ELT(s, pos);
if(!ISNAN(dblind)) {
/* see comment above integerOneIndex */
if (dblind > 0) indx = (R_xlen_t)(dblind - 1);
else if (dblind == 0 || len < 2) {
ECALL3(call, _("attempt to select less than one element in %s"), "get1index <real>");
} else if (len == 2 && dblind > -3)
indx = (R_xlen_t)(2 + dblind);
else {
ECALL3(call, _("attempt to select more than one element in %s"), "get1index <real>");
}
}
break;
}
case STRSXP:
/* NA matches nothing */
if(STRING_ELT(s, pos) == NA_STRING) break;
/* "" matches nothing: see names.Rd */
if(!CHAR(STRING_ELT(s, pos))[0]) break;
/* Try for exact match */
vmax = vmaxget();
ss = translateChar(STRING_ELT(s, pos));
for (R_xlen_t i = 0; i < xlength(names); i++)
if (STRING_ELT(names, i) != NA_STRING) {
if (streql(translateChar(STRING_ELT(names, i)), ss)) {
indx = i;
break;
}
}
/* Try for partial match */
if (pok && indx < 0) {
size_t len = strlen(ss);
for(R_xlen_t i = 0; i < xlength(names); i++) {
if (STRING_ELT(names, i) != NA_STRING) {
cur_name = translateChar(STRING_ELT(names, i));
if(!strncmp(cur_name, ss, len)) {
if(indx == -1) {/* first one */
indx = i;
if (warn_pok) {
if (call == R_NilValue)
warning(_("partial match of '%s' to '%s'"),
ss, cur_name);
else
warningcall(call,
_("partial match of '%s' to '%s'"),
ss, cur_name);
}
}
else {
indx = -2;/* more than one partial match */
if (warn_pok) /* already given context */
warningcall(R_NilValue,
_("further partial match of '%s' to '%s'"),
ss, cur_name);
break;
}
}
}
}
}
vmaxset(vmax);
break;
case SYMSXP:
vmax = vmaxget();
for (R_xlen_t i = 0; i < xlength(names); i++)
if (STRING_ELT(names, i) != NA_STRING &&
streql(translateChar(STRING_ELT(names, i)),
CHAR(PRINTNAME(s)))) {
indx = i;
vmaxset(vmax);
break;
}
break;
default:
ECALL3(call, _("invalid subscript type '%s'"), type2char(TYPEOF(s)));
}
return indx;
}
/* This is used for [[ and [[<- with a vector of indices of length > 1 .
x is a list or pairlist, and it is indexed recusively from
level start to level stop-1. ( 0...len-1 or 0..len-2 then len-1).
For [[<- it needs to duplicate if substructure might be shared.
*/
SEXP attribute_hidden
vectorIndex(SEXP x, SEXP thesub, int start, int stop, int pok, SEXP call,
Rboolean dup)
{
int i;
R_xlen_t offset;
SEXP cx;
/* sanity check */
if (dup && MAYBE_SHARED(x))
error("should only be called in an assignment context.");
for(i = start; i < stop; i++) {
if(!isVectorList(x) && !isPairList(x)) {
if (i)
errorcall(call, _("recursive indexing failed at level %d\n"), i+1);
else
errorcall(call, _("attempt to select more than one element in %s"), "vectorIndex");
}
PROTECT(x);
SEXP names = PROTECT(getAttrib(x, R_NamesSymbol));
offset = get1index(thesub, names,
xlength(x), pok, i, call);
UNPROTECT(2); /* x, names */
if(offset < 0 || offset >= xlength(x))
errorcall(call, _("no such index at level %d\n"), i+1);
if(isPairList(x)) {
#ifdef LONG_VECTOR_SUPPORT
if (offset > R_SHORT_LEN_MAX)
error("invalid subscript for pairlist");
#endif
cx = nthcdr(x, (int) offset);
RAISE_NAMED(CAR(x), NAMED(x));
x = CAR(cx);
if (dup && MAYBE_SHARED(x)) {
PROTECT(cx);
x = shallow_duplicate(x);
SETCAR(cx, x);
UNPROTECT(1); /* cx */
}
} else {
cx = x;
x = VECTOR_ELT(x, offset);
RAISE_NAMED(x, NAMED(cx));
if (dup && MAYBE_SHARED(x)) {
PROTECT(cx);
x = shallow_duplicate(x);
SET_VECTOR_ELT(cx, offset, x);
UNPROTECT(1); /* cx */
}
}
}
return x;
}
/* Special Matrix Subscripting: Handles the case x[i] where
x is an n-way array and i is a matrix with n columns.
This code returns a vector containing the subscripts
to be extracted when x is regarded as unravelled.
Negative indices are not allowed.
A zero/NA anywhere in a row will cause a zero/NA in the same
position in the result.
*/
SEXP attribute_hidden mat2indsub(SEXP dims, SEXP s, SEXP call)
{
int nrs = nrows(s);
R_xlen_t NR = nrs;
SEXP rvec;
int ndim = LENGTH(dims);
const int *pdims = INTEGER_RO(dims);
if (ncols(s) != ndim) {
ECALL(call, _("incorrect number of columns in matrix subscript"));
}
#ifdef LONG_VECTOR_SUPPORT
/* Check if it is a long vector we need to index */
R_xlen_t len = 1;
for (int j = 0; j < ndim; j++) len *= pdims[j];
if(len > R_SHORT_LEN_MAX) {
PROTECT(rvec = allocVector(REALSXP, nrs));
double *rv = REAL(rvec);
for (int i = 0; i < nrs; i++) rv[i] = 1.; // 1-based.
if (TYPEOF(s) == REALSXP) {
for (int i = 0; i < nrs; i++) {
R_xlen_t tdim = 1;
const double *ps = REAL_RO(s);
for (int j = 0; j < ndim; j++) {
double k = ps[i + j * NR];
if(ISNAN(k)) {rv[i] = NA_REAL; break;}
if(k < 0) {
ECALL(call, _("negative values are not allowed in a matrix subscript"));
}
if(k == 0.) {rv[i] = 0.; break;}
if (k > pdims[j]) {
ECALL(call, _("subscript out of bounds"));
}
rv[i] += (k - 1.) * tdim;
tdim *= pdims[j];
}
}
} else {
s = coerceVector(s, INTSXP);
const int *ps = INTEGER_RO(s);
for (int i = 0; i < nrs; i++) {
R_xlen_t tdim = 1;
for (int j = 0; j < ndim; j++) {
int k = ps[i + j * NR];
if(k == NA_INTEGER) {rv[i] = NA_REAL; break;}
if(k < 0) {
ECALL(call, _("negative values are not allowed in a matrix subscript"));
}
if(k == 0) {rv[i] = 0.; break;}
if (k > pdims[j]) {
ECALL(call, _("subscript out of bounds"));
}
rv[i] += (double) ((k - 1) * tdim);
tdim *= pdims[j];
}
}
}
} else
#endif
{
PROTECT(rvec = allocVector(INTSXP, nrs));
int *iv = INTEGER(rvec);
for (int i = 0; i < nrs; i++) iv[i] = 1; // 1-based.
s = coerceVector(s, INTSXP);
int *ps = INTEGER(s);
for (int i = 0; i < nrs; i++) {
int tdim = 1;
for (int j = 0; j < ndim; j++) {
int k = ps[i + j * NR];
if(k == NA_INTEGER) {iv[i] = NA_INTEGER; break;}
if(k < 0) {
ECALL(call, _("negative values are not allowed in a matrix subscript"));
}
if(k == 0) {iv[i] = 0; break;}
if (k > pdims[j]) {
ECALL(call, _("subscript out of bounds"));
}
iv[i] += (k - 1) * tdim;
tdim *= pdims[j];
}
}
}
UNPROTECT(1);
return rvec;
}
/*
Special Matrix Subscripting: For the case x[i] where x is an n-way
array and i is a character matrix with n columns, this code converts i
to an integer matrix by matching against the dimnames of x. NA values
in any row of i propagate to the result. Unmatched entries result in
a subscript out of bounds error. */
SEXP attribute_hidden strmat2intmat(SEXP s, SEXP dnamelist, SEXP call)
{
/* XXX: assumes all args are protected */
int nr = nrows(s), i, j, v;
R_xlen_t idx, NR = nr;
SEXP dnames, snames, si, sicol, s_elt;
PROTECT(snames = allocVector(STRSXP, nr));
PROTECT(si = allocVector(INTSXP, xlength(s)));
dimgets(si, getAttrib(s, R_DimSymbol));
int *psi = INTEGER(si);
for (i = 0; i < length(dnamelist); i++) {
dnames = VECTOR_ELT(dnamelist, i);
for (j = 0; j < nr; j++)
SET_STRING_ELT(snames, j, STRING_ELT(s, j + (i * NR)));
PROTECT(sicol = match(dnames, snames, 0));
for (j = 0; j < nr; j++) {
v = INTEGER_ELT(sicol, j);
idx = j + (i * NR);
s_elt = STRING_ELT(s, idx);
if (s_elt == NA_STRING) v = NA_INTEGER;
if (!CHAR(s_elt)[0]) v = 0; /* disallow "" match */
if (v == 0) errorcall(call, _("subscript out of bounds"));
psi[idx] = v;
}
UNPROTECT(1);
}
UNPROTECT(2);
return si;
}
static SEXP nullSubscript(R_xlen_t n)
{
SEXP indx;
#ifdef LONG_VECTOR_SUPPORT
if (n > R_SHORT_LEN_MAX) {
indx = allocVector(REALSXP, n);
double *pindx = REAL(indx);
for (R_xlen_t i = 0; i < n; i++)
pindx[i] = (double)(i + 1);
} else
#endif
{
indx = allocVector(INTSXP, n);
int *pindx = INTEGER(indx);
for (int i = 0; i < n; i++)
pindx[i] = i + 1;
}
return indx;
}
static SEXP
logicalSubscript(SEXP s, R_xlen_t ns, R_xlen_t nx, R_xlen_t *stretch, SEXP call)
{
R_xlen_t count, i, nmax, i1, i2;
int canstretch;
SEXP indx;
canstretch = *stretch > 0;
if (!canstretch && ns > nx) {
ECALL(call, _("(subscript) logical subscript too long"));
}
nmax = (ns > nx) ? ns : nx;
*stretch = (ns > nx) ? ns : 0;
if (ns == 0) return(allocVector(INTSXP, 0));
const int *ps = LOGICAL_RO(s); /* Calling LOCICAL_RO here may force a
large allocation, but no larger than
the one made by R_alloc below. This
could use rewriting to better handle
a sparse logical index. */
#ifdef LONG_VECTOR_SUPPORT
if (nmax > R_SHORT_LEN_MAX) {
if (ns == nmax) { /* no recycling - use fast single-index code */
const void *vmax = vmaxget();
double *buf = (double *) R_alloc(nmax, sizeof(double));
count = 0;
R_ITERATE_CHECK(NINTERRUPT, nmax, i,
if (ps[i]) {
if (ps[i] == NA_LOGICAL)
buf[count++] = NA_REAL;
else
buf[count++] = (double)(i + 1);
});
PROTECT(indx = allocVector(REALSXP, count));
memcpy(REAL(indx), buf, sizeof(double) * count);
vmaxset(vmax);
UNPROTECT(1);
return indx;
}
count = 0;
/* we only need to scan s once even if we recycle,
just remember the total count as well as
the count for the last incomplete chunk (if any) */
i1 = (ns < nmax) ? (nmax % ns) : 0;
if (i1 > 0) { /* last recycling chunk is incomplete -
we have to get the truncated count as well */
R_xlen_t rem = 0;
for (i = 0; i < ns; i++) {
if (i == i1) rem = count;
if (ps[i]) count++;
}
count = count * (nmax / ns) + rem;
} else { /* nested recycling, total is sufficient */
for (i = 0; i < ns; i++)
if (ps[i]) count++;
count *= nmax / ns;
}
PROTECT(indx = allocVector(REALSXP, count));
double *pindx = REAL(indx);
count = 0;
MOD_ITERATE_CHECK(NINTERRUPT, nmax, ns, nmax, i, i1, i2,
if (ps[i1]) {
if (ps[i1] == NA_LOGICAL)
pindx[count++] = NA_REAL;
else
pindx[count++] = (double)(i + 1);
});
UNPROTECT(1);
return indx;
}
#endif
// else --- the same code for non-long vectors --------------------------
if (ns == nmax) { /* no recycling - use fast single-index code */
const void *vmax = vmaxget();
int *buf = (int *) R_alloc(nmax, sizeof(int));
count = 0;
R_ITERATE_CHECK(NINTERRUPT, nmax, i,
if (ps[i]) {
if (ps[i] == NA_LOGICAL)
buf[count++] = NA_INTEGER;
else
buf[count++] = (int)(i + 1);
});
PROTECT(indx = allocVector(INTSXP, count));
memcpy(INTEGER(indx), buf, sizeof(int) * count);
vmaxset(vmax);
UNPROTECT(1);
return indx;
}
count = 0;
/* we only need to scan s once even if we recycle,
just remember the total count as well as
the count for the last incomplete chunk (if any) */
i1 = (ns < nmax) ? (nmax % ns) : 0;
if (i1 > 0) { /* last recycling chunk is incomplete -
we have to get the truncated count as well */
R_xlen_t rem = 0;
for (i = 0; i < ns; i++) {
if (i == i1) rem = count;
if (ps[i]) count++;
}
count = count * (nmax / ns) + rem;
} else { /* nested recycling, total is sufficient */
for (i = 0; i < ns; i++)
if (ps[i]) count++;
count *= nmax / ns;
}
PROTECT(indx = allocVector(INTSXP, count));
int *pindx = INTEGER(indx);
count = 0;
MOD_ITERATE_CHECK(NINTERRUPT, nmax, ns, nmax, i, i1, i2,
if (ps[i1]) {
if (ps[i1] == NA_LOGICAL)
pindx[count++] = NA_INTEGER;
else
pindx[count++] = (int)(i + 1);
});
UNPROTECT(1);
return indx;
}
static SEXP negativeSubscript(SEXP s, R_xlen_t ns, R_xlen_t nx, SEXP call)
{
SEXP indx;
R_xlen_t stretch = 0;
R_xlen_t i;
PROTECT(indx = allocVector(LGLSXP, nx));
int *pindx = LOGICAL(indx);
for (i = 0; i < nx; i++)
pindx[i] = 1;
const int *ps = INTEGER_RO(s);
for (i = 0; i < ns; i++) {
int ix = ps[i];
if (ix != 0 && ix != NA_INTEGER && -ix <= nx)
pindx[-ix - 1] = 0;
}
s = logicalSubscript(indx, nx, nx, &stretch, call);
UNPROTECT(1);
return s;
}
static SEXP positiveSubscript(SEXP s, R_xlen_t ns, R_xlen_t nx)
{
SEXP indx;
R_xlen_t i, zct = 0;
const int *ps = INTEGER_RO(s);
for (i = 0; i < ns; i++) if (ps[i] == 0) zct++;
if (zct) {
indx = allocVector(INTSXP, (ns - zct));
int *pindx = INTEGER(indx);
for (i = 0, zct = 0; i < ns; i++)
if (ps[i] != 0)
pindx[zct++] = ps[i];
return indx;
} else return s;
}
static SEXP
integerSubscript(SEXP s, R_xlen_t ns, R_xlen_t nx, R_xlen_t *stretch, SEXP call)
{
R_xlen_t i;
int ii, neg, max, canstretch;
Rboolean isna = FALSE;
canstretch = *stretch > 0;
*stretch = 0;
neg = FALSE;
max = 0;
const int *ps = INTEGER_RO(s);
for (i = 0; i < ns; i++) {
ii = ps[i];
if (ii < 0) {
if (ii == NA_INTEGER)
isna = TRUE;
else
neg = TRUE;
}
else if (ii > max)
max = ii;
}
if (max > nx) {
if(canstretch) *stretch = max;
else {
ECALL(call, _("subscript out of bounds"));
}
}
if (neg) {
if (max == 0 && !isna) return negativeSubscript(s, ns, nx, call);
else {
ECALL(call, _("only 0's may be mixed with negative subscripts"));
}
}
else return positiveSubscript(s, ns, nx);
return R_NilValue;
}
static SEXP
realSubscript(SEXP s, R_xlen_t ns, R_xlen_t nx, R_xlen_t *stretch, SEXP call)
{
R_xlen_t i;
int canstretch;
double ii, min, max;
Rboolean isna = FALSE;
canstretch = *stretch > 0;
*stretch = 0;
min = 0;
max = 0;
const double *ps = REAL_RO(s);
for (i = 0; i < ns; i++) {
ii = ps[i];
if (R_FINITE(ii)) {
if (ii < min) min = ii;
if (ii > max) max = ii;
} else isna = TRUE;
}
if (max > nx) {
#ifndef LONG_VECTOR_SUPPORT
if (max > INT_MAX) {
ECALL(call, _("subscript too large for 32-bit R"));
}
#endif
if(canstretch) *stretch = (R_xlen_t) max;
else {
ECALL(call, _("subscript out of bounds"));
}
}
if (min < 0) {
if (max == 0 && !isna) {
SEXP indx;
R_xlen_t stretch = 0;
double dx;
R_xlen_t i, ix;
PROTECT(indx = allocVector(LGLSXP, nx));
int *pindx = LOGICAL(indx);
for (i = 0; i < nx; i++) pindx[i] = 1;
for (i = 0; i < ns; i++) {
dx = ps[i];
if (R_FINITE(dx) && dx != 0 && -dx <= nx) {
ix = (R_xlen_t)(-dx - 1);
pindx[ix] = 0;
}
}
s = logicalSubscript(indx, nx, nx, &stretch, call);
UNPROTECT(1);
return s;
} else {
ECALL(call, _("only 0's may be mixed with negative subscripts"));
}
} else {
/* Only return a REALSXP index if we need to */
SEXP indx;
R_xlen_t i, cnt = 0;
Rboolean int_ok = TRUE;
/* NB, indices will be truncated eventually,
so need to do that to take '0' into account */
for (i = 0; i < ns; i++) {
double ds = ps[i];
#ifdef OLDCODE_LONG_VECTOR
if (!R_FINITE(ds)) {
if (ds > INT_MAX) int_ok = FALSE;
cnt++;
} else if ((R_xlen_t) ds != 0) cnt++;
#else
if (R_FINITE(ds) && ds > INT_MAX) int_ok = FALSE;
if (!R_FINITE(ds) || (R_xlen_t) ds != 0) cnt++;
#endif
}
if (int_ok) {
indx = allocVector(INTSXP, cnt);
int *pindx = INTEGER(indx);
for (i = 0, cnt = 0; i < ns; i++) {
double ds = ps[i];
int ia;
if (!R_FINITE(ds)) ia = NA_INTEGER;
else ia = (int) ds;
if (ia != 0) pindx[cnt++] = ia;
}
} else {
indx = allocVector(REALSXP, cnt);
double *pindx = REAL(indx);
for (i = 0, cnt = 0; i < ns; i++) {
double ds = ps[i];
if (!R_FINITE(ds) || (R_xlen_t) ds != 0) pindx[cnt++] = ds;
}
}
return indx;
}
return R_NilValue;
}
/* This uses a couple of horrible hacks in conjunction with
* VectorAssign (in subassign.c). If subscripting is used for
* assignment, it is possible to extend a vector by supplying new
* names, and we want to give the extended vector those names, so they
* are returned as the use.names attribute. Also, unset elements of the vector
* of new names (places where a match was found) are indicated by
* setting the element of the newnames vector to NULL.
*/
/* The original code (pre 2.0.0) used a ns x nx loop that was too
* slow. So now we hash. Hashing is expensive on memory (up to 32nx
* bytes) so it is only worth doing if ns * nx is large. If nx is
* large, then it will be too slow unless ns is very small.
*/
static SEXP
stringSubscript(SEXP s, R_xlen_t ns, R_xlen_t nx, SEXP names,
R_xlen_t *stretch, SEXP call)
{
SEXP indx, indexnames = R_NilValue;
R_xlen_t i, j, nnames, extra, sub;
int canstretch = *stretch > 0;
/* product may overflow, so check factors as well. */
Rboolean usehashing = ( ((ns > 1000 && nx) || (nx > 1000 && ns)) || (ns * nx > 15*nx + ns) );
PROTECT(s);
PROTECT(names);
nnames = nx;
extra = nnames;
/* Process each of the subscripts. First we compare with the names
* on the vector and then (if there is no match) with each of the
* previous subscripts, since (if assigning) we may have already
* added an element of that name. (If we are not assigning, any
* nonmatch will have given an error.)
*/
if(usehashing) {
/* must be internal, so names contains a character vector */
/* NB: this does not behave in the same way with respect to ""
and NA names: they will match */
PROTECT(indx = match(names, s, 0)); /**** guaranteed to be fresh???*/
/* second pass to correct this */
int *pindx = INTEGER(indx);
for (i = 0; i < ns; i++)
if(STRING_ELT(s, i) == NA_STRING || !CHAR(STRING_ELT(s, i))[0])
pindx[i] = 0;
} else {
PROTECT(indx = allocVector(INTSXP, ns));
int *pindx = INTEGER(indx);
for (i = 0; i < ns; i++) {
sub = 0;
if (names != R_NilValue) {
for (j = 0; j < nnames; j++) {
SEXP names_j = STRING_ELT(names, j);
if (NonNullStringMatch(STRING_ELT(s, i), names_j)) {
sub = j + 1;
break;
}
}
}
pindx[i] = (int) sub;
}
}
int *pindx = INTEGER(indx);
SEXP sindx = NULL;
for (i = 0; i < ns; i++) {
sub = pindx[i];
if (sub == 0) {
if (sindx == NULL) {
sindx = PROTECT(match(s, s, 0));
indexnames = PROTECT(allocVector(VECSXP, ns));
for (int z = 0; z < ns; z++)
SET_VECTOR_ELT(indexnames, z, R_NilValue);
}
int j = INTEGER(sindx)[i] - 1;
if(STRING_ELT(s, i) != NA_STRING && CHAR(STRING_ELT(s, i))[0]) {
sub = pindx[j];
SET_VECTOR_ELT(indexnames, i, STRING_ELT(s, j));
}
}
if (sub == 0) {
if (!canstretch) {
ECALL(call, _("subscript out of bounds"));
}
extra += 1;
sub = extra;
SET_VECTOR_ELT(indexnames, i, STRING_ELT(s, i));
}
pindx[i] = (int) sub;
}
/* We return the new names as the names attribute of the returned
subscript vector. */
if (extra != nnames)
setAttrib(indx, R_UseNamesSymbol, indexnames);
if (sindx != NULL) {
UNPROTECT(2);
}
if (canstretch)
*stretch = extra;
UNPROTECT(3);
return indx;
}
/* Array Subscripts.
dim is the dimension (0 to k-1)
s is the subscript list,
dims is the dimensions of x
dng is a function (usually getAttrib) that obtains the dimnames
x is the array to be subscripted.
*/
attribute_hidden SEXP
int_arraySubscript(int dim, SEXP s, SEXP dims, SEXP x, SEXP call)
{
int nd, ns;
R_xlen_t stretch = 0;
SEXP dnames, tmp;
ns = length(s);
nd = INTEGER_ELT(dims, dim);
switch (TYPEOF(s)) {
case NILSXP:
return allocVector(INTSXP, 0);
case LGLSXP:
return logicalSubscript(s, ns, nd, &stretch, call);
case INTSXP:
return integerSubscript(s, ns, nd, &stretch, call);
case REALSXP:
/* We don't yet allow subscripts > R_SHORT_LEN_MAX */
PROTECT(tmp = coerceVector(s, INTSXP));
tmp = integerSubscript(tmp, ns, nd, &stretch, call);
UNPROTECT(1);
return tmp;
case STRSXP:
dnames = getAttrib(x, R_DimNamesSymbol);
if (dnames == R_NilValue) {
ECALL(call, _("no 'dimnames' attribute for array"));
}
dnames = VECTOR_ELT(dnames, dim);
return stringSubscript(s, ns, nd, dnames, &stretch, call);
case SYMSXP:
if (s == R_MissingArg)
return nullSubscript(nd);
default:
ECALL3(call, _("invalid subscript type '%s'"), type2char(TYPEOF(s)));
}
return R_NilValue;
}
/* This is used by packages arules, cba, proxy and seriation. */
typedef SEXP AttrGetter(SEXP x, SEXP data);
typedef SEXP (*StringEltGetter)(SEXP x, int i);
SEXP
arraySubscript(int dim, SEXP s, SEXP dims, AttrGetter dng,
StringEltGetter strg, SEXP x)
{
return int_arraySubscript(dim, s, dims, x, R_NilValue);
}
/* Subscript creation. The first thing we do is check to see */
/* if there are any user supplied NULL's, these result in */
/* returning a vector of length 0. */
/* if stretch is zero on entry then the vector x cannot be
"stretched",
otherwise, stretch returns the new required length for x
*/
SEXP attribute_hidden
makeSubscript(SEXP x, SEXP s, R_xlen_t *stretch, SEXP call)
{
if (! (isVector(x) || isList(x) || isLanguage(x))) {
ECALL(call, _("subscripting on non-vector"));
}
R_xlen_t nx = xlength(x);
/* special case for simple indices -- does not duplicate */
if (IS_SCALAR(s, INTSXP)) {
int i = SCALAR_IVAL(s);
if (0 < i && i <= nx) {
*stretch = 0;
return s;
}
}
else if (IS_SCALAR(s, REALSXP)) {
double di = SCALAR_DVAL(s);
if (1 <= di && di <= nx) {
*stretch = 0;
/* We could only return a REALSXP if the value is too
large for an INTSXP, but, as the calling code can
handle REALSXP indices, returning the REALSXP
avoids an allocation. */
return s;
}
}
R_xlen_t ns = xlength(s);
SEXP ans = R_NilValue;
switch (TYPEOF(s)) {
case NILSXP:
*stretch = 0;
ans = allocVector(INTSXP, 0);
break;
case LGLSXP:
ans = logicalSubscript(s, ns, nx, stretch, call);
break;
case INTSXP:
ans = integerSubscript(s, ns, nx, stretch, call);
break;
case REALSXP:
ans = realSubscript(s, ns, nx, stretch, call);
break;
case STRSXP:
{
SEXP names = PROTECT(getAttrib(x, R_NamesSymbol));
/* *stretch = 0; */
ans = stringSubscript(s, ns, nx, names, stretch, call);
UNPROTECT(1); /* names */
break;
}
case SYMSXP:
*stretch = 0;
if (s == R_MissingArg) {
ans = nullSubscript(nx);
break;
}
default:
ECALL3(call, _("invalid subscript type '%s'"), type2char(TYPEOF(s)));
}
return ans;
}