| /* |
| * R : A Computer Language for Statistical Data Analysis |
| * Copyright (C) 1997--2021 The R Core Team |
| * Copyright (C) 1995, 1996 Robert Gentleman and Ross Ihaka |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License as published by |
| * the Free Software Foundation; either version 2 of the License, or |
| * (at your option) any later version. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, a copy is available at |
| * https://www.R-project.org/Licenses/ |
| */ |
| |
| /* This is currently restricted to vectors of length < 2^30 */ |
| |
| #ifdef HAVE_CONFIG_H |
| #include <config.h> |
| #endif |
| |
| #define R_USE_SIGNALS 1 |
| #include <Defn.h> |
| #include <Internal.h> |
| #include <R_ext/Altrep.h> |
| #include <R_ext/Itermacros.h> |
| |
| /* inline version of function R_NaN_is_R_NA defined in arithmetic.c */ |
| /* may not be needed if LTO is enabled */ |
| #define R_NaN_is_R_NA R_NaN_is_R_NA_inline |
| static R_INLINE int R_NaN_is_R_NA_inline(double x) |
| { |
| #ifdef WORDS_BIGENDIAN |
| static const int lw = 1; |
| #else /* !WORDS_BIGENDIAN */ |
| static const int lw = 0; |
| #endif /* WORDS_BIGENDIAN */ |
| union { |
| double value; |
| unsigned int word[2]; |
| } y; |
| y.value = x; |
| return y.word[lw] == 1954; |
| } |
| |
| #define NIL -1 |
| #define ARGUSED(x) LEVELS(x) |
| #define SET_ARGUSED(x,v) SETLEVELS(x,v) |
| |
| /* interval at which to check interrupts */ |
| #define NINTERRUPT 1000000 |
| |
| typedef size_t hlen; |
| |
| /* Hash function and equality test for keys */ |
| typedef struct _HashData HashData; |
| |
| struct _HashData { |
| int K; |
| hlen M; |
| R_xlen_t nmax; |
| #ifdef LONG_VECTOR_SUPPORT |
| Rboolean isLong; |
| #endif |
| hlen (*hash)(SEXP, R_xlen_t, HashData *); |
| int (*equal)(SEXP, R_xlen_t, SEXP, R_xlen_t); |
| SEXP HashTable; |
| |
| int nomatch; |
| Rboolean useUTF8; |
| Rboolean useCache; |
| Rboolean useCloEnv; |
| Rboolean extptrAsRef; |
| Rboolean inHashtab; |
| }; |
| |
| #define HTDATA_INT(d) (INTEGER0((d)->HashTable)) |
| #define HTDATA_DBL(d) (REAL0((d)->HashTable)) |
| |
| |
| /* |
| Integer keys are hashed via a random number generator |
| based on Knuth's recommendations. The high order K bits |
| are used as the hash code. |
| |
| NB: lots of this code relies on M being a power of two and |
| on silent integer overflow mod 2^32. |
| |
| <FIXME> Integer keys are wasteful for logical and raw vectors, but |
| the tables are small in that case. It would be much easier to |
| implement long vectors, though. |
| */ |
| |
| /* Currently the hash table is implemented as a (signed) integer |
| array. So there are two 31-bit restrictions, the length of the |
| array and the values. The values are initially NIL (-1). O-based |
| indices are inserted by isDuplicated, and invalidated by setting |
| to NA_INTEGER. |
| */ |
| |
| static hlen scatter(unsigned int key, HashData *d) |
| { |
| return 3141592653U * key >> (32 - d->K); |
| } |
| |
| static hlen lhash(SEXP x, R_xlen_t indx, HashData *d) |
| { |
| int xi = LOGICAL_ELT(x, indx); |
| if (xi == NA_LOGICAL) return 2U; |
| return (hlen) xi; |
| } |
| |
| static R_INLINE hlen ihash(SEXP x, R_xlen_t indx, HashData *d) |
| { |
| int xi = INTEGER_ELT(x, indx); |
| if (xi == NA_INTEGER) return 0; |
| return scatter((unsigned int) xi, d); |
| } |
| |
| /* We use unions here because Solaris gcc -O2 has trouble with |
| casting + incrementing pointers. We use tests here, but R currently |
| assumes int is 4 bytes and double is 8 bytes. |
| */ |
| union foo { |
| double d; |
| unsigned int u[2]; |
| }; |
| |
| static R_INLINE hlen rhash(SEXP x, R_xlen_t indx, HashData *d) |
| { |
| /* There is a problem with signed 0s under IEC60559 */ |
| double xi = REAL_ELT(x, indx); |
| double tmp = (xi == 0.0) ? 0.0 : xi; |
| /* need to use both 32-byte chunks or endianness is an issue */ |
| /* we want all NaNs except NA equal, and all NAs equal */ |
| if (R_IsNA(tmp)) tmp = NA_REAL; |
| else if (R_IsNaN(tmp)) tmp = R_NaN; |
| #if 2*SIZEOF_INT == SIZEOF_DOUBLE |
| { |
| union foo tmpu; |
| tmpu.d = tmp; |
| return scatter(tmpu.u[0] + tmpu.u[1], d); |
| } |
| #else |
| return scatter(*((unsigned int *) (&tmp)), d); |
| #endif |
| } |
| |
| static Rcomplex unify_complex_na(Rcomplex z) { |
| Rcomplex ans; |
| ans.r = (z.r == 0.0) ? 0.0 : z.r; |
| ans.i = (z.i == 0.0) ? 0.0 : z.i; |
| if (R_IsNA(ans.r) || R_IsNA(ans.i)) |
| ans.r = ans.i = NA_REAL; |
| else if (R_IsNaN(ans.r) || R_IsNaN(ans.i)) |
| ans.r = ans.i = R_NaN; |
| return ans; |
| } |
| |
| static hlen chash(SEXP x, R_xlen_t indx, HashData *d) |
| { |
| Rcomplex tmp = unify_complex_na(COMPLEX_ELT(x, indx)); |
| |
| #if 2*SIZEOF_INT == SIZEOF_DOUBLE |
| { |
| unsigned int u; |
| union foo tmpu; |
| tmpu.d = tmp.r; |
| u = tmpu.u[0] ^ tmpu.u[1]; |
| tmpu.d = tmp.i; |
| u ^= tmpu.u[0] ^ tmpu.u[1]; |
| return scatter(u, d); |
| } |
| #else |
| return scatter((*((unsigned int *)(&tmp.r)) ^ |
| (*((unsigned int *)(&tmp.i)))), d); |
| #endif |
| } |
| |
| /* Pointer hashing as used here isn't entirely portable (we do it in |
| several other places, sometimes in slightly different ways) but it |
| could be made so by computing a unique value based on the |
| allocation page and position in the page. |
| |
| Pointer hashes will not be valid if serialized and unserialized in |
| another process. |
| |
| Hash values are int, For 64bit pointers, we do (upper ^ lower) */ |
| static R_INLINE unsigned int PTRHASH(void *x) |
| { |
| intptr_t z = (intptr_t) x; |
| unsigned int z1 = (unsigned int)(z & 0xffffffff), z2 = 0; |
| #if SIZEOF_LONG == 8 |
| z2 = (unsigned int)(z/0x100000000L); |
| #endif |
| return z1 ^ z2; |
| } |
| |
| /* Hash CHARSXP by address. */ |
| static R_INLINE hlen cshash(SEXP x, R_xlen_t indx, HashData *d) |
| { |
| return scatter(PTRHASH(STRING_ELT(x, indx)), d); |
| } |
| |
| static R_INLINE hlen shash(SEXP x, R_xlen_t indx, HashData *d) |
| { |
| unsigned int k; |
| const char *p; |
| if (d->inHashtab) { |
| SEXP xi = STRING_ELT(x, indx); |
| int noTrans = (IS_BYTES(xi) || IS_ASCII(xi)) ? TRUE : FALSE; |
| if (d->useCache && noTrans) |
| return scatter(PTRHASH(xi), d); |
| const void *vmax = vmaxget(); |
| p = noTrans ? CHAR(xi) : translateCharUTF8(xi); |
| k = 0; |
| while (*p++) |
| /* multiplier was 8 but 11 isn't a power of 2 */ |
| k = 11 * k + (unsigned int) *p; |
| vmaxset(vmax); /* discard any memory used by translateChar */ |
| return scatter(k, d); |
| } |
| if(!d->useUTF8 && d->useCache) return cshash(x, indx, d); |
| const void *vmax = vmaxget(); |
| /* Not having d->useCache really should not happen anymore. */ |
| p = translateCharUTF8(STRING_ELT(x, indx)); |
| k = 0; |
| while (*p++) |
| k = 11 * k + (unsigned int) *p; /* was 8 but 11 isn't a power of 2 */ |
| vmaxset(vmax); /* discard any memory used by translateChar */ |
| return scatter(k, d); |
| } |
| |
| static int lequal(SEXP x, R_xlen_t i, SEXP y, R_xlen_t j) |
| { |
| if (i < 0 || j < 0) return 0; |
| return (LOGICAL_ELT(x, i) == LOGICAL_ELT(y, j)); |
| } |
| |
| |
| static R_INLINE int iequal(SEXP x, R_xlen_t i, SEXP y, R_xlen_t j) |
| { |
| if (i < 0 || j < 0) return 0; |
| return (INTEGER_ELT(x, i) == INTEGER_ELT(y, j)); |
| } |
| |
| /* BDR 2002-1-17 We don't want NA and other NaNs to be equal */ |
| static R_INLINE int requal(SEXP x, R_xlen_t i, SEXP y, R_xlen_t j) |
| { |
| if (i < 0 || j < 0) return 0; |
| double xi = REAL_ELT(x, i); |
| double yj = REAL_ELT(y, j); |
| if (!ISNAN(xi) && !ISNAN(yj)) |
| return (xi == yj); |
| else if (R_IsNA(xi) && R_IsNA(yj)) return 1; |
| else if (R_IsNaN(xi) && R_IsNaN(yj)) return 1; |
| else return 0; |
| } |
| |
| /* This is differentiating {NA,1}, {NA,0}, {NA, NaN}, {NA, NA}, |
| * but R's print() and format() render all as "NA" */ |
| static int cplx_eq(Rcomplex x, Rcomplex y) |
| { |
| if (!ISNAN(x.r) && !ISNAN(x.i) && !ISNAN(y.r) && !ISNAN(y.i)) |
| return x.r == y.r && x.i == y.i; |
| else if (R_IsNA(x.r) || R_IsNA(x.i)) // x is NA |
| return (R_IsNA(y.r) || R_IsNA(y.i)) ? 1 : 0; |
| else if (R_IsNA(y.r) || R_IsNA(y.i)) // y is NA but x is not |
| return 0; |
| // else : none is NA but there's at least one NaN; hence ISNAN(.) == R_IsNaN(.) |
| return |
| (((ISNAN(x.r) && ISNAN(y.r)) || (!ISNAN(x.r) && !ISNAN(y.r) && x.r == y.r)) && // Re |
| ((ISNAN(x.i) && ISNAN(y.i)) || (!ISNAN(x.i) && !ISNAN(y.i) && x.i == y.i)) // Im |
| ) ? 1 : 0; |
| } |
| |
| static int cequal(SEXP x, R_xlen_t i, SEXP y, R_xlen_t j) |
| { |
| if (i < 0 || j < 0) return 0; |
| return cplx_eq(COMPLEX_ELT(x, i), COMPLEX_ELT(y, j)); |
| } |
| |
| static R_INLINE int sequal(SEXP x, R_xlen_t i, SEXP y, R_xlen_t j) |
| { |
| if (i < 0 || j < 0) return 0; |
| SEXP xi = STRING_ELT(x, i); |
| SEXP yj = STRING_ELT(y, j); |
| /* Two strings which have the same address must be the same, |
| so avoid looking at the contents */ |
| if (xi == yj) return 1; |
| /* Then if either is NA the other cannot be */ |
| /* Once all CHARSXPs are cached, Seql will handle this */ |
| if (xi == NA_STRING || yj == NA_STRING) |
| return 0; |
| /* another pre-test to avoid the call to Seql */ |
| if (IS_CACHED(xi) && IS_CACHED(yj) && ENC_KNOWN(xi) == ENC_KNOWN(yj)) |
| return 0; |
| return Seql(xi, yj); |
| } |
| |
| static hlen rawhash(SEXP x, R_xlen_t indx, HashData *d) |
| { |
| return (hlen) RAW_ELT(x, indx); |
| } |
| |
| static int rawequal(SEXP x, R_xlen_t i, SEXP y, R_xlen_t j) |
| { |
| if (i < 0 || j < 0) return 0; |
| return (RAW_ELT(x, i) == RAW_ELT(y, j)); |
| } |
| |
| static hlen vhash_one(SEXP _this, HashData *d); |
| static hlen vhash(SEXP x, R_xlen_t indx, HashData *d) |
| { |
| SEXP _this = VECTOR_ELT(x, indx); |
| return vhash_one(_this, d); |
| } |
| |
| static hlen vhash_one(SEXP _this, HashData *d) |
| { |
| int i; |
| unsigned int key; |
| |
| /* Handle environments by pointer hashing. Previously, |
| environments were hashed based only on length, which is not |
| very effective and could be expensive to compute. */ |
| if (TYPEOF(_this) == ENVSXP) |
| return scatter(PTRHASH(_this), d); |
| |
| key = OBJECT(_this) + 2*TYPEOF(_this) + 100U*(unsigned int) length(_this); |
| /* maybe we should also look at attributes, but that slows us down */ |
| switch (TYPEOF(_this)) { |
| case LGLSXP: |
| /* This is not too clever: pack into 32-bits and then scatter? */ |
| for(i = 0; i < LENGTH(_this); i++) { |
| key ^= lhash(_this, i, d); |
| key *= 97; |
| } |
| break; |
| case INTSXP: |
| for(i = 0; i < LENGTH(_this); i++) { |
| key ^= ihash(_this, i, d); |
| key *= 97; |
| } |
| break; |
| case REALSXP: |
| for(i = 0; i < LENGTH(_this); i++) { |
| key ^= rhash(_this, i, d); |
| key *= 97; |
| } |
| break; |
| case CPLXSXP: |
| for(i = 0; i < LENGTH(_this); i++) { |
| key ^= chash(_this, i, d); |
| key *= 97; |
| } |
| break; |
| case STRSXP: |
| for(i = 0; i < LENGTH(_this); i++) { |
| key ^= shash(_this, i, d); |
| key *= 97; |
| } |
| break; |
| case RAWSXP: |
| for(i = 0; i < LENGTH(_this); i++) { |
| key ^= scatter((unsigned int)rawhash(_this, i, d), d); |
| key *= 97; |
| } |
| break; |
| case EXPRSXP: |
| case VECSXP: |
| R_CheckStack(); |
| for(i = 0; i < LENGTH(_this); i++) { |
| key ^= vhash(_this, i, d); |
| key *= 97; |
| } |
| break; |
| case LANGSXP: |
| case LISTSXP: |
| R_CheckStack(); |
| /* all attributes are ignored */ |
| /* might be good to consider envirnments on formulas */ |
| for (SEXP next = _this; next != R_NilValue; next = CDR(next)) { |
| key ^= vhash_one(CAR(next), d); |
| key *= 97; |
| } |
| break; |
| case CLOSXP: |
| /* all attributes are ignored */ |
| key ^= vhash_one(BODY_EXPR(_this), d); |
| key *= 97; |
| if (d->useCloEnv) { |
| key ^= vhash_one(CLOENV(_this), d); |
| key *= 97; |
| } |
| break; |
| case SYMSXP: |
| key *= PTRHASH(_this); |
| key *= 97; |
| break; |
| case CHARSXP: |
| if(!d->useUTF8 && d->useCache) key *= PTRHASH(_this); |
| /**** otherwise, do nothing for now */ |
| /* this should only happen in C-leve hash tables */ |
| /* eventually this should do what shash does */ |
| key *= 97; |
| break; |
| case EXTPTRSXP: |
| if (d->extptrAsRef) |
| key ^= PTRHASH(_this); |
| else |
| /* identical() considers only the EXTPTR_PTR values ... */ |
| key ^= PTRHASH(EXTPTR_PTR(_this)); |
| key *= 97; |
| break; |
| default: |
| break; |
| } |
| return scatter(key, d); |
| } |
| |
| static int vequal(SEXP x, R_xlen_t i, SEXP y, R_xlen_t j) |
| { |
| if (i < 0 || j < 0) return 0; |
| return R_compute_identical(VECTOR_ELT(x, i), VECTOR_ELT(y, j), 0); |
| } |
| |
| /* |
| Choose M to be the smallest power of 2 |
| not less than 2*n and set K = log2(M). |
| Need K >= 1 and hence M >= 2, and 2^M < 2^31-1, hence n <= 2^29. |
| |
| Dec 2004: modified from 4*n to 2*n, since in the worst case we have |
| a 50% full table, and that is still rather efficient -- see |
| R. Sedgewick (1998) Algorithms in C++ 3rd edition p.606. |
| */ |
| static void MKsetup(R_xlen_t n, HashData *d, R_xlen_t nmax) |
| { |
| #ifdef LONG_VECTOR_SUPPORT |
| /* M = 2^32 is safe, hence n <= 2^31 -1 */ |
| if(n < 0) /* protect against overflow to -ve */ |
| error(_("length %d is too large for hashing"), n); |
| #else |
| if(n < 0 || n >= 1073741824) /* protect against overflow to -ve */ |
| error(_("length %d is too large for hashing"), n); |
| #endif |
| |
| if (nmax != NA_INTEGER && nmax != 1) n = nmax; |
| size_t n2 = 2U * (size_t) n; |
| d->M = 2; |
| d->K = 1; |
| while (d->M < n2) { |
| d->M *= 2; |
| d->K++; |
| } |
| d->nmax = n; |
| } |
| |
| #define IMAX 4294967296L |
| static void HashTableSetup(SEXP x, HashData *d, R_xlen_t nmax) |
| { |
| d->useUTF8 = FALSE; |
| d->useCache = TRUE; |
| switch (TYPEOF(x)) { |
| case LGLSXP: |
| d->hash = lhash; |
| d->equal = lequal; |
| d->nmax = d->M = 4; |
| d->K = 2; /* unused */ |
| break; |
| case INTSXP: |
| { |
| d->hash = ihash; |
| d->equal = iequal; |
| #ifdef LONG_VECTOR_SUPPORT |
| R_xlen_t nn = XLENGTH(x); |
| if (nn > IMAX) nn = IMAX; |
| MKsetup(nn, d, nmax); |
| #else |
| MKsetup(LENGTH(x), d, nmax); |
| #endif |
| } |
| break; |
| case REALSXP: |
| d->hash = rhash; |
| d->equal = requal; |
| MKsetup(XLENGTH(x), d, nmax); |
| break; |
| case CPLXSXP: |
| d->hash = chash; |
| d->equal = cequal; |
| MKsetup(XLENGTH(x), d, nmax); |
| break; |
| case STRSXP: |
| d->hash = shash; |
| d->equal = sequal; |
| MKsetup(XLENGTH(x), d, nmax); |
| break; |
| case RAWSXP: |
| d->hash = rawhash; |
| d->equal = rawequal; |
| d->nmax = d->M = 256; |
| d->K = 8; /* unused */ |
| break; |
| case VECSXP: |
| d->hash = vhash; |
| d->equal = vequal; |
| MKsetup(XLENGTH(x), d, nmax); |
| break; |
| default: |
| UNIMPLEMENTED_TYPE("HashTableSetup", x); |
| } |
| #ifdef LONG_VECTOR_SUPPORT |
| d->isLong = IS_LONG_VEC(x); |
| if (d->isLong) { |
| d->HashTable = allocVector(REALSXP, (R_xlen_t) d->M); |
| for (R_xlen_t i = 0; i < d->M; i++) HTDATA_DBL(d)[i] = NIL; |
| } else |
| #endif |
| { |
| d->HashTable = allocVector(INTSXP, (R_xlen_t) d->M); |
| for (R_xlen_t i = 0; i < d->M; i++) HTDATA_INT(d)[i] = NIL; |
| } |
| } |
| |
| /* Open address hashing */ |
| /* Collision resolution is by linear probing */ |
| /* The table is guaranteed large so this is sufficient */ |
| |
| static int isDuplicated(SEXP x, R_xlen_t indx, HashData *d) |
| { |
| #ifdef LONG_VECTOR_SUPPORT |
| if (d->isLong) { |
| double *h = HTDATA_DBL(d); |
| hlen i = d->hash(x, indx, d); |
| while (h[i] != NIL) { |
| if (d->equal(x, (R_xlen_t) h[i], x, indx)) |
| return h[i] >= 0 ? 1 : 0; |
| i = (i + 1) % d->M; |
| } |
| if (d->nmax-- < 0) error("hash table is full"); |
| h[i] = (double) indx; |
| } else |
| #endif |
| { |
| int *h = HTDATA_INT(d); |
| hlen i = d->hash(x, indx, d); |
| while (h[i] != NIL) { |
| if (d->equal(x, h[i], x, indx)) |
| return h[i] >= 0 ? 1 : 0; |
| i = (i + 1) % d->M; |
| } |
| if (d->nmax-- < 0) error("hash table is full"); |
| h[i] = (int) indx; |
| } |
| return 0; |
| } |
| |
| static Rboolean duplicatedInit(SEXP x, HashData *d) |
| { |
| Rboolean stop = FALSE; |
| |
| if(TYPEOF(x) == STRSXP) { |
| R_xlen_t i, n = XLENGTH(x); |
| for(i = 0; i < n; i++) { |
| if(IS_BYTES(STRING_ELT(x, i))) { |
| d->useUTF8 = FALSE; |
| stop = TRUE; |
| break; |
| } |
| if(ENC_KNOWN(STRING_ELT(x, i))) { |
| d->useUTF8 = TRUE; |
| } |
| /* uncached strings are currently not properly supported */ |
| if(!IS_CACHED(STRING_ELT(x, i))) { |
| d->useCache = FALSE; |
| stop = TRUE; |
| break; |
| } |
| } |
| } else if (TYPEOF(x) == VECSXP || TYPEOF(x) == EXPRSXP) { |
| R_xlen_t i, n = XLENGTH(x); |
| for(i = 0; i < n; i++) |
| if (duplicatedInit(VECTOR_ELT(x, i), d)) { |
| stop = TRUE; |
| break; |
| } |
| } else if (TYPEOF(x) == LANGSXP || TYPEOF(x) == LISTSXP) { |
| for(SEXP head = x; head != R_NilValue; head = CDR(head)) |
| if (duplicatedInit(CAR(head), d)) { |
| stop = TRUE; |
| break; |
| } |
| |
| } else if (TYPEOF(x) == CLOSXP) { |
| if (duplicatedInit(BODY_EXPR(x), d)) |
| stop = TRUE; |
| } |
| return stop; |
| } |
| |
| static void removeEntry(SEXP table, SEXP x, R_xlen_t indx, HashData *d) |
| { |
| #ifdef LONG_VECTOR_SUPPORT |
| if (d->isLong) { |
| double *h = HTDATA_DBL(d); |
| hlen i = d->hash(x, indx, d); |
| while (h[i] >= 0) { |
| if (d->equal(table, (R_xlen_t) h[i], x, indx)) { |
| h[i] = NA_INTEGER; /* < 0, only index values are inserted */ |
| return; |
| } |
| i = (i + 1) % d->M; |
| } |
| } else |
| #endif |
| { |
| int *h = HTDATA_INT(d); |
| hlen i = d->hash(x, indx, d); |
| while (h[i] >= 0) { |
| if (d->equal(table, h[i], x, indx)) { |
| h[i] = NA_INTEGER; /* < 0, only index values are inserted */ |
| return; |
| } |
| i = (i + 1) % d->M; |
| } |
| } |
| } |
| |
| #define DUPLICATED_INIT \ |
| HashData data = { 0 }; \ |
| HashTableSetup(x, &data, nmax); \ |
| data.useUTF8 = FALSE; data.useCache = TRUE; \ |
| duplicatedInit(x, &data); |
| |
| /* used in scan() */ |
| SEXP duplicated(SEXP x, Rboolean from_last) |
| { |
| SEXP ans; |
| int *v, nmax = NA_INTEGER; |
| |
| if (!isVector(x)) error(_("'duplicated' applies only to vectors")); |
| R_xlen_t i, n = XLENGTH(x); |
| DUPLICATED_INIT; |
| |
| PROTECT(data.HashTable); |
| PROTECT(ans = allocVector(LGLSXP, n)); |
| |
| v = LOGICAL(ans); |
| |
| if(from_last) |
| for (i = n-1; i >= 0; i--) { |
| // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); |
| v[i] = isDuplicated(x, i, &data); |
| } |
| else |
| for (i = 0; i < n; i++) { |
| // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); |
| v[i] = isDuplicated(x, i, &data); |
| } |
| |
| UNPROTECT(2); |
| return ans; |
| } |
| |
| R_xlen_t sorted_real_count_NANs(SEXP x) { |
| R_xlen_t n = XLENGTH(x); |
| if(n == 0) |
| return 0; |
| if(n == 1) |
| return ISNAN(REAL_ELT(x, 0)); |
| |
| int sorted = REAL_IS_SORTED(x); |
| if(!KNOWN_SORTED(sorted)) /* this should never happen! */ |
| error("sorted_real_count_NANs got unsorted vector: this should not happen"); |
| |
| double rtmp; |
| R_xlen_t ret, nanpos, lowedge, highedge; |
| int nas1st = KNOWN_NA_1ST(sorted); |
| rtmp = nas1st ? REAL_ELT(x, 0) : REAL_ELT(x, n - 1); |
| if(!ISNAN(rtmp)) |
| return 0; |
| if((nas1st && ISNAN(REAL_ELT(x, n - 1))) || |
| (!nas1st && ISNAN(REAL_ELT(x, 0)))) |
| return n; |
| |
| nanpos = n / 2; |
| lowedge = 0; |
| highedge = n - 1; |
| |
| #define FIND_NAN_EDGE(yes, no, final) do { \ |
| while(highedge > lowedge + 1) { \ |
| rtmp = REAL_ELT(x, nanpos); \ |
| if(ISNAN(rtmp)) \ |
| yes; \ |
| else \ |
| no; \ |
| nanpos = (highedge + lowedge) / 2; \ |
| } \ |
| ret = final; \ |
| } while(0) |
| |
| if(nas1st) { |
| FIND_NAN_EDGE(lowedge = nanpos, highedge = nanpos, lowedge + 1); |
| } else { |
| FIND_NAN_EDGE(highedge = nanpos, lowedge = nanpos, n - highedge); |
| } |
| return ret; |
| } |
| |
| #undef FIND_NAN_EDGE |
| |
| #define DUP_DO_ONE(x, y, ind) do { \ |
| if(x == y) \ |
| v[ind] = TRUE; \ |
| else \ |
| v[ind] = FALSE; \ |
| } while(0) |
| |
| static SEXP sorted_Duplicated(SEXP x, Rboolean from_last, int nmax) |
| { |
| // n guaranteed >= 2 from calling function (Duplicated) |
| R_xlen_t n = XLENGTH(x), numnas, na_left = -1, startpos; |
| SEXP ans = PROTECT(allocVector(LGLSXP, n)); |
| int *v = LOGICAL(ans), itmp, sorted; |
| double rtmp; |
| Rboolean seen_na = FALSE, seen_nan = FALSE, |
| nas1st= TRUE; |
| |
| #define SORTED_DUP_NONNANS(start, niter, tmpvar, eetype, vvtype) do { \ |
| if(from_last) { \ |
| v[start + niter] = FALSE; \ |
| tmpvar = vvtype##_ELT(x, start + niter); \ |
| ITERATE_BY_REGION_PARTIAL_REV(x, xptr, idx, nb, eetype, \ |
| vvtype, start, niter, { \ |
| DUP_DO_ONE(xptr[nb - 1], \ |
| tmpvar, \ |
| idx + nb - 1); \ |
| for(R_xlen_t k = nb - 2; k >= 0; k--) { \ |
| DUP_DO_ONE(xptr[k + 1], \ |
| xptr[k], \ |
| idx + k); \ |
| } \ |
| tmpvar = xptr[0]; \ |
| }); \ |
| } else { /* !from_last */ \ |
| v[start] = FALSE; \ |
| tmpvar = vvtype##_ELT(x, start); \ |
| ITERATE_BY_REGION_PARTIAL(x, xptr, idx, nb, eetype, \ |
| vvtype, start + 1, niter, { \ |
| DUP_DO_ONE(xptr[0], tmpvar, \ |
| idx); \ |
| for(R_xlen_t k = 1; k < nb; k++) { \ |
| DUP_DO_ONE(xptr[k], \ |
| xptr[k - 1], \ |
| idx + k); \ |
| } \ |
| tmpvar = xptr[nb - 1]; \ |
| }); \ |
| } \ |
| } while(0) |
| |
| #define SORTED_DUP_NANS(itype, istart, icond, iter) do { \ |
| ITERATE_BY_REGION_##itype(x, xptr, idx, nb, \ |
| double, REAL, na_left, numnas, { \ |
| for(R_xlen_t i = istart; icond; iter) { \ |
| if(R_NaN_is_R_NA(xptr[i])) { \ |
| v[idx + i] = seen_na; \ |
| seen_na = TRUE; \ |
| } else { \ |
| v[idx + i] = seen_nan; \ |
| seen_nan = TRUE; \ |
| } \ |
| } \ |
| }); \ |
| } while(0) |
| |
| switch(TYPEOF(x)) { |
| case INTSXP: |
| sorted = INTEGER_IS_SORTED(x); |
| if(!KNOWN_SORTED(sorted)) |
| error("sorted_Duplicated got unsorted vector: this should not happen"); |
| SORTED_DUP_NONNANS(0, n - 1, itmp, int, INTEGER); |
| break; |
| case REALSXP: |
| sorted = REAL_IS_SORTED(x); |
| if(!KNOWN_SORTED(sorted)) |
| error("sorted_Duplicated got unsorted vector: this should not happen"); |
| numnas = sorted_real_count_NANs(x); |
| nas1st = KNOWN_NA_1ST(REAL_IS_SORTED(x)); |
| |
| if(numnas > 0) { |
| na_left = nas1st ? 0 : n - numnas; |
| if(from_last) { |
| SORTED_DUP_NANS(PARTIAL_REV, nb - 1, i >= 0, i--); |
| } else { // !from_last |
| SORTED_DUP_NANS(PARTIAL, 0, i < nb, i++); |
| } // from_last |
| } // numnas > 0 |
| |
| if(numnas < n) { |
| startpos = nas1st ? numnas : 0; |
| SORTED_DUP_NONNANS(startpos, n - numnas - 1, rtmp, double, REAL); |
| } |
| break; |
| default: |
| error("sorted_Duplicated got unsupported type %d: this should not happen", TYPEOF(x)); |
| } |
| UNPROTECT(1); //ans |
| return ans; |
| } |
| |
| #undef SORTED_DUP_NONNANS |
| #undef SORTED_DUP_NANS |
| #undef DUP_DO_ONE |
| |
| /* to add sorted fastpass support for new SEXP types modify sorted_Duplicated |
| and sorted_any_Duplicated then add them here */ |
| #define DUP_KNOWN_SORTED(x) \ |
| ((TYPEOF(x) == INTSXP && KNOWN_SORTED(INTEGER_IS_SORTED(x))) || \ |
| (TYPEOF(x) == REALSXP && KNOWN_SORTED(REAL_IS_SORTED(x)))) |
| |
| static SEXP Duplicated(SEXP x, Rboolean from_last, int nmax) |
| { |
| SEXP ans; |
| int *v; |
| |
| if (!isVector(x)) error(_("'duplicated' applies only to vectors")); |
| R_xlen_t i, n = XLENGTH(x); |
| if(n == 0) |
| return allocVector(LGLSXP, 0); |
| else if (n == 1) |
| return ScalarLogical(FALSE); |
| |
| if(DUP_KNOWN_SORTED(x)) { |
| return sorted_Duplicated(x, from_last, nmax); |
| } |
| DUPLICATED_INIT; |
| |
| PROTECT(data.HashTable); |
| PROTECT(ans = allocVector(LGLSXP, n)); |
| |
| v = LOGICAL(ans); |
| |
| if(from_last) |
| for (i = n-1; i >= 0; i--) { |
| // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); |
| v[i] = isDuplicated(x, i, &data); |
| } |
| else |
| for (i = 0; i < n; i++) { |
| // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); |
| v[i] = isDuplicated(x, i, &data); |
| } |
| |
| UNPROTECT(2); |
| return ans; |
| } |
| |
| R_xlen_t sorted_any_duplicated(SEXP x, Rboolean from_last) { |
| int itmp, sorted; |
| double rtmp; |
| Rboolean seen_na = FALSE, seen_nan = FALSE, na1st = FALSE; |
| |
| #define SORTED_ANYDUP_NONNANS_FROM_LAST(start, count, tmpvar, eetype, vvtype) do { \ |
| tmpvar = vvtype##_ELT(x, start + count - 1); \ |
| ITERATE_BY_REGION_PARTIAL_REV(x, xptr, idx, nb, eetype, vvtype, \ |
| start, count - 1, { \ |
| if(xptr[nb - 1] == tmpvar) { \ |
| return idx + nb; \ |
| } \ |
| for(R_xlen_t k = nb - 2; k >= 0; k--) { \ |
| if(xptr[k + 1] == xptr[k]) { \ |
| return idx + k + 1; \ |
| } \ |
| } \ |
| tmpvar = xptr[0]; \ |
| }); \ |
| } while(0) |
| |
| #define SORTED_ANYDUP_NONNANS_FROM_FIRST(start, count, tmpvar, eetype, vvtype) do { \ |
| tmpvar = vvtype##_ELT(x, start); \ |
| ITERATE_BY_REGION_PARTIAL(x, xptr, idx, nb, eetype, \ |
| vvtype, start + 1, count - 1, { \ |
| if(xptr[0] == tmpvar) { \ |
| return idx + 1; \ |
| } \ |
| for(R_xlen_t k = 1; k < nb; k++) { \ |
| if(xptr[k] == xptr[k - 1]) { \ |
| return idx + k + 1; \ |
| } \ |
| } \ |
| tmpvar = xptr[nb - 1]; \ |
| }); \ |
| } while(0) |
| |
| #define SORTED_ANYDUP_NANS(start, count, itype, istart, icond, iter) do { \ |
| ITERATE_BY_REGION_##itype(x, xptr, idx, nb, double, REAL, \ |
| start, count, { \ |
| for(R_xlen_t i = istart; icond; iter) { \ |
| if(R_NaN_is_R_NA(xptr[i])) { \ |
| if(seen_na) { \ |
| return idx + i + 1; \ |
| } else { \ |
| seen_na = TRUE; \ |
| } \ |
| } else { \ |
| if(seen_nan) { \ |
| return idx + i + 1; \ |
| } else { \ |
| seen_nan = TRUE; \ |
| } \ |
| } \ |
| } \ |
| }); \ |
| } while(0) |
| |
| switch(TYPEOF(x)) { |
| case INTSXP: |
| sorted = INTEGER_IS_SORTED(x); |
| if(!KNOWN_SORTED(sorted)) |
| error("sorted_any_duplicated got unsorted vector: this should not happen"); |
| if(from_last) { |
| SORTED_ANYDUP_NONNANS_FROM_LAST(0, XLENGTH(x), itmp, int, INTEGER); |
| } else { |
| SORTED_ANYDUP_NONNANS_FROM_FIRST(0, XLENGTH(x), itmp, int, INTEGER); |
| } |
| break; |
| case REALSXP: |
| sorted = REAL_IS_SORTED(x); |
| if(!KNOWN_SORTED(sorted)) |
| error("sorted_any_duplicated got unsorted vector: this should not happen"); |
| R_xlen_t numnas = sorted_real_count_NANs(x), napivot; |
| napivot = XLENGTH(x) - numnas; |
| na1st = KNOWN_NA_1ST(sorted); |
| |
| if(from_last) { |
| if(na1st) { |
| SORTED_ANYDUP_NONNANS_FROM_LAST(numnas, napivot, rtmp, double, |
| REAL); |
| SORTED_ANYDUP_NANS(0, numnas, PARTIAL_REV, nb - 1, i >=0, i--); |
| } else { |
| SORTED_ANYDUP_NANS(napivot, numnas, PARTIAL_REV, nb - 1, i >= 0, |
| i--); |
| SORTED_ANYDUP_NONNANS_FROM_LAST(0, napivot, rtmp, double, REAL); |
| } |
| } else { // !from_last |
| if(na1st) { |
| SORTED_ANYDUP_NANS(0, numnas, PARTIAL, 0, i < nb, i++); |
| SORTED_ANYDUP_NONNANS_FROM_FIRST(numnas, napivot, rtmp, double, |
| REAL); |
| } else { |
| SORTED_ANYDUP_NONNANS_FROM_FIRST(0, napivot, rtmp, double, |
| REAL); |
| SORTED_ANYDUP_NANS(napivot, numnas, PARTIAL, 0, i < nb, i++); |
| } |
| } //from_last |
| break; |
| default: |
| error("sorted_Duplicated got unsupported type %d: this should not happen", TYPEOF(x)); |
| } |
| return 0; |
| } |
| |
| #undef SORTED_ANYDUP_NONNANS_FROM_LAST |
| #undef SORTED_ANYDUP_NONNANS_FRO_FIRST |
| #undef SORTED_ANYDUP_NANS |
| |
| /* simpler version of the above : return 1-based index of first, or 0 : */ |
| R_xlen_t any_duplicated(SEXP x, Rboolean from_last) |
| { |
| R_xlen_t result = 0; |
| int nmax = NA_INTEGER; |
| |
| if (!isVector(x)) error(_("'duplicated' applies only to vectors")); |
| R_xlen_t i, n = XLENGTH(x); |
| |
| if(DUP_KNOWN_SORTED(x)) { |
| return sorted_any_duplicated(x, from_last); |
| } |
| |
| DUPLICATED_INIT; |
| PROTECT(data.HashTable); |
| |
| if(from_last) { |
| for (i = n-1; i >= 0; i--) { |
| if(isDuplicated(x, i, &data)) { result = ++i; break; } |
| } |
| } else { |
| for (i = 0; i < n; i++) { |
| if(isDuplicated(x, i, &data)) { result = ++i; break; } |
| } |
| } |
| UNPROTECT(1); |
| return result; |
| } |
| |
| #undef DUP_KNOWN_SORTED |
| |
| static SEXP duplicated3(SEXP x, SEXP incomp, Rboolean from_last, int nmax) |
| { |
| SEXP ans; |
| int *v, j, m; |
| |
| if (!isVector(x)) error(_("'duplicated' applies only to vectors")); |
| R_xlen_t i, n = XLENGTH(x); |
| DUPLICATED_INIT; |
| |
| PROTECT(data.HashTable); |
| PROTECT(ans = allocVector(LGLSXP, n)); |
| |
| v = LOGICAL(ans); |
| |
| if(from_last) |
| for (i = n-1; i >= 0; i--) { |
| // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); |
| v[i] = isDuplicated(x, i, &data); |
| } |
| else |
| for (i = 0; i < n; i++) { |
| // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); |
| v[i] = isDuplicated(x, i, &data); |
| } |
| |
| if(length(incomp)) { |
| PROTECT(incomp = coerceVector(incomp, TYPEOF(x))); |
| m = length(incomp); |
| for (i = 0; i < n; i++) |
| if(v[i]) { |
| for(j = 0; j < m; j++) |
| if(data.equal(x, i, incomp, j)) {v[i] = 0; break;} |
| } |
| UNPROTECT(1); |
| } |
| UNPROTECT(2); |
| return ans; |
| } |
| |
| /* return (1-based) index of first duplication, or 0 : */ |
| R_xlen_t any_duplicated3(SEXP x, SEXP incomp, Rboolean from_last) |
| { |
| int j, m = length(incomp), nmax = NA_INTEGER; |
| |
| if (!isVector(x)) error(_("'duplicated' applies only to vectors")); |
| R_xlen_t i, n = XLENGTH(x); |
| DUPLICATED_INIT; |
| PROTECT(data.HashTable); |
| |
| if(!m) error(_("any_duplicated3(., <0-length incomp>)")); |
| |
| PROTECT(incomp = coerceVector(incomp, TYPEOF(x))); |
| m = length(incomp); |
| |
| if(from_last) |
| for (i = n-1; i >= 0; i--) { |
| #define IS_DUPLICATED_CHECK \ |
| if(isDuplicated(x, i, &data)) { \ |
| Rboolean isDup = TRUE; \ |
| for(j = 0; j < m; j++) \ |
| if(data.equal(x, i, incomp, j)) { \ |
| isDup = FALSE; break; \ |
| } \ |
| if(isDup) { \ |
| UNPROTECT(2); \ |
| return ++i; \ |
| } \ |
| /* else continue */ \ |
| } |
| IS_DUPLICATED_CHECK; |
| } |
| else { |
| for (i = 0; i < n; i++) { |
| IS_DUPLICATED_CHECK; |
| } |
| } |
| |
| UNPROTECT(2); |
| return 0; |
| } |
| |
| #undef IS_DUPLICATED_CHECK |
| #undef DUPLICATED_INIT |
| |
| |
| /* .Internal( duplicated(x, incomparables, fromLast, nmax)) [op=0] |
| .Internal( unique(x, incomparables, fromLast, nmax)) [op=1] |
| .Internal(anyDuplicated(x, incomparables, fromLast )) [op=2] |
| */ |
| SEXP attribute_hidden do_duplicated(SEXP call, SEXP op, SEXP args, SEXP env) |
| { |
| SEXP x, dup, ans; |
| R_xlen_t i, k, n; |
| |
| checkArity(op, args); |
| x = CAR(args); |
| SEXP incomp = CADR(args); |
| if (length(CADDR(args)) < 1) |
| error(_("'fromLast' must be length 1")); |
| int fromLast = asLogical(CADDR(args)); |
| if (fromLast == NA_LOGICAL) |
| error(_("'fromLast' must be TRUE or FALSE")); |
| Rboolean fL = (Rboolean) fromLast; |
| |
| /* handle zero length vectors, and NULL */ |
| if ((n = xlength(x)) == 0) |
| return(PRIMVAL(op) <= 1 |
| ? allocVector(PRIMVAL(op) != 1 ? LGLSXP : TYPEOF(x), 0) |
| : ScalarInteger(0)); |
| |
| if (!isVector(x)) { |
| error(_("%s() applies only to vectors"), |
| (PRIMVAL(op) == 0 ? "duplicated" : |
| (PRIMVAL(op) == 1 ? "unique" : /* 2 */ "anyDuplicated"))); |
| } |
| int nmax = NA_INTEGER; |
| if (PRIMVAL(op) <= 1) { |
| nmax = asInteger(CADDDR(args)); |
| if (nmax != NA_INTEGER && nmax <= 0) |
| error(_("'nmax' must be positive")); |
| } |
| |
| if(length(incomp) && /* S has FALSE to mean empty */ |
| !(isLogical(incomp) && length(incomp) == 1 && |
| LOGICAL_ELT(incomp, 0) == 0)) { |
| if(PRIMVAL(op) == 2) { |
| /* return R's 1-based index :*/ |
| R_xlen_t ind = any_duplicated3(x, incomp, fL); |
| if(ind > INT_MAX) return ScalarReal((double) ind); |
| else return ScalarInteger((int)ind); |
| } else |
| dup = duplicated3(x, incomp, fL, nmax); |
| } |
| else { |
| if(PRIMVAL(op) == 2) { |
| R_xlen_t ind = any_duplicated(x, fL); |
| if(ind > INT_MAX) return ScalarReal((double) ind); |
| else return ScalarInteger((int)ind); |
| } else |
| dup = Duplicated(x, fL, nmax); |
| } |
| if (PRIMVAL(op) == 0) /* "duplicated()" */ |
| return dup; |
| /* ELSE |
| use the results of "duplicated" to get "unique" */ |
| |
| /* count unique entries */ |
| k = 0; |
| PROTECT(dup); |
| ITERATE_BY_REGION(dup, duptr, idx, nb, int, LOGICAL, { |
| for(R_xlen_t j=0; j < nb; j++) |
| if(duptr[j] == 0) k++; |
| }); |
| |
| PROTECT(ans = allocVector(TYPEOF(x), k)); |
| |
| k = 0; |
| switch (TYPEOF(x)) { |
| case LGLSXP: |
| for (i = 0; i < n; i++) |
| if (LOGICAL_ELT(dup, i) == 0) |
| LOGICAL0(ans)[k++] = LOGICAL_ELT(x, i); |
| break; |
| case INTSXP: |
| ITERATE_BY_REGION(dup, duptr, idx, nb, int, LOGICAL, { |
| for(R_xlen_t j = 0; j < nb; j++) { |
| if(duptr[j] == 0) |
| INTEGER0(ans)[k++] = INTEGER_ELT(x, idx + j); |
| } |
| }); |
| break; |
| case REALSXP: |
| ITERATE_BY_REGION(dup, duptr, idx, nb, int, LOGICAL, { |
| for(R_xlen_t j = 0; j < nb; j++) { |
| if(duptr[j] == 0) |
| REAL0(ans)[k++] = REAL_ELT(x, idx + j); |
| } |
| }); |
| break; |
| case CPLXSXP: |
| for (i = 0; i < n; i++) |
| if (LOGICAL_ELT(dup, i) == 0) |
| COMPLEX0(ans)[k++] = COMPLEX_ELT(x, i); |
| break; |
| case STRSXP: |
| for (i = 0; i < n; i++) |
| if (LOGICAL_ELT(dup, i) == 0) |
| SET_STRING_ELT(ans, k++, STRING_ELT(x, i)); |
| break; |
| case VECSXP: |
| for (i = 0; i < n; i++) |
| if (LOGICAL_ELT(dup, i) == 0) |
| SET_VECTOR_ELT(ans, k++, VECTOR_ELT(x, i)); |
| break; |
| case RAWSXP: |
| for (i = 0; i < n; i++) |
| if (LOGICAL_ELT(dup, i) == 0) |
| RAW0(ans)[k++] = RAW_ELT(x, i); |
| break; |
| default: |
| UNIMPLEMENTED_TYPE("duplicated", x); |
| } |
| UNPROTECT(2); |
| return ans; |
| } |
| |
| /* Build a hash table, ignoring information on duplication */ |
| static void DoHashing(SEXP table, HashData *d) |
| { |
| R_xlen_t i, n = XLENGTH(table); |
| for (i = 0; i < n; i++) { |
| // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); |
| (void) isDuplicated(table, i, d); |
| } |
| } |
| |
| /* invalidate entries: normally few */ |
| static void UndoHashing(SEXP x, SEXP table, HashData *d) |
| { |
| for (R_xlen_t i = 0; i < XLENGTH(x); i++) removeEntry(table, x, i, d); |
| } |
| |
| #define DEFLOOKUP(NAME, HASHFUN, EQLFUN) \ |
| static R_INLINE int \ |
| NAME(SEXP table, SEXP x, R_xlen_t indx, HashData *d) \ |
| { \ |
| int *h = HTDATA_INT(d); \ |
| hlen i = HASHFUN(x, indx, d); \ |
| while (h[i] != NIL) { \ |
| if (EQLFUN(table, h[i], x, indx)) \ |
| return h[i] >= 0 ? h[i] + 1 : d->nomatch; \ |
| i = (i + 1) % d->M; \ |
| } \ |
| return d->nomatch; \ |
| } |
| |
| /* definitions to help the C compiler to inline of most important cases */ |
| DEFLOOKUP(iLookup, ihash, iequal) |
| DEFLOOKUP(rLookup, rhash, requal) |
| DEFLOOKUP(sLookup, shash, sequal) |
| |
| /* definition for the general case */ |
| DEFLOOKUP(Lookup, d->hash, d->equal) |
| |
| /* Now do the table lookup */ |
| static SEXP HashLookup(SEXP table, SEXP x, HashData *d) |
| { |
| SEXP ans; |
| R_xlen_t i, n; |
| |
| n = XLENGTH(x); |
| PROTECT(ans = allocVector(INTSXP, n)); |
| int *pa = INTEGER0(ans); |
| |
| switch (TYPEOF(x)) { |
| case INTSXP: |
| for (i = 0; i < n; i++) |
| pa[i] = iLookup(table, x, i, d); |
| break; |
| case REALSXP: |
| for (i = 0; i < n; i++) |
| pa[i] = rLookup(table, x, i, d); |
| break; |
| case STRSXP: |
| for (i = 0; i < n; i++) |
| pa[i] = sLookup(table, x, i, d); |
| break; |
| default: |
| for (i = 0; i < n; i++) |
| pa[i] = Lookup(table, x, i, d); |
| } |
| |
| UNPROTECT(1); |
| return ans; |
| } |
| |
| static SEXP match_transform(SEXP s, SEXP env) |
| { |
| if(OBJECT(s)) { |
| if(inherits(s, "factor")) return asCharacterFactor(s); |
| /* |
| else if(inherits(s, "POSIXlt")) { // and maybe more classes in the future: |
| // Call R's (generic) as.character(s): |
| SEXP call, r; |
| PROTECT(call = lang2(R_AsCharacterSymbol, s)); |
| r = eval(call, env); |
| UNPROTECT(1); |
| return r; |
| } |
| */ |
| else { |
| SEXP call, r; |
| PROTECT(call = lang2(install("mtfrm"), s)); |
| r = eval(call, env); |
| UNPROTECT(1); |
| return r; |
| } |
| } |
| /* else */ |
| return duplicate(s); |
| } |
| |
| /* assumes that x does not have any element in bytes encoding */ |
| static SEXP asUTF8(SEXP x) |
| { |
| R_xlen_t nx = xlength(x); |
| SEXP ux = NULL; |
| for(R_xlen_t i = 0; i < nx; i++) { |
| SEXP xi = STRING_ELT(x, i); |
| if ((xi != NA_STRING) && !IS_ASCII(xi) && !IS_UTF8(xi)) { |
| if (!ux) { |
| ux = PROTECT(allocVector(STRSXP, nx)); |
| for(R_xlen_t j = 0; j < i; j++) |
| SET_STRING_ELT(ux, j, STRING_ELT(x, j)); |
| } |
| SET_STRING_ELT(ux, i, mkCharCE(translateCharUTF8(xi), CE_UTF8)); |
| } else if (ux) |
| SET_STRING_ELT(ux, i, xi); |
| } |
| if (ux) { |
| UNPROTECT(1); |
| return ux; |
| } else |
| return x; |
| } |
| |
| // workhorse of R's match() and hence also " ix %in% itable " |
| SEXP match5(SEXP itable, SEXP ix, int nmatch, SEXP incomp, SEXP env) |
| { |
| R_xlen_t n = xlength(ix); |
| /* handle zero length arguments */ |
| if (n == 0) return allocVector(INTSXP, 0); |
| |
| SEXP ans; |
| if (length(itable) == 0) { |
| ans = allocVector(INTSXP, n); |
| int *pa = INTEGER0(ans); |
| for (R_xlen_t i = 0; i < n; i++) pa[i] = nmatch; |
| return ans; |
| } |
| |
| int nprot = 0; |
| SEXP x = PROTECT(match_transform(ix, env)); nprot++; |
| SEXP table = PROTECT(match_transform(itable, env)); nprot++; |
| /* or should we use PROTECT_WITH_INDEX and REPROTECT below ? */ |
| |
| SEXPTYPE type; |
| /* Coerce to a common type; type == NILSXP is ok here. |
| * Note that above we coerce factors and "POSIXlt", only to character. |
| * Hence, coerce to character or to `higher' type |
| * (given that we have "Vector" or NULL) */ |
| if(TYPEOF(x) >= STRSXP || TYPEOF(table) >= STRSXP) type = STRSXP; |
| else type = TYPEOF(x) < TYPEOF(table) ? TYPEOF(table) : TYPEOF(x); |
| PROTECT(x = coerceVector(x, type)); nprot++; |
| PROTECT(table = coerceVector(table, type)); nprot++; |
| |
| // special case scalar x -- for speed only : |
| if(XLENGTH(x) == 1 && !incomp) { |
| int val = nmatch; |
| int ntable = LENGTH(table); |
| switch (type) { |
| case STRSXP: { |
| SEXP x_val = STRING_ELT(x,0); |
| for (int i=0; i < ntable; i++) if (Seql(STRING_ELT(table,i), x_val)) { |
| val = i + 1; break; |
| } |
| break; } |
| case LGLSXP: |
| case INTSXP: { |
| int x_val = INTEGER_ELT(x, 0), |
| *table_p = INTEGER(table); |
| for (int i=0; i < ntable; i++) if (table_p[i] == x_val) { |
| val = i + 1; break; |
| } |
| break; } |
| case REALSXP: { |
| double xv = REAL_ELT(x, 0); |
| // pblm with signed 0s under IEC60559 |
| double x_val = (xv == 0.) ? 0. : xv; |
| double *table_p = REAL(table); |
| /* we want all NaNs except NA equal, and all NAs equal */ |
| if (R_IsNA(x_val)) { |
| for (int i=0; i < ntable; i++) if (R_IsNA(table_p[i])) { |
| val = i + 1; break; |
| } |
| } |
| else if (R_IsNaN(x_val)) { |
| for (int i=0; i < ntable; i++) if (R_IsNaN(table_p[i])) { |
| val = i + 1; break; |
| } |
| } |
| else { |
| for (int i=0; i < ntable; i++) if (table_p[i] == x_val) { |
| val = i + 1; break; |
| } |
| } |
| break; } |
| case CPLXSXP: { |
| Rcomplex x_val = COMPLEX_ELT(x, 0), |
| *table_p = COMPLEX(table); |
| for (int i=0; i < ntable; i++) |
| if (cplx_eq(table_p[i], x_val)) { |
| val = i + 1; break; |
| } |
| break; } |
| case RAWSXP: { |
| Rbyte x_val = RAW_ELT(x, 0), |
| *table_p = RAW(table); |
| for (int i=0; i < ntable; i++) if (table_p[i] == x_val) { |
| val = i + 1; break; |
| } |
| break; } |
| } |
| PROTECT(ans = ScalarInteger(val)); nprot++; |
| } |
| else { // regular case |
| HashData data = { 0 }; |
| if (incomp) { PROTECT(incomp = coerceVector(incomp, type)); nprot++; } |
| data.nomatch = nmatch; |
| HashTableSetup(table, &data, NA_INTEGER); |
| PROTECT(data.HashTable); nprot++; |
| if(type == STRSXP) { |
| Rboolean useBytes = FALSE; |
| Rboolean useUTF8 = FALSE; |
| Rboolean useCache = TRUE; |
| for(R_xlen_t i = 0; i < xlength(x); i++) { |
| SEXP s = STRING_ELT(x, i); |
| if(IS_BYTES(s)) { |
| useBytes = TRUE; |
| useUTF8 = FALSE; |
| break; |
| } |
| if(ENC_KNOWN(s)) { |
| useUTF8 = TRUE; |
| } |
| if(!IS_CACHED(s)) { |
| useCache = FALSE; |
| break; |
| } |
| } |
| if(!useBytes || useCache) { |
| for(int i = 0; i < LENGTH(table); i++) { |
| SEXP s = STRING_ELT(table, i); |
| if(IS_BYTES(s)) { |
| useBytes = TRUE; |
| useUTF8 = FALSE; |
| break; |
| } |
| if(ENC_KNOWN(s)) { |
| useUTF8 = TRUE; |
| } |
| if(!IS_CACHED(s)) { |
| useCache = FALSE; |
| break; |
| } |
| } |
| } |
| if(useUTF8) { |
| x = PROTECT(asUTF8(x)); nprot++; |
| table = PROTECT(asUTF8(table)); nprot++; |
| } |
| data.useUTF8 = useUTF8; |
| data.useCache = useCache; |
| } |
| DoHashing(table, &data); |
| if (incomp) UndoHashing(incomp, table, &data); |
| ans = HashLookup(table, x, &data); |
| } |
| UNPROTECT(nprot); |
| return ans; |
| } // end{ match5 } |
| |
| SEXP matchE(SEXP itable, SEXP ix, int nmatch, SEXP env) |
| { |
| return match5(itable, ix, nmatch, NULL, env); |
| } |
| |
| /* used from other code, not here: */ |
| SEXP match(SEXP itable, SEXP ix, int nmatch) |
| { |
| return match5(itable, ix, nmatch, NULL, R_BaseEnv); |
| } |
| |
| |
| // .Internal(match(x, table, nomatch, incomparables)) : |
| SEXP attribute_hidden do_match(SEXP call, SEXP op, SEXP args, SEXP env) |
| { |
| checkArity(op, args); |
| |
| if ((!isVector(CAR(args)) && !isNull(CAR(args))) |
| || (!isVector(CADR(args)) && !isNull(CADR(args)))) |
| error(_("'match' requires vector arguments")); |
| |
| int nomatch = asInteger(CADDR(args)); |
| SEXP incomp = CADDDR(args); |
| |
| if (isNull(incomp) || /* S has FALSE to mean empty */ |
| (length(incomp) == 1 && isLogical(incomp) && |
| LOGICAL_ELT(incomp, 0) == 0)) |
| return match5(CADR(args), CAR(args), nomatch, NULL, env); |
| else |
| return match5(CADR(args), CAR(args), nomatch, incomp, env); |
| } |
| |
| /* pmatch and charmatch return integer positions, so cannot be used |
| for long vector tables */ |
| |
| /* Partial Matching of Strings */ |
| /* Fully S Compatible version. */ |
| |
| /* Hmm, this was not all S compatible! The desired behaviour is: |
| * First do exact matches, and mark elements as used as they are matched |
| * unless dup_ok is true. |
| * Then do partial matching, from left to right, using up the table |
| * unless dup_ok is true. Multiple partial matches are ignored. |
| * Empty strings are unmatched BDR 2000/2/16 |
| */ |
| |
| SEXP attribute_hidden do_pmatch(SEXP call, SEXP op, SEXP args, SEXP env) |
| { |
| SEXP ans, input, target; |
| int mtch, n_target, mtch_count, dups_ok, no_match; |
| size_t temp; |
| int *used = NULL, *ians; |
| const char **in, **tar; |
| Rboolean no_dups; |
| Rboolean useBytes = FALSE, useUTF8 = FALSE; |
| |
| checkArity(op, args); |
| input = CAR(args); |
| R_xlen_t n_input = XLENGTH(input); |
| target = CADR(args); |
| n_target = LENGTH(target); // not allowed to be long |
| no_match = asInteger(CADDR(args)); |
| dups_ok = asLogical(CADDDR(args)); |
| if (dups_ok == NA_LOGICAL) |
| error(_("invalid '%s' argument"), "duplicates.ok"); |
| no_dups = !dups_ok; |
| |
| if (!isString(input) || !isString(target)) |
| error(_("argument is not of mode character")); |
| |
| if(no_dups) { |
| used = (int *) R_alloc((size_t) n_target, sizeof(int)); |
| for (int j = 0; j < n_target; j++) used[j] = 0; |
| } |
| |
| for(R_xlen_t i = 0; i < n_input; i++) { |
| if(IS_BYTES(STRING_ELT(input, i))) { |
| useBytes = TRUE; |
| useUTF8 = FALSE; |
| break; |
| } else if(ENC_KNOWN(STRING_ELT(input, i))) { |
| useUTF8 = TRUE; |
| } |
| } |
| if(!useBytes) { |
| for(R_xlen_t i = 0; i < n_target; i++) { |
| if(IS_BYTES(STRING_ELT(target, i))) { |
| useBytes = TRUE; |
| useUTF8 = FALSE; |
| break; |
| } else if(ENC_KNOWN(STRING_ELT(target, i))) { |
| useUTF8 = TRUE; |
| } |
| } |
| } |
| |
| in = (const char **) R_alloc((size_t) n_input, sizeof(char *)); |
| tar = (const char **) R_alloc((size_t) n_target, sizeof(char *)); |
| PROTECT(ans = allocVector(INTSXP, n_input)); |
| ians = INTEGER0(ans); |
| if(useBytes) { |
| for(R_xlen_t i = 0; i < n_input; i++) { |
| in[i] = CHAR(STRING_ELT(input, i)); |
| ians[i] = 0; |
| } |
| for(int j = 0; j < n_target; j++) |
| tar[j] = CHAR(STRING_ELT(target, j)); |
| } |
| else if(useUTF8) { |
| for(R_xlen_t i = 0; i < n_input; i++) { |
| in[i] = translateCharUTF8(STRING_ELT(input, i)); |
| ians[i] = 0; |
| } |
| for(int j = 0; j < n_target; j++) |
| tar[j] = translateCharUTF8(STRING_ELT(target, j)); |
| } else { |
| for(R_xlen_t i = 0; i < n_input; i++) { |
| in[i] = translateChar(STRING_ELT(input, i)); |
| ians[i] = 0; |
| } |
| for(int j = 0; j < n_target; j++) |
| tar[j] = translateChar(STRING_ELT(target, j)); |
| } |
| /* First pass, exact matching */ |
| R_xlen_t nexact = 0; |
| /* Compromise when hashing used changed in 3.2.0 (PR#15697) */ |
| if (n_input <= 100 || n_target <= 100) { |
| for (R_xlen_t i = 0; i < n_input; i++) { |
| const char *ss = in[i]; |
| if (strlen(ss) == 0) continue; |
| for (int j = 0; j < n_target; j++) { |
| if (no_dups && used[j]) continue; |
| if (strcmp(ss, tar[j]) == 0) { |
| ians[i] = j + 1; |
| if (no_dups) used[j] = 1; |
| nexact++; |
| break; |
| } |
| } |
| } |
| } else { |
| HashData data = { 0 }; |
| HashTableSetup(target, &data, NA_INTEGER); |
| data.useUTF8 = useUTF8; |
| data.nomatch = 0; |
| DoHashing(target, &data); |
| for (R_xlen_t i = 0; i < n_input; i++) { |
| if (strlen(in[i]) == 0) /* don't look up "" */ |
| continue; |
| int j = Lookup(target, input, i, &data); |
| if ((j == 0) || (no_dups && used[j - 1])) continue; |
| if (no_dups) used[j - 1] = 1; |
| ians[i] = j; |
| nexact++; |
| } |
| } |
| |
| if(nexact < n_input) { |
| /* Second pass, partial matching */ |
| for (R_xlen_t i = 0; i < n_input; i++) { |
| const char *ss; |
| if (ians[i]) continue; |
| ss = in[i]; |
| temp = strlen(ss); |
| if (temp == 0) continue; |
| mtch = 0; |
| mtch_count = 0; |
| for (int j = 0; j < n_target; j++) { |
| if (no_dups && used[j]) continue; |
| if (strncmp(ss, tar[j], temp) == 0) { |
| mtch = j + 1; |
| mtch_count++; |
| } |
| } |
| if (mtch > 0 && mtch_count == 1) { |
| if(no_dups) used[mtch - 1] = 1; |
| ians[i] = mtch; |
| } |
| } |
| /* Third pass, set no matches */ |
| for (R_xlen_t i = 0; i < n_input; i++) |
| if(ians[i] == 0) ians[i] = no_match; |
| |
| } |
| UNPROTECT(1); |
| return ans; |
| } |
| |
| |
| /* Partial Matching of Strings */ |
| /* Based on Therneau's charmatch. */ |
| |
| SEXP attribute_hidden do_charmatch(SEXP call, SEXP op, SEXP args, SEXP env) |
| { |
| SEXP ans, input, target; |
| const char *ss, *st; |
| Rboolean useBytes = FALSE, useUTF8 = FALSE; |
| |
| checkArity(op, args); |
| |
| input = CAR(args); |
| R_xlen_t n_input = LENGTH(input); |
| target = CADR(args); |
| int n_target = LENGTH(target); |
| |
| if (!isString(input) || !isString(target)) |
| error(_("argument is not of mode character")); |
| int no_match = asInteger(CADDR(args)); |
| |
| for(R_xlen_t i = 0; i < n_input; i++) { |
| if(IS_BYTES(STRING_ELT(input, i))) { |
| useBytes = TRUE; |
| useUTF8 = FALSE; |
| break; |
| } else if(ENC_KNOWN(STRING_ELT(input, i))) { |
| useUTF8 = TRUE; |
| } |
| } |
| if(!useBytes) { |
| for(int i = 0; i < n_target; i++) { |
| if(IS_BYTES(STRING_ELT(target, i))) { |
| useBytes = TRUE; |
| useUTF8 = FALSE; |
| break; |
| } else if(ENC_KNOWN(STRING_ELT(target, i))) { |
| useUTF8 = TRUE; |
| } |
| } |
| } |
| |
| PROTECT(ans = allocVector(INTSXP, n_input)); |
| int *ians = INTEGER0(ans); |
| |
| const void *vmax = vmaxget(); // prudence: .Internal does this too. |
| for(R_xlen_t i = 0; i < n_input; i++) { |
| if(useBytes) |
| ss = CHAR(STRING_ELT(input, i)); |
| else if(useUTF8) |
| ss = translateCharUTF8(STRING_ELT(input, i)); |
| else |
| ss = translateChar(STRING_ELT(input, i)); |
| size_t temp = strlen(ss); |
| int imatch = NA_INTEGER; |
| Rboolean perfect = FALSE; |
| /* we could reset vmax here too: worth it? */ |
| for(int j = 0; j < n_target; j++) { |
| if(useBytes) |
| st = CHAR(STRING_ELT(target, j)); |
| else if(useUTF8) |
| st = translateCharUTF8(STRING_ELT(target, j)); |
| else |
| st = translateChar(STRING_ELT(target, j)); |
| int k = strncmp(ss, st, temp); |
| if (k == 0) { |
| if (strlen(st) == temp) { |
| if (perfect) |
| imatch = 0; |
| else { |
| perfect = TRUE; |
| imatch = j + 1; |
| } |
| } |
| else if (!perfect) { |
| if (imatch == NA_INTEGER) |
| imatch = j + 1; |
| else |
| imatch = 0; |
| } |
| } |
| } |
| ians[i] = (imatch == NA_INTEGER) ? no_match : imatch; |
| vmaxset(vmax); |
| } |
| UNPROTECT(1); |
| return ans; |
| } |
| |
| |
| /* Functions for matching the supplied arguments to the */ |
| /* formal arguments of functions. The returned value */ |
| /* is a list with all components named. */ |
| |
| static SEXP StripUnmatched(SEXP s) |
| { |
| if (s == R_NilValue) return s; |
| |
| if (CAR(s) == R_MissingArg && !ARGUSED(s) ) { |
| return StripUnmatched(CDR(s)); |
| } |
| else if (CAR(s) == R_DotsSymbol ) { |
| return StripUnmatched(CDR(s)); |
| } |
| else { |
| SETCDR(s, StripUnmatched(CDR(s))); |
| return s; |
| } |
| } |
| |
| static SEXP ExpandDots(SEXP s, int expdots) |
| { |
| SEXP r; |
| if (s == R_NilValue) |
| return s; |
| if (TYPEOF(CAR(s)) == DOTSXP ) { |
| SET_TYPEOF(CAR(s), LISTSXP); /* a safe mutation */ |
| if (expdots) { |
| r = CAR(s); |
| while (CDR(r) != R_NilValue ) { |
| SET_ARGUSED(r, 1); |
| r = CDR(r); |
| } |
| SET_ARGUSED(r, 1); |
| SETCDR(r, ExpandDots(CDR(s), expdots)); |
| return CAR(s); |
| } |
| } |
| else |
| SET_ARGUSED(s, 0); |
| SETCDR(s, ExpandDots(CDR(s), expdots)); |
| return s; |
| } |
| static SEXP subDots(SEXP rho) |
| { |
| SEXP rval, dots, a, b, t; |
| int len,i; |
| |
| dots = findVar(R_DotsSymbol, rho); |
| |
| if (dots == R_UnboundValue) |
| error(_("... used in a situation where it does not exist")); |
| |
| if (dots == R_MissingArg) |
| return dots; |
| |
| if (!isPairList(dots)) |
| error(_("... is not a pairlist")); |
| |
| len = length(dots); |
| PROTECT(dots); |
| PROTECT(rval=allocList(len)); |
| for(a = dots, b = rval, i = 1; i <= len; a = CDR(a), b = CDR(b), i++) { |
| SET_TAG(b, TAG(a)); |
| t = CAR(a); |
| while (TYPEOF(t) == PROMSXP) |
| t = PREXPR(t); |
| if( isSymbol(t) || isLanguage(t) ) |
| SETCAR(b, installDDVAL(i)); |
| else |
| SETCAR(b, t); |
| } |
| UNPROTECT(2); |
| return rval; |
| } |
| |
| |
| SEXP attribute_hidden do_matchcall(SEXP call, SEXP op, SEXP args, SEXP env) |
| { |
| SEXP formals, actuals, rlist; |
| SEXP funcall, f, b, rval, sysp, t1, t2, tail; |
| // RCNTXT *cptr; |
| int expdots; |
| |
| checkArity(op,args); |
| |
| funcall = CADR(args); |
| |
| if (TYPEOF(funcall) == EXPRSXP) |
| funcall = VECTOR_ELT(funcall, 0); |
| |
| if (TYPEOF(funcall) != LANGSXP) |
| error(_("invalid '%s' argument"), "call"); |
| |
| b = CAR(args); |
| if (TYPEOF(b) != CLOSXP) |
| error(_("invalid '%s' argument"), "definition"); |
| |
| sysp = CAR(CDDDR(args)); |
| if (!isEnvironment(sysp)) |
| error(_("'envir' must be an environment")); |
| |
| /* Do we expand ... ? */ |
| |
| expdots = asLogical(CAR(CDDR(args))); |
| if (expdots == NA_LOGICAL) |
| error(_("invalid '%s' argument"), "expand.dots"); |
| |
| /* Get the formals and match the actual args */ |
| |
| formals = FORMALS(b); |
| PROTECT(actuals = shallow_duplicate(CDR(funcall))); |
| |
| /* If there is a ... symbol then expand it out in the sysp env |
| We need to take some care since the ... might be in the middle |
| of the actuals */ |
| |
| t2 = R_MissingArg; |
| for (t1=actuals ; t1!=R_NilValue ; t1 = CDR(t1) ) { |
| if (CAR(t1) == R_DotsSymbol) { |
| t2 = subDots(sysp); |
| break; |
| } |
| } |
| /* now to splice t2 into the correct spot in actuals */ |
| if (t2 != R_MissingArg ) { /* so we did something above */ |
| if( CAR(actuals) == R_DotsSymbol ) { |
| UNPROTECT(1); |
| actuals = listAppend(t2, CDR(actuals)); |
| PROTECT(actuals); |
| } |
| else { |
| for(t1=actuals; t1!=R_NilValue; t1=CDR(t1)) { |
| if( CADR(t1) == R_DotsSymbol ) { |
| tail = CDDR(t1); |
| SETCDR(t1, t2); |
| listAppend(actuals,tail); |
| break; |
| } |
| } |
| } |
| } else { /* get rid of it */ |
| if( CAR(actuals) == R_DotsSymbol ) { |
| UNPROTECT(1); |
| actuals = CDR(actuals); |
| PROTECT(actuals); |
| } |
| else { |
| for(t1=actuals; t1!=R_NilValue; t1=CDR(t1)) { |
| if( CADR(t1) == R_DotsSymbol ) { |
| tail = CDDR(t1); |
| SETCDR(t1, tail); |
| break; |
| } |
| } |
| } |
| } |
| rlist = matchArgs_RC(formals, actuals, call); |
| |
| /* Attach the argument names as tags */ |
| |
| for (f = formals, b = rlist; b != R_NilValue; b = CDR(b), f = CDR(f)) { |
| SET_TAG(b, TAG(f)); |
| } |
| |
| |
| /* Handle the dots */ |
| |
| PROTECT(rlist = ExpandDots(rlist, expdots)); |
| |
| /* Eliminate any unmatched formals and any that match R_DotSymbol */ |
| /* This needs to be after ExpandDots as the DOTSXP might match ... */ |
| |
| rlist = StripUnmatched(rlist); |
| |
| PROTECT(rval = allocSExp(LANGSXP)); |
| SETCAR(rval, lazy_duplicate(CAR(funcall))); |
| SETCDR(rval, rlist); |
| UNPROTECT(3); |
| return rval; |
| } |
| |
| |
| #include <R_ext/RS.h> /* for Memzero */ |
| |
| #ifdef _AIX /*some people just have to be different: is this still needed? */ |
| # include <memory.h> |
| #endif |
| |
| |
| static SEXP |
| rowsum(SEXP x, SEXP g, SEXP uniqueg, SEXP snarm, SEXP rn) |
| { |
| SEXP matches,ans; |
| int n, p, ng, narm; |
| R_xlen_t offset = 0, offsetg = 0; |
| HashData data = { 0 }; |
| data.nomatch = 0; |
| |
| n = LENGTH(g); |
| ng = length(uniqueg); |
| narm = asLogical(snarm); |
| if(narm == NA_LOGICAL) error("'na.rm' must be TRUE or FALSE"); |
| if(isMatrix(x)) p = ncols(x); else p = 1; |
| |
| HashTableSetup(uniqueg, &data, NA_INTEGER); |
| PROTECT(data.HashTable); |
| DoHashing(uniqueg, &data); |
| PROTECT(matches = HashLookup(uniqueg, g, &data)); |
| int *pmatches = INTEGER(matches); |
| |
| PROTECT(ans = allocMatrix(TYPEOF(x), ng, p)); |
| |
| switch(TYPEOF(x)){ |
| case REALSXP: |
| Memzero(REAL0(ans), ng*p); |
| for(int i = 0; i < p; i++) { |
| double *pa = REAL0(ans); |
| for(int j = 0; j < n; j++) { |
| double xjpo = REAL_ELT(x, j + offset); |
| if(!narm || !ISNAN(xjpo)) |
| pa[pmatches[j] - 1 + offsetg] += xjpo; |
| } |
| offset += n; |
| offsetg += ng; |
| } |
| break; |
| case INTSXP: |
| Memzero(INTEGER0(ans), ng*p); |
| for(int i = 0; i < p; i++) { |
| int *pa = INTEGER0(ans); |
| for(int j = 0; j < n; j++) { |
| int xjpo = INTEGER_ELT(x, j + offset); |
| if (xjpo == NA_INTEGER) { |
| if(!narm) |
| pa[pmatches[j] - 1 + offsetg] = NA_INTEGER; |
| } else if (pa[pmatches[j] - 1 + offsetg] != NA_INTEGER) { |
| /* check for integer overflows */ |
| int itmp = pa[pmatches[j] - 1 + offsetg]; |
| double dtmp = itmp; |
| dtmp += xjpo; |
| if (dtmp < INT_MIN || dtmp > INT_MAX) itmp = NA_INTEGER; |
| else itmp += xjpo; |
| pa[pmatches[j] - 1 + offsetg] = itmp; |
| } |
| } |
| offset += n; |
| offsetg += ng; |
| } |
| break; |
| default: |
| error("non-numeric matrix in rowsum(): this should not happen"); |
| } |
| if (TYPEOF(rn) != STRSXP) error("row names are not character"); |
| SEXP dn = allocVector(VECSXP, 2), dn2, dn3; |
| setAttrib(ans, R_DimNamesSymbol, dn); |
| SET_VECTOR_ELT(dn, 0, rn); |
| dn2 = getAttrib(x, R_DimNamesSymbol); |
| if(length(dn2) >= 2 && |
| !isNull(dn3 = VECTOR_ELT(dn2, 1))) SET_VECTOR_ELT(dn, 1, dn3); |
| |
| UNPROTECT(3); /* HashTable, matches, ans */ |
| return ans; |
| } |
| |
| static SEXP |
| rowsum_df(SEXP x, SEXP g, SEXP uniqueg, SEXP snarm, SEXP rn) |
| { |
| SEXP matches,ans,col,xcol; |
| int p, narm; |
| HashData data = { 0 }; |
| data.nomatch = 0; |
| |
| R_xlen_t n = XLENGTH(g); |
| p = LENGTH(x); |
| R_xlen_t ng = XLENGTH(uniqueg); |
| narm = asLogical(snarm); |
| if(narm == NA_LOGICAL) error("'na.rm' must be TRUE or FALSE"); |
| |
| HashTableSetup(uniqueg, &data, NA_INTEGER); |
| PROTECT(data.HashTable); |
| DoHashing(uniqueg, &data); |
| PROTECT(matches = HashLookup(uniqueg, g, &data)); |
| int *pmatches = INTEGER(matches); |
| |
| PROTECT(ans = allocVector(VECSXP, p)); |
| |
| for(int i = 0; i < p; i++) { |
| xcol = VECTOR_ELT(x,i); |
| if (!isNumeric(xcol)) |
| error(_("non-numeric data frame in rowsum")); |
| switch(TYPEOF(xcol)){ |
| case REALSXP: |
| PROTECT(col = allocVector(REALSXP,ng)); |
| Memzero(REAL0(col), ng); |
| for(R_xlen_t j = 0; j < n; j++) { |
| double xj = REAL_ELT(xcol, j); |
| if(!narm || !ISNAN(xj)) |
| REAL0(col)[pmatches[j] - 1] += xj; |
| } |
| SET_VECTOR_ELT(ans,i,col); |
| UNPROTECT(1); |
| break; |
| case INTSXP: |
| PROTECT(col = allocVector(INTSXP, ng)); |
| Memzero(INTEGER0(col), ng); |
| for(R_xlen_t j = 0; j < n; j++) { |
| int xj = INTEGER_ELT(xcol, j); |
| if (xj == NA_INTEGER) { |
| if(!narm) |
| INTEGER0(col)[pmatches[j] - 1] = NA_INTEGER; |
| } else if (INTEGER0(col)[pmatches[j] - 1] != NA_INTEGER) { |
| int itmp = INTEGER0(col)[pmatches[j] - 1]; |
| double dtmp = itmp; |
| dtmp += xj; |
| if (dtmp < INT_MIN || dtmp > INT_MAX) itmp = NA_INTEGER; |
| else itmp += xj; |
| INTEGER0(col)[pmatches[j] - 1] = itmp; |
| } |
| } |
| SET_VECTOR_ELT(ans, i, col); |
| UNPROTECT(1); |
| break; |
| |
| default: |
| error(_("this cannot happen")); |
| } |
| } |
| namesgets(ans, getAttrib(x, R_NamesSymbol)); |
| if (TYPEOF(rn) != STRSXP) error("row names are not character"); |
| setAttrib(ans, R_RowNamesSymbol, rn); |
| classgets(ans, mkString("data.frame")); |
| |
| UNPROTECT(3); /* HashTable, matches, ans */ |
| return ans; |
| } |
| |
| SEXP attribute_hidden do_rowsum(SEXP call, SEXP op, SEXP args, SEXP env) |
| { |
| checkArity(op, args); |
| if(PRIMVAL(op) == 1) |
| return rowsum_df(CAR(args), CADR(args), CADDR(args), CADDDR(args), |
| CAD4R(args)); |
| else |
| return rowsum(CAR(args), CADR(args), CADDR(args), CADDDR(args), |
| CAD4R(args)); |
| } |
| |
| |
| /* returns 1-based duplicate no */ |
| static int isDuplicated2(SEXP x, int indx, HashData *d) |
| { |
| int *h = HTDATA_INT(d); |
| hlen i = d->hash(x, indx, d); |
| while (h[i] != NIL) { |
| if (d->equal(x, h[i], x, indx)) |
| return h[i] + 1; |
| i = (i + 1) % d->M; |
| } |
| h[i] = indx; |
| return 0; |
| } |
| |
| static SEXP duplicated2(SEXP x, HashData *d) |
| { |
| SEXP ans; |
| int i, n; |
| |
| n = LENGTH(x); |
| HashTableSetup(x, d, NA_INTEGER); |
| PROTECT(d->HashTable); |
| PROTECT(ans = allocVector(INTSXP, n)); |
| |
| int *h = HTDATA_INT(d); |
| int *v = INTEGER0(ans); |
| for (i = 0; i < d->M; i++) h[i] = NIL; |
| for (i = 0; i < n; i++) { |
| // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); |
| v[i] = isDuplicated2(x, i, d); |
| } |
| UNPROTECT(2); |
| return ans; |
| } |
| |
| SEXP attribute_hidden do_makeunique(SEXP call, SEXP op, SEXP args, SEXP env) |
| { |
| SEXP names, sep, ans, dup, newx; |
| int i, cnt, *cnts, dp; |
| int n, len, maxlen = 0; |
| HashData data = { 0 }; |
| const char *csep, *ss; |
| const void *vmax; |
| |
| checkArity(op, args); |
| names = CAR(args); |
| if(!isString(names)) |
| error(_("'names' must be a character vector")); |
| n = LENGTH(names); |
| sep = CADR(args); |
| if(!isString(sep) || LENGTH(sep) != 1) |
| error(_("'%s' must be a character string"), "sep"); |
| csep = translateChar(STRING_ELT(sep, 0)); |
| PROTECT(ans = allocVector(STRSXP, n)); |
| vmax = vmaxget(); |
| for(i = 0; i < n; i++) { |
| SET_STRING_ELT(ans, i, STRING_ELT(names, i)); |
| len = (int) strlen(translateChar(STRING_ELT(names, i))); |
| if(len > maxlen) maxlen = len; |
| vmaxset(vmax); |
| } |
| if(n > 1) { |
| /* +2 for terminator and rounding error */ |
| char buf[maxlen + (int) strlen(csep) |
| + (int) (log((double)n)/log(10.0)) + 2]; |
| if(n < 10000) { |
| R_CheckStack2((size_t)n * sizeof(int)); |
| cnts = (int *) alloca(((size_t) n) * sizeof(int)); |
| } else { |
| /* This is going to be slow so use expensive allocation |
| that will be recovered if interrupted. */ |
| cnts = (int *) R_alloc((size_t) n, sizeof(int)); |
| } |
| for(i = 0; i < n; i++) cnts[i] = 1; |
| data.nomatch = 0; |
| PROTECT(newx = allocVector(STRSXP, 1)); |
| PROTECT(dup = duplicated2(names, &data)); |
| PROTECT(data.HashTable); |
| vmax = vmaxget(); |
| for(i = 1; i < n; i++) { /* first cannot be a duplicate */ |
| dp = INTEGER_ELT(dup, i); /* 1-based number of first occurrence */ |
| if(dp == 0) continue; |
| ss = translateChar(STRING_ELT(names, i)); |
| /* Try appending 1,2,3, ..., n-1 until it is not already in use */ |
| for(cnt = cnts[dp - 1]; cnt < n; cnt++) { |
| sprintf(buf, "%s%s%d", ss, csep, cnt); |
| SET_STRING_ELT(newx, 0, mkChar(buf)); |
| if(Lookup(ans, newx, 0, &data) == data.nomatch) break; |
| } |
| SET_STRING_ELT(ans, i, STRING_ELT(newx, 0)); |
| /* insert it */ (void) isDuplicated(ans, i, &data); |
| cnts[dp - 1] = cnt+1; /* cache the first unused cnt value */ |
| vmaxset(vmax); |
| } |
| UNPROTECT(3); |
| } |
| UNPROTECT(1); |
| return ans; |
| } |
| |
| /* Use hashing to improve object.size. Here we want equal CHARSXPs, |
| not equal contents. */ |
| |
| static int csequal(SEXP x, R_xlen_t i, SEXP y, R_xlen_t j) |
| { |
| return STRING_ELT(x, i) == STRING_ELT(y, j); |
| } |
| |
| static void HashTableSetup1(SEXP x, HashData *d) |
| { |
| d->hash = cshash; |
| d->equal = csequal; |
| #ifdef LONG_VECTOR_SUPPORT |
| d->isLong = FALSE; |
| #endif |
| MKsetup(XLENGTH(x), d, NA_INTEGER); |
| d->HashTable = allocVector(INTSXP, (R_xlen_t) d->M); |
| for (R_xlen_t i = 0; i < d->M; i++) HTDATA_INT(d)[i] = NIL; |
| } |
| |
| /* used in utils */ |
| SEXP Rf_csduplicated(SEXP x) |
| { |
| if(TYPEOF(x) != STRSXP) |
| error("C function 'csduplicated' not called on a STRSXP"); |
| R_xlen_t n = XLENGTH(x); |
| HashData data = { 0 }; |
| HashTableSetup1(x, &data); |
| PROTECT(data.HashTable); |
| SEXP ans = PROTECT(allocVector(LGLSXP, n)); |
| int *v = LOGICAL(ans); |
| |
| for (R_xlen_t i = 0; i < n; i++) v[i] = isDuplicated(x, i, &data); |
| |
| UNPROTECT(2); |
| return ans; |
| } |
| |
| #include <R_ext/Random.h> |
| |
| // sample.int(.) --> .Internal(sample2(n, size)) : |
| SEXP attribute_hidden do_sample2(SEXP call, SEXP op, SEXP args, SEXP env) |
| { |
| checkArity(op, args); |
| SEXP ans; |
| double dn = asReal(CAR(args)); |
| SEXP sk = CADR(args); |
| if (length(sk) != 1) |
| error(_("invalid '%s' argument"), "size"); |
| int k = asInteger(sk); |
| if (length(CAR(args)) != 1 || !R_FINITE(dn) || dn < 0 |
| || dn > 4.5e15 || (k > 0 && dn == 0)) |
| error(_("invalid first argument")); |
| if (k < 0) error(_("invalid '%s' argument"), "size"); // includes NA |
| if (k > dn/2) error("This algorithm is for size <= n/2"); |
| HashData data = { 0 }; |
| GetRNGstate(); |
| if (dn > INT_MAX) { |
| ans = PROTECT(allocVector(REALSXP, k)); |
| double *ry = REAL0(ans); |
| HashTableSetup(ans, &data, NA_INTEGER); |
| PROTECT(data.HashTable); |
| for (int i = 0; i < k; i++) |
| for(int j = 0; j < 100; j++) { // average < 2 |
| ry[i] = R_unif_index(dn) + 1; |
| if(!isDuplicated(ans, i, &data)) break; |
| } |
| } else { |
| ans = PROTECT(allocVector(INTSXP, k)); |
| int *iy = INTEGER0(ans); |
| HashTableSetup(ans, &data, NA_INTEGER); |
| PROTECT(data.HashTable); |
| for (int i = 0; i < k; i++) |
| for(int j = 0; j < 100; j++) { // average < 2 |
| iy[i] = (int)(R_unif_index(dn) + 1); |
| if(!isDuplicated(ans, i, &data)) break; |
| } |
| } |
| PutRNGstate(); |
| UNPROTECT(2); |
| return ans; |
| } |
| |
| /*** |
| *** Experimental Hath Table Implementation |
| *** |
| *** The interface is based loosely on the design of the Common Lisp |
| *** hash table support. Two equality tests are supported: identical() |
| *** and pointer equality. |
| ***/ |
| |
| /** |
| ** C Level Interface |
| **/ |
| |
| /* |
| * Low Level Functions |
| */ |
| |
| static int hash_identical(SEXP x, int K, int useCloEnv) |
| { |
| /* using 31 seems to work reasonably */ |
| if (K == 0 || K > 31) K = 31; |
| |
| HashData d = { .K = K, .useUTF8 = FALSE, .useCache = TRUE }; |
| d.useCloEnv = useCloEnv; |
| d.extptrAsRef = TRUE; |
| d.inHashtab = TRUE; |
| |
| int val = (int) vhash_one(x, &d); |
| if (val == NA_INTEGER) val = 0; |
| if (val < 0) val = -val; |
| return val; |
| } |
| |
| static int hash_address(SEXP x, int K) |
| { |
| if (K == 0 || K > 31) K = 31; |
| |
| HashData d = { .K = K }; |
| |
| int val = (int) scatter(PTRHASH(x), &d); |
| if (val == NA_INTEGER) val = 0; |
| if (val < 0) val = -val; |
| return val; |
| } |
| |
| /* allow for compiling with NAMED */ |
| static R_INLINE SEXP INC_NMD(SEXP x) { |
| INCREMENT_NAMED(x); |
| return x; |
| } |
| |
| #define HT_SEXP(h) (h).cell |
| #define HT_META_SIZE 3 |
| #define HT_META(h) R_ExternalPtrTag(HT_SEXP(h)) |
| |
| #define HT_TABLE(h) R_ExternalPtrProtected(HT_SEXP(h)) |
| #define SET_HT_TABLE(h, table) R_SetExternalPtrProtected(HT_SEXP(h), table) |
| #define HT_TABLE_SIZE(h) LENGTH(HT_TABLE(h)) |
| |
| #define HT_COUNT(h) (INTEGER(HT_META(h))[0]) |
| #define HT_TYPE(h) (INTEGER(HT_META(h))[1]) |
| #define HT_TABLE_K(h) (INTEGER(HT_META(h))[2]) |
| |
| #define HT_IS_VALID(h) (R_ExternalPtrAddr(HT_SEXP(h)) != NULL) |
| #define HT_VALIDATE(h) R_SetExternalPtrAddr(HT_SEXP(h), HT_SEXP(h)) |
| |
| static R_INLINE int HT_HASH(R_hashtab_type h, SEXP key) |
| { |
| switch(HT_TYPE(h)) { |
| case HT_TYPE_IDENTICAL: |
| return hash_identical(key, HT_TABLE_K(h), TRUE); |
| case HT_TYPE_ADDRESS: |
| return hash_address(key, HT_TABLE_K(h)); |
| default: |
| error("bad hash table type"); |
| } |
| } |
| |
| static R_INLINE int HT_EQUAL(R_hashtab_type h, SEXP x, SEXP y) |
| { |
| switch(HT_TYPE(h)) { |
| case HT_TYPE_IDENTICAL: |
| { |
| /* IDENT_USE_CLOENV correspnds to the default in identical(). |
| IDENT_EXTPTR_AS_REF ensures that EXTPTRSXP objects in |
| keys unserialize senibly. */ |
| int flags = IDENT_USE_CLOENV | IDENT_EXTPTR_AS_REF; |
| return R_compute_identical(x, y, flags); |
| } |
| case HT_TYPE_ADDRESS: |
| return x == y; |
| default: |
| error("bad hash table type"); |
| } |
| } |
| |
| static void rehash(R_hashtab_type h, int resize) |
| { |
| /* If the meta-data structure is changed then a saved hash table |
| will be invalid. For now, just check and reject incompatible |
| oned. For future changes would be better to try to |
| rewrite/repair the meta data. */ |
| if (TYPEOF(HT_META(h)) != INTSXP || LENGTH(HT_META(h)) != HT_META_SIZE) |
| error("invalid hash table meta data"); |
| |
| SEXP old_table = PROTECT(HT_TABLE(h)); |
| int old_size = LENGTH(old_table); |
| |
| R_xlen_t new_size = resize ? 2 * old_size : old_size; |
| if (new_size > INT_MAX) |
| error("hash size would exceed limit"); |
| |
| HT_COUNT(h) = 0; |
| HT_VALIDATE(h); |
| |
| SET_HT_TABLE(h, allocVector(VECSXP, new_size)); |
| if (resize) HT_TABLE_K(h)++; |
| |
| for (int i = 0; i < old_size; i++) |
| for (SEXP cell = VECTOR_ELT(old_table, i); |
| cell != R_NilValue; |
| cell = CDR(cell)) |
| R_sethash(h, TAG(cell), CAR(cell)); |
| |
| UNPROTECT(1); /* old_table */ |
| } |
| |
| static SEXP getcell(R_hashtab_type h, SEXP key, int *pidx) |
| { |
| SEXP table = HT_TABLE(h); |
| |
| if (! HT_IS_VALID(h)) |
| rehash(h, FALSE); |
| |
| int idx = HT_HASH(h, key); |
| *pidx = idx; |
| |
| SEXP chain = VECTOR_ELT(table, idx); |
| while (chain != R_NilValue) { |
| if (HT_EQUAL(h, TAG(chain), key)) |
| return chain; |
| chain = CDR(chain); |
| } |
| return R_NilValue; |
| } |
| |
| |
| /* |
| * Higer Level Public Functions |
| */ |
| |
| /* initial size = 2 ^ HT_INIT_K */ |
| #define HT_INIT_K 3 |
| |
| R_hashtab_type R_mkhashtab(int type, int K) |
| { |
| if (K < 3 || K > 30) K = 3; |
| int size = 1; |
| for (int i = 0; i < K; i++) size *= 2; |
| |
| switch(type) { |
| case HT_TYPE_IDENTICAL: |
| case HT_TYPE_ADDRESS: break; |
| default: error("bad hash table type"); |
| } |
| SEXP table = PROTECT(allocVector(VECSXP, size)); |
| SEXP meta = PROTECT(allocVector(INTSXP, HT_META_SIZE)); |
| R_hashtab_type val = { .cell = R_MakeExternalPtr(NULL, meta, table) }; |
| HT_VALIDATE(val); |
| HT_COUNT(val) = 0; |
| HT_TYPE(val) = type; |
| HT_TABLE_K(val) = K; |
| UNPROTECT(2); /* table, meta */ |
| return val; |
| } |
| |
| SEXP R_gethash(R_hashtab_type h, SEXP key, SEXP nomatch) |
| { |
| int idx; |
| PROTECT(HT_SEXP(h)); |
| PROTECT(key); |
| PROTECT(nomatch); |
| SEXP cell = getcell(h, key, &idx); |
| UNPROTECT(3); /* h, key, nomatch */ |
| if (cell == R_NilValue) |
| return nomatch; |
| else |
| return CAR(cell); |
| } |
| |
| SEXP R_sethash(R_hashtab_type h, SEXP key, SEXP value) |
| { |
| int idx; |
| PROTECT(HT_SEXP(h)); |
| PROTECT(key); |
| PROTECT(value); |
| SEXP cell = getcell(h, key, &idx); |
| if (cell == R_NilValue) { |
| int new_count = HT_COUNT(h) + 1; |
| if (new_count > 0.5 * HT_TABLE_SIZE(h)) { |
| /* rehash() will fail if new_count would be too large */ |
| rehash(h, TRUE); |
| /* recalculate the hash bin, cell will still be R_NilValue */ |
| (void) getcell(h, key, &idx); |
| } |
| SEXP table = HT_TABLE(h); |
| SEXP chain = CONS(INC_NMD(value), VECTOR_ELT(table, idx)); |
| SET_TAG(chain, INC_NMD(key)); |
| SET_VECTOR_ELT(table, idx, chain); |
| HT_COUNT(h) = new_count; |
| } |
| else { |
| SETCAR(cell, value); |
| INCREMENT_NAMED(value); |
| } |
| UNPROTECT(3); /* h, key, value */ |
| return value; |
| } |
| |
| int R_remhash(R_hashtab_type h, SEXP key) |
| { |
| int idx; |
| PROTECT(HT_SEXP(h)); |
| PROTECT(key); |
| SEXP cell = getcell(h, key, &idx); |
| UNPROTECT(2); /* h, key */ |
| |
| if (cell == R_NilValue) |
| return FALSE; |
| else { |
| SEXP table = HT_TABLE(h); |
| if (cell == VECTOR_ELT(table, idx)) |
| SET_VECTOR_ELT(table, idx, CDR(cell)); |
| else { |
| SEXP prev = VECTOR_ELT(table, idx); |
| while (CDR(prev) != cell) |
| prev = CDR(prev); |
| SETCDR(prev, CDR(cell)); |
| } |
| HT_COUNT(h)--; |
| SETCAR(cell, R_NilValue); // drop REFCNT on old value |
| SET_TAG(cell, R_NilValue); // drop REFCNT on old key |
| return TRUE; |
| } |
| } |
| |
| int R_numhash(R_hashtab_type h) { return HT_COUNT(h); } |
| int R_typhash(R_hashtab_type h) { return HT_TYPE(h); } |
| |
| static R_INLINE void defvar(SEXP sym, SEXP val, SEXP env) |
| { |
| defineVar(sym, INC_NMD(val), env); |
| } |
| |
| SEXP R_maphash(R_hashtab_type h, SEXP FUN) |
| { |
| PROTECT(HT_SEXP(h)); |
| PROTECT(FUN); |
| SEXP FUN_sym = install("FUN"); |
| SEXP key_sym = install("key"); |
| SEXP val_sym = install("value"); |
| |
| SEXP env = PROTECT(R_NewEnv(R_GlobalEnv, FALSE, 0)); |
| SEXP call = PROTECT(lang3(FUN_sym, key_sym, val_sym)); |
| defvar(FUN_sym, FUN, env); |
| |
| SEXP table = PROTECT(HT_TABLE(h)); // PROTECT in case FUN causes a rehash |
| int size = LENGTH(table); |
| for (int i = 0; i < size; i++) { |
| SEXP cell = VECTOR_ELT(table, i); |
| while (cell != R_NilValue) { |
| SEXP next = PROTECT(CDR(cell)); |
| defvar(key_sym, TAG(cell), env); // key is PROTECTed by env |
| defvar(val_sym, CAR(cell), env); // val is PROTECTed by env |
| eval(call, env); |
| cell = next; |
| UNPROTECT(1); /* next */ |
| } |
| } |
| UNPROTECT(5); /* h, FUN, env, call, table */ |
| return R_NilValue; |
| } |
| |
| void R_maphashC(R_hashtab_type h, void (*FUN)(SEXP, SEXP, void *), void *data) |
| { |
| PROTECT(HT_SEXP(h)); |
| SEXP table = PROTECT(HT_TABLE(h)); // PROTECT in case FUN causes a rehash |
| int size = LENGTH(table); |
| for (int i = 0; i < size; i++) { |
| SEXP cell = VECTOR_ELT(table, i); |
| while (cell != R_NilValue) { |
| SEXP next = PROTECT(CDR(cell)); |
| SEXP key = PROTECT(TAG(cell)); |
| SEXP val = PROTECT(CAR(cell)); |
| FUN(key, val, data); |
| cell = next; |
| UNPROTECT(3); /* next, key, val */ |
| } |
| } |
| UNPROTECT(2); /* h, table */ |
| } |
| |
| void R_clrhash(R_hashtab_type h) |
| { |
| SEXP table = HT_TABLE(h); |
| int size = LENGTH(table); |
| for (int i = 0; i < size; i++) { |
| for (SEXP cell = VECTOR_ELT(table, i); |
| cell != R_NilValue; |
| cell = CDR(cell)) { |
| SETCAR(cell, R_NilValue); // drop REFCNT on old value |
| SET_TAG(cell, R_NilValue); // drop REFCNT on old key |
| } |
| SET_VECTOR_ELT(table, i, R_NilValue); |
| } |
| HT_COUNT(h) = 0; |
| /* could also drop table size back down to HT_INIT_SIZE */ |
| } |
| |
| |
| /** |
| ** R Level Interface Support |
| **/ |
| |
| R_hashtab_type R_asHashtable(SEXP h) |
| { |
| if (TYPEOF(h) != VECSXP || LENGTH(h) != 1 || ! inherits(h, "hashtab")) |
| error("not a proper hash table object"); |
| SEXP p = VECTOR_ELT(h, 0); |
| if (TYPEOF(p) != EXTPTRSXP) |
| error("hash table object is corrupted"); |
| R_hashtab_type val = { .cell = p }; |
| return val; |
| } |
| |
| SEXP R_HashtabSEXP(R_hashtab_type h) |
| { |
| return HT_SEXP(h); |
| } |
| |
| int R_isHashtable(SEXP h) |
| { |
| if (TYPEOF(h) != VECSXP || |
| LENGTH(h) != 1 || |
| ! inherits(h, "hashtab") || |
| TYPEOF(VECTOR_ELT(h, 0)) != EXTPTRSXP) |
| return FALSE; |
| else |
| return TRUE; |
| } |
| |
| /* This allows experimenting with creating hash tables at the R level. */ |
| SEXP attribute_hidden do_vhash(SEXP call, SEXP op, SEXP args, SEXP env) |
| { |
| SEXP x = CAR(args); |
| SEXP sK = CADR(args); |
| SEXP sUseCloEnv = CADDR(args); |
| |
| int K = sK == R_NilValue ? 31 : asInteger(sK); |
| int useCloEnv = sUseCloEnv == R_NilValue ? TRUE : asLogical(sUseCloEnv); |
| |
| int val = hash_identical(x, K, useCloEnv); |
| return ScalarInteger(val); |
| } |