src/main/unique.c - R - Git at Google

 /*
  *  R : A Computer Language for Statistical Data Analysis
  *  Copyright (C) 1997--2019  The R Core Team
  *  Copyright (C) 1995, 1996  Robert Gentleman and Ross Ihaka
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 2 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, a copy is available at
  *  https://www.R-project.org/Licenses/
  */

 /* This is currently restricted to vectors of length < 2^30 */

 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif

 #define R_USE_SIGNALS 1
 #include <Defn.h>
 #include <Internal.h>

 #define NIL -1
 #define ARGUSED(x) LEVELS(x)
 #define SET_ARGUSED(x,v) SETLEVELS(x,v)

 /* interval at which to check interrupts */
 #define NINTERRUPT 1000000

 typedef size_t hlen;

 /* Hash function and equality test for keys */
 typedef struct _HashData HashData;

 struct _HashData {
     int K;
     hlen M;
     R_xlen_t nmax;
 #ifdef LONG_VECTOR_SUPPORT
     Rboolean isLong;
 #endif
     hlen (*hash)(SEXP, R_xlen_t, HashData *);
     int (*equal)(SEXP, R_xlen_t, SEXP, R_xlen_t);
     SEXP HashTable;

     int nomatch;
     Rboolean useUTF8;
     Rboolean useCache;
 };

 #define HTDATA_INT(d) (INTEGER0((d)->HashTable))
 #define HTDATA_DBL(d) (REAL0((d)->HashTable))


 /*
    Integer keys are hashed via a random number generator
    based on Knuth's recommendations.  The high order K bits
    are used as the hash code.

    NB: lots of this code relies on M being a power of two and
    on silent integer overflow mod 2^32.

    <FIXME> Integer keys are wasteful for logical and raw vectors, but
    the tables are small in that case.  It would be much easier to
    implement long vectors, though.
 */

 /*  Currently the hash table is implemented as a (signed) integer
     array.  So there are two 31-bit restrictions, the length of the
     array and the values.  The values are initially NIL (-1).  O-based
     indices are inserted by isDuplicated, and invalidated by setting
     to NA_INTEGER.
 */

 static hlen scatter(unsigned int key, HashData *d)
 {
     return 3141592653U * key >> (32 - d->K);
 }

 static hlen lhash(SEXP x, R_xlen_t indx, HashData *d)
 {
     int xi = LOGICAL_ELT(x, indx);
     if (xi == NA_LOGICAL) return 2U;
     return (hlen) xi;
 }

 static R_INLINE hlen ihash(SEXP x, R_xlen_t indx, HashData *d)
 {
     int xi = INTEGER_ELT(x, indx);
     if (xi == NA_INTEGER) return 0;
     return scatter((unsigned int) xi, d);
 }

 /* We use unions here because Solaris gcc -O2 has trouble with
    casting + incrementing pointers.  We use tests here, but R currently
    assumes int is 4 bytes and double is 8 bytes.
  */
 union foo {
     double d;
     unsigned int u[2];
 };

 static R_INLINE hlen rhash(SEXP x, R_xlen_t indx, HashData *d)
 {
     /* There is a problem with signed 0s under IEC60559 */
     double xi = REAL_ELT(x, indx);
     double tmp = (xi == 0.0) ? 0.0 : xi;
     /* need to use both 32-byte chunks or endianness is an issue */
     /* we want all NaNs except NA equal, and all NAs equal */
     if (R_IsNA(tmp)) tmp = NA_REAL;
     else if (R_IsNaN(tmp)) tmp = R_NaN;
 #if 2*SIZEOF_INT == SIZEOF_DOUBLE
     {
 	union foo tmpu;
 	tmpu.d = tmp;
 	return scatter(tmpu.u[0] + tmpu.u[1], d);
     }
 #else
     return scatter(*((unsigned int *) (&tmp)), d);
 #endif
 }

 static Rcomplex unify_complex_na(Rcomplex z) {
     Rcomplex ans;
     ans.r = (z.r == 0.0) ? 0.0 : z.r;
     ans.i = (z.i == 0.0) ? 0.0 : z.i;
     if (R_IsNA(ans.r) || R_IsNA(ans.i))
 	ans.r = ans.i = NA_REAL;
     else if (R_IsNaN(ans.r) || R_IsNaN(ans.i))
 	ans.r = ans.i = R_NaN;
     return ans;
 }

 static hlen chash(SEXP x, R_xlen_t indx, HashData *d)
 {
     Rcomplex tmp = unify_complex_na(COMPLEX_ELT(x, indx));

 #if 2*SIZEOF_INT == SIZEOF_DOUBLE
     {
 	unsigned int u;
 	union foo tmpu;
 	tmpu.d = tmp.r;
 	u = tmpu.u[0] ^ tmpu.u[1];
 	tmpu.d = tmp.i;
 	u ^= tmpu.u[0] ^ tmpu.u[1];
 	return scatter(u, d);
     }
 #else
 	return scatter((*((unsigned int *)(&tmp.r)) ^
 			(*((unsigned int *)(&tmp.i)))), d);
 #endif
 }

 /* Hash CHARSXP by address.  Hash values are int, For 64bit pointers,
  * we do (upper ^ lower) */
 static R_INLINE hlen cshash(SEXP x, R_xlen_t indx, HashData *d)
 {
     intptr_t z = (intptr_t) STRING_ELT(x, indx);
     unsigned int z1 = (unsigned int)(z & 0xffffffff), z2 = 0;
 #if SIZEOF_LONG == 8
     z2 = (unsigned int)(z/0x100000000L);
 #endif
     return scatter(z1 ^ z2, d);
 }

 static R_INLINE hlen shash(SEXP x, R_xlen_t indx, HashData *d)
 {
     unsigned int k;
     const char *p;
     if(!d->useUTF8 && d->useCache) return cshash(x, indx, d);
     const void *vmax = vmaxget();
     /* Not having d->useCache really should not happen anymore. */
     p = translateCharUTF8(STRING_ELT(x, indx));
     k = 0;
     while (*p++)
 	k = 11 * k + (unsigned int) *p; /* was 8 but 11 isn't a power of 2 */
     vmaxset(vmax); /* discard any memory used by translateChar */
     return scatter(k, d);
 }

 static int lequal(SEXP x, R_xlen_t i, SEXP y, R_xlen_t j)
 {
     if (i < 0 || j < 0) return 0;
     return (LOGICAL_ELT(x, i) == LOGICAL_ELT(y, j));
 }


 static R_INLINE int iequal(SEXP x, R_xlen_t i, SEXP y, R_xlen_t j)
 {
     if (i < 0 || j < 0) return 0;
     return (INTEGER_ELT(x, i) == INTEGER_ELT(y, j));
 }

 /* BDR 2002-1-17  We don't want NA and other NaNs to be equal */
 static R_INLINE int requal(SEXP x, R_xlen_t i, SEXP y, R_xlen_t j)
 {
     if (i < 0 || j < 0) return 0;
     double xi = REAL_ELT(x, i);
     double yj = REAL_ELT(y, j);
     if (!ISNAN(xi) && !ISNAN(yj))
 	return (xi == yj);
     else if (R_IsNA(xi) && R_IsNA(yj)) return 1;
     else if (R_IsNaN(xi) && R_IsNaN(yj)) return 1;
     else return 0;
 }

 /* This is differentiating {NA,1}, {NA,0}, {NA, NaN}, {NA, NA},
  * but R's print() and format()  render all as "NA" */
 static int cplx_eq(Rcomplex x, Rcomplex y)
 {
     if (!ISNAN(x.r) && !ISNAN(x.i) && !ISNAN(y.r) && !ISNAN(y.i))
 	return x.r == y.r && x.i == y.i;
     else if (R_IsNA(x.r) || R_IsNA(x.i)) // x is NA
 	return (R_IsNA(y.r) || R_IsNA(y.i)) ? 1 : 0;
     else if (R_IsNA(y.r) || R_IsNA(y.i)) // y is NA but x is not
 	return 0;
     // else : none is NA but there's at least one NaN;  hence  ISNAN(.) == R_IsNaN(.)
     return
 	(((ISNAN(x.r) && ISNAN(y.r)) || (!ISNAN(x.r) && !ISNAN(y.r) && x.r == y.r)) && // Re
 	 ((ISNAN(x.i) && ISNAN(y.i)) || (!ISNAN(x.i) && !ISNAN(y.i) && x.i == y.i))    // Im
 	    ) ? 1 : 0;
 }

 static int cequal(SEXP x, R_xlen_t i, SEXP y, R_xlen_t j)
 {
     if (i < 0 || j < 0) return 0;
     return cplx_eq(COMPLEX_ELT(x, i), COMPLEX_ELT(y, j));
 }

 static R_INLINE int sequal(SEXP x, R_xlen_t i, SEXP y, R_xlen_t j)
 {
     if (i < 0 || j < 0) return 0;
     SEXP xi = STRING_ELT(x, i);
     SEXP yj = STRING_ELT(y, j);
     /* Two strings which have the same address must be the same,
        so avoid looking at the contents */
     if (xi == yj) return 1;
     /* Then if either is NA the other cannot be */
     /* Once all CHARSXPs are cached, Seql will handle this */
     if (xi == NA_STRING || yj == NA_STRING)
 	return 0;
     /* another pre-test to avoid the call to Seql */
     if (IS_CACHED(xi) && IS_CACHED(yj) && ENC_KNOWN(xi) == ENC_KNOWN(yj))
 	return 0;
     return Seql(xi, yj);
 }

 static hlen rawhash(SEXP x, R_xlen_t indx, HashData *d)
 {
     return (hlen) RAW_ELT(x, indx);
 }

 static int rawequal(SEXP x, R_xlen_t i, SEXP y, R_xlen_t j)
 {
     if (i < 0 || j < 0) return 0;
     return (RAW_ELT(x, i) == RAW_ELT(y, j));
 }

 static hlen vhash(SEXP x, R_xlen_t indx, HashData *d)
 {
     int i;
     unsigned int key;
     SEXP _this = VECTOR_ELT(x, indx);

     key = OBJECT(_this) + 2*TYPEOF(_this) + 100U*(unsigned int) length(_this);
     /* maybe we should also look at attributes, but that slows us down */
     switch (TYPEOF(_this)) {
     case LGLSXP:
 	/* This is not too clever: pack into 32-bits and then scatter? */
 	for(i = 0; i < LENGTH(_this); i++) {
 	    key ^= lhash(_this, i, d);
 	    key *= 97;
 	}
 	break;
     case INTSXP:
 	for(i = 0; i < LENGTH(_this); i++) {
 	    key ^= ihash(_this, i, d);
 	    key *= 97;
 	}
 	break;
     case REALSXP:
 	for(i = 0; i < LENGTH(_this); i++) {
 	    key ^= rhash(_this, i, d);
 	    key *= 97;
 	}
 	break;
     case CPLXSXP:
 	for(i = 0; i < LENGTH(_this); i++) {
 	    key ^= chash(_this, i, d);
 	    key *= 97;
 	}
 	break;
     case STRSXP:
 	for(i = 0; i < LENGTH(_this); i++) {
 	    key ^= shash(_this, i, d);
 	    key *= 97;
 	}
 	break;
     case RAWSXP:
 	for(i = 0; i < LENGTH(_this); i++) {
 	    key ^= scatter((unsigned int)rawhash(_this, i, d), d);
 	    key *= 97;
 	}
 	break;
     case VECSXP:
 	for(i = 0; i < LENGTH(_this); i++) {
 	    key ^= vhash(_this, i, d);
 	    key *= 97;
 	}
 	break;
     default:
 	break;
     }
     return scatter(key, d);
 }

 static int vequal(SEXP x, R_xlen_t i, SEXP y, R_xlen_t j)
 {
     if (i < 0 || j < 0) return 0;
     return R_compute_identical(VECTOR_ELT(x, i), VECTOR_ELT(y, j), 0);
 }

 /*
   Choose M to be the smallest power of 2
   not less than 2*n and set K = log2(M).
   Need K >= 1 and hence M >= 2, and 2^M < 2^31-1, hence n <= 2^29.

   Dec 2004: modified from 4*n to 2*n, since in the worst case we have
   a 50% full table, and that is still rather efficient -- see
   R. Sedgewick (1998) Algorithms in C++ 3rd edition p.606.
 */
 static void MKsetup(R_xlen_t n, HashData *d, R_xlen_t nmax)
 {
 #ifdef LONG_VECTOR_SUPPORT
     /* M = 2^32 is safe, hence n <= 2^31 -1 */
     if(n < 0) /* protect against overflow to -ve */
 	error(_("length %d is too large for hashing"), n);
 #else
     if(n < 0 || n >= 1073741824) /* protect against overflow to -ve */
 	error(_("length %d is too large for hashing"), n);
 #endif

     if (nmax != NA_INTEGER && nmax != 1) n = nmax;
     size_t n2 = 2U * (size_t) n;
     d->M = 2;
     d->K = 1;
     while (d->M < n2) {
 	d->M *= 2;
 	d->K++;
     }
     d->nmax = n;
 }

 #define IMAX 4294967296L
 static void HashTableSetup(SEXP x, HashData *d, R_xlen_t nmax)
 {
     d->useUTF8 = FALSE;
     d->useCache = TRUE;
     switch (TYPEOF(x)) {
     case LGLSXP:
 	d->hash = lhash;
 	d->equal = lequal;
 	d->nmax = d->M = 4;
 	d->K = 2; /* unused */
 	break;
     case INTSXP:
     {
 	d->hash = ihash;
 	d->equal = iequal;
 #ifdef LONG_VECTOR_SUPPORT
 	R_xlen_t nn = XLENGTH(x);
 	if (nn > IMAX) nn = IMAX;
 	MKsetup(nn, d, nmax);
 #else
 	MKsetup(LENGTH(x), d, nmax);
 #endif
     }
 	break;
     case REALSXP:
 	d->hash = rhash;
 	d->equal = requal;
 	MKsetup(XLENGTH(x), d, nmax);
 	break;
     case CPLXSXP:
 	d->hash = chash;
 	d->equal = cequal;
 	MKsetup(XLENGTH(x), d, nmax);
 	break;
     case STRSXP:
 	d->hash = shash;
 	d->equal = sequal;
 	MKsetup(XLENGTH(x), d, nmax);
 	break;
     case RAWSXP:
 	d->hash = rawhash;
 	d->equal = rawequal;
 	d->nmax = d->M = 256;
 	d->K = 8; /* unused */
 	break;
     case VECSXP:
 	d->hash = vhash;
 	d->equal = vequal;
 	MKsetup(XLENGTH(x), d, nmax);
 	break;
     default:
 	UNIMPLEMENTED_TYPE("HashTableSetup", x);
     }
 #ifdef LONG_VECTOR_SUPPORT
     d->isLong = IS_LONG_VEC(x);
     if (d->isLong) {
 	d->HashTable = allocVector(REALSXP, (R_xlen_t) d->M);
 	for (R_xlen_t i = 0; i < d->M; i++) HTDATA_DBL(d)[i] = NIL;
     } else
 #endif
     {
 	d->HashTable = allocVector(INTSXP, (R_xlen_t) d->M);
 	for (R_xlen_t i = 0; i < d->M; i++) HTDATA_INT(d)[i] = NIL;
     }
 }

 /* Open address hashing */
 /* Collision resolution is by linear probing */
 /* The table is guaranteed large so this is sufficient */

 static int isDuplicated(SEXP x, R_xlen_t indx, HashData *d)
 {
 #ifdef LONG_VECTOR_SUPPORT
     if (d->isLong) {
 	double *h = HTDATA_DBL(d);
 	hlen i = d->hash(x, indx, d);
 	while (h[i] != NIL) {
 	    if (d->equal(x, (R_xlen_t) h[i], x, indx))
 		return h[i] >= 0 ? 1 : 0;
 	    i = (i + 1) % d->M;
 	}
 	if (d->nmax-- < 0) error("hash table is full");
 	h[i] = (double) indx;
     } else
 #endif
     {
 	int *h = HTDATA_INT(d);
 	hlen i = d->hash(x, indx, d);
 	while (h[i] != NIL) {
 	    if (d->equal(x, h[i], x, indx))
 		return h[i] >= 0 ? 1 : 0;
 	    i = (i + 1) % d->M;
 	}
 	if (d->nmax-- < 0) error("hash table is full");
 	h[i] = (int) indx;
     }
     return 0;
 }

 static void removeEntry(SEXP table, SEXP x, R_xlen_t indx, HashData *d)
 {
 #ifdef LONG_VECTOR_SUPPORT
     if (d->isLong) {
 	double *h = HTDATA_DBL(d);
 	hlen i = d->hash(x, indx, d);
 	while (h[i] >= 0) {
 	    if (d->equal(table, (R_xlen_t) h[i], x, indx)) {
 		h[i] = NA_INTEGER;  /* < 0, only index values are inserted */
 		return;
 	    }
 	    i = (i + 1) % d->M;
 	}
     } else
 #endif
     {
 	int *h = HTDATA_INT(d);
 	hlen i = d->hash(x, indx, d);
 	while (h[i] >= 0) {
 	    if (d->equal(table, h[i], x, indx)) {
 		h[i] = NA_INTEGER;  /* < 0, only index values are inserted */
 		return;
 	    }
 	    i = (i + 1) % d->M;
 	}
     }
 }

 #define DUPLICATED_INIT						\
     HashData data;						\
     HashTableSetup(x, &data, nmax);				\
     if(TYPEOF(x) == STRSXP) {					\
 	data.useUTF8 = FALSE; data.useCache = TRUE;		\
 	for(i = 0; i < n; i++) {				\
 	    if(IS_BYTES(STRING_ELT(x, i))) {			\
 		data.useUTF8 = FALSE; break;			\
 	    }							\
 	    if(ENC_KNOWN(STRING_ELT(x, i))) {			\
 		data.useUTF8 = TRUE;				\
 	    }							\
 	    if(!IS_CACHED(STRING_ELT(x, i))) {			\
 		data.useCache = FALSE; break;			\
 	    }							\
 	}							\
     }

 /* used in scan() */
 SEXP duplicated(SEXP x, Rboolean from_last)
 {
     SEXP ans;
     int *v, nmax = NA_INTEGER;

     if (!isVector(x)) error(_("'duplicated' applies only to vectors"));
     R_xlen_t i, n = XLENGTH(x);
     DUPLICATED_INIT;

     PROTECT(data.HashTable);
     PROTECT(ans = allocVector(LGLSXP, n));

     v = LOGICAL(ans);

     if(from_last)
 	for (i = n-1; i >= 0; i--) {
 //	    if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt();
 	    v[i] = isDuplicated(x, i, &data);
 	}
     else
 	for (i = 0; i < n; i++) {
 //	    if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt();
 	    v[i] = isDuplicated(x, i, &data);
 	}

     UNPROTECT(2);
     return ans;
 }

 static SEXP Duplicated(SEXP x, Rboolean from_last, int nmax)
 {
     SEXP ans;
     int *v;

     if (!isVector(x)) error(_("'duplicated' applies only to vectors"));
     R_xlen_t i, n = XLENGTH(x);
     DUPLICATED_INIT;

     PROTECT(data.HashTable);
     PROTECT(ans = allocVector(LGLSXP, n));

     v = LOGICAL(ans);

     if(from_last)
 	for (i = n-1; i >= 0; i--) {
 //	    if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt();
 	    v[i] = isDuplicated(x, i, &data);
 	}
     else
 	for (i = 0; i < n; i++) {
 //	    if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt();
 	    v[i] = isDuplicated(x, i, &data);
 	}

     UNPROTECT(2);
     return ans;
 }

 /* simpler version of the above : return 1-based index of first, or 0 : */
 R_xlen_t any_duplicated(SEXP x, Rboolean from_last)
 {
     R_xlen_t result = 0;
     int nmax = NA_INTEGER;

     if (!isVector(x)) error(_("'duplicated' applies only to vectors"));
     R_xlen_t i, n = XLENGTH(x);

     DUPLICATED_INIT;
     PROTECT(data.HashTable);

     if(from_last) {
 	for (i = n-1; i >= 0; i--) {
 	    if(isDuplicated(x, i, &data)) { result = ++i; break; }
 	}
     } else {
 	for (i = 0; i < n; i++) {
 	    if(isDuplicated(x, i, &data)) { result = ++i; break; }
 	}
     }
     UNPROTECT(1);
     return result;
 }

 static SEXP duplicated3(SEXP x, SEXP incomp, Rboolean from_last, int nmax)
 {
     SEXP ans;
     int *v, j, m;

     if (!isVector(x)) error(_("'duplicated' applies only to vectors"));
     R_xlen_t i, n = XLENGTH(x);
     DUPLICATED_INIT;

     PROTECT(data.HashTable);
     PROTECT(ans = allocVector(LGLSXP, n));

     v = LOGICAL(ans);

     if(from_last)
 	for (i = n-1; i >= 0; i--) {
 //	    if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt();
 	    v[i] = isDuplicated(x, i, &data);
 	}
     else
 	for (i = 0; i < n; i++) {
 //	    if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt();
 	    v[i] = isDuplicated(x, i, &data);
 	}

     if(length(incomp)) {
 	PROTECT(incomp = coerceVector(incomp, TYPEOF(x)));
 	m = length(incomp);
 	for (i = 0; i < n; i++)
 	    if(v[i]) {
 		for(j = 0; j < m; j++)
 		    if(data.equal(x, i, incomp, j)) {v[i] = 0; break;}
 	    }
 	UNPROTECT(1);
     }
     UNPROTECT(2);
     return ans;
 }

 /* return (1-based) index of first duplication, or 0 : */
 R_xlen_t any_duplicated3(SEXP x, SEXP incomp, Rboolean from_last)
 {
     int j, m = length(incomp), nmax = NA_INTEGER;

     if (!isVector(x)) error(_("'duplicated' applies only to vectors"));
     R_xlen_t i, n = XLENGTH(x);
     DUPLICATED_INIT;
     PROTECT(data.HashTable);

     if(!m) error(_("any_duplicated3(., <0-length incomp>)"));

     PROTECT(incomp = coerceVector(incomp, TYPEOF(x)));
     m = length(incomp);

     if(from_last)
 	for (i = n-1; i >= 0; i--) {
 #define IS_DUPLICATED_CHECK				\
 	    if(isDuplicated(x, i, &data)) {		\
 		Rboolean isDup = TRUE;			\
 		for(j = 0; j < m; j++)			\
 		    if(data.equal(x, i, incomp, j)) {	\
 			isDup = FALSE; break;		\
 		    }					\
 		if(isDup) {				\
 		    UNPROTECT(2);			\
 		    return ++i;				\
 		}					\
 		/* else continue */			\
 	    }
 	    IS_DUPLICATED_CHECK;
 	}
     else {
 	for (i = 0; i < n; i++) {
 	    IS_DUPLICATED_CHECK;
 	}
     }

     UNPROTECT(2);
     return 0;
 }

 #undef IS_DUPLICATED_CHECK
 #undef DUPLICATED_INIT


 /* .Internal(duplicated(x))	  [op=0]
   .Internal(unique(x))		  [op=1]
    .Internal(anyDuplicated(x))	  [op=2]
 */
 SEXP attribute_hidden do_duplicated(SEXP call, SEXP op, SEXP args, SEXP env)
 {
     SEXP x, incomp, dup, ans;
     int fromLast, nmax = NA_INTEGER;
     R_xlen_t i, k, n;

     checkArity(op, args);
     x = CAR(args);
     incomp = CADR(args);
     if (length(CADDR(args)) < 1)
 	error(_("'fromLast' must be length 1"));
     fromLast = asLogical(CADDR(args));
     if (fromLast == NA_LOGICAL)
 	error(_("'fromLast' must be TRUE or FALSE"));

     Rboolean fL = (Rboolean) fromLast;

     /* handle zero length vectors, and NULL */
     if ((n = xlength(x)) == 0)
 	return(PRIMVAL(op) <= 1
 	       ? allocVector(PRIMVAL(op) != 1 ? LGLSXP : TYPEOF(x), 0)
 	       : ScalarInteger(0));

     if (!isVector(x)) {
 	error(_("%s() applies only to vectors"),
 	      (PRIMVAL(op) == 0 ? "duplicated" :
 	       (PRIMVAL(op) == 1 ? "unique" : /* 2 */ "anyDuplicated")));
     }
     if (PRIMVAL(op) <= 1) {
 	nmax = asInteger(CADDDR(args));
 	if (nmax != NA_INTEGER && nmax <= 0)
 	    error(_("'nmax' must be positive"));
     }

     if(length(incomp) && /* S has FALSE to mean empty */
        !(isLogical(incomp) && length(incomp) == 1 &&
 	 LOGICAL_ELT(incomp, 0) == 0)) {
 	if(PRIMVAL(op) == 2) {
 	    /* return R's 1-based index :*/
 	    R_xlen_t ind  = any_duplicated3(x, incomp, fL);
 	    if(ind > INT_MAX) return ScalarReal((double) ind);
 	    else return ScalarInteger((int)ind);
 	} else
 	    dup = duplicated3(x, incomp, fL, nmax);
     }
     else {
 	if(PRIMVAL(op) == 2) {
 	    R_xlen_t ind  = any_duplicated(x, fL);
 	    if(ind > INT_MAX) return ScalarReal((double) ind);
 	    else return ScalarInteger((int)ind);
 	} else
 	    dup = Duplicated(x, fL, nmax);
     }
     if (PRIMVAL(op) == 0) /* "duplicated()" */
 	return dup;
     /*	ELSE
 	use the results of "duplicated" to get "unique" */

     /* count unique entries */
     k = 0;
     for (i = 0; i < n; i++)
 	if (LOGICAL_ELT(dup, i) == 0)
 	    k++;

     PROTECT(dup);
     PROTECT(ans = allocVector(TYPEOF(x), k));

     k = 0;
     switch (TYPEOF(x)) {
     case LGLSXP:
 	for (i = 0; i < n; i++)
 	    if (LOGICAL_ELT(dup, i) == 0)
 		LOGICAL0(ans)[k++] = LOGICAL_ELT(x, i);
 	break;
     case INTSXP:
 	for (i = 0; i < n; i++)
 	    if (LOGICAL_ELT(dup, i) == 0)
 		INTEGER0(ans)[k++] = INTEGER_ELT(x, i);
 	break;
     case REALSXP:
 	for (i = 0; i < n; i++)
 	    if (LOGICAL_ELT(dup, i) == 0)
 		REAL0(ans)[k++] = REAL_ELT(x, i);
 	break;
     case CPLXSXP:
 	for (i = 0; i < n; i++)
 	    if (LOGICAL_ELT(dup, i) == 0)
 		COMPLEX0(ans)[k++] = COMPLEX_ELT(x, i);
 	break;
     case STRSXP:
 	for (i = 0; i < n; i++)
 	    if (LOGICAL_ELT(dup, i) == 0)
 		SET_STRING_ELT(ans, k++, STRING_ELT(x, i));
 	break;
     case VECSXP:
 	for (i = 0; i < n; i++)
 	    if (LOGICAL_ELT(dup, i) == 0)
 		SET_VECTOR_ELT(ans, k++, VECTOR_ELT(x, i));
 	break;
     case RAWSXP:
 	for (i = 0; i < n; i++)
 	    if (LOGICAL_ELT(dup, i) == 0)
 		RAW0(ans)[k++] = RAW_ELT(x, i);
 	break;
     default:
 	UNIMPLEMENTED_TYPE("duplicated", x);
     }
     UNPROTECT(2);
     return ans;
 }

 /* Build a hash table, ignoring information on duplication */
 static void DoHashing(SEXP table, HashData *d)
 {
     R_xlen_t i, n = XLENGTH(table);
     for (i = 0; i < n; i++) {
 //	if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt();
 	(void) isDuplicated(table, i, d);
     }
 }

 /* invalidate entries: normally few */
 static void UndoHashing(SEXP x, SEXP table, HashData *d)
 {
     for (R_xlen_t i = 0; i < XLENGTH(x); i++) removeEntry(table, x, i, d);
 }

 #define DEFLOOKUP(NAME, HASHFUN, EQLFUN)			\
     static R_INLINE int						\
     NAME(SEXP table, SEXP x, R_xlen_t indx, HashData *d)	\
     {								\
 	int *h = HTDATA_INT(d);					\
 	hlen i = HASHFUN(x, indx, d);				\
 	while (h[i] != NIL) {					\
 	    if (EQLFUN(table, h[i], x, indx))			\
 		return h[i] >= 0 ? h[i] + 1 : d->nomatch;	\
 	    i = (i + 1) % d->M;					\
 	}							\
 	return d->nomatch;					\
     }

 /* definitions to help the C compiler to inline of most important cases */
 DEFLOOKUP(iLookup, ihash, iequal)
 DEFLOOKUP(rLookup, rhash, requal)
 DEFLOOKUP(sLookup, shash, sequal)

 /* definition for the general case */
 DEFLOOKUP(Lookup, d->hash, d->equal)

 /* Now do the table lookup */
 static SEXP HashLookup(SEXP table, SEXP x, HashData *d)
 {
     SEXP ans;
     R_xlen_t i, n;

     n = XLENGTH(x);
     PROTECT(ans = allocVector(INTSXP, n));
     int *pa = INTEGER0(ans);

     switch (TYPEOF(x)) {
     case INTSXP:
 	for (i = 0; i < n; i++)
 	    pa[i] = iLookup(table, x, i, d);
 	break;
     case REALSXP:
 	for (i = 0; i < n; i++)
 	    pa[i] = rLookup(table, x, i, d);
 	break;
     case STRSXP:
 	for (i = 0; i < n; i++)
 	    pa[i] = sLookup(table, x, i, d);
 	break;
     default:
 	for (i = 0; i < n; i++)
 	    pa[i] = Lookup(table, x, i, d);
     }

     UNPROTECT(1);
     return ans;
 }

 static SEXP match_transform(SEXP s, SEXP env)
 {
     if(OBJECT(s)) {
 	if(inherits(s, "factor")) return asCharacterFactor(s);
 	else if(inherits(s, "POSIXlt")) { /* and maybe more classes in the future:
 					   * Call R's (generic)	 as.character(s) : */
 	    SEXP call, r;
 	    PROTECT(call = lang2(R_AsCharacterSymbol, s));
 	    r = eval(call, env);
 	    UNPROTECT(1);
 	    return r;
 	}
     }
     /* else */
     return duplicate(s);
 }

 // workhorse of R's match() and hence also  " ix %in% itable "
 SEXP match5(SEXP itable, SEXP ix, int nmatch, SEXP incomp, SEXP env)
 {
     R_xlen_t n = xlength(ix);
     /* handle zero length arguments */
     if (n == 0) return allocVector(INTSXP, 0);

     SEXP ans;
     if (length(itable) == 0) {
 	ans = allocVector(INTSXP, n);
 	int *pa = INTEGER0(ans);
 	for (R_xlen_t i = 0; i < n; i++) pa[i] = nmatch;
 	return ans;
     }

     int nprot = 0;
     SEXP x     = PROTECT(match_transform(ix,     env)); nprot++;
     SEXP table = PROTECT(match_transform(itable, env)); nprot++;
     /* or should we use PROTECT_WITH_INDEX and REPROTECT below ? */

     SEXPTYPE type;
     /* Coerce to a common type; type == NILSXP is ok here.
      * Note that above we coerce factors and "POSIXlt", only to character.
      * Hence, coerce to character or to `higher' type
      * (given that we have "Vector" or NULL) */
     if(TYPEOF(x) >= STRSXP || TYPEOF(table) >= STRSXP) type = STRSXP;
     else type = TYPEOF(x) < TYPEOF(table) ? TYPEOF(table) : TYPEOF(x);
     PROTECT(x	  = coerceVector(x,	type)); nprot++;
     PROTECT(table = coerceVector(table, type)); nprot++;

     // special case scalar x -- for speed only :
     if(XLENGTH(x) == 1 && !incomp) {
       int val = nmatch;
       int ntable = LENGTH(table);
       switch (type) {
       case STRSXP: {
 	  SEXP x_val = STRING_ELT(x,0);
 	  for (int i=0; i < ntable; i++) if (Seql(STRING_ELT(table,i), x_val)) {
 		  val = i + 1; break;
 	      }
 	  break; }
       case LGLSXP:
       case INTSXP: {
 	  int x_val = INTEGER_ELT(x, 0),
 	      *table_p = INTEGER(table);
 	  for (int i=0; i < ntable; i++) if (table_p[i] == x_val) {
 		  val = i + 1; break;
 	      }
 	  break; }
       case REALSXP: {
 	  double xv = REAL_ELT(x, 0);
 	  // pblm with signed 0s under IEC60559
 	  double x_val = (xv == 0.) ? 0. : xv;
 	  double *table_p = REAL(table);
 	  /* we want all NaNs except NA equal, and all NAs equal */
 	  if (R_IsNA(x_val)) {
 	      for (int i=0; i < ntable; i++) if (R_IsNA(table_p[i])) {
 		      val = i + 1; break;
 		  }
 	  }
 	  else if (R_IsNaN(x_val)) {
 	      for (int i=0; i < ntable; i++) if (R_IsNaN(table_p[i])) {
 		      val = i + 1; break;
 		  }
 	  }
 	  else {
 	      for (int i=0; i < ntable; i++) if (table_p[i] == x_val) {
 		      val = i + 1; break;
 	      }
 	  }
 	  break; }
       case CPLXSXP: {
 	  Rcomplex x_val = COMPLEX_ELT(x, 0),
 	      *table_p = COMPLEX(table);
 	  for (int i=0; i < ntable; i++)
 	      if (cplx_eq(table_p[i], x_val)) {
 		  val = i + 1; break;
 	      }
 	  break; }
       case RAWSXP: {
 	  Rbyte x_val = RAW_ELT(x, 0),
 	      *table_p = RAW(table);
 	  for (int i=0; i < ntable; i++) if (table_p[i] == x_val) {
 		  val = i + 1; break;
 	      }
 	  break; }
       }
       PROTECT(ans = ScalarInteger(val)); nprot++;
     }
     else { // regular case
 	HashData data;
 	if (incomp) { PROTECT(incomp = coerceVector(incomp, type)); nprot++; }
 	data.nomatch = nmatch;
 	HashTableSetup(table, &data, NA_INTEGER);
 	if(type == STRSXP) {
 	    Rboolean useBytes = FALSE;
 	    Rboolean useUTF8 = FALSE;
 	    Rboolean useCache = TRUE;
 	    for(R_xlen_t i = 0; i < xlength(x); i++) {
 		SEXP s = STRING_ELT(x, i);
 		if(IS_BYTES(s)) {
 		    useBytes = TRUE;
 		    useUTF8 = FALSE;
 		    break;
 		}
 		if(ENC_KNOWN(s)) {
 		    useUTF8 = TRUE;
 		}
 		if(!IS_CACHED(s)) {
 		    useCache = FALSE;
 		    break;
 		}
 	    }
 	    if(!useBytes || useCache) {
 		for(int i = 0; i < LENGTH(table); i++) {
 		    SEXP s = STRING_ELT(table, i);
 		    if(IS_BYTES(s)) {
 			useBytes = TRUE;
 			useUTF8 = FALSE;
 			break;
 		    }
 		    if(ENC_KNOWN(s)) {
 			useUTF8 = TRUE;
 		    }
 		    if(!IS_CACHED(s)) {
 			useCache = FALSE;
 			break;
 		    }
 		}
 	    }
 	    data.useUTF8 = useUTF8;
 	    data.useCache = useCache;
 	}
 	PROTECT(data.HashTable); nprot++;
 	DoHashing(table, &data);
 	if (incomp) UndoHashing(incomp, table, &data);
 	ans = HashLookup(table, x, &data);
     }
     UNPROTECT(nprot);
     return ans;
 } // end{ match5 }

 SEXP matchE(SEXP itable, SEXP ix, int nmatch, SEXP env)
 {
     return match5(itable, ix, nmatch, NULL, env);
 }

 /* used from other code, not here: */
 SEXP match(SEXP itable, SEXP ix, int nmatch)
 {
     return match5(itable, ix, nmatch, NULL, R_BaseEnv);
 }


 // .Internal(match(x, table, nomatch, incomparables)) :
 SEXP attribute_hidden do_match(SEXP call, SEXP op, SEXP args, SEXP env)
 {
     checkArity(op, args);

     if ((!isVector(CAR(args)) && !isNull(CAR(args)))
 	|| (!isVector(CADR(args)) && !isNull(CADR(args))))
 	error(_("'match' requires vector arguments"));

     int nomatch = asInteger(CADDR(args));
     SEXP incomp = CADDDR(args);

     if (isNull(incomp) || /* S has FALSE to mean empty */
 	(length(incomp) == 1 && isLogical(incomp) &&
 	 LOGICAL_ELT(incomp, 0) == 0))
 	return match5(CADR(args), CAR(args), nomatch, NULL, env);
     else
 	return match5(CADR(args), CAR(args), nomatch, incomp, env);
 }

 /* pmatch and charmatch return integer positions, so cannot be used
    for long vector tables */

 /* Partial Matching of Strings */
 /* Fully S Compatible version. */

 /* Hmm, this was not all S compatible!	The desired behaviour is:
  * First do exact matches, and mark elements as used as they are matched
  *   unless dup_ok is true.
  * Then do partial matching, from left to right, using up the table
  *   unless dup_ok is true.  Multiple partial matches are ignored.
  * Empty strings are unmatched			      BDR 2000/2/16
  */

 SEXP attribute_hidden do_pmatch(SEXP call, SEXP op, SEXP args, SEXP env)
 {
     SEXP ans, input, target;
     int mtch, n_target, mtch_count, dups_ok, no_match;
     size_t temp;
     int *used = NULL, *ians;
     const char **in, **tar;
     Rboolean no_dups;
     Rboolean useBytes = FALSE, useUTF8 = FALSE;

     checkArity(op, args);
     input = CAR(args);
     R_xlen_t n_input = XLENGTH(input);
     target = CADR(args);
     n_target = LENGTH(target); // not allowed to be long
     no_match = asInteger(CADDR(args));
     dups_ok = asLogical(CADDDR(args));
     if (dups_ok == NA_LOGICAL)
 	error(_("invalid '%s' argument"), "duplicates.ok");
     no_dups = !dups_ok;

     if (!isString(input) || !isString(target))
 	error(_("argument is not of mode character"));

     if(no_dups) {
 	used = (int *) R_alloc((size_t) n_target, sizeof(int));
 	for (int j = 0; j < n_target; j++) used[j] = 0;
     }

     for(R_xlen_t i = 0; i < n_input; i++) {
 	if(IS_BYTES(STRING_ELT(input, i))) {
 	    useBytes = TRUE;
 	    useUTF8 = FALSE;
 	    break;
 	} else if(ENC_KNOWN(STRING_ELT(input, i))) {
 	    useUTF8 = TRUE;
 	}
     }
     if(!useBytes) {
 	for(R_xlen_t i = 0; i < n_target; i++) {
 	    if(IS_BYTES(STRING_ELT(target, i))) {
 		useBytes = TRUE;
 		useUTF8 = FALSE;
 		break;
 	    } else if(ENC_KNOWN(STRING_ELT(target, i))) {
 		useUTF8 = TRUE;
 	    }
 	}
     }

     in = (const char **) R_alloc((size_t) n_input, sizeof(char *));
     tar = (const char **) R_alloc((size_t) n_target, sizeof(char *));
     PROTECT(ans = allocVector(INTSXP, n_input));
     ians = INTEGER0(ans);
     if(useBytes) {
 	for(R_xlen_t i = 0; i < n_input; i++) {
 	    in[i] = CHAR(STRING_ELT(input, i));
 	    ians[i] = 0;
 	}
 	for(int j = 0; j < n_target; j++)
 	    tar[j] = CHAR(STRING_ELT(target, j));
     }
     else if(useUTF8) {
 	for(R_xlen_t i = 0; i < n_input; i++) {
 	    in[i] = translateCharUTF8(STRING_ELT(input, i));
 	    ians[i] = 0;
 	}
 	for(int j = 0; j < n_target; j++)
 	    tar[j] = translateCharUTF8(STRING_ELT(target, j));
     } else {
 	for(R_xlen_t i = 0; i < n_input; i++) {
 	    in[i] = translateChar(STRING_ELT(input, i));
 	    ians[i] = 0;
 	}
 	for(int j = 0; j < n_target; j++)
 	    tar[j] = translateChar(STRING_ELT(target, j));
     }
     /* First pass, exact matching */
     R_xlen_t nexact = 0;
     /* Compromise when hashing used changed in 3.2.0 (PR#15697) */
     if (n_input <= 100 || n_target <= 100) {
 	for (R_xlen_t i = 0; i < n_input; i++) {
 	    const char *ss = in[i];
 	    if (strlen(ss) == 0) continue;
 	    for (int j = 0; j < n_target; j++) {
 		if (no_dups && used[j]) continue;
 		if (strcmp(ss, tar[j]) == 0) {
 		    ians[i] = j + 1;
 		    if (no_dups) used[j] = 1;
 		    nexact++;
 		    break;
 		}
 	    }
 	}
     } else {
 	HashData data;
 	HashTableSetup(target, &data, NA_INTEGER);
 	data.useUTF8 = useUTF8;
 	data.nomatch = 0;
 	DoHashing(target, &data);
 	for (R_xlen_t i = 0; i < n_input; i++) {
 	    if (strlen(in[i]) == 0) /* don't look up "" */
 		continue;
 	    int j = Lookup(target, input, i, &data);
 	    if ((j == 0) || (no_dups && used[j - 1])) continue;
 	    if (no_dups) used[j - 1] = 1;
 	    ians[i] = j;
 	    nexact++;
 	}
     }

     if(nexact < n_input) {
 	/* Second pass, partial matching */
 	for (R_xlen_t i = 0; i < n_input; i++) {
 	    const char *ss;
 	    if (ians[i]) continue;
 	    ss = in[i];
 	    temp = strlen(ss);
 	    if (temp == 0) continue;
 	    mtch = 0;
 	    mtch_count = 0;
 	    for (int j = 0; j < n_target; j++) {
 		if (no_dups && used[j]) continue;
 		if (strncmp(ss, tar[j], temp) == 0) {
 		    mtch = j + 1;
 		    mtch_count++;
 		}
 	    }
 	    if (mtch > 0 && mtch_count == 1) {
 		if(no_dups) used[mtch - 1] = 1;
 		ians[i] = mtch;
 	    }
 	}
 	/* Third pass, set no matches */
 	for (R_xlen_t i = 0; i < n_input; i++)
 	    if(ians[i] == 0) ians[i] = no_match;

     }
     UNPROTECT(1);
     return ans;
 }


 /* Partial Matching of Strings */
 /* Based on Therneau's charmatch. */

 SEXP attribute_hidden do_charmatch(SEXP call, SEXP op, SEXP args, SEXP env)
 {
     SEXP ans, input, target;
     const char *ss, *st;
     Rboolean useBytes = FALSE, useUTF8 = FALSE;

     checkArity(op, args);

     input = CAR(args);
     R_xlen_t n_input = LENGTH(input);
     target = CADR(args);
     int n_target = LENGTH(target);

     if (!isString(input) || !isString(target))
 	error(_("argument is not of mode character"));
     int no_match = asInteger(CADDR(args));

     for(R_xlen_t i = 0; i < n_input; i++) {
 	if(IS_BYTES(STRING_ELT(input, i))) {
 	    useBytes = TRUE;
 	    useUTF8 = FALSE;
 	    break;
 	} else if(ENC_KNOWN(STRING_ELT(input, i))) {
 	    useUTF8 = TRUE;
 	}
     }
     if(!useBytes) {
 	for(int i = 0; i < n_target; i++) {
 	    if(IS_BYTES(STRING_ELT(target, i))) {
 		useBytes = TRUE;
 		useUTF8 = FALSE;
 		break;
 	    } else if(ENC_KNOWN(STRING_ELT(target, i))) {
 		useUTF8 = TRUE;
 	    }
 	}
     }

     PROTECT(ans = allocVector(INTSXP, n_input));
     int *ians = INTEGER0(ans);

     const void *vmax = vmaxget();  // prudence: .Internal does this too.
     for(R_xlen_t i = 0; i < n_input; i++) {
 	if(useBytes)
 	    ss = CHAR(STRING_ELT(input, i));
 	else if(useUTF8)
 	    ss = translateCharUTF8(STRING_ELT(input, i));
 	else
 	    ss = translateChar(STRING_ELT(input, i));
 	size_t temp = strlen(ss);
 	int imatch = NA_INTEGER;
 	Rboolean perfect = FALSE;
 	/* we could reset vmax here too: worth it? */
 	for(int j = 0; j < n_target; j++) {
 	    if(useBytes)
 		st = CHAR(STRING_ELT(target, j));
 	    else if(useUTF8)
 		st = translateCharUTF8(STRING_ELT(target, j));
 	    else
 		st = translateChar(STRING_ELT(target, j));
 	    int k = strncmp(ss, st, temp);
 	    if (k == 0) {
 		if (strlen(st) == temp) {
 		    if (perfect)
 			imatch = 0;
 		    else {
 			perfect = TRUE;
 			imatch = j + 1;
 		    }
 		}
 		else if (!perfect) {
 		    if (imatch == NA_INTEGER)
 			imatch = j + 1;
 		    else
 			imatch = 0;
 		}
 	    }
 	}
 	ians[i] = (imatch == NA_INTEGER) ? no_match : imatch;
 	vmaxset(vmax);
     }
     UNPROTECT(1);
     return ans;
 }


 /* Functions for matching the supplied arguments to the */
 /* formal arguments of functions.  The returned value */
 /* is a list with all components named. */

 static SEXP StripUnmatched(SEXP s)
 {
     if (s == R_NilValue) return s;

     if (CAR(s) == R_MissingArg && !ARGUSED(s) ) {
 	return StripUnmatched(CDR(s));
     }
     else if (CAR(s) == R_DotsSymbol ) {
 	return StripUnmatched(CDR(s));
     }
     else {
 	SETCDR(s, StripUnmatched(CDR(s)));
 	return s;
     }
 }

 static SEXP ExpandDots(SEXP s, int expdots)
 {
     SEXP r;
     if (s == R_NilValue)
 	return s;
     if (TYPEOF(CAR(s)) == DOTSXP ) {
 	SET_TYPEOF(CAR(s), LISTSXP);	/* a safe mutation */
 	if (expdots) {
 	    r = CAR(s);
 	    while (CDR(r) != R_NilValue ) {
 		SET_ARGUSED(r, 1);
 		r = CDR(r);
 	    }
 	    SET_ARGUSED(r, 1);
 	    SETCDR(r, ExpandDots(CDR(s), expdots));
 	    return CAR(s);
 	}
     }
     else
 	SET_ARGUSED(s, 0);
     SETCDR(s, ExpandDots(CDR(s), expdots));
     return s;
 }
 static SEXP subDots(SEXP rho)
 {
     SEXP rval, dots, a, b, t;
     int len,i;

     dots = findVar(R_DotsSymbol, rho);

     if (dots == R_UnboundValue)
 	error(_("... used in a situation where it does not exist"));

     if (dots == R_MissingArg)
 	return dots;

     if (!isPairList(dots))
 	error(_("... is not a pairlist"));

     len = length(dots);
     PROTECT(dots);
     PROTECT(rval=allocList(len));
     for(a = dots, b = rval, i = 1; i <= len; a = CDR(a), b = CDR(b), i++) {
 	SET_TAG(b, TAG(a));
 	t = CAR(a);
 	while (TYPEOF(t) == PROMSXP)
 	    t = PREXPR(t);
 	if( isSymbol(t) || isLanguage(t) )
 	    SETCAR(b, installDDVAL(i));
 	else
 	    SETCAR(b, t);
     }
     UNPROTECT(2);
     return rval;
 }


 SEXP attribute_hidden do_matchcall(SEXP call, SEXP op, SEXP args, SEXP env)
 {
     SEXP formals, actuals, rlist;
     SEXP funcall, f, b, rval, sysp, t1, t2, tail;
 //    RCNTXT *cptr;
     int expdots;

     checkArity(op,args);

     funcall = CADR(args);

     if (TYPEOF(funcall) == EXPRSXP)
 	funcall = VECTOR_ELT(funcall, 0);

     if (TYPEOF(funcall) != LANGSXP)
 	error(_("invalid '%s' argument"), "call");

     b = CAR(args);
     if (TYPEOF(b) != CLOSXP)
 	error(_("invalid '%s' argument"), "definition");

     sysp = CAR(CDDDR(args));
     if (!isEnvironment(sysp))
 	error(_("'envir' must be an environment"));

     /* Do we expand ... ? */

     expdots = asLogical(CAR(CDDR(args)));
     if (expdots == NA_LOGICAL)
 	error(_("invalid '%s' argument"), "expand.dots");

     /* Get the formals and match the actual args */

     formals = FORMALS(b);
     PROTECT(actuals = shallow_duplicate(CDR(funcall)));

     /* If there is a ... symbol then expand it out in the sysp env
        We need to take some care since the ... might be in the middle
        of the actuals  */

     t2 = R_MissingArg;
     for (t1=actuals ; t1!=R_NilValue ; t1 = CDR(t1) ) {
 	if (CAR(t1) == R_DotsSymbol) {
 	    t2 = subDots(sysp);
 	    break;
 	}
     }
     /* now to splice t2 into the correct spot in actuals */
     if (t2 != R_MissingArg ) {	/* so we did something above */
 	if( CAR(actuals) == R_DotsSymbol ) {
 	    UNPROTECT(1);
 	    actuals = listAppend(t2, CDR(actuals));
 	    PROTECT(actuals);
 	}
 	else {
 	    for(t1=actuals; t1!=R_NilValue; t1=CDR(t1)) {
 		if( CADR(t1) == R_DotsSymbol ) {
 		    tail = CDDR(t1);
 		    SETCDR(t1, t2);
 		    listAppend(actuals,tail);
 		    break;
 		}
 	    }
 	}
     } else { /* get rid of it */
 	if( CAR(actuals) == R_DotsSymbol ) {
 	    UNPROTECT(1);
 	    actuals = CDR(actuals);
 	    PROTECT(actuals);
 	}
 	else {
 	    for(t1=actuals; t1!=R_NilValue; t1=CDR(t1)) {
 		if( CADR(t1) == R_DotsSymbol ) {
 		    tail = CDDR(t1);
 		    SETCDR(t1, tail);
 		    break;
 		}
 	    }
 	}
     }
     rlist = matchArgs_RC(formals, actuals, call);

     /* Attach the argument names as tags */

     for (f = formals, b = rlist; b != R_NilValue; b = CDR(b), f = CDR(f)) {
 	SET_TAG(b, TAG(f));
     }


     /* Handle the dots */

     PROTECT(rlist = ExpandDots(rlist, expdots));

     /* Eliminate any unmatched formals and any that match R_DotSymbol */
     /* This needs to be after ExpandDots as the DOTSXP might match ... */

     rlist = StripUnmatched(rlist);

     PROTECT(rval = allocSExp(LANGSXP));
     SETCAR(rval, lazy_duplicate(CAR(funcall)));
     SETCDR(rval, rlist);
     UNPROTECT(3);
     return rval;
 }


 #include <R_ext/RS.h> /* for Memzero */

 #ifdef _AIX  /*some people just have to be different: is this still needed? */
 #    include <memory.h>
 #endif


 static SEXP
 rowsum(SEXP x, SEXP g, SEXP uniqueg, SEXP snarm, SEXP rn)
 {
     SEXP matches,ans;
     int n, p, ng, narm;
     R_xlen_t offset = 0, offsetg = 0;
     HashData data;
     data.nomatch = 0;

     n = LENGTH(g);
     ng = length(uniqueg);
     narm = asLogical(snarm);
     if(narm == NA_LOGICAL) error("'na.rm' must be TRUE or FALSE");
     if(isMatrix(x)) p = ncols(x); else p = 1;

     HashTableSetup(uniqueg, &data, NA_INTEGER);
     PROTECT(data.HashTable);
     DoHashing(uniqueg, &data);
     PROTECT(matches = HashLookup(uniqueg, g, &data));
     int *pmatches = INTEGER(matches);

     PROTECT(ans = allocMatrix(TYPEOF(x), ng, p));

     switch(TYPEOF(x)){
     case REALSXP:
 	Memzero(REAL0(ans), ng*p);
 	for(int i = 0; i < p; i++) {
 	    double *pa = REAL0(ans);
 	    for(int j = 0; j < n; j++) {
 		double xjpo = REAL_ELT(x, j + offset);
 		if(!narm || !ISNAN(xjpo))
 		    pa[pmatches[j] - 1 + offsetg] += xjpo;
 	    }
 	    offset += n;
 	    offsetg += ng;
 	}
 	break;
     case INTSXP:
 	Memzero(INTEGER0(ans), ng*p);
 	for(int i = 0; i < p; i++) {
 	    int *pa = INTEGER0(ans);
 	    for(int j = 0; j < n; j++) {
 		int xjpo = INTEGER_ELT(x, j + offset);
 		if (xjpo == NA_INTEGER) {
 		    if(!narm)
 			pa[pmatches[j] - 1 + offsetg] = NA_INTEGER;
 		} else if (pa[pmatches[j] - 1 + offsetg] != NA_INTEGER) {
 		    /* check for integer overflows */
 		    int itmp = pa[pmatches[j] - 1 + offsetg];
 		    double dtmp = itmp;
 		    dtmp += xjpo;
 		    if (dtmp < INT_MIN || dtmp > INT_MAX) itmp = NA_INTEGER;
 		    else itmp += xjpo;
 		    pa[pmatches[j] - 1 + offsetg] = itmp;
 		}
 	    }
 	    offset += n;
 	    offsetg += ng;
 	}
 	break;
     default:
 	error("non-numeric matrix in rowsum(): this should not happen");
     }
     if (TYPEOF(rn) != STRSXP) error("row names are not character");
     SEXP dn = allocVector(VECSXP, 2), dn2, dn3;
     setAttrib(ans, R_DimNamesSymbol, dn);
     SET_VECTOR_ELT(dn, 0, rn);
     dn2 = getAttrib(x, R_DimNamesSymbol);
     if(length(dn2) >= 2 &&
        !isNull(dn3 = VECTOR_ELT(dn2, 1))) SET_VECTOR_ELT(dn, 1, dn3);

     UNPROTECT(3); /* HashTable, matches, ans */
     return ans;
 }

 static SEXP
 rowsum_df(SEXP x, SEXP g, SEXP uniqueg, SEXP snarm, SEXP rn)
 {
     SEXP matches,ans,col,xcol;
     int p, narm;
     HashData data;
     data.nomatch = 0;

     R_xlen_t n = XLENGTH(g);
     p = LENGTH(x);
     R_xlen_t ng = XLENGTH(uniqueg);
     narm = asLogical(snarm);
     if(narm == NA_LOGICAL) error("'na.rm' must be TRUE or FALSE");

     HashTableSetup(uniqueg, &data, NA_INTEGER);
     PROTECT(data.HashTable);
     DoHashing(uniqueg, &data);
     PROTECT(matches = HashLookup(uniqueg, g, &data));
     int *pmatches = INTEGER(matches);

     PROTECT(ans = allocVector(VECSXP, p));

     for(int i = 0; i < p; i++) {
 	xcol = VECTOR_ELT(x,i);
 	if (!isNumeric(xcol))
 	    error(_("non-numeric data frame in rowsum"));
 	switch(TYPEOF(xcol)){
 	case REALSXP:
 	    PROTECT(col = allocVector(REALSXP,ng));
 	    Memzero(REAL0(col), ng);
 	    for(R_xlen_t j = 0; j < n; j++) {
 		double xj = REAL_ELT(xcol, j);
 		if(!narm || !ISNAN(xj))
 		    REAL0(col)[pmatches[j] - 1] += xj;
 	    }
 	    SET_VECTOR_ELT(ans,i,col);
 	    UNPROTECT(1);
 	    break;
 	case INTSXP:
 	    PROTECT(col = allocVector(INTSXP, ng));
 	    Memzero(INTEGER0(col), ng);
 	    for(R_xlen_t j = 0; j < n; j++) {
 		int xj = INTEGER_ELT(xcol, j);
 		if (xj == NA_INTEGER) {
 		    if(!narm)
 			INTEGER0(col)[pmatches[j] - 1] = NA_INTEGER;
 		} else if (INTEGER0(col)[pmatches[j] - 1] != NA_INTEGER) {
 		    int itmp = INTEGER0(col)[pmatches[j] - 1];
 		    double dtmp = itmp;
 		    dtmp += xj;
 		    if (dtmp < INT_MIN || dtmp > INT_MAX) itmp = NA_INTEGER;
 		    else itmp += xj;
 		    INTEGER0(col)[pmatches[j] - 1] = itmp;
 		}
 	    }
 	    SET_VECTOR_ELT(ans, i, col);
 	    UNPROTECT(1);
 	    break;

 	default:
 	    error(_("this cannot happen"));
 	}
     }
     namesgets(ans, getAttrib(x, R_NamesSymbol));
     if (TYPEOF(rn) != STRSXP) error("row names are not character");
     setAttrib(ans, R_RowNamesSymbol, rn);
     classgets(ans, mkString("data.frame"));

     UNPROTECT(3); /* HashTable, matches, ans */
     return ans;
 }

 SEXP attribute_hidden do_rowsum(SEXP call, SEXP op, SEXP args, SEXP env)
 {
     checkArity(op, args);
     if(PRIMVAL(op) == 1)
 	return rowsum_df(CAR(args), CADR(args), CADDR(args), CADDDR(args),
 			 CAD4R(args));
     else
 	return rowsum(CAR(args), CADR(args), CADDR(args), CADDDR(args),
 		      CAD4R(args));
 }


 /* returns 1-based duplicate no */
 static int isDuplicated2(SEXP x, int indx, HashData *d)
 {
     int *h = HTDATA_INT(d);
     hlen i = d->hash(x, indx, d);
     while (h[i] != NIL) {
 	if (d->equal(x, h[i], x, indx))
 	    return h[i] + 1;
 	i = (i + 1) % d->M;
     }
     h[i] = indx;
     return 0;
 }

 static SEXP duplicated2(SEXP x, HashData *d)
 {
     SEXP ans;
     int i, n;

     n = LENGTH(x);
     HashTableSetup(x, d, NA_INTEGER);
     PROTECT(d->HashTable);
     PROTECT(ans = allocVector(INTSXP, n));

     int *h = HTDATA_INT(d);
     int *v = INTEGER0(ans);
     for (i = 0; i < d->M; i++) h[i] = NIL;
     for (i = 0; i < n; i++) {
 //	if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt();
 	v[i] = isDuplicated2(x, i, d);
     }
     UNPROTECT(2);
     return ans;
 }

 SEXP attribute_hidden do_makeunique(SEXP call, SEXP op, SEXP args, SEXP env)
 {
     SEXP names, sep, ans, dup, newx;
     int i, cnt, *cnts, dp;
     int n, len, maxlen = 0;
     HashData data;
     const char *csep, *ss;
     const void *vmax;

     checkArity(op, args);
     names = CAR(args);
     if(!isString(names))
 	error(_("'names' must be a character vector"));
     n = LENGTH(names);
     sep = CADR(args);
     if(!isString(sep) || LENGTH(sep) != 1)
 	error(_("'%s' must be a character string"), "sep");
     csep = translateChar(STRING_ELT(sep, 0));
     PROTECT(ans = allocVector(STRSXP, n));
     vmax = vmaxget();
     for(i = 0; i < n; i++) {
 	SET_STRING_ELT(ans, i, STRING_ELT(names, i));
 	len = (int) strlen(translateChar(STRING_ELT(names, i)));
 	if(len > maxlen) maxlen = len;
 	vmaxset(vmax);
     }
     if(n > 1) {
 	/* +2 for terminator and rounding error */
 	char buf[maxlen + (int) strlen(csep)
 		 + (int) (log((double)n)/log(10.0)) + 2];
 	if(n < 10000) {
 	    R_CheckStack2((size_t)n * sizeof(int));
 	    cnts = (int *) alloca(((size_t) n) * sizeof(int));
 	} else {
 	    /* This is going to be slow so use expensive allocation
 	       that will be recovered if interrupted. */
 	    cnts = (int *) R_alloc((size_t) n,	sizeof(int));
 	}
 	for(i = 0; i < n; i++) cnts[i] = 1;
 	data.nomatch = 0;
 	PROTECT(newx = allocVector(STRSXP, 1));
 	PROTECT(dup = duplicated2(names, &data));
 	PROTECT(data.HashTable);
 	vmax = vmaxget();
 	for(i = 1; i < n; i++) { /* first cannot be a duplicate */
 	    dp = INTEGER_ELT(dup, i); /* 1-based number of first occurrence */
 	    if(dp == 0) continue;
 	    ss = translateChar(STRING_ELT(names, i));
 	    /* Try appending 1,2,3, ..., n-1 until it is not already in use */
 	    for(cnt = cnts[dp - 1]; cnt < n; cnt++) {
 		sprintf(buf, "%s%s%d", ss, csep, cnt);
 		SET_STRING_ELT(newx, 0, mkChar(buf));
 		if(Lookup(ans, newx, 0, &data) == data.nomatch) break;
 	    }
 	    SET_STRING_ELT(ans, i, STRING_ELT(newx, 0));
 	    /* insert it */ (void) isDuplicated(ans, i, &data);
 	    cnts[dp - 1] = cnt+1; /* cache the first unused cnt value */
 	    vmaxset(vmax);
 	}
 	UNPROTECT(3);
     }
     UNPROTECT(1);
     return ans;
 }

 /* Use hashing to improve object.size. Here we want equal CHARSXPs,
    not equal contents. */

 static int csequal(SEXP x, R_xlen_t i, SEXP y, R_xlen_t j)
 {
     return STRING_ELT(x, i) == STRING_ELT(y, j);
 }

 static void HashTableSetup1(SEXP x, HashData *d)
 {
     d->hash = cshash;
     d->equal = csequal;
 #ifdef LONG_VECTOR_SUPPORT
     d->isLong = FALSE;
 #endif
     MKsetup(XLENGTH(x), d, NA_INTEGER);
     d->HashTable = allocVector(INTSXP, (R_xlen_t) d->M);
     for (R_xlen_t i = 0; i < d->M; i++) HTDATA_INT(d)[i] = NIL;
 }

 /* used in utils */
 SEXP Rf_csduplicated(SEXP x)
 {
     if(TYPEOF(x) != STRSXP)
 	error("C function 'csduplicated' not called on a STRSXP");
     R_xlen_t n = XLENGTH(x);
     HashData data;
     HashTableSetup1(x, &data);
     PROTECT(data.HashTable);
     SEXP ans = PROTECT(allocVector(LGLSXP, n));
     int *v = LOGICAL(ans);

     for (R_xlen_t i = 0; i < n; i++) v[i] = isDuplicated(x, i, &data);

     UNPROTECT(2);
     return ans;
 }

 #include <R_ext/Random.h>

 // sample.int(.) --> .Internal(sample2(n, size)) :
 SEXP attribute_hidden do_sample2(SEXP call, SEXP op, SEXP args, SEXP env)
 {
     checkArity(op, args);
     SEXP ans;
     double dn = asReal(CAR(args));
     int k = asInteger(CADR(args));
     if (!R_FINITE(dn) || dn < 0 || dn > 4.5e15 || (k > 0 && dn == 0))
 	error(_("invalid first argument"));
     if (k < 0) error(_("invalid '%s' argument"), "size");
     if (k > dn/2) error("This algorithm is for size <= n/2");
     HashData data;
     GetRNGstate();
     if (dn > INT_MAX) {
 	ans = PROTECT(allocVector(REALSXP, k));
 	double *ry = REAL0(ans);
 	HashTableSetup(ans, &data, NA_INTEGER);
 	PROTECT(data.HashTable);
 	for (int i = 0; i < k; i++)
 	    for(int j = 0; j < 100; j++) { // average < 2
 		ry[i] = R_unif_index(dn) + 1;
 		if(!isDuplicated(ans, i, &data)) break;
 	    }
    } else {
 	ans = PROTECT(allocVector(INTSXP, k));
 	int *iy = INTEGER0(ans);
 	HashTableSetup(ans, &data, NA_INTEGER);
 	PROTECT(data.HashTable);
 	for (int i = 0; i < k; i++)
 	    for(int j = 0; j < 100; j++) { // average < 2
 		iy[i] = (int)(R_unif_index(dn) + 1);
 		if(!isDuplicated(ans, i, &data)) break;
 	    }
     }
     PutRNGstate();
     UNPROTECT(2);
     return ans;
 }