/*
 *  R : A Computer Language for Statistical Data Analysis
 *  Copyright (C) 1997--2019  The R Core Team
 *  Copyright (C) 1995, 1996  Robert Gentleman and Ross Ihaka
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, a copy is available at
 *  https://www.R-project.org/Licenses/
 */

/* This is currently restricted to vectors of length < 2^30 */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#define R_USE_SIGNALS 1
#include <Defn.h>
#include <Internal.h>

#define NIL -1
#define ARGUSED(x) LEVELS(x)
#define SET_ARGUSED(x,v) SETLEVELS(x,v)

/* interval at which to check interrupts */
#define NINTERRUPT 1000000

typedef size_t hlen;

/* Hash function and equality test for keys */
typedef struct _HashData HashData;

struct _HashData {
    int K;
    hlen M;
    R_xlen_t nmax;
#ifdef LONG_VECTOR_SUPPORT
    Rboolean isLong;
#endif
    hlen (*hash)(SEXP, R_xlen_t, HashData *);
    int (*equal)(SEXP, R_xlen_t, SEXP, R_xlen_t);
    SEXP HashTable;

    int nomatch;
    Rboolean useUTF8;
    Rboolean useCache;
};

#define HTDATA_INT(d) (INTEGER0((d)->HashTable))
#define HTDATA_DBL(d) (REAL0((d)->HashTable))


/*
   Integer keys are hashed via a random number generator
   based on Knuth's recommendations.  The high order K bits
   are used as the hash code.

   NB: lots of this code relies on M being a power of two and
   on silent integer overflow mod 2^32.

   <FIXME> Integer keys are wasteful for logical and raw vectors, but
   the tables are small in that case.  It would be much easier to
   implement long vectors, though.
*/

/*  Currently the hash table is implemented as a (signed) integer
    array.  So there are two 31-bit restrictions, the length of the
    array and the values.  The values are initially NIL (-1).  O-based
    indices are inserted by isDuplicated, and invalidated by setting
    to NA_INTEGER.
*/

static hlen scatter(unsigned int key, HashData *d)
{
    return 3141592653U * key >> (32 - d->K);
}

static hlen lhash(SEXP x, R_xlen_t indx, HashData *d)
{
    int xi = LOGICAL_ELT(x, indx);
    if (xi == NA_LOGICAL) return 2U;
    return (hlen) xi;
}

static R_INLINE hlen ihash(SEXP x, R_xlen_t indx, HashData *d)
{
    int xi = INTEGER_ELT(x, indx);
    if (xi == NA_INTEGER) return 0;
    return scatter((unsigned int) xi, d);
}

/* We use unions here because Solaris gcc -O2 has trouble with
   casting + incrementing pointers.  We use tests here, but R currently
   assumes int is 4 bytes and double is 8 bytes.
 */
union foo {
    double d;
    unsigned int u[2];
};

static R_INLINE hlen rhash(SEXP x, R_xlen_t indx, HashData *d)
{
    /* There is a problem with signed 0s under IEC60559 */
    double xi = REAL_ELT(x, indx);
    double tmp = (xi == 0.0) ? 0.0 : xi;
    /* need to use both 32-byte chunks or endianness is an issue */
    /* we want all NaNs except NA equal, and all NAs equal */
    if (R_IsNA(tmp)) tmp = NA_REAL;
    else if (R_IsNaN(tmp)) tmp = R_NaN;
#if 2*SIZEOF_INT == SIZEOF_DOUBLE
    {
	union foo tmpu;
	tmpu.d = tmp;
	return scatter(tmpu.u[0] + tmpu.u[1], d);
    }
#else
    return scatter(*((unsigned int *) (&tmp)), d);
#endif
}

static Rcomplex unify_complex_na(Rcomplex z) {
    Rcomplex ans;
    ans.r = (z.r == 0.0) ? 0.0 : z.r;
    ans.i = (z.i == 0.0) ? 0.0 : z.i;
    if (R_IsNA(ans.r) || R_IsNA(ans.i))
	ans.r = ans.i = NA_REAL;
    else if (R_IsNaN(ans.r) || R_IsNaN(ans.i))
	ans.r = ans.i = R_NaN;
    return ans;
}

static hlen chash(SEXP x, R_xlen_t indx, HashData *d)
{
    Rcomplex tmp = unify_complex_na(COMPLEX_ELT(x, indx));

#if 2*SIZEOF_INT == SIZEOF_DOUBLE
    {
	unsigned int u;
	union foo tmpu;
	tmpu.d = tmp.r;
	u = tmpu.u[0] ^ tmpu.u[1];
	tmpu.d = tmp.i;
	u ^= tmpu.u[0] ^ tmpu.u[1];
	return scatter(u, d);
    }
#else
	return scatter((*((unsigned int *)(&tmp.r)) ^
			(*((unsigned int *)(&tmp.i)))), d);
#endif
}

/* Hash CHARSXP by address.  Hash values are int, For 64bit pointers,
 * we do (upper ^ lower) */
static R_INLINE hlen cshash(SEXP x, R_xlen_t indx, HashData *d)
{
    intptr_t z = (intptr_t) STRING_ELT(x, indx);
    unsigned int z1 = (unsigned int)(z & 0xffffffff), z2 = 0;
#if SIZEOF_LONG == 8
    z2 = (unsigned int)(z/0x100000000L);
#endif
    return scatter(z1 ^ z2, d);
}

static R_INLINE hlen shash(SEXP x, R_xlen_t indx, HashData *d)
{
    unsigned int k;
    const char *p;
    if(!d->useUTF8 && d->useCache) return cshash(x, indx, d);
    const void *vmax = vmaxget();
    /* Not having d->useCache really should not happen anymore. */
    p = translateCharUTF8(STRING_ELT(x, indx));
    k = 0;
    while (*p++)
	k = 11 * k + (unsigned int) *p; /* was 8 but 11 isn't a power of 2 */
    vmaxset(vmax); /* discard any memory used by translateChar */
    return scatter(k, d);
}

static int lequal(SEXP x, R_xlen_t i, SEXP y, R_xlen_t j)
{
    if (i < 0 || j < 0) return 0;
    return (LOGICAL_ELT(x, i) == LOGICAL_ELT(y, j));
}


static R_INLINE int iequal(SEXP x, R_xlen_t i, SEXP y, R_xlen_t j)
{
    if (i < 0 || j < 0) return 0;
    return (INTEGER_ELT(x, i) == INTEGER_ELT(y, j));
}

/* BDR 2002-1-17  We don't want NA and other NaNs to be equal */
static R_INLINE int requal(SEXP x, R_xlen_t i, SEXP y, R_xlen_t j)
{
    if (i < 0 || j < 0) return 0;
    double xi = REAL_ELT(x, i);
    double yj = REAL_ELT(y, j);
    if (!ISNAN(xi) && !ISNAN(yj))
	return (xi == yj);
    else if (R_IsNA(xi) && R_IsNA(yj)) return 1;
    else if (R_IsNaN(xi) && R_IsNaN(yj)) return 1;
    else return 0;
}

/* This is differentiating {NA,1}, {NA,0}, {NA, NaN}, {NA, NA},
 * but R's print() and format()  render all as "NA" */
static int cplx_eq(Rcomplex x, Rcomplex y)
{
    if (!ISNAN(x.r) && !ISNAN(x.i) && !ISNAN(y.r) && !ISNAN(y.i))
	return x.r == y.r && x.i == y.i;
    else if (R_IsNA(x.r) || R_IsNA(x.i)) // x is NA
	return (R_IsNA(y.r) || R_IsNA(y.i)) ? 1 : 0;
    else if (R_IsNA(y.r) || R_IsNA(y.i)) // y is NA but x is not
	return 0;
    // else : none is NA but there's at least one NaN;  hence  ISNAN(.) == R_IsNaN(.)
    return
	(((ISNAN(x.r) && ISNAN(y.r)) || (!ISNAN(x.r) && !ISNAN(y.r) && x.r == y.r)) && // Re
	 ((ISNAN(x.i) && ISNAN(y.i)) || (!ISNAN(x.i) && !ISNAN(y.i) && x.i == y.i))    // Im
	    ) ? 1 : 0;
}

static int cequal(SEXP x, R_xlen_t i, SEXP y, R_xlen_t j)
{
    if (i < 0 || j < 0) return 0;
    return cplx_eq(COMPLEX_ELT(x, i), COMPLEX_ELT(y, j));
}

static R_INLINE int sequal(SEXP x, R_xlen_t i, SEXP y, R_xlen_t j)
{
    if (i < 0 || j < 0) return 0;
    SEXP xi = STRING_ELT(x, i);
    SEXP yj = STRING_ELT(y, j);
    /* Two strings which have the same address must be the same,
       so avoid looking at the contents */
    if (xi == yj) return 1;
    /* Then if either is NA the other cannot be */
    /* Once all CHARSXPs are cached, Seql will handle this */
    if (xi == NA_STRING || yj == NA_STRING)
	return 0;
    /* another pre-test to avoid the call to Seql */
    if (IS_CACHED(xi) && IS_CACHED(yj) && ENC_KNOWN(xi) == ENC_KNOWN(yj))
	return 0;
    return Seql(xi, yj);
}

static hlen rawhash(SEXP x, R_xlen_t indx, HashData *d)
{
    return (hlen) RAW_ELT(x, indx);
}

static int rawequal(SEXP x, R_xlen_t i, SEXP y, R_xlen_t j)
{
    if (i < 0 || j < 0) return 0;
    return (RAW_ELT(x, i) == RAW_ELT(y, j));
}

static hlen vhash(SEXP x, R_xlen_t indx, HashData *d)
{
    int i;
    unsigned int key;
    SEXP _this = VECTOR_ELT(x, indx);

    key = OBJECT(_this) + 2*TYPEOF(_this) + 100U*(unsigned int) length(_this);
    /* maybe we should also look at attributes, but that slows us down */
    switch (TYPEOF(_this)) {
    case LGLSXP:
	/* This is not too clever: pack into 32-bits and then scatter? */
	for(i = 0; i < LENGTH(_this); i++) {
	    key ^= lhash(_this, i, d);
	    key *= 97;
	}
	break;
    case INTSXP:
	for(i = 0; i < LENGTH(_this); i++) {
	    key ^= ihash(_this, i, d);
	    key *= 97;
	}
	break;
    case REALSXP:
	for(i = 0; i < LENGTH(_this); i++) {
	    key ^= rhash(_this, i, d);
	    key *= 97;
	}
	break;
    case CPLXSXP:
	for(i = 0; i < LENGTH(_this); i++) {
	    key ^= chash(_this, i, d);
	    key *= 97;
	}
	break;
    case STRSXP:
	for(i = 0; i < LENGTH(_this); i++) {
	    key ^= shash(_this, i, d);
	    key *= 97;
	}
	break;
    case RAWSXP:
	for(i = 0; i < LENGTH(_this); i++) {
	    key ^= scatter((unsigned int)rawhash(_this, i, d), d);
	    key *= 97;
	}
	break;
    case VECSXP:
	for(i = 0; i < LENGTH(_this); i++) {
	    key ^= vhash(_this, i, d);
	    key *= 97;
	}
	break;
    default:
	break;
    }
    return scatter(key, d);
}

static int vequal(SEXP x, R_xlen_t i, SEXP y, R_xlen_t j)
{
    if (i < 0 || j < 0) return 0;
    return R_compute_identical(VECTOR_ELT(x, i), VECTOR_ELT(y, j), 0);
}

/*
  Choose M to be the smallest power of 2
  not less than 2*n and set K = log2(M).
  Need K >= 1 and hence M >= 2, and 2^M < 2^31-1, hence n <= 2^29.

  Dec 2004: modified from 4*n to 2*n, since in the worst case we have
  a 50% full table, and that is still rather efficient -- see
  R. Sedgewick (1998) Algorithms in C++ 3rd edition p.606.
*/
static void MKsetup(R_xlen_t n, HashData *d, R_xlen_t nmax)
{
#ifdef LONG_VECTOR_SUPPORT
    /* M = 2^32 is safe, hence n <= 2^31 -1 */
    if(n < 0) /* protect against overflow to -ve */
	error(_("length %d is too large for hashing"), n);
#else
    if(n < 0 || n >= 1073741824) /* protect against overflow to -ve */
	error(_("length %d is too large for hashing"), n);
#endif

    if (nmax != NA_INTEGER && nmax != 1) n = nmax;
    size_t n2 = 2U * (size_t) n;
    d->M = 2;
    d->K = 1;
    while (d->M < n2) {
	d->M *= 2;
	d->K++;
    }
    d->nmax = n;
}

#define IMAX 4294967296L
static void HashTableSetup(SEXP x, HashData *d, R_xlen_t nmax)
{
    d->useUTF8 = FALSE;
    d->useCache = TRUE;
    switch (TYPEOF(x)) {
    case LGLSXP:
	d->hash = lhash;
	d->equal = lequal;
	d->nmax = d->M = 4;
	d->K = 2; /* unused */
	break;
    case INTSXP:
    {
	d->hash = ihash;
	d->equal = iequal;
#ifdef LONG_VECTOR_SUPPORT
	R_xlen_t nn = XLENGTH(x);
	if (nn > IMAX) nn = IMAX;
	MKsetup(nn, d, nmax);
#else
	MKsetup(LENGTH(x), d, nmax);
#endif
    }
	break;
    case REALSXP:
	d->hash = rhash;
	d->equal = requal;
	MKsetup(XLENGTH(x), d, nmax);
	break;
    case CPLXSXP:
	d->hash = chash;
	d->equal = cequal;
	MKsetup(XLENGTH(x), d, nmax);
	break;
    case STRSXP:
	d->hash = shash;
	d->equal = sequal;
	MKsetup(XLENGTH(x), d, nmax);
	break;
    case RAWSXP:
	d->hash = rawhash;
	d->equal = rawequal;
	d->nmax = d->M = 256;
	d->K = 8; /* unused */
	break;
    case VECSXP:
	d->hash = vhash;
	d->equal = vequal;
	MKsetup(XLENGTH(x), d, nmax);
	break;
    default:
	UNIMPLEMENTED_TYPE("HashTableSetup", x);
    }
#ifdef LONG_VECTOR_SUPPORT
    d->isLong = IS_LONG_VEC(x);
    if (d->isLong) {
	d->HashTable = allocVector(REALSXP, (R_xlen_t) d->M);
	for (R_xlen_t i = 0; i < d->M; i++) HTDATA_DBL(d)[i] = NIL;
    } else
#endif
    {
	d->HashTable = allocVector(INTSXP, (R_xlen_t) d->M);
	for (R_xlen_t i = 0; i < d->M; i++) HTDATA_INT(d)[i] = NIL;
    }
}

/* Open address hashing */
/* Collision resolution is by linear probing */
/* The table is guaranteed large so this is sufficient */

static int isDuplicated(SEXP x, R_xlen_t indx, HashData *d)
{
#ifdef LONG_VECTOR_SUPPORT
    if (d->isLong) {
	double *h = HTDATA_DBL(d);
	hlen i = d->hash(x, indx, d);
	while (h[i] != NIL) {
	    if (d->equal(x, (R_xlen_t) h[i], x, indx))
		return h[i] >= 0 ? 1 : 0;
	    i = (i + 1) % d->M;
	}
	if (d->nmax-- < 0) error("hash table is full");
	h[i] = (double) indx;
    } else
#endif
    {
	int *h = HTDATA_INT(d);
	hlen i = d->hash(x, indx, d);
	while (h[i] != NIL) {
	    if (d->equal(x, h[i], x, indx))
		return h[i] >= 0 ? 1 : 0;
	    i = (i + 1) % d->M;
	}
	if (d->nmax-- < 0) error("hash table is full");
	h[i] = (int) indx;
    }
    return 0;
}

static void removeEntry(SEXP table, SEXP x, R_xlen_t indx, HashData *d)
{
#ifdef LONG_VECTOR_SUPPORT
    if (d->isLong) {
	double *h = HTDATA_DBL(d);
	hlen i = d->hash(x, indx, d);
	while (h[i] >= 0) {
	    if (d->equal(table, (R_xlen_t) h[i], x, indx)) {
		h[i] = NA_INTEGER;  /* < 0, only index values are inserted */
		return;
	    }
	    i = (i + 1) % d->M;
	}
    } else
#endif
    {
	int *h = HTDATA_INT(d);
	hlen i = d->hash(x, indx, d);
	while (h[i] >= 0) {
	    if (d->equal(table, h[i], x, indx)) {
		h[i] = NA_INTEGER;  /* < 0, only index values are inserted */
		return;
	    }
	    i = (i + 1) % d->M;
	}
    }
}

#define DUPLICATED_INIT						\
    HashData data;						\
    HashTableSetup(x, &data, nmax);				\
    if(TYPEOF(x) == STRSXP) {					\
	data.useUTF8 = FALSE; data.useCache = TRUE;		\
	for(i = 0; i < n; i++) {				\
	    if(IS_BYTES(STRING_ELT(x, i))) {			\
		data.useUTF8 = FALSE; break;			\
	    }							\
	    if(ENC_KNOWN(STRING_ELT(x, i))) {			\
		data.useUTF8 = TRUE;				\
	    }							\
	    if(!IS_CACHED(STRING_ELT(x, i))) {			\
		data.useCache = FALSE; break;			\
	    }							\
	}							\
    }

/* used in scan() */
SEXP duplicated(SEXP x, Rboolean from_last)
{
    SEXP ans;
    int *v, nmax = NA_INTEGER;

    if (!isVector(x)) error(_("'duplicated' applies only to vectors"));
    R_xlen_t i, n = XLENGTH(x);
    DUPLICATED_INIT;

    PROTECT(data.HashTable);
    PROTECT(ans = allocVector(LGLSXP, n));

    v = LOGICAL(ans);

    if(from_last)
	for (i = n-1; i >= 0; i--) {
//	    if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt();
	    v[i] = isDuplicated(x, i, &data);
	}
    else
	for (i = 0; i < n; i++) {
//	    if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt();
	    v[i] = isDuplicated(x, i, &data);
	}

    UNPROTECT(2);
    return ans;
}

static SEXP Duplicated(SEXP x, Rboolean from_last, int nmax)
{
    SEXP ans;
    int *v;

    if (!isVector(x)) error(_("'duplicated' applies only to vectors"));
    R_xlen_t i, n = XLENGTH(x);
    DUPLICATED_INIT;

    PROTECT(data.HashTable);
    PROTECT(ans = allocVector(LGLSXP, n));

    v = LOGICAL(ans);

    if(from_last)
	for (i = n-1; i >= 0; i--) {
//	    if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt();
	    v[i] = isDuplicated(x, i, &data);
	}
    else
	for (i = 0; i < n; i++) {
//	    if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt();
	    v[i] = isDuplicated(x, i, &data);
	}

    UNPROTECT(2);
    return ans;
}

/* simpler version of the above : return 1-based index of first, or 0 : */
R_xlen_t any_duplicated(SEXP x, Rboolean from_last)
{
    R_xlen_t result = 0;
    int nmax = NA_INTEGER;

    if (!isVector(x)) error(_("'duplicated' applies only to vectors"));
    R_xlen_t i, n = XLENGTH(x);

    DUPLICATED_INIT;
    PROTECT(data.HashTable);

    if(from_last) {
	for (i = n-1; i >= 0; i--) {
	    if(isDuplicated(x, i, &data)) { result = ++i; break; }
	}
    } else {
	for (i = 0; i < n; i++) {
	    if(isDuplicated(x, i, &data)) { result = ++i; break; }
	}
    }
    UNPROTECT(1);
    return result;
}

static SEXP duplicated3(SEXP x, SEXP incomp, Rboolean from_last, int nmax)
{
    SEXP ans;
    int *v, j, m;

    if (!isVector(x)) error(_("'duplicated' applies only to vectors"));
    R_xlen_t i, n = XLENGTH(x);
    DUPLICATED_INIT;

    PROTECT(data.HashTable);
    PROTECT(ans = allocVector(LGLSXP, n));

    v = LOGICAL(ans);

    if(from_last)
	for (i = n-1; i >= 0; i--) {
//	    if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt();
	    v[i] = isDuplicated(x, i, &data);
	}
    else
	for (i = 0; i < n; i++) {
//	    if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt();
	    v[i] = isDuplicated(x, i, &data);
	}

    if(length(incomp)) {
	PROTECT(incomp = coerceVector(incomp, TYPEOF(x)));
	m = length(incomp);
	for (i = 0; i < n; i++)
	    if(v[i]) {
		for(j = 0; j < m; j++)
		    if(data.equal(x, i, incomp, j)) {v[i] = 0; break;}
	    }
	UNPROTECT(1);
    }
    UNPROTECT(2);
    return ans;
}

/* return (1-based) index of first duplication, or 0 : */
R_xlen_t any_duplicated3(SEXP x, SEXP incomp, Rboolean from_last)
{
    int j, m = length(incomp), nmax = NA_INTEGER;

    if (!isVector(x)) error(_("'duplicated' applies only to vectors"));
    R_xlen_t i, n = XLENGTH(x);
    DUPLICATED_INIT;
    PROTECT(data.HashTable);

    if(!m) error(_("any_duplicated3(., <0-length incomp>)"));

    PROTECT(incomp = coerceVector(incomp, TYPEOF(x)));
    m = length(incomp);

    if(from_last)
	for (i = n-1; i >= 0; i--) {
#define IS_DUPLICATED_CHECK				\
	    if(isDuplicated(x, i, &data)) {		\
		Rboolean isDup = TRUE;			\
		for(j = 0; j < m; j++)			\
		    if(data.equal(x, i, incomp, j)) {	\
			isDup = FALSE; break;		\
		    }					\
		if(isDup) {				\
		    UNPROTECT(2);			\
		    return ++i;				\
		}					\
		/* else continue */			\
	    }
	    IS_DUPLICATED_CHECK;
	}
    else {
	for (i = 0; i < n; i++) {
	    IS_DUPLICATED_CHECK;
	}
    }

    UNPROTECT(2);
    return 0;
}

#undef IS_DUPLICATED_CHECK
#undef DUPLICATED_INIT


/* .Internal(duplicated(x))	  [op=0]
  .Internal(unique(x))		  [op=1]
   .Internal(anyDuplicated(x))	  [op=2]
*/
SEXP attribute_hidden do_duplicated(SEXP call, SEXP op, SEXP args, SEXP env)
{
    SEXP x, incomp, dup, ans;
    int fromLast, nmax = NA_INTEGER;
    R_xlen_t i, k, n;

    checkArity(op, args);
    x = CAR(args);
    incomp = CADR(args);
    if (length(CADDR(args)) < 1)
	error(_("'fromLast' must be length 1"));
    fromLast = asLogical(CADDR(args));
    if (fromLast == NA_LOGICAL)
	error(_("'fromLast' must be TRUE or FALSE"));

    Rboolean fL = (Rboolean) fromLast;

    /* handle zero length vectors, and NULL */
    if ((n = xlength(x)) == 0)
	return(PRIMVAL(op) <= 1
	       ? allocVector(PRIMVAL(op) != 1 ? LGLSXP : TYPEOF(x), 0)
	       : ScalarInteger(0));

    if (!isVector(x)) {
	error(_("%s() applies only to vectors"),
	      (PRIMVAL(op) == 0 ? "duplicated" :
	       (PRIMVAL(op) == 1 ? "unique" : /* 2 */ "anyDuplicated")));
    }
    if (PRIMVAL(op) <= 1) {
	nmax = asInteger(CADDDR(args));
	if (nmax != NA_INTEGER && nmax <= 0)
	    error(_("'nmax' must be positive"));
    }

    if(length(incomp) && /* S has FALSE to mean empty */
       !(isLogical(incomp) && length(incomp) == 1 &&
	 LOGICAL_ELT(incomp, 0) == 0)) {
	if(PRIMVAL(op) == 2) {
	    /* return R's 1-based index :*/
	    R_xlen_t ind  = any_duplicated3(x, incomp, fL);
	    if(ind > INT_MAX) return ScalarReal((double) ind);
	    else return ScalarInteger((int)ind);
	} else
	    dup = duplicated3(x, incomp, fL, nmax);
    }
    else {
	if(PRIMVAL(op) == 2) {
	    R_xlen_t ind  = any_duplicated(x, fL);
	    if(ind > INT_MAX) return ScalarReal((double) ind);
	    else return ScalarInteger((int)ind);
	} else
	    dup = Duplicated(x, fL, nmax);
    }
    if (PRIMVAL(op) == 0) /* "duplicated()" */
	return dup;
    /*	ELSE
	use the results of "duplicated" to get "unique" */

    /* count unique entries */
    k = 0;
    for (i = 0; i < n; i++)
	if (LOGICAL_ELT(dup, i) == 0)
	    k++;

    PROTECT(dup);
    PROTECT(ans = allocVector(TYPEOF(x), k));

    k = 0;
    switch (TYPEOF(x)) {
    case LGLSXP:
	for (i = 0; i < n; i++)
	    if (LOGICAL_ELT(dup, i) == 0)
		LOGICAL0(ans)[k++] = LOGICAL_ELT(x, i);
	break;
    case INTSXP:
	for (i = 0; i < n; i++)
	    if (LOGICAL_ELT(dup, i) == 0)
		INTEGER0(ans)[k++] = INTEGER_ELT(x, i);
	break;
    case REALSXP:
	for (i = 0; i < n; i++)
	    if (LOGICAL_ELT(dup, i) == 0)
		REAL0(ans)[k++] = REAL_ELT(x, i);
	break;
    case CPLXSXP:
	for (i = 0; i < n; i++)
	    if (LOGICAL_ELT(dup, i) == 0)
		COMPLEX0(ans)[k++] = COMPLEX_ELT(x, i);
	break;
    case STRSXP:
	for (i = 0; i < n; i++)
	    if (LOGICAL_ELT(dup, i) == 0)
		SET_STRING_ELT(ans, k++, STRING_ELT(x, i));
	break;
    case VECSXP:
	for (i = 0; i < n; i++)
	    if (LOGICAL_ELT(dup, i) == 0)
		SET_VECTOR_ELT(ans, k++, VECTOR_ELT(x, i));
	break;
    case RAWSXP:
	for (i = 0; i < n; i++)
	    if (LOGICAL_ELT(dup, i) == 0)
		RAW0(ans)[k++] = RAW_ELT(x, i);
	break;
    default:
	UNIMPLEMENTED_TYPE("duplicated", x);
    }
    UNPROTECT(2);
    return ans;
}

/* Build a hash table, ignoring information on duplication */
static void DoHashing(SEXP table, HashData *d)
{
    R_xlen_t i, n = XLENGTH(table);
    for (i = 0; i < n; i++) {
//	if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt();
	(void) isDuplicated(table, i, d);
    }
}

/* invalidate entries: normally few */
static void UndoHashing(SEXP x, SEXP table, HashData *d)
{
    for (R_xlen_t i = 0; i < XLENGTH(x); i++) removeEntry(table, x, i, d);
}

#define DEFLOOKUP(NAME, HASHFUN, EQLFUN)			\
    static R_INLINE int						\
    NAME(SEXP table, SEXP x, R_xlen_t indx, HashData *d)	\
    {								\
	int *h = HTDATA_INT(d);					\
	hlen i = HASHFUN(x, indx, d);				\
	while (h[i] != NIL) {					\
	    if (EQLFUN(table, h[i], x, indx))			\
		return h[i] >= 0 ? h[i] + 1 : d->nomatch;	\
	    i = (i + 1) % d->M;					\
	}							\
	return d->nomatch;					\
    }

/* definitions to help the C compiler to inline of most important cases */
DEFLOOKUP(iLookup, ihash, iequal)
DEFLOOKUP(rLookup, rhash, requal)
DEFLOOKUP(sLookup, shash, sequal)

/* definition for the general case */
DEFLOOKUP(Lookup, d->hash, d->equal)

/* Now do the table lookup */
static SEXP HashLookup(SEXP table, SEXP x, HashData *d)
{
    SEXP ans;
    R_xlen_t i, n;

    n = XLENGTH(x);
    PROTECT(ans = allocVector(INTSXP, n));
    int *pa = INTEGER0(ans);

    switch (TYPEOF(x)) {
    case INTSXP:
	for (i = 0; i < n; i++)
	    pa[i] = iLookup(table, x, i, d);
	break;
    case REALSXP:
	for (i = 0; i < n; i++)
	    pa[i] = rLookup(table, x, i, d);
	break;
    case STRSXP:
	for (i = 0; i < n; i++)
	    pa[i] = sLookup(table, x, i, d);
	break;
    default:
	for (i = 0; i < n; i++)
	    pa[i] = Lookup(table, x, i, d);
    }

    UNPROTECT(1);
    return ans;
}

static SEXP match_transform(SEXP s, SEXP env)
{
    if(OBJECT(s)) {
	if(inherits(s, "factor")) return asCharacterFactor(s);
	else if(inherits(s, "POSIXlt")) { /* and maybe more classes in the future:
					   * Call R's (generic)	 as.character(s) : */
	    SEXP call, r;
	    PROTECT(call = lang2(R_AsCharacterSymbol, s));
	    r = eval(call, env);
	    UNPROTECT(1);
	    return r;
	}
    }
    /* else */
    return duplicate(s);
}

// workhorse of R's match() and hence also  " ix %in% itable "
SEXP match5(SEXP itable, SEXP ix, int nmatch, SEXP incomp, SEXP env)
{
    R_xlen_t n = xlength(ix);
    /* handle zero length arguments */
    if (n == 0) return allocVector(INTSXP, 0);

    SEXP ans;
    if (length(itable) == 0) {
	ans = allocVector(INTSXP, n);
	int *pa = INTEGER0(ans);
	for (R_xlen_t i = 0; i < n; i++) pa[i] = nmatch;
	return ans;
    }

    int nprot = 0;
    SEXP x     = PROTECT(match_transform(ix,     env)); nprot++;
    SEXP table = PROTECT(match_transform(itable, env)); nprot++;
    /* or should we use PROTECT_WITH_INDEX and REPROTECT below ? */

    SEXPTYPE type;
    /* Coerce to a common type; type == NILSXP is ok here.
     * Note that above we coerce factors and "POSIXlt", only to character.
     * Hence, coerce to character or to `higher' type
     * (given that we have "Vector" or NULL) */
    if(TYPEOF(x) >= STRSXP || TYPEOF(table) >= STRSXP) type = STRSXP;
    else type = TYPEOF(x) < TYPEOF(table) ? TYPEOF(table) : TYPEOF(x);
    PROTECT(x	  = coerceVector(x,	type)); nprot++;
    PROTECT(table = coerceVector(table, type)); nprot++;

    // special case scalar x -- for speed only :
    if(XLENGTH(x) == 1 && !incomp) {
      int val = nmatch;
      int ntable = LENGTH(table);
      switch (type) {
      case STRSXP: {
	  SEXP x_val = STRING_ELT(x,0);
	  for (int i=0; i < ntable; i++) if (Seql(STRING_ELT(table,i), x_val)) {
		  val = i + 1; break;
	      }
	  break; }
      case LGLSXP:
      case INTSXP: {
	  int x_val = INTEGER_ELT(x, 0),
	      *table_p = INTEGER(table);
	  for (int i=0; i < ntable; i++) if (table_p[i] == x_val) {
		  val = i + 1; break;
	      }
	  break; }
      case REALSXP: {
	  double xv = REAL_ELT(x, 0);
	  // pblm with signed 0s under IEC60559
	  double x_val = (xv == 0.) ? 0. : xv;
	  double *table_p = REAL(table);
	  /* we want all NaNs except NA equal, and all NAs equal */
	  if (R_IsNA(x_val)) {
	      for (int i=0; i < ntable; i++) if (R_IsNA(table_p[i])) {
		      val = i + 1; break;
		  }
	  }
	  else if (R_IsNaN(x_val)) {
	      for (int i=0; i < ntable; i++) if (R_IsNaN(table_p[i])) {
		      val = i + 1; break;
		  }
	  }
	  else {
	      for (int i=0; i < ntable; i++) if (table_p[i] == x_val) {
		      val = i + 1; break;
	      }
	  }
	  break; }
      case CPLXSXP: {
	  Rcomplex x_val = COMPLEX_ELT(x, 0),
	      *table_p = COMPLEX(table);
	  for (int i=0; i < ntable; i++)
	      if (cplx_eq(table_p[i], x_val)) {
		  val = i + 1; break;
	      }
	  break; }
      case RAWSXP: {
	  Rbyte x_val = RAW_ELT(x, 0),
	      *table_p = RAW(table);
	  for (int i=0; i < ntable; i++) if (table_p[i] == x_val) {
		  val = i + 1; break;
	      }
	  break; }
      }
      PROTECT(ans = ScalarInteger(val)); nprot++;
    }
    else { // regular case
	HashData data;
	if (incomp) { PROTECT(incomp = coerceVector(incomp, type)); nprot++; }
	data.nomatch = nmatch;
	HashTableSetup(table, &data, NA_INTEGER);
	if(type == STRSXP) {
	    Rboolean useBytes = FALSE;
	    Rboolean useUTF8 = FALSE;
	    Rboolean useCache = TRUE;
	    for(R_xlen_t i = 0; i < xlength(x); i++) {
		SEXP s = STRING_ELT(x, i);
		if(IS_BYTES(s)) {
		    useBytes = TRUE;
		    useUTF8 = FALSE;
		    break;
		}
		if(ENC_KNOWN(s)) {
		    useUTF8 = TRUE;
		}
		if(!IS_CACHED(s)) {
		    useCache = FALSE;
		    break;
		}
	    }
	    if(!useBytes || useCache) {
		for(int i = 0; i < LENGTH(table); i++) {
		    SEXP s = STRING_ELT(table, i);
		    if(IS_BYTES(s)) {
			useBytes = TRUE;
			useUTF8 = FALSE;
			break;
		    }
		    if(ENC_KNOWN(s)) {
			useUTF8 = TRUE;
		    }
		    if(!IS_CACHED(s)) {
			useCache = FALSE;
			break;
		    }
		}
	    }
	    data.useUTF8 = useUTF8;
	    data.useCache = useCache;
	}
	PROTECT(data.HashTable); nprot++;
	DoHashing(table, &data);
	if (incomp) UndoHashing(incomp, table, &data);
	ans = HashLookup(table, x, &data);
    }
    UNPROTECT(nprot);
    return ans;
} // end{ match5 }

SEXP matchE(SEXP itable, SEXP ix, int nmatch, SEXP env)
{
    return match5(itable, ix, nmatch, NULL, env);
}

/* used from other code, not here: */
SEXP match(SEXP itable, SEXP ix, int nmatch)
{
    return match5(itable, ix, nmatch, NULL, R_BaseEnv);
}


// .Internal(match(x, table, nomatch, incomparables)) :
SEXP attribute_hidden do_match(SEXP call, SEXP op, SEXP args, SEXP env)
{
    checkArity(op, args);

    if ((!isVector(CAR(args)) && !isNull(CAR(args)))
	|| (!isVector(CADR(args)) && !isNull(CADR(args))))
	error(_("'match' requires vector arguments"));

    int nomatch = asInteger(CADDR(args));
    SEXP incomp = CADDDR(args);

    if (isNull(incomp) || /* S has FALSE to mean empty */
	(length(incomp) == 1 && isLogical(incomp) &&
	 LOGICAL_ELT(incomp, 0) == 0))
	return match5(CADR(args), CAR(args), nomatch, NULL, env);
    else
	return match5(CADR(args), CAR(args), nomatch, incomp, env);
}

/* pmatch and charmatch return integer positions, so cannot be used
   for long vector tables */

/* Partial Matching of Strings */
/* Fully S Compatible version. */

/* Hmm, this was not all S compatible!	The desired behaviour is:
 * First do exact matches, and mark elements as used as they are matched
 *   unless dup_ok is true.
 * Then do partial matching, from left to right, using up the table
 *   unless dup_ok is true.  Multiple partial matches are ignored.
 * Empty strings are unmatched			      BDR 2000/2/16
 */

SEXP attribute_hidden do_pmatch(SEXP call, SEXP op, SEXP args, SEXP env)
{
    SEXP ans, input, target;
    int mtch, n_target, mtch_count, dups_ok, no_match;
    size_t temp;
    int *used = NULL, *ians;
    const char **in, **tar;
    Rboolean no_dups;
    Rboolean useBytes = FALSE, useUTF8 = FALSE;

    checkArity(op, args);
    input = CAR(args);
    R_xlen_t n_input = XLENGTH(input);
    target = CADR(args);
    n_target = LENGTH(target); // not allowed to be long
    no_match = asInteger(CADDR(args));
    dups_ok = asLogical(CADDDR(args));
    if (dups_ok == NA_LOGICAL)
	error(_("invalid '%s' argument"), "duplicates.ok");
    no_dups = !dups_ok;

    if (!isString(input) || !isString(target))
	error(_("argument is not of mode character"));

    if(no_dups) {
	used = (int *) R_alloc((size_t) n_target, sizeof(int));
	for (int j = 0; j < n_target; j++) used[j] = 0;
    }

    for(R_xlen_t i = 0; i < n_input; i++) {
	if(IS_BYTES(STRING_ELT(input, i))) {
	    useBytes = TRUE;
	    useUTF8 = FALSE;
	    break;
	} else if(ENC_KNOWN(STRING_ELT(input, i))) {
	    useUTF8 = TRUE;
	}
    }
    if(!useBytes) {
	for(R_xlen_t i = 0; i < n_target; i++) {
	    if(IS_BYTES(STRING_ELT(target, i))) {
		useBytes = TRUE;
		useUTF8 = FALSE;
		break;
	    } else if(ENC_KNOWN(STRING_ELT(target, i))) {
		useUTF8 = TRUE;
	    }
	}
    }

    in = (const char **) R_alloc((size_t) n_input, sizeof(char *));
    tar = (const char **) R_alloc((size_t) n_target, sizeof(char *));
    PROTECT(ans = allocVector(INTSXP, n_input));
    ians = INTEGER0(ans);
    if(useBytes) {
	for(R_xlen_t i = 0; i < n_input; i++) {
	    in[i] = CHAR(STRING_ELT(input, i));
	    ians[i] = 0;
	}
	for(int j = 0; j < n_target; j++)
	    tar[j] = CHAR(STRING_ELT(target, j));
    }
    else if(useUTF8) {
	for(R_xlen_t i = 0; i < n_input; i++) {
	    in[i] = translateCharUTF8(STRING_ELT(input, i));
	    ians[i] = 0;
	}
	for(int j = 0; j < n_target; j++)
	    tar[j] = translateCharUTF8(STRING_ELT(target, j));
    } else {
	for(R_xlen_t i = 0; i < n_input; i++) {
	    in[i] = translateChar(STRING_ELT(input, i));
	    ians[i] = 0;
	}
	for(int j = 0; j < n_target; j++)
	    tar[j] = translateChar(STRING_ELT(target, j));
    }
    /* First pass, exact matching */
    R_xlen_t nexact = 0;
    /* Compromise when hashing used changed in 3.2.0 (PR#15697) */
    if (n_input <= 100 || n_target <= 100) {
	for (R_xlen_t i = 0; i < n_input; i++) {
	    const char *ss = in[i];
	    if (strlen(ss) == 0) continue;
	    for (int j = 0; j < n_target; j++) {
		if (no_dups && used[j]) continue;
		if (strcmp(ss, tar[j]) == 0) {
		    ians[i] = j + 1;
		    if (no_dups) used[j] = 1;
		    nexact++;
		    break;
		}
	    }
	}
    } else {
	HashData data;
	HashTableSetup(target, &data, NA_INTEGER);
	data.useUTF8 = useUTF8;
	data.nomatch = 0;
	DoHashing(target, &data);
	for (R_xlen_t i = 0; i < n_input; i++) {
	    if (strlen(in[i]) == 0) /* don't look up "" */
		continue;
	    int j = Lookup(target, input, i, &data);
	    if ((j == 0) || (no_dups && used[j - 1])) continue;
	    if (no_dups) used[j - 1] = 1;
	    ians[i] = j;
	    nexact++;
	}
    }

    if(nexact < n_input) {
	/* Second pass, partial matching */
	for (R_xlen_t i = 0; i < n_input; i++) {
	    const char *ss;
	    if (ians[i]) continue;
	    ss = in[i];
	    temp = strlen(ss);
	    if (temp == 0) continue;
	    mtch = 0;
	    mtch_count = 0;
	    for (int j = 0; j < n_target; j++) {
		if (no_dups && used[j]) continue;
		if (strncmp(ss, tar[j], temp) == 0) {
		    mtch = j + 1;
		    mtch_count++;
		}
	    }
	    if (mtch > 0 && mtch_count == 1) {
		if(no_dups) used[mtch - 1] = 1;
		ians[i] = mtch;
	    }
	}
	/* Third pass, set no matches */
	for (R_xlen_t i = 0; i < n_input; i++)
	    if(ians[i] == 0) ians[i] = no_match;

    }
    UNPROTECT(1);
    return ans;
}


/* Partial Matching of Strings */
/* Based on Therneau's charmatch. */

SEXP attribute_hidden do_charmatch(SEXP call, SEXP op, SEXP args, SEXP env)
{
    SEXP ans, input, target;
    const char *ss, *st;
    Rboolean useBytes = FALSE, useUTF8 = FALSE;

    checkArity(op, args);

    input = CAR(args);
    R_xlen_t n_input = LENGTH(input);
    target = CADR(args);
    int n_target = LENGTH(target);

    if (!isString(input) || !isString(target))
	error(_("argument is not of mode character"));
    int no_match = asInteger(CADDR(args));

    for(R_xlen_t i = 0; i < n_input; i++) {
	if(IS_BYTES(STRING_ELT(input, i))) {
	    useBytes = TRUE;
	    useUTF8 = FALSE;
	    break;
	} else if(ENC_KNOWN(STRING_ELT(input, i))) {
	    useUTF8 = TRUE;
	}
    }
    if(!useBytes) {
	for(int i = 0; i < n_target; i++) {
	    if(IS_BYTES(STRING_ELT(target, i))) {
		useBytes = TRUE;
		useUTF8 = FALSE;
		break;
	    } else if(ENC_KNOWN(STRING_ELT(target, i))) {
		useUTF8 = TRUE;
	    }
	}
    }

    PROTECT(ans = allocVector(INTSXP, n_input));
    int *ians = INTEGER0(ans);

    const void *vmax = vmaxget();  // prudence: .Internal does this too.
    for(R_xlen_t i = 0; i < n_input; i++) {
	if(useBytes)
	    ss = CHAR(STRING_ELT(input, i));
	else if(useUTF8)
	    ss = translateCharUTF8(STRING_ELT(input, i));
	else
	    ss = translateChar(STRING_ELT(input, i));
	size_t temp = strlen(ss);
	int imatch = NA_INTEGER;
	Rboolean perfect = FALSE;
	/* we could reset vmax here too: worth it? */
	for(int j = 0; j < n_target; j++) {
	    if(useBytes)
		st = CHAR(STRING_ELT(target, j));
	    else if(useUTF8)
		st = translateCharUTF8(STRING_ELT(target, j));
	    else
		st = translateChar(STRING_ELT(target, j));
	    int k = strncmp(ss, st, temp);
	    if (k == 0) {
		if (strlen(st) == temp) {
		    if (perfect)
			imatch = 0;
		    else {
			perfect = TRUE;
			imatch = j + 1;
		    }
		}
		else if (!perfect) {
		    if (imatch == NA_INTEGER)
			imatch = j + 1;
		    else
			imatch = 0;
		}
	    }
	}
	ians[i] = (imatch == NA_INTEGER) ? no_match : imatch;
	vmaxset(vmax);
    }
    UNPROTECT(1);
    return ans;
}


/* Functions for matching the supplied arguments to the */
/* formal arguments of functions.  The returned value */
/* is a list with all components named. */

static SEXP StripUnmatched(SEXP s)
{
    if (s == R_NilValue) return s;

    if (CAR(s) == R_MissingArg && !ARGUSED(s) ) {
	return StripUnmatched(CDR(s));
    }
    else if (CAR(s) == R_DotsSymbol ) {
	return StripUnmatched(CDR(s));
    }
    else {
	SETCDR(s, StripUnmatched(CDR(s)));
	return s;
    }
}

static SEXP ExpandDots(SEXP s, int expdots)
{
    SEXP r;
    if (s == R_NilValue)
	return s;
    if (TYPEOF(CAR(s)) == DOTSXP ) {
	SET_TYPEOF(CAR(s), LISTSXP);	/* a safe mutation */
	if (expdots) {
	    r = CAR(s);
	    while (CDR(r) != R_NilValue ) {
		SET_ARGUSED(r, 1);
		r = CDR(r);
	    }
	    SET_ARGUSED(r, 1);
	    SETCDR(r, ExpandDots(CDR(s), expdots));
	    return CAR(s);
	}
    }
    else
	SET_ARGUSED(s, 0);
    SETCDR(s, ExpandDots(CDR(s), expdots));
    return s;
}
static SEXP subDots(SEXP rho)
{
    SEXP rval, dots, a, b, t;
    int len,i;

    dots = findVar(R_DotsSymbol, rho);

    if (dots == R_UnboundValue)
	error(_("... used in a situation where it does not exist"));

    if (dots == R_MissingArg)
	return dots;

    if (!isPairList(dots))
	error(_("... is not a pairlist"));

    len = length(dots);
    PROTECT(dots);
    PROTECT(rval=allocList(len));
    for(a = dots, b = rval, i = 1; i <= len; a = CDR(a), b = CDR(b), i++) {
	SET_TAG(b, TAG(a));
	t = CAR(a);
	while (TYPEOF(t) == PROMSXP)
	    t = PREXPR(t);
	if( isSymbol(t) || isLanguage(t) )
	    SETCAR(b, installDDVAL(i));
	else
	    SETCAR(b, t);
    }
    UNPROTECT(2);
    return rval;
}


SEXP attribute_hidden do_matchcall(SEXP call, SEXP op, SEXP args, SEXP env)
{
    SEXP formals, actuals, rlist;
    SEXP funcall, f, b, rval, sysp, t1, t2, tail;
//    RCNTXT *cptr;
    int expdots;

    checkArity(op,args);

    funcall = CADR(args);

    if (TYPEOF(funcall) == EXPRSXP)
	funcall = VECTOR_ELT(funcall, 0);

    if (TYPEOF(funcall) != LANGSXP)
	error(_("invalid '%s' argument"), "call");

    b = CAR(args);
    if (TYPEOF(b) != CLOSXP)
	error(_("invalid '%s' argument"), "definition");

    sysp = CAR(CDDDR(args));
    if (!isEnvironment(sysp))
	error(_("'envir' must be an environment"));

    /* Do we expand ... ? */

    expdots = asLogical(CAR(CDDR(args)));
    if (expdots == NA_LOGICAL)
	error(_("invalid '%s' argument"), "expand.dots");

    /* Get the formals and match the actual args */

    formals = FORMALS(b);
    PROTECT(actuals = shallow_duplicate(CDR(funcall)));

    /* If there is a ... symbol then expand it out in the sysp env
       We need to take some care since the ... might be in the middle
       of the actuals  */

    t2 = R_MissingArg;
    for (t1=actuals ; t1!=R_NilValue ; t1 = CDR(t1) ) {
	if (CAR(t1) == R_DotsSymbol) {
	    t2 = subDots(sysp);
	    break;
	}
    }
    /* now to splice t2 into the correct spot in actuals */
    if (t2 != R_MissingArg ) {	/* so we did something above */
	if( CAR(actuals) == R_DotsSymbol ) {
	    UNPROTECT(1);
	    actuals = listAppend(t2, CDR(actuals));
	    PROTECT(actuals);
	}
	else {
	    for(t1=actuals; t1!=R_NilValue; t1=CDR(t1)) {
		if( CADR(t1) == R_DotsSymbol ) {
		    tail = CDDR(t1);
		    SETCDR(t1, t2);
		    listAppend(actuals,tail);
		    break;
		}
	    }
	}
    } else { /* get rid of it */
	if( CAR(actuals) == R_DotsSymbol ) {
	    UNPROTECT(1);
	    actuals = CDR(actuals);
	    PROTECT(actuals);
	}
	else {
	    for(t1=actuals; t1!=R_NilValue; t1=CDR(t1)) {
		if( CADR(t1) == R_DotsSymbol ) {
		    tail = CDDR(t1);
		    SETCDR(t1, tail);
		    break;
		}
	    }
	}
    }
    rlist = matchArgs_RC(formals, actuals, call);

    /* Attach the argument names as tags */

    for (f = formals, b = rlist; b != R_NilValue; b = CDR(b), f = CDR(f)) {
	SET_TAG(b, TAG(f));
    }


    /* Handle the dots */

    PROTECT(rlist = ExpandDots(rlist, expdots));

    /* Eliminate any unmatched formals and any that match R_DotSymbol */
    /* This needs to be after ExpandDots as the DOTSXP might match ... */

    rlist = StripUnmatched(rlist);

    PROTECT(rval = allocSExp(LANGSXP));
    SETCAR(rval, lazy_duplicate(CAR(funcall)));
    SETCDR(rval, rlist);
    UNPROTECT(3);
    return rval;
}


#include <R_ext/RS.h> /* for Memzero */

#ifdef _AIX  /*some people just have to be different: is this still needed? */
#    include <memory.h>
#endif


static SEXP
rowsum(SEXP x, SEXP g, SEXP uniqueg, SEXP snarm, SEXP rn)
{
    SEXP matches,ans;
    int n, p, ng, narm;
    R_xlen_t offset = 0, offsetg = 0;
    HashData data;
    data.nomatch = 0;

    n = LENGTH(g);
    ng = length(uniqueg);
    narm = asLogical(snarm);
    if(narm == NA_LOGICAL) error("'na.rm' must be TRUE or FALSE");
    if(isMatrix(x)) p = ncols(x); else p = 1;

    HashTableSetup(uniqueg, &data, NA_INTEGER);
    PROTECT(data.HashTable);
    DoHashing(uniqueg, &data);
    PROTECT(matches = HashLookup(uniqueg, g, &data));
    int *pmatches = INTEGER(matches);

    PROTECT(ans = allocMatrix(TYPEOF(x), ng, p));

    switch(TYPEOF(x)){
    case REALSXP:
	Memzero(REAL0(ans), ng*p);
	for(int i = 0; i < p; i++) {
	    double *pa = REAL0(ans);
	    for(int j = 0; j < n; j++) {
		double xjpo = REAL_ELT(x, j + offset);
		if(!narm || !ISNAN(xjpo))
		    pa[pmatches[j] - 1 + offsetg] += xjpo;
	    }
	    offset += n;
	    offsetg += ng;
	}
	break;
    case INTSXP:
	Memzero(INTEGER0(ans), ng*p);
	for(int i = 0; i < p; i++) {
	    int *pa = INTEGER0(ans);
	    for(int j = 0; j < n; j++) {
		int xjpo = INTEGER_ELT(x, j + offset);
		if (xjpo == NA_INTEGER) {
		    if(!narm)
			pa[pmatches[j] - 1 + offsetg] = NA_INTEGER;
		} else if (pa[pmatches[j] - 1 + offsetg] != NA_INTEGER) {
		    /* check for integer overflows */
		    int itmp = pa[pmatches[j] - 1 + offsetg];
		    double dtmp = itmp;
		    dtmp += xjpo;
		    if (dtmp < INT_MIN || dtmp > INT_MAX) itmp = NA_INTEGER;
		    else itmp += xjpo;
		    pa[pmatches[j] - 1 + offsetg] = itmp;
		}
	    }
	    offset += n;
	    offsetg += ng;
	}
	break;
    default:
	error("non-numeric matrix in rowsum(): this should not happen");
    }
    if (TYPEOF(rn) != STRSXP) error("row names are not character");
    SEXP dn = allocVector(VECSXP, 2), dn2, dn3;
    setAttrib(ans, R_DimNamesSymbol, dn);
    SET_VECTOR_ELT(dn, 0, rn);
    dn2 = getAttrib(x, R_DimNamesSymbol);
    if(length(dn2) >= 2 &&
       !isNull(dn3 = VECTOR_ELT(dn2, 1))) SET_VECTOR_ELT(dn, 1, dn3);

    UNPROTECT(3); /* HashTable, matches, ans */
    return ans;
}

static SEXP
rowsum_df(SEXP x, SEXP g, SEXP uniqueg, SEXP snarm, SEXP rn)
{
    SEXP matches,ans,col,xcol;
    int p, narm;
    HashData data;
    data.nomatch = 0;

    R_xlen_t n = XLENGTH(g);
    p = LENGTH(x);
    R_xlen_t ng = XLENGTH(uniqueg);
    narm = asLogical(snarm);
    if(narm == NA_LOGICAL) error("'na.rm' must be TRUE or FALSE");

    HashTableSetup(uniqueg, &data, NA_INTEGER);
    PROTECT(data.HashTable);
    DoHashing(uniqueg, &data);
    PROTECT(matches = HashLookup(uniqueg, g, &data));
    int *pmatches = INTEGER(matches);

    PROTECT(ans = allocVector(VECSXP, p));

    for(int i = 0; i < p; i++) {
	xcol = VECTOR_ELT(x,i);
	if (!isNumeric(xcol))
	    error(_("non-numeric data frame in rowsum"));
	switch(TYPEOF(xcol)){
	case REALSXP:
	    PROTECT(col = allocVector(REALSXP,ng));
	    Memzero(REAL0(col), ng);
	    for(R_xlen_t j = 0; j < n; j++) {
		double xj = REAL_ELT(xcol, j);
		if(!narm || !ISNAN(xj))
		    REAL0(col)[pmatches[j] - 1] += xj;
	    }
	    SET_VECTOR_ELT(ans,i,col);
	    UNPROTECT(1);
	    break;
	case INTSXP:
	    PROTECT(col = allocVector(INTSXP, ng));
	    Memzero(INTEGER0(col), ng);
	    for(R_xlen_t j = 0; j < n; j++) {
		int xj = INTEGER_ELT(xcol, j);
		if (xj == NA_INTEGER) {
		    if(!narm)
			INTEGER0(col)[pmatches[j] - 1] = NA_INTEGER;
		} else if (INTEGER0(col)[pmatches[j] - 1] != NA_INTEGER) {
		    int itmp = INTEGER0(col)[pmatches[j] - 1];
		    double dtmp = itmp;
		    dtmp += xj;
		    if (dtmp < INT_MIN || dtmp > INT_MAX) itmp = NA_INTEGER;
		    else itmp += xj;
		    INTEGER0(col)[pmatches[j] - 1] = itmp;
		}
	    }
	    SET_VECTOR_ELT(ans, i, col);
	    UNPROTECT(1);
	    break;

	default:
	    error(_("this cannot happen"));
	}
    }
    namesgets(ans, getAttrib(x, R_NamesSymbol));
    if (TYPEOF(rn) != STRSXP) error("row names are not character");
    setAttrib(ans, R_RowNamesSymbol, rn);
    classgets(ans, mkString("data.frame"));

    UNPROTECT(3); /* HashTable, matches, ans */
    return ans;
}

SEXP attribute_hidden do_rowsum(SEXP call, SEXP op, SEXP args, SEXP env)
{
    checkArity(op, args);
    if(PRIMVAL(op) == 1)
	return rowsum_df(CAR(args), CADR(args), CADDR(args), CADDDR(args),
			 CAD4R(args));
    else
	return rowsum(CAR(args), CADR(args), CADDR(args), CADDDR(args),
		      CAD4R(args));
}


/* returns 1-based duplicate no */
static int isDuplicated2(SEXP x, int indx, HashData *d)
{
    int *h = HTDATA_INT(d);
    hlen i = d->hash(x, indx, d);
    while (h[i] != NIL) {
	if (d->equal(x, h[i], x, indx))
	    return h[i] + 1;
	i = (i + 1) % d->M;
    }
    h[i] = indx;
    return 0;
}

static SEXP duplicated2(SEXP x, HashData *d)
{
    SEXP ans;
    int i, n;

    n = LENGTH(x);
    HashTableSetup(x, d, NA_INTEGER);
    PROTECT(d->HashTable);
    PROTECT(ans = allocVector(INTSXP, n));

    int *h = HTDATA_INT(d);
    int *v = INTEGER0(ans);
    for (i = 0; i < d->M; i++) h[i] = NIL;
    for (i = 0; i < n; i++) {
//	if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt();
	v[i] = isDuplicated2(x, i, d);
    }
    UNPROTECT(2);
    return ans;
}

SEXP attribute_hidden do_makeunique(SEXP call, SEXP op, SEXP args, SEXP env)
{
    SEXP names, sep, ans, dup, newx;
    int i, cnt, *cnts, dp;
    int n, len, maxlen = 0;
    HashData data;
    const char *csep, *ss;
    const void *vmax;

    checkArity(op, args);
    names = CAR(args);
    if(!isString(names))
	error(_("'names' must be a character vector"));
    n = LENGTH(names);
    sep = CADR(args);
    if(!isString(sep) || LENGTH(sep) != 1)
	error(_("'%s' must be a character string"), "sep");
    csep = translateChar(STRING_ELT(sep, 0));
    PROTECT(ans = allocVector(STRSXP, n));
    vmax = vmaxget();
    for(i = 0; i < n; i++) {
	SET_STRING_ELT(ans, i, STRING_ELT(names, i));
	len = (int) strlen(translateChar(STRING_ELT(names, i)));
	if(len > maxlen) maxlen = len;
	vmaxset(vmax);
    }
    if(n > 1) {
	/* +2 for terminator and rounding error */
	char buf[maxlen + (int) strlen(csep)
		 + (int) (log((double)n)/log(10.0)) + 2];
	if(n < 10000) {
	    R_CheckStack2((size_t)n * sizeof(int));
	    cnts = (int *) alloca(((size_t) n) * sizeof(int));
	} else {
	    /* This is going to be slow so use expensive allocation
	       that will be recovered if interrupted. */
	    cnts = (int *) R_alloc((size_t) n,	sizeof(int));
	}
	for(i = 0; i < n; i++) cnts[i] = 1;
	data.nomatch = 0;
	PROTECT(newx = allocVector(STRSXP, 1));
	PROTECT(dup = duplicated2(names, &data));
	PROTECT(data.HashTable);
	vmax = vmaxget();
	for(i = 1; i < n; i++) { /* first cannot be a duplicate */
	    dp = INTEGER_ELT(dup, i); /* 1-based number of first occurrence */
	    if(dp == 0) continue;
	    ss = translateChar(STRING_ELT(names, i));
	    /* Try appending 1,2,3, ..., n-1 until it is not already in use */
	    for(cnt = cnts[dp - 1]; cnt < n; cnt++) {
		sprintf(buf, "%s%s%d", ss, csep, cnt);
		SET_STRING_ELT(newx, 0, mkChar(buf));
		if(Lookup(ans, newx, 0, &data) == data.nomatch) break;
	    }
	    SET_STRING_ELT(ans, i, STRING_ELT(newx, 0));
	    /* insert it */ (void) isDuplicated(ans, i, &data);
	    cnts[dp - 1] = cnt+1; /* cache the first unused cnt value */
	    vmaxset(vmax);
	}
	UNPROTECT(3);
    }
    UNPROTECT(1);
    return ans;
}

/* Use hashing to improve object.size. Here we want equal CHARSXPs,
   not equal contents. */

static int csequal(SEXP x, R_xlen_t i, SEXP y, R_xlen_t j)
{
    return STRING_ELT(x, i) == STRING_ELT(y, j);
}

static void HashTableSetup1(SEXP x, HashData *d)
{
    d->hash = cshash;
    d->equal = csequal;
#ifdef LONG_VECTOR_SUPPORT
    d->isLong = FALSE;
#endif
    MKsetup(XLENGTH(x), d, NA_INTEGER);
    d->HashTable = allocVector(INTSXP, (R_xlen_t) d->M);
    for (R_xlen_t i = 0; i < d->M; i++) HTDATA_INT(d)[i] = NIL;
}

/* used in utils */
SEXP Rf_csduplicated(SEXP x)
{
    if(TYPEOF(x) != STRSXP)
	error("C function 'csduplicated' not called on a STRSXP");
    R_xlen_t n = XLENGTH(x);
    HashData data;
    HashTableSetup1(x, &data);
    PROTECT(data.HashTable);
    SEXP ans = PROTECT(allocVector(LGLSXP, n));
    int *v = LOGICAL(ans);

    for (R_xlen_t i = 0; i < n; i++) v[i] = isDuplicated(x, i, &data);

    UNPROTECT(2);
    return ans;
}

#include <R_ext/Random.h>

// sample.int(.) --> .Internal(sample2(n, size)) :
SEXP attribute_hidden do_sample2(SEXP call, SEXP op, SEXP args, SEXP env)
{
    checkArity(op, args);
    SEXP ans;
    double dn = asReal(CAR(args));
    int k = asInteger(CADR(args));
    if (!R_FINITE(dn) || dn < 0 || dn > 4.5e15 || (k > 0 && dn == 0))
	error(_("invalid first argument"));
    if (k < 0) error(_("invalid '%s' argument"), "size");
    if (k > dn/2) error("This algorithm is for size <= n/2");
    HashData data;
    GetRNGstate();
    if (dn > INT_MAX) {
	ans = PROTECT(allocVector(REALSXP, k));
	double *ry = REAL0(ans);
	HashTableSetup(ans, &data, NA_INTEGER);
	PROTECT(data.HashTable);
	for (int i = 0; i < k; i++)
	    for(int j = 0; j < 100; j++) { // average < 2
		ry[i] = R_unif_index(dn) + 1;
		if(!isDuplicated(ans, i, &data)) break;
	    }
   } else {
	ans = PROTECT(allocVector(INTSXP, k));
	int *iy = INTEGER0(ans);
	HashTableSetup(ans, &data, NA_INTEGER);
	PROTECT(data.HashTable);
	for (int i = 0; i < k; i++)
	    for(int j = 0; j < 100; j++) { // average < 2
		iy[i] = (int)(R_unif_index(dn) + 1);
		if(!isDuplicated(ans, i, &data)) break;
	    }
    }
    PutRNGstate();
    UNPROTECT(2);
    return ans;
}
