| /* |
| * R : A Computer Language for Statistical Data Analysis |
| * Copyright (C) 2016 The R Core Team |
| * |
| * Based on code donated from the data.table package |
| * (C) 2006-2015 Matt Dowle and Arun Srinivasan. |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License as published by |
| * the Free Software Foundation; either version 2 of the License, or |
| * (at your option) any later version. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, a copy is available at |
| * https://www.R-project.org/Licenses/ |
| */ |
| |
| #ifdef HAVE_CONFIG_H |
| #include <config.h> |
| #endif |
| |
| #include <Defn.h> |
| #include <Internal.h> |
| |
| /* It would be better to find a way to avoid abusing TRUELENGTH, but |
| in the meantime replace TRUELENGTH/SET_TRUELENGTH with |
| TRLEN/SET_TRLEN that cast to int to avoid warnings. */ |
| #define TRLEN(x) ((int) TRUELENGTH(x)) |
| #define SET_TRLEN(x, v) SET_TRUELENGTH(x, ((int) (v))) |
| |
| // gs = groupsizes e.g.23, 12, 87, 2, 1, 34,... |
| static int *gs[2] = { NULL }; |
| //two vectors flip flopped:flip and 1 - flip |
| static int flip = 0; |
| //allocated stack size |
| static int gsalloc[2] = { 0 }; |
| static int gsngrp[2] = { 0 }; |
| //max grpn so far |
| static int gsmax[2] = { 0 }; |
| //max size of stack, set by do_radixsort to nrows |
| static int gsmaxalloc = 0; |
| //switched off for last arg unless retGrp==TRUE |
| static Rboolean stackgrps = TRUE; |
| // TRUE for setkey, FALSE for by= |
| static Rboolean sortStr = TRUE; |
| // used by do_radixsort and [i|d|c]sort to reorder order. |
| // not needed if narg==1 |
| static int *newo = NULL; |
| // =1, 0, -1 for TRUE, NA, FALSE respectively. |
| // Value rewritten inside do_radixsort(). |
| static int nalast = -1; |
| // =1, -1 for ascending and descending order respectively |
| static int order = 1; |
| |
| //replaced n < 200 with n < N_SMALL.Easier to change later |
| #define N_SMALL 200 |
| // range limit for counting sort. Should be less than INT_MAX |
| // (see setRange for details) |
| #define N_RANGE 100000 |
| |
| static SEXP *saveds = NULL; |
| static R_len_t *savedtl = NULL, nalloc = 0, nsaved = 0; |
| |
| static void savetl_init() |
| { |
| if (nsaved || nalloc || saveds || savedtl) |
| error("Internal error: savetl_init checks failed (%d %d %p %p).", |
| nsaved, nalloc, saveds, savedtl); |
| nsaved = 0; |
| nalloc = 100; |
| saveds = (SEXP *) malloc(nalloc * sizeof(SEXP)); |
| if (saveds == NULL) |
| error("Could not allocate saveds in savetl_init"); |
| savedtl = (R_len_t *) malloc(nalloc * sizeof(R_len_t)); |
| if (savedtl == NULL) { |
| free(saveds); |
| error("Could not allocate saveds in savetl_init"); |
| } |
| } |
| |
| static void savetl_end() |
| { |
| // Can get called if nothing has been saved yet (nsaved == 0), or |
| // even if _init() has not been called yet (pointers NULL). Such as |
| // to clear up before error. Also, it might be that nothing needed |
| // to be saved anyway. |
| for (int i = 0; i < nsaved; i++) |
| SET_TRLEN(saveds[i], savedtl[i]); |
| free(saveds); // does nothing on NULL input |
| free(savedtl); |
| nsaved = nalloc = 0; |
| saveds = NULL; |
| savedtl = NULL; |
| } |
| |
| |
| static void savetl(SEXP s) |
| { |
| if (nsaved >= nalloc) { |
| nalloc *= 2; |
| char *tmp; |
| tmp = (char *) realloc(saveds, nalloc * sizeof(SEXP)); |
| if (tmp == NULL) { |
| savetl_end(); |
| error("Could not realloc saveds in savetl"); |
| } |
| saveds = (SEXP *) tmp; |
| tmp = (char *) realloc(savedtl, nalloc * sizeof(R_len_t)); |
| if (tmp == NULL) { |
| savetl_end(); |
| error("Could not realloc savedtl in savetl"); |
| } |
| savedtl = (R_len_t *) tmp; |
| } |
| saveds[nsaved] = s; |
| savedtl[nsaved] = TRLEN(s); |
| nsaved++; |
| } |
| |
| // http://gcc.gnu.org/onlinedocs/cpp/Swallowing-the-Semicolon.html#Swallowing-the-Semicolon |
| #define Error(...) do {savetl_end(); error(__VA_ARGS__);} while(0) |
| #undef warning |
| // since it can be turned to error via warn = 2 |
| #define warning(...) Do not use warning in this file |
| /* use malloc/realloc (not Calloc/Realloc) so we can trap errors |
| and call savetl_end() before the error(). */ |
| |
| static void growstack(uint64_t newlen) |
| { |
| // no link to icount range restriction, |
| // just 100,000 seems a good minimum at 0.4MB |
| if (newlen == 0) newlen = 100000; |
| if (newlen > gsmaxalloc) newlen = gsmaxalloc; |
| gs[flip] = realloc(gs[flip], newlen * sizeof(int)); |
| if (gs[flip] == NULL) |
| Error("Failed to realloc working memory stack to %d*4bytes (flip=%d)", |
| (int)newlen /* no bigger than gsmaxalloc */, flip); |
| gsalloc[flip] = (int)newlen; |
| } |
| |
| static void push(int x) |
| { |
| if (!stackgrps || x == 0) |
| return; |
| if (gsalloc[flip] == gsngrp[flip]) |
| growstack((uint64_t)(gsngrp[flip]) * 2); |
| gs[flip][gsngrp[flip]++] = x; |
| if (x > gsmax[flip]) |
| gsmax[flip] = x; |
| } |
| |
| static void mpush(int x, int n) |
| { |
| if (!stackgrps || x == 0) |
| return; |
| if (gsalloc[flip] < gsngrp[flip] + n) |
| growstack(((uint64_t)(gsngrp[flip]) + n) * 2); |
| for (int i = 0; i < n; i++) |
| gs[flip][gsngrp[flip]++] = x; |
| if (x > gsmax[flip]) |
| gsmax[flip] = x; |
| } |
| |
| static void flipflop() |
| { |
| flip = 1 - flip; |
| gsngrp[flip] = 0; |
| gsmax[flip] = 0; |
| if (gsalloc[flip] < gsalloc[1 - flip]) |
| growstack((uint64_t)(gsalloc[1 - flip]) * 2); |
| } |
| |
| static void gsfree() |
| { |
| free(gs[0]); |
| free(gs[1]); |
| gs[0] = NULL; |
| gs[1] = NULL; |
| flip = 0; |
| gsalloc[0] = gsalloc[1] = 0; |
| gsngrp[0] = gsngrp[1] = 0; |
| gsmax[0] = gsmax[1] = 0; |
| gsmaxalloc = 0; |
| } |
| |
| #ifdef TIMING_ON |
| // many calls to clock() can be expensive, |
| // hence compiled out rather than switch(verbose) |
| #include <time.h> |
| #define NBLOCK 20 |
| static clock_t tblock[NBLOCK], tstart; |
| static int nblock[NBLOCK]; |
| #define TBEG() tstart = clock(); |
| #define TEND(i) tblock[i] += clock()-tstart; nblock[i]++; tstart = clock(); |
| #else |
| #define TBEG() |
| #define TEND(i) |
| #endif |
| |
| static int range, xmin; // used by both icount and do_radixsort |
| static void setRange(int *x, int n) |
| { |
| xmin = NA_INTEGER; |
| int xmax = NA_INTEGER; |
| double overflow; |
| |
| int i = 0; |
| while(i < n && x[i] == NA_INTEGER) i++; |
| if (i < n) xmax = xmin = x[i]; |
| for (; i < n; i++) { |
| int tmp = x[i]; |
| if (tmp == NA_INTEGER) |
| continue; |
| if (tmp > xmax) |
| xmax = tmp; |
| else if (tmp < xmin) |
| xmin = tmp; |
| } |
| // all NAs, nothing to do |
| if (xmin == NA_INTEGER) { |
| range = NA_INTEGER; |
| return; |
| } |
| // ex: x=c(-2147483647L, NA_integer_, 1L) results in overflowing int range. |
| overflow = (double) xmax - (double) xmin + 1; |
| // detect and force iradix here, since icount is out of the picture |
| if (overflow > INT_MAX) { |
| range = INT_MAX; |
| return; |
| } |
| |
| range = xmax - xmin + 1; |
| |
| return; |
| } |
| |
| // x*order results in integer overflow when -1*NA, |
| // so careful to avoid that here : |
| static inline int icheck(int x) |
| { |
| // if nalast == 1, NAs must go last. |
| return ((nalast != 1) ? ((x != NA_INTEGER) ? x*order : x) : |
| ((x != NA_INTEGER) ? (x*order) - 1 : INT_MAX)); |
| } |
| |
| |
| static void icount(int *x, int *o, int n) |
| /* Counting sort: |
| 1. Places the ordering into o directly, overwriting whatever was there |
| 2. Doesn't change x |
| 3. Pushes group sizes onto stack |
| */ |
| { |
| int napos = range; // NA's always counted in last bin |
| // static is IMPORTANT, counting sort is called repetitively. |
| static unsigned int counts[N_RANGE + 1] = { 0 }; |
| /* counts are set back to 0 at the end efficiently. 1e5 = 0.4MB i.e |
| tiny. We'll only use the front part of it, as large as range. So it's |
| just reserving space, not using it. Have defined N_RANGE to be 100000.*/ |
| if (range > N_RANGE) |
| Error("Internal error: range = %d; isorted cannot handle range > %d", |
| range, N_RANGE); |
| for (int i = 0; i < n; i++) { |
| // For nalast=NA case, we won't remove/skip NAs, rather set 'o' indices |
| // to 0. subset will skip them. We can't know how many NAs to skip |
| // beforehand - i.e. while allocating "ans" vector |
| if (x[i] == NA_INTEGER) |
| counts[napos]++; |
| else |
| counts[x[i] - xmin]++; |
| } |
| |
| int tmp = 0; |
| if (nalast != 1 && counts[napos]) { |
| push(counts[napos]); |
| tmp += counts[napos]; |
| } |
| int w = (order==1) ? 0 : range-1; |
| for (int i = 0; i < range; i++) |
| /* no point in adding tmp < n && i <= range, since range includes max, |
| need to go to max, unlike 256 loops elsewhere in radixsort.c */ |
| { |
| if (counts[w]) { |
| // cumulate but not through 0's. |
| // Helps resetting zeros when n < range, below. |
| push(counts[w]); |
| counts[w] = (tmp += counts[w]); |
| } |
| w += order; // order is +1 or -1 |
| } |
| if (nalast == 1 && counts[napos]) { |
| push(counts[napos]); |
| counts[napos] = (tmp += counts[napos]); |
| } |
| for (int i = n - 1; i >= 0; i--) { |
| // This way na.last=TRUE/FALSE cases will have just a |
| // single if-check overhead. |
| o[--counts[(x[i] == NA_INTEGER) ? napos : |
| x[i] - xmin]] = (int) (i + 1); |
| } |
| // nalast = 1, -1 are both taken care already. |
| if (nalast == 0) |
| // nalast = 0 is dealt with separately as it just sets o to 0 |
| for (int i = 0; i < n; i++) |
| o[i] = (x[o[i] - 1] == NA_INTEGER) ? 0 : o[i]; |
| // at those indices where x is NA. x[o[i]-1] because x is not modifed here. |
| |
| /* counts were cumulated above so leaves non zero. |
| Faster to clear up now ready for next time. */ |
| if (n < range) { |
| /* Many zeros in counts already. Loop through n instead, |
| doesn't matter if we set to 0 several times on any repeats */ |
| counts[napos] = 0; |
| for (int i = 0; i < n; i++) { |
| if (x[i] != NA_INTEGER) |
| counts[x[i] - xmin] = 0; |
| } |
| } else |
| memset(counts, 0, (range + 1) * sizeof(int)); |
| return; |
| } |
| |
| static void iinsert(int *x, int *o, int n) |
| /* orders both x and o by reference in-place. Fast for small vectors, |
| low overhead. don't be tempted to binsearch backwards here, have |
| to shift anyway; many memmove would have overhead and do the same |
| thing. */ |
| /* when nalast == 0, iinsert will be called only from within iradix, |
| where o[.] = 0 for x[.]=NA is already taken care of */ |
| { |
| for (int i = 1; i < n; i++) { |
| int xtmp = x[i]; |
| if (xtmp < x[i - 1]) { |
| int j = i - 1; |
| int otmp = o[i]; |
| while (j >= 0 && xtmp < x[j]) { |
| x[j + 1] = x[j]; |
| o[j + 1] = o[j]; |
| j--; |
| } |
| x[j + 1] = xtmp; |
| o[j + 1] = otmp; |
| } |
| } |
| int tt = 0; |
| for (int i = 1; i < n; i++) |
| if (x[i] == x[i - 1]) |
| tt++; |
| else { |
| push(tt + 1); |
| tt = 0; |
| } |
| push(tt + 1); |
| } |
| |
| /* |
| iradix is a counting sort performed forwards from MSB to LSB, with |
| some tricks and short circuits building on Terdiman and Herf. |
| http://codercorner.com/RadixSortRevisited.htm |
| http://stereopsis.com/radix.html |
| |
| ~ Note they are LSD, but we do MSD here which is more complicated, |
| for efficiency. |
| ~ NAs need no special treatment as NA is the most negative integer |
| in R (checked in init.c once, for efficiency) so NA naturally sort |
| to the front. |
| ~ Using 4-pass 1-byte radix for the following reasons : |
| |
| * 11-bit (Herf) reduces to 3-passes (3*11=33) yes, and LSD need |
| random access to o vector in each pass 1:n so reduction in passes is |
| good, but Terdiman's idea to skip a radix if all values are equal |
| occurs less the wider the radix. A narrower radix benefits more from that. |
| * That's detected here using a single 'if', an improvement on |
| Terdiman's exposition of a single loop to find if any count==n |
| * The pass through counts bites when radix is wider, |
| because we repetitively call this iradix from fastorder forwards. |
| * Herf's parallel histogramming is neat. In 4-pass 1-byte it needs |
| 4*256 storage, that's tiny, and can be static. 4*256 << 3*2048. |
| 4-pass 1-byte is simpler and tighter code than 3-pass 11-bit, |
| giving modern optimizers and modern CPUs a better chance. |
| We may get lucky anyway, if one or two of the 4-passes are skipped. |
| |
| Recall: there are no comparisons at all in counting and radix, |
| there is wide random access in each LSD radix pass, though. |
| */ |
| |
| // 4 are used for iradix, 8 for dradix and i64radix |
| static unsigned int radixcounts[8][257] = { {0} }; |
| |
| static int skip[8]; |
| /* global because iradix and iradix_r interact and are called repetitively. |
| counts are set back to 0 after each use, to benefit from skipped radix. */ |
| static void *radix_xsub = NULL; |
| static size_t radix_xsuballoc = 0; |
| |
| static int *otmp = NULL, otmp_alloc = 0; |
| static void alloc_otmp(int n) |
| { |
| if (otmp_alloc >= n) |
| return; |
| otmp = (int *) realloc(otmp, n * sizeof(int)); |
| if (otmp == NULL) |
| Error("Failed to allocate working memory for otmp. Requested %d * %d bytes", |
| n, sizeof(int)); |
| otmp_alloc = n; |
| } |
| |
| // TO DO: save xtmp if possible, see allocs in do_radixsort |
| static void *xtmp = NULL; |
| static int xtmp_alloc = 0; |
| // TO DO: currently always the largest type (double) but |
| // could be int if that's all that's needed |
| static void alloc_xtmp(int n) |
| { |
| if (xtmp_alloc >= n) |
| return; |
| xtmp = (double *) realloc(xtmp, n * sizeof(double)); |
| if (xtmp == NULL) |
| Error("Failed to allocate working memory for xtmp. Requested %d * %d bytes", |
| n, sizeof(double)); |
| xtmp_alloc = n; |
| } |
| |
| static void iradix_r(int *xsub, int *osub, int n, int radix); |
| |
| static void iradix(int *x, int *o, int n) |
| /* As icount : |
| Places the ordering into o directly, overwriting whatever was there |
| Doesn't change x |
| Pushes group sizes onto stack */ |
| { |
| int nextradix, itmp, thisgrpn, maxgrpn; |
| unsigned int thisx = 0, shift, *thiscounts; |
| |
| for (int i = 0; i < n;i++) { |
| /* parallel histogramming pass; i.e. count occurrences of |
| 0:255 in each byte. Sequential so almost negligible. */ |
| // relies on overflow behaviour. And shouldn't -INT_MIN be up in iradix? |
| thisx = (unsigned int) (icheck(x[i])) - INT_MIN; |
| // unrolled since inside n-loop |
| radixcounts[0][thisx & 0xFF]++; |
| radixcounts[1][thisx >> 8 & 0xFF]++; |
| radixcounts[2][thisx >> 16 & 0xFF]++; |
| radixcounts[3][thisx >> 24 & 0xFF]++; |
| } |
| for (int radix = 0; radix < 4; radix++) { |
| /* any(count == n) => all radix must have been that value => |
| last x (still thisx) was that value */ |
| int i = thisx >> (radix*8) & 0xFF; |
| skip[radix] = radixcounts[radix][i] == n; |
| // clear it now, the other counts must be 0 already |
| if (skip[radix]) |
| radixcounts[radix][i] = 0; |
| } |
| |
| int radix = 3; // MSD |
| while (radix >= 0 && skip[radix]) radix--; |
| if (radix == -1) { // All radix are skipped; one number repeated n times. |
| if (nalast == 0 && x[0] == NA_INTEGER) |
| // all values are identical. return 0 if nalast=0 & all NA |
| // because of 'return', have to take care of it here. |
| for (int i = 0; i < n; i++) |
| o[i] = 0; |
| else |
| for (int i = 0; i < n; i++) |
| o[i] = (i + 1); |
| push(n); |
| return; |
| } |
| for (int i = radix - 1; i >= 0; i--) { |
| if (!skip[i]) |
| memset(radixcounts[i], 0, 257 * sizeof(unsigned int)); |
| /* clear the counts as we only needed the parallel pass for skip[] |
| and we're going to use radixcounts again below. Can't use parallel |
| lower counts in MSD radix, unlike LSD. */ |
| } |
| thiscounts = radixcounts[radix]; |
| shift = radix * 8; |
| |
| itmp = thiscounts[0]; |
| maxgrpn = itmp; |
| for (int i = 1; itmp < n && i < 256; i++) { |
| thisgrpn = thiscounts[i]; |
| if (thisgrpn) { |
| // don't cummulate through 0s, important below. |
| if (thisgrpn > maxgrpn) |
| maxgrpn = thisgrpn; |
| thiscounts[i] = (itmp += thisgrpn); |
| } |
| } |
| for (int i = n - 1; i >= 0; i--) { |
| thisx = ((unsigned int) (icheck(x[i])) - INT_MIN) >> shift & 0xFF; |
| o[--thiscounts[thisx]] = i + 1; |
| } |
| |
| if (radix_xsuballoc < maxgrpn) { |
| // The largest group according to the first non-skipped radix, |
| // so could be big (if radix is needed on first arg) |
| // TO DO: could include extra bits to divide the first radix |
| // up more. Often the MSD has groups in just 0-4 out of 256. |
| // free'd at the end of do_radixsort once we're done calling iradix |
| // repetitively |
| radix_xsub = (int *) realloc(radix_xsub, maxgrpn * sizeof(double)); |
| if (!radix_xsub) |
| Error("Failed to realloc working memory %d*8bytes (xsub in iradix), radix=%d", |
| maxgrpn, radix); |
| radix_xsuballoc = maxgrpn; |
| } |
| |
| // TO DO: can we leave this to do_radixsort and remove these calls?? |
| alloc_otmp(maxgrpn); |
| // TO DO: doesn't need to be sizeof(double) always, see inside |
| alloc_xtmp(maxgrpn); |
| |
| nextradix = radix - 1; |
| while (nextradix >= 0 && skip[nextradix]) nextradix--; |
| if (thiscounts[0] != 0) |
| Error("Internal error. thiscounts[0]=%d but should have been decremented to 0. dradix=%d", |
| thiscounts[0], radix); |
| thiscounts[256] = n; |
| itmp = 0; |
| for (int i = 1; itmp < n && i <= 256; i++) { |
| if (thiscounts[i] == 0) continue; |
| // undo cumulate; i.e. diff |
| thisgrpn = thiscounts[i] - itmp; |
| if (thisgrpn == 1 || nextradix == -1) { |
| push(thisgrpn); |
| } else { |
| for (int j = 0; j < thisgrpn; j++) |
| // this is why this xsub here can't be the same memory as |
| // xsub in do_radixsort. |
| ((int *)radix_xsub)[j] = icheck(x[o[itmp+j]-1]); |
| // changes xsub and o by reference recursively. |
| iradix_r(radix_xsub, o+itmp, thisgrpn, nextradix); |
| } |
| itmp = thiscounts[i]; |
| thiscounts[i] = 0; |
| } |
| if (nalast == 0) // nalast = 1, -1 are both taken care already. |
| // nalast = 0 is dealt with separately as it just sets o to 0 |
| for (int i = 0; i < n; i++) |
| o[i] = (x[o[i] - 1] == NA_INTEGER) ? 0 : o[i]; |
| // at those indices where x is NA. x[o[i]-1] because x is not |
| // modified by reference unlike iinsert or iradix_r |
| } |
| |
| static void iradix_r(int *xsub, int *osub, int n, int radix) |
| // xsub is a recursive offset into xsub working memory above in |
| // iradix, reordered by reference. osub is a an offset into the main |
| // answer o, reordered by reference. radix iterates 3,2,1,0 |
| { |
| int j, itmp, thisx, thisgrpn, nextradix, shift; |
| unsigned int *thiscounts; |
| |
| // N_SMALL=200 is guess based on limited testing. Needs |
| // calibrate(). Was 50 based on sum(1:50)=1275 worst -vs- 256 |
| // cummulate + 256 memset + allowance since reverse order is |
| // unlikely. when nalast==0, iinsert will be called only from |
| // within iradix. |
| if (n < N_SMALL) { |
| iinsert(xsub, osub, n); |
| return; |
| } |
| |
| shift = radix * 8; |
| thiscounts = radixcounts[radix]; |
| |
| for (int i = 0; i < n; i++) { |
| thisx = (unsigned int) xsub[i] - INT_MIN; // sequential in xsub |
| thiscounts[thisx >> shift & 0xFF]++; |
| } |
| itmp = thiscounts[0]; |
| for (int i = 1; itmp < n && i < 256; i++) |
| // don't cummulate through 0s, important below |
| if (thiscounts[i]) |
| thiscounts[i] = (itmp += thiscounts[i]); |
| for (int i = n - 1; i >= 0; i--) { |
| thisx = ((unsigned int) xsub[i] - INT_MIN) >> shift & 0xFF; |
| j = --thiscounts[thisx]; |
| otmp[j] = osub[i]; |
| ((int *) xtmp)[j] = xsub[i]; |
| } |
| memcpy(osub, otmp, n * sizeof(int)); |
| memcpy(xsub, xtmp, n * sizeof(int)); |
| |
| nextradix = radix - 1; |
| while (nextradix >= 0 && skip[nextradix]) nextradix--; |
| /* TO DO: If nextradix == -1 AND no further args from do_radixsort AND |
| !retGrp, we're done. We have o. Remember to memset thiscounts |
| before returning. */ |
| |
| if (thiscounts[0] != 0) |
| Error("Logical error. thiscounts[0]=%d but should have been decremented to 0. radix=%d", |
| thiscounts[0], radix); |
| thiscounts[256] = n; |
| itmp = 0; |
| for (int i = 1; itmp < n && i <= 256; i++) { |
| if (thiscounts[i] == 0) |
| continue; |
| thisgrpn = thiscounts[i] - itmp; // undo cummulate; i.e. diff |
| if (thisgrpn == 1 || nextradix == -1) { |
| push(thisgrpn); |
| } else { |
| iradix_r(xsub+itmp, osub+itmp, thisgrpn, nextradix); |
| } |
| itmp = thiscounts[i]; |
| thiscounts[i] = 0; |
| } |
| } |
| |
| // dradix from Arun's fastradixdouble.c |
| // + changed to MSD and hooked into do_radixsort framework here. |
| // + replaced tolerance with rounding s.f. |
| |
| static unsigned long long dmask1; |
| static unsigned long long dmask2; |
| |
| static void setNumericRounding(int dround) |
| { |
| dmask1 = dround ? 1 << (8 * dround - 1) : 0; |
| dmask2 = 0xffffffffffffffff << dround * 8; |
| } |
| |
| static union { |
| double d; |
| unsigned long long ull; |
| } u; |
| |
| static |
| unsigned long long dtwiddle(void *p, int i, int order) |
| { |
| u.d = order * ((double *)p)[i]; // take care of 'order' at the beginning |
| if (R_FINITE(u.d)) { |
| u.ull = (u.d != 0.0) ? u.ull + ((u.ull & dmask1) << 1) : 0; |
| } else if (ISNAN(u.d)) { |
| u.ull = 0; |
| return (nalast == 1 ? ~u.ull : u.ull); |
| } |
| unsigned long long mask = (u.ull & 0x8000000000000000) ? |
| // always flip sign bit and if negative (sign bit was set) |
| // flip other bits too |
| 0xffffffffffffffff : 0x8000000000000000; |
| return ((u.ull ^ mask) & dmask2); |
| } |
| |
| static Rboolean dnan(void *p, int i) |
| { |
| u.d = ((double *) p)[i]; |
| return (ISNAN(u.d)); |
| } |
| |
| static unsigned long long (*twiddle) (void *, int, int); |
| static Rboolean(*is_nan) (void *, int); |
| // the size of the arg type (4 or 8). Just 8 currently until iradix is |
| // merged in. |
| static size_t colSize = 8; |
| |
| static void dradix_r(unsigned char *xsub, int *osub, int n, int radix); |
| |
| #ifdef WORDS_BIGENDIAN |
| #define RADIX_BYTE colSize - radix - 1 |
| #else |
| #define RADIX_BYTE radix |
| #endif |
| |
| static void dradix(unsigned char *x, int *o, int n) |
| { |
| int radix, nextradix, itmp, thisgrpn, maxgrpn; |
| unsigned int *thiscounts; |
| unsigned long long thisx = 0; |
| // see comments in iradix for structure. This follows the same. |
| // TO DO: merge iradix in here (almost ready) |
| for (int i = 0; i < n; i++) { |
| thisx = twiddle(x, i, order); |
| for (radix = 0; radix < colSize; radix++) |
| // if dround == 2 then radix 0 and 1 will be all 0 here and skipped. |
| /* on little endian, 0 is the least significant bits (the right) |
| and 7 is the most including sign (the left); i.e. reversed. */ |
| radixcounts[radix][((unsigned char *)&thisx)[RADIX_BYTE]]++; |
| } |
| for (radix = 0; radix < colSize; radix++) { |
| // thisx is the last x after loop above |
| int i = ((unsigned char *) &thisx)[RADIX_BYTE]; |
| skip[radix] = radixcounts[radix][i] == n; |
| // clear it now, the other counts must be 0 already |
| if (skip[radix]) |
| radixcounts[radix][i] = 0; |
| } |
| radix = (int) colSize - 1; // MSD |
| while (radix >= 0 && skip[radix]) radix--; |
| if (radix == -1) { |
| // All radix are skipped; i.e. one number repeated n times. |
| if (nalast == 0 && is_nan(x, 0)) |
| // all values are identical. return 0 if nalast=0 & all NA |
| // because of 'return', have to take care of it here. |
| for (int i = 0; i < n; i++) |
| o[i] = 0; |
| else |
| for (int i = 0; i < n; i++) |
| o[i] = (i + 1); |
| push(n); |
| return; |
| } |
| for (int i = radix - 1; i >= 0; i--) { |
| // clear the lower radix counts, we only did them to know |
| // skip. will be reused within each group |
| if (!skip[i]) |
| memset(radixcounts[i], 0, 257 * sizeof(unsigned int)); |
| } |
| thiscounts = radixcounts[radix]; |
| itmp = thiscounts[0]; |
| maxgrpn = itmp; |
| for (int i = 1; itmp < n && i < 256; i++) { |
| thisgrpn = thiscounts[i]; |
| if (thisgrpn) { // don't cummulate through 0s, important below |
| if (thisgrpn > maxgrpn) |
| maxgrpn = thisgrpn; |
| thiscounts[i] = (itmp += thisgrpn); |
| } |
| } |
| for (int i = n - 1; i >= 0; i--) { |
| thisx = twiddle(x, i, order); |
| o[ --thiscounts[((unsigned char *)&thisx)[RADIX_BYTE]] ] = i + 1; |
| } |
| |
| if (radix_xsuballoc < maxgrpn) { |
| // TO DO: centralize this alloc |
| // The largest group according to the first non-skipped radix, |
| // so could be big (if radix is needed on first arg) TO DO: |
| // could include extra bits to divide the first radix up |
| // more. Often the MSD has groups in just 0-4 out of 256. |
| // free'd at the end of do_radixsort once we're done calling iradix |
| // repetitively |
| radix_xsub = (double *) realloc(radix_xsub, maxgrpn * sizeof(double)); |
| if (!radix_xsub) |
| Error("Failed to realloc working memory %d*8bytes (xsub in dradix), radix=%d", |
| maxgrpn, radix); |
| radix_xsuballoc = maxgrpn; |
| } |
| |
| alloc_otmp(maxgrpn); // TO DO: leave to do_radixsort and remove these? |
| alloc_xtmp(maxgrpn); |
| |
| nextradix = radix - 1; |
| while (nextradix >= 0 && skip[nextradix]) |
| nextradix--; |
| if (thiscounts[0] != 0) |
| Error("Logical error. thiscounts[0]=%d but should have been decremented to 0. dradix=%d", |
| thiscounts[0], radix); |
| thiscounts[256] = n; |
| itmp = 0; |
| for (int i = 1; itmp < n && i <= 256; i++) { |
| if (thiscounts[i] == 0) |
| continue; |
| thisgrpn = thiscounts[i] - itmp; // undo cummulate; i.e. diff |
| if (thisgrpn == 1 || nextradix == -1) { |
| push(thisgrpn); |
| } else { |
| if (colSize == 4) { // ready for merging in iradix ... |
| error("Not yet used, still using iradix instead"); |
| for (int j = 0; j < thisgrpn; j++) |
| ((int *)radix_xsub)[j] = (int)twiddle(x, o[itmp+j]-1, order); |
| // this is why this xsub here can't be the same memory |
| // as xsub in do_radixsort |
| } else |
| for (int j = 0; j < thisgrpn; j++) |
| ((unsigned long long *)radix_xsub)[j] = |
| twiddle(x, o[itmp+j]-1, order); |
| // changes xsub and o by reference recursively. |
| dradix_r(radix_xsub, o+itmp, thisgrpn, nextradix); |
| } |
| itmp = thiscounts[i]; |
| thiscounts[i] = 0; |
| } |
| if (nalast == 0) // nalast = 1, -1 are both taken care already. |
| for (int i = 0; i < n; i++) |
| o[i] = is_nan(x, o[i] - 1) ? 0 : o[i]; |
| // nalast = 0 is dealt with separately as it just sets o to 0 |
| // at those indices where x is NA. x[o[i]-1] because x is not |
| // modified by reference unlike iinsert or iradix_r |
| |
| } |
| |
| static void dinsert(unsigned long long *x, int *o, int n) |
| // orders both x and o by reference in-place. Fast for small vectors, |
| // low overhead. don't be tempted to binsearch backwards here, have |
| // to shift anyway; many memmove would have overhead and do the same |
| // thing 'dinsert' will not be called when nalast = 0 and o[0] = -1. |
| { |
| int otmp, tt; |
| unsigned long long xtmp; |
| for (int i = 1; i < n; i++) { |
| xtmp = x[i]; |
| if (xtmp < x[i - 1]) { |
| int j = i - 1; |
| otmp = o[i]; |
| while (j >= 0 && xtmp < x[j]) { |
| x[j + 1] = x[j]; |
| o[j + 1] = o[j]; |
| j--; |
| } |
| x[j + 1] = xtmp; |
| o[j + 1] = otmp; |
| } |
| } |
| tt = 0; |
| for (int i = 1; i < n; i++) |
| if (x[i] == x[i - 1]) |
| tt++; |
| else { |
| push(tt + 1); |
| tt = 0; |
| } |
| push(tt + 1); |
| } |
| |
| static void dradix_r(unsigned char *xsub, int *osub, int n, int radix) |
| /* xsub is a recursive offset into xsub working memory above in |
| dradix, reordered by reference. osub is a an offset into the main |
| answer o, reordered by reference. dradix iterates |
| 7,6,5,4,3,2,1,0 */ |
| { |
| int itmp, thisgrpn, nextradix; |
| unsigned int *thiscounts; |
| unsigned char *p; |
| if (n < 200) { |
| /* 200 is guess based on limited testing. Needs calibrate(). Was 50 |
| based on sum(1:50)=1275 worst -vs- 256 cummulate + 256 memset + |
| allowance since reverse order is unlikely */ |
| // order=1 here because it's already taken care of in iradix |
| dinsert((void *)xsub, osub, n); |
| |
| return; |
| } |
| thiscounts = radixcounts[radix]; |
| p = xsub + RADIX_BYTE; |
| for (int i = 0; i < n; i++) { |
| thiscounts[*p]++; |
| p += colSize; |
| } |
| itmp = thiscounts[0]; |
| for (int i = 1; itmp < n && i < 256; i++) |
| // don't cummulate through 0s, important below |
| if (thiscounts[i]) |
| thiscounts[i] = (itmp += thiscounts[i]); |
| p = xsub + (n - 1) * colSize; |
| if (colSize == 4) { |
| error("Not yet used, still using iradix instead"); |
| for (int i = n - 1; i >= 0; i--) { |
| int j = --thiscounts[*(p + RADIX_BYTE)]; |
| otmp[j] = osub[i]; |
| ((int *) xtmp)[j] = *(int *) p; |
| p -= colSize; |
| } |
| } else { |
| for (int i = n - 1; i >= 0; i--) { |
| int j = --thiscounts[*(p + RADIX_BYTE)]; |
| otmp[j] = osub[i]; |
| ((unsigned long long *) xtmp)[j] = *(unsigned long long *) p; |
| p -= colSize; |
| } |
| } |
| memcpy(osub, otmp, n * sizeof(int)); |
| memcpy(xsub, xtmp, n * colSize); |
| |
| nextradix = radix - 1; |
| while (nextradix >= 0 && skip[nextradix]) |
| nextradix--; |
| // TO DO: If nextradix==-1 and no further args from do_radixsort, |
| // we're done. We have o. Remember to memset thiscounts before |
| // returning. |
| |
| if (thiscounts[0] != 0) |
| Error("Logical error. thiscounts[0]=%d but should have been decremented to 0. radix=%d", |
| thiscounts[0], radix); |
| thiscounts[256] = n; |
| itmp = 0; |
| for (int i = 1; itmp < n && i <= 256; i++) { |
| if (thiscounts[i] == 0) |
| continue; |
| thisgrpn = thiscounts[i] - itmp; // undo cummulate; i.e. diff |
| if (thisgrpn == 1 || nextradix == -1) |
| push(thisgrpn); |
| else |
| dradix_r(xsub + itmp * colSize, osub + itmp, thisgrpn, |
| nextradix); |
| itmp = thiscounts[i]; |
| thiscounts[i] = 0; |
| } |
| } |
| |
| // TO DO?: dcount. Find step size, then range = (max-min)/step and |
| // proceed as icount. Many fixed precision floats (such as prices) may |
| // be suitable. Fixed precision such as 1.10, 1.15, 1.20, 1.25, 1.30 |
| // ... do use all bits so dradix skipping may not help. |
| |
| static int *cradix_counts = NULL; |
| static int cradix_counts_alloc = 0; |
| static int maxlen = 1; |
| static SEXP *cradix_xtmp = NULL; |
| static int cradix_xtmp_alloc = 0; |
| |
| // same as StrCmp but also takes into account 'decreasing' and 'na.last' args. |
| static int StrCmp2(SEXP x, SEXP y) |
| { |
| // same cached pointer (including NA_STRING == NA_STRING) |
| if (x == y) return 0; |
| // if x=NA, nalast=1 ? then x > y else x < y (Note: nalast == 0 is |
| // already taken care of in 'csorted', won't be 0 here) |
| if (x == NA_STRING) return nalast; |
| if (y == NA_STRING) return -nalast; // if y=NA, nalast=1 ? then y > x |
| return order*strcmp(CHAR(x), CHAR(y)); // same as explanation in StrCmp |
| } |
| |
| static int StrCmp(SEXP x, SEXP y) // also used by bmerge and chmatch |
| { |
| // same cached pointer (including NA_STRING == NA_STRING) |
| if (x == y) return 0; |
| if (x == NA_STRING) return -1; // x < y |
| if (y == NA_STRING) return 1; // x > y |
| // assumes strings are in same encoding |
| return strcmp(CHAR(x), CHAR(y)); |
| } |
| |
| #define CHAR_ENCODING(x) (IS_ASCII(x) ? CE_UTF8 : getCharCE(x)) |
| |
| static void checkEncodings(SEXP x) |
| { |
| cetype_t ce; |
| |
| int i; |
| for (i = 0; i < length(x) && STRING_ELT(x, i) == NA_STRING; i++); |
| |
| if (i < length(x)) { |
| ce = CHAR_ENCODING(STRING_ELT(x, i)); |
| if (ce == CE_NATIVE) { |
| error(_("Character encoding must be UTF-8, Latin-1 or bytes")); |
| } |
| } |
| |
| /* Disabled for now -- doubles the time (for already sorted vectors): why? |
| for (int i = 1; i < length(x); i++) { |
| if (ce != CHAR_ENCODING(STRING_ELT(x, i))) { |
| error(_("Mixed character encodings are not supported")); |
| } |
| } |
| */ |
| } |
| |
| static void cradix_r(SEXP * xsub, int n, int radix) |
| // xsub is a unique set of CHARSXP, to be ordered by reference |
| |
| // First time, radix == 0, and xsub == x. Then recursively moves SEXP together |
| // for L1 cache efficiency. |
| |
| // Quite different to iradix because |
| // 1) x is known to be unique so fits in cache |
| // (wide random access not an issue) |
| // 2) they're variable length character strings |
| // 3) no need to maintain o. Just simply reorder x. No grps or push. |
| |
| // Fortunately, UTF sorts in the same order if treated as ASCII, so we |
| // can simplify by doing it by bytes. |
| |
| // TO DO: confirm a forwards (MSD) radix for efficiency, although more |
| // complicated. |
| |
| // This part has nothing to do with truelength. The |
| // truelength stuff is to do with finding the unique strings. We may |
| // be able to improve CHARSXP derefencing by submitting patch to R to |
| // make R's string cache contiguous but would likely be difficult. If |
| // we strxfrm, then it'll then be contiguous and compact then anyway. |
| { |
| int itmp, *thiscounts, thisgrpn=0, thisx=0; |
| SEXP stmp; |
| |
| // TO DO?: chmatch to existing sorted vector, then grow it. |
| // TO DO?: if (n<N_SMALL = 200) insert sort, then loop through groups via == |
| if (n <= 1) return; |
| if (n == 2) { |
| if (StrCmp(xsub[1], xsub[0]) < 0) { |
| stmp = xsub[0]; |
| xsub[0] = xsub[1]; |
| xsub[1] = stmp; |
| } |
| return; |
| } |
| // TO DO: if (n < 50) cinsert (continuing from radix offset into |
| // CHAR) or using StrCmp. But 256 is narrow, so quick and not too |
| // much an issue. |
| |
| thiscounts = cradix_counts + radix * 256; |
| for (int i = 0; i < n; i++) { |
| thisx = xsub[i] == NA_STRING ? |
| 0 : (radix < LENGTH(xsub[i]) ? |
| (unsigned char) (CHAR(xsub[i])[radix]) : 1); |
| thiscounts[ thisx ]++; // 0 for NA, 1 for "" |
| } |
| // this also catches when subx has shorter strings than the rest, |
| // thiscounts[0] == n and we'll recurse very quickly through to the |
| // overall maxlen with no 256 overhead each time |
| if (thiscounts[thisx] == n && radix < maxlen - 1) { |
| cradix_r(xsub, n, radix + 1); |
| thiscounts[thisx] = 0; // the rest must be 0 already, save the memset |
| return; |
| } |
| itmp = thiscounts[0]; |
| for (int i = 1; i < 256; i++) |
| // don't cummulate through 0s, important below |
| if (thiscounts[i]) |
| thiscounts[i] = (itmp += thiscounts[i]); |
| for (int i = n - 1; i >= 0; i--) { |
| thisx = xsub[i] == NA_STRING ? |
| 0 : (radix < LENGTH(xsub[i]) ? |
| (unsigned char) (CHAR(xsub[i])[radix]) : 1); |
| int j = --thiscounts[thisx]; |
| cradix_xtmp[j] = xsub[i]; |
| } |
| memcpy(xsub, cradix_xtmp, n * sizeof(SEXP)); |
| if (radix == maxlen - 1) { |
| memset(thiscounts, 0, 256 * sizeof(int)); |
| return; |
| } |
| if (thiscounts[0] != 0) |
| Error("Logical error. counts[0]=%d in cradix but should have been decremented to 0. radix=%d", |
| thiscounts[0], radix); |
| itmp = 0; |
| for (int i = 1; i < 256; i++) { |
| if (thiscounts[i] == 0) |
| continue; |
| thisgrpn = thiscounts[i] - itmp; // undo cummulate; i.e. diff |
| cradix_r(xsub + itmp, thisgrpn, radix + 1); |
| itmp = thiscounts[i]; |
| // set to 0 now since we're here, saves memset |
| // afterwards. Important to clear! Also more portable for |
| // machines where 0 isn't all bits 0 (?!) |
| thiscounts[i] = 0; |
| } |
| if (itmp < n - 1) |
| cradix_r(xsub + itmp, n - itmp, radix + 1); // final group |
| } |
| |
| static SEXP *ustr = NULL; |
| static int ustr_alloc = 0, ustr_n = 0; |
| |
| static void cgroup(SEXP * x, int *o, int n) |
| // As icount : |
| // Places the ordering into o directly, overwriting whatever was there |
| // Doesn't change x |
| // Pushes group sizes onto stack |
| |
| // Only run when sortStr == FALSE. Basically a counting sort, in first |
| // appearance order, directly. Since it doesn't sort the strings, the |
| // name is cgroup. there is no _pre for this. ustr created and |
| // cleared each time. |
| { |
| // savetl_init() is called once at the start of do_radixsort |
| if (ustr_n != 0) |
| Error |
| ("Internal error. ustr isn't empty when starting cgroup: ustr_n=%d, ustr_alloc=%d", |
| ustr_n, ustr_alloc); |
| for (int i = 0; i < n; i++) { |
| SEXP s = x[i]; |
| if (TRLEN(s) < 0) { // this case first as it's the most frequent |
| SET_TRLEN(s, TRLEN(s) - 1); |
| // use negative counts so as to detect R's own (positive) |
| // usage of tl on CHARSXP |
| continue; |
| } |
| if (TRLEN(s) > 0) { |
| // Save any of R's own usage of tl (assumed positive, so |
| // we can both count and save in one scan), to restore |
| // afterwards. From R 2.14.0, tl is initialized to 0, |
| // prior to that it was random so this step saved too much. |
| savetl(s); |
| SET_TRLEN(s, 0); |
| } |
| if (ustr_alloc <= ustr_n) { |
| // 10000 = 78k of 8byte pointers. Small initial guess, |
| // negligible time to alloc. |
| ustr_alloc = (ustr_alloc == 0) ? 10000 : ustr_alloc*2; |
| if (ustr_alloc > n) |
| ustr_alloc = n; |
| ustr = realloc(ustr, ustr_alloc * sizeof(SEXP)); |
| if (ustr == NULL) |
| Error("Unable to realloc %d * %d bytes in cgroup", ustr_alloc, |
| sizeof(SEXP)); |
| } |
| SET_TRLEN(s, -1); |
| ustr[ustr_n++] = s; |
| } |
| // TO DO: the same string in different encodings will be |
| // considered different here. Sweep through ustr and merge counts |
| // where equal (sort needed therefore, unfortunately?, only if |
| // there are any marked encodings present) |
| int cumsum = 0; |
| for (int i = 0; i < ustr_n; i++) { // 0.000 |
| push(-TRLEN(ustr[i])); |
| SET_TRLEN(ustr[i], cumsum += -TRLEN(ustr[i])); |
| } |
| int *target = (o[0] != -1) ? newo : o; |
| for (int i = n - 1; i >= 0; i--) { |
| SEXP s = x[i]; // 0.400 (page fetches on string cache) |
| int k = TRLEN(s) - 1; |
| SET_TRLEN(s, k); |
| target[k] = i + 1; // 0.800 (random access to o) |
| } |
| // The cummulate meant counts are left non zero, so reset for next |
| // time (0.00s). |
| for (int i = 0; i < ustr_n; i++) |
| SET_TRLEN(ustr[i], 0); |
| ustr_n = 0; |
| } |
| |
| static int *csort_otmp = NULL, csort_otmp_alloc = 0; |
| static void alloc_csort_otmp(int n) |
| { |
| if (csort_otmp_alloc >= n) |
| return; |
| csort_otmp = (int *) realloc(csort_otmp, n * sizeof(int)); |
| if (csort_otmp == NULL) |
| Error |
| ("Failed to allocate working memory for csort_otmp. Requested %d * %d bytes", |
| n, sizeof(int)); |
| csort_otmp_alloc = n; |
| } |
| |
| static void csort(SEXP * x, int *o, int n) |
| /* |
| As icount : |
| Places the ordering into o directly, overwriting whatever was there |
| Doesn't change x |
| Pushes group sizes onto stack |
| Requires csort_pre() to have created and sorted ustr already |
| */ |
| { |
| /* can't use otmp, since iradix might be called here and that uses |
| otmp (and xtmp). alloc_csort_otmp(n) is called from do_radixsort for |
| either n=nrow if 1st arg, or n=maxgrpn if onwards args */ |
| for (int i = 0; i < n; i++) |
| csort_otmp[i] = (x[i] == NA_STRING) ? NA_INTEGER : -TRLEN(x[i]); |
| if (nalast == 0 && n == 2) { |
| // special case for nalast == 0. n == 1 is handled inside |
| // do_radixsort. at least 1 will be NA here else use o from caller |
| // directly (not 1st arg) |
| if (o[0] == -1) |
| for (int i = 0; i < n; i++) |
| o[i] = i + 1; |
| for (int i = 0; i < n; i++) |
| if (csort_otmp[i] == NA_INTEGER) |
| o[i] = 0; |
| push(1); push(1); |
| return; |
| } |
| if (n < N_SMALL && nalast != 0) { // TO DO: calibrate() N_SMALL=200 |
| if (o[0] == -1) |
| for (int i = 0; i < n; i++) |
| o[i] = i + 1; |
| // else use o from caller directly (not 1st arg) |
| for (int i = 0; i < n; i++) |
| csort_otmp[i] = icheck(csort_otmp[i]); |
| iinsert(csort_otmp, o, n); |
| } else { |
| setRange(csort_otmp, n); |
| if (range == NA_INTEGER) |
| Error("Internal error. csort's otmp contains all-NA"); |
| int *target = (o[0] != -1) ? newo : o; |
| if (range <= N_RANGE) |
| // TO DO: calibrate(). radix was faster (9.2s |
| // "range<=10000" instead of 11.6s "range<=N_RANGE && |
| // range<n") for run(7) where range=N_RANGE n=10000000 |
| icount(csort_otmp, target, n); |
| else |
| iradix(csort_otmp, target, n); |
| } |
| // all i* push onto stack. Using their counts may be faster here |
| // than thrashing SEXP fetches over several passes as cgroup does |
| // (but cgroup needs that to keep orginal order, and cgroup saves |
| // the sort in csort_pre). |
| } |
| |
| static void csort_pre(SEXP * x, int n) |
| // Finds ustr and sorts it. Runs once for each arg (if |
| // sortStr == TRUE), then ustr is used by csort within each group ustr |
| // is grown on each character arg, to save sorting the same strings |
| // again if several args contain the same strings |
| { |
| SEXP s; |
| int old_un, new_un; |
| // savetl_init() is called once at the start of do_radixsort |
| old_un = ustr_n; |
| for (int i = 0; i < n; i++) { |
| s = x[i]; |
| // this case first as it's the most frequent. Already in ustr, |
| // this negative is its ordering. |
| if (TRLEN(s) < 0) |
| continue; |
| // Save any of R's own usage of tl (assumed positive, so we |
| // can both count and save in one scan), to restore |
| // afterwards. From R 2.14.0, tl is initialized to 0, prior to |
| // that it was random so this step saved too much. |
| if (TRLEN(s) > 0) { |
| savetl(s); |
| SET_TRLEN(s, 0); |
| } |
| if (ustr_alloc <= ustr_n) { |
| // 10000 = 78k of 8byte pointers. Small initial guess, |
| // negligible time to alloc. |
| ustr_alloc = (ustr_alloc == 0) ? 10000 : ustr_alloc*2; |
| if (ustr_alloc > old_un+n) |
| ustr_alloc = old_un + n; |
| ustr = realloc(ustr, ustr_alloc * sizeof(SEXP)); |
| if (ustr == NULL) |
| Error("Failed to realloc ustr. Requested %d * %d bytes", |
| ustr_alloc, sizeof(SEXP)); |
| } |
| SET_TRLEN(s, -1); // this -1 will become its ordering later below |
| ustr[ustr_n++] = s; |
| // length on CHARSXP is the nchar of char * (excluding \0), |
| // and treats marked encodings as if ascii. |
| if (s != NA_STRING && LENGTH(s) > maxlen) |
| maxlen = LENGTH(s); |
| } |
| new_un = ustr_n; |
| if (new_un == old_un) |
| return; |
| // No new strings observed, seen them all before in previous |
| // arg. ustr already sufficient. If we ever make ustr |
| // permanently held by data.table, we'll just need to make the |
| // final loop to set -i-1 before returning here. sort ustr. |
| |
| // TODO: just sort new ones and merge them in. These allocs are |
| // here, to save them being in the recursive cradix_r() |
| if (cradix_counts_alloc < maxlen) { |
| cradix_counts_alloc = maxlen + 10; // +10 to save too many reallocs |
| cradix_counts = (int *)realloc(cradix_counts, |
| cradix_counts_alloc * 256 * sizeof(int)); |
| if (!cradix_counts) |
| Error("Failed to alloc cradix_counts"); |
| memset(cradix_counts, 0, cradix_counts_alloc * 256 * sizeof(int)); |
| } |
| if (cradix_xtmp_alloc < ustr_n) { |
| cradix_xtmp = (SEXP *) realloc(cradix_xtmp, ustr_n * sizeof(SEXP)); |
| // TO DO: Reuse the one we have in do_radixsort. |
| // Does it need to be n length? |
| if (!cradix_xtmp) |
| Error("Failed to alloc cradix_tmp"); |
| cradix_xtmp_alloc = ustr_n; |
| } |
| // sorts ustr in-place by reference save ordering in the |
| // CHARSXP. negative so as to distinguish with R's own usage. |
| cradix_r(ustr, ustr_n, 0); |
| for (int i = 0; i < ustr_n; i++) |
| SET_TRLEN(ustr[i], -i - 1); |
| } |
| |
| // functions to test vectors for sortedness: isorted, dsorted and csorted |
| |
| // base:is.unsorted returns NA in the presence of any NA, but we need |
| // to consider na.last, and we also return -1 if x is sorted in |
| // _strictly_ reverse order; a common case we optimize. If a vector |
| // is in decreasing order *with ties*, then an in-place reverse (no |
| // sort) would result in instability of ties, so we are strict. We |
| // also save grouping information during the check; that information |
| // is required when sorting by multiple arguments. |
| |
| // TO DO: test in big steps first to return faster if unsortedness is |
| // at the end (a common case of rbind'ing data to end) These are all |
| // sequential access to x, so very quick and cache efficient. |
| |
| // order = 1 is ascending and order=-1 is descending; also takes care |
| // of na.last argument with check through 'icheck' Relies on |
| // NA_INTEGER == INT_MIN, checked in init.c |
| static int isorted(int *x, int n) |
| { |
| int i = 1, j = 0; |
| // when nalast = NA, |
| // all NAs ? return special value to replace all o's values with '0' |
| // any NAs ? return 0 = unsorted and leave it |
| // to sort routines to replace o's with 0's |
| // no NAs ? continue to check rest of isorted - the same routine as usual |
| if (nalast == 0) { |
| for (int k = 0; k < n; k++) |
| if (x[k] != NA_INTEGER) |
| j++; |
| if (j == 0) { |
| push(n); |
| return (-2); |
| } |
| if (j != n) |
| return (0); |
| } |
| if (n <= 1) { |
| push(n); |
| return (1); |
| } |
| if (icheck(x[1]) < icheck(x[0])) { |
| i = 2; |
| while (i < n && icheck(x[i]) < icheck(x[i - 1])) |
| i++; |
| // strictly opposite to expected 'order', no ties; |
| if (i == n) { |
| mpush(1, n); |
| return (-1); |
| } |
| // e.g. no more than one NA at the beginning/end (for order=-1/1) |
| else return (0); |
| } |
| int old = gsngrp[flip]; |
| int tt = 1; |
| for (int i = 1; i < n; i++) { |
| if (icheck(x[i]) < icheck(x[i - 1])) { |
| gsngrp[flip] = old; |
| return (0); |
| } |
| if (x[i] == x[i - 1]) |
| tt++; |
| else { |
| push(tt); tt = 1; |
| } |
| } |
| push(tt); |
| // same as 'order', NAs at the beginning for order=1, at end for |
| // order=-1, possibly with ties |
| return(1); |
| } |
| |
| // order=1 is ascending and -1 is descending |
| // also accounts for nalast=0 (=NA), =1 (TRUE), -1 (FALSE) (in twiddle) |
| static int dsorted(double *x, int n) |
| { |
| int i = 1, j = 0; |
| unsigned long long prev, this; |
| if (nalast == 0) { |
| // when nalast = NA, |
| // all NAs ? return special value to replace all o's values with '0' |
| // any NAs ? return 0 = unsorted and leave it to sort routines to |
| // replace o's with 0's |
| // no NAs ? continue to check the rest of isorted - |
| // the same routine as usual |
| for (int k = 0; k < n; k++) |
| if (!is_nan(x, k)) |
| j++; |
| if (j == 0) { |
| push(n); |
| return (-2); |
| } |
| if (j != n) |
| return (0); |
| } |
| if (n <= 1) { |
| push(n); |
| return (1); |
| } |
| prev = twiddle(x, 0, order); |
| this = twiddle(x, 1, order); |
| if (this < prev) { |
| i = 2; |
| prev = this; |
| while (i < n && (this = twiddle(x, i, order)) < prev) { |
| i++; |
| prev = this; |
| } |
| if (i == n) { |
| mpush(1, n); |
| return (-1); |
| } |
| // strictly opposite of expected 'order', no ties; e.g. no |
| // more than one NA at the beginning/end (for order=-1/1) |
| |
| // TO DO: improve to be stable for ties in reverse |
| else return(0); |
| } |
| int old = gsngrp[flip]; |
| int tt = 1; |
| for (int i = 1; i < n; i++) { |
| // TO DO: once we get past -Inf, NA and NaN at the bottom, and |
| // +Inf at the top, the middle only need be twiddled |
| // for tolerance (worth it?) |
| this = twiddle(x, i, order); |
| if (this < prev) { |
| gsngrp[flip] = old; |
| return (0); |
| } |
| if (this == prev) |
| tt++; |
| else { |
| push(tt); |
| tt = 1; |
| } |
| prev = this; |
| } |
| push(tt); |
| // exactly as expected in 'order' (1=increasing, -1=decreasing), |
| // possibly with ties |
| return (1); |
| } |
| |
| // order=1 is ascending and -1 is descending |
| // also accounts for nalast=0 (=NA), =1 (TRUE), -1 (FALSE) |
| static int csorted(SEXP *x, int n) |
| { |
| int i = 1, j = 0, tmp; |
| if (nalast == 0) { |
| // when nalast = NA, |
| // all NAs ? return special value to replace all o's values with '0' |
| // any NAs ? return 0 = unsorted and leave it to sort routines |
| // to replace o's with 0's |
| // no NAs ? continue to check the rest of isorted - |
| // the same routine as usual |
| for (int k = 0; k < n; k++) |
| if (x[k] != NA_STRING) |
| j++; |
| if (j == 0) { |
| push(n); |
| return (-2); |
| } |
| if (j != n) |
| return (0); |
| } |
| if (n <= 1) { |
| push(n); |
| return (1); |
| } |
| if (StrCmp2(x[1], x[0]) < 0) { |
| i = 2; |
| while (i < n && StrCmp2(x[i], x[i - 1]) < 0) |
| i++; |
| if (i == n) { |
| mpush(1, n); |
| return (-1); |
| } |
| // strictly opposite of expected 'order', no ties; |
| // e.g. no more than one NA at the beginning/end (for order=-1/1) |
| else |
| return (0); |
| } |
| int old = gsngrp[flip]; |
| int tt = 1; |
| for (int i = 1; i < n; i++) { |
| tmp = StrCmp2(x[i], x[i - 1]); |
| if (tmp < 0) { |
| gsngrp[flip] = old; |
| return (0); |
| } |
| if (tmp == 0) |
| tt++; |
| else { |
| push(tt); |
| tt = 1; |
| } |
| } |
| push(tt); |
| // exactly as expected in 'order', possibly with ties |
| return (1); |
| } |
| |
| static void isort(int *x, int *o, int n) |
| { |
| if (n <= 2) { |
| // nalast = 0 and n == 2 (check bottom of this file for explanation) |
| if (nalast == 0 && n == 2) { |
| if (o[0] == -1) { |
| o[0] = 1; |
| o[1] = 2; |
| } |
| for (int i = 0; i < n; i++) |
| if (x[i] == NA_INTEGER) |
| o[i] = 0; |
| push(1); push(1); |
| return; |
| } else Error("Internal error: isort received n=%d. isorted should have dealt with this (e.g. as a reverse sorted vector) already",n); |
| } |
| if (n < N_SMALL && o[0] != -1 && nalast != 0) { |
| // see comment above in iradix_r on N_SMALL=200. |
| /* if not o[0] then can't just populate with 1:n here, since x |
| is changed by ref too (so would need to be copied). */ |
| /* pushes inside too. Changes x and o by reference, so not |
| suitable in first arg when o hasn't been populated yet |
| and x is an actual argument (hence check on o[0]). */ |
| if (order != 1 || nalast != -1) |
| // so that default case, i.e., order=1, nalast=FALSE will |
| // not be affected (ex: `setkey`) |
| for (int i = 0; i < n; i++) |
| x[i] = icheck(x[i]); |
| iinsert(x, o, n); |
| } else { |
| /* Tighter range (e.g. copes better with a few abormally large |
| values in some groups), but also, when setRange was once at |
| arg level that caused an extra scan of (long) x |
| first. 10,000 calls to setRange takes just 0.04s |
| i.e. negligible. */ |
| setRange(x, n); |
| if (range == NA_INTEGER) |
| Error("Internal error: isort passed all-NA. isorted should have caught this before this point"); |
| int *target = (o[0] != -1) ? newo : o; |
| // was range < 10000 for subgroups, but 1e5 for the first |
| // arg, tried to generalise here. 1e4 rather than 1e5 here |
| // because iterated was (thisgrpn < 200 || range > 20000) then |
| // radix a short vector with large range can bite icount when |
| // iterated (BLOCK 4 and 6) |
| if (range <= N_RANGE && range <= n) { |
| icount(x, target, n); |
| } else { |
| iradix(x, target, n); |
| } |
| } |
| } |
| |
| static void dsort(double *x, int *o, int n) |
| { |
| if (n <= 2) { |
| if (nalast == 0 && n == 2) { |
| // don't have to twiddle here.. at least one will be NA |
| // and 'n' WILL BE 2. |
| if (o[0] == -1) { |
| o[0] = 1; |
| o[1] = 2; |
| } |
| for (int i = 0; i < n; i++) |
| if (is_nan(x, i)) |
| o[i] = 0; |
| push(1); push(1); |
| return; |
| } |
| Error("Internal error: dsort received n=%d. dsorted should have dealt with this (e.g. as a reverse sorted vector) already",n); |
| } |
| if (n < N_SMALL && o[0] != -1 && nalast != 0) { |
| // see comment above in iradix_r re N_SMALL=200, and isort for o[0] |
| for (int i = 0; i < n; i++) |
| ((unsigned long long *)x)[i] = twiddle(x, i, order); |
| // have to twiddle here anyways, can't speed up default case |
| // like in isort |
| dinsert((unsigned long long *)x, o, n); |
| } else { |
| dradix((unsigned char *) x, (o[0] != -1) ? newo : o, n); |
| } |
| } |
| |
| SEXP attribute_hidden do_radixsort(SEXP call, SEXP op, SEXP args, SEXP rho) |
| { |
| int n = -1, narg = 0, ngrp, tmp, *osub, thisgrpn; |
| R_xlen_t nl = n; |
| Rboolean isSorted = TRUE, retGrp; |
| void *xd; |
| int *o = NULL; |
| |
| /* ML: FIXME: Here are just two of the dangerous assumptions here */ |
| if (sizeof(int) != 4) { |
| error("radix sort assumes sizeof(int) == 4"); |
| } |
| if (sizeof(double) != 8) { |
| error("radix sort assumes sizeof(double) == 8"); |
| } |
| |
| nalast = (asLogical(CAR(args)) == NA_LOGICAL) ? 0 : |
| (asLogical(CAR(args)) == TRUE) ? 1 : -1; // 1=TRUE, -1=FALSE, 0=NA |
| args = CDR(args); |
| SEXP decreasing = CAR(args); |
| args = CDR(args); |
| |
| /* If TRUE, return starts of runs of identical values + max group size. */ |
| retGrp = asLogical(CAR(args)); |
| args = CDR(args); |
| |
| /* If FALSE, get order of strings in appearance order. Essentially |
| abuses the CHARSXP table to group strings without hashing |
| them. Only makes sense when retGrp=TRUE. |
| */ |
| sortStr = asLogical(CAR(args)); |
| args = CDR(args); |
| |
| /* When grouping, we round off doubles to account for imprecision */ |
| setNumericRounding(retGrp ? 2 : 0); |
| |
| if (args == R_NilValue) |
| return R_NilValue; |
| if (isVector(CAR(args))) |
| nl = XLENGTH(CAR(args)); |
| for (SEXP ap = args; ap != R_NilValue; ap = CDR(ap), narg++) { |
| if (!isVector(CAR(ap))) |
| error(_("argument %d is not a vector"), narg + 1); |
| //Rprintf("%d, %d\n", XLENGTH(CAR(ap)), nl); |
| if (XLENGTH(CAR(ap)) != nl) |
| error(_("argument lengths differ")); |
| } |
| |
| if (narg != length(decreasing)) |
| error(_("length(decreasing) must match the number of order arguments")); |
| for (int i = 0; i < narg; i++) { |
| if (LOGICAL(decreasing)[i] == NA_LOGICAL) |
| error(_("'decreasing' elements must be TRUE or FALSE")); |
| } |
| order = asLogical(decreasing) ? -1 : 1; |
| |
| SEXP x = CAR(args); |
| args = CDR(args); |
| |
| // (ML) FIXME: need to support long vectors |
| if (nl > INT_MAX) { |
| error(_("long vectors not supported")); |
| } |
| n = (int) nl; |
| |
| // upper limit for stack size (all size 1 groups). We'll detect |
| // and avoid that limit, but if just one non-1 group (say 2), that |
| // can't be avoided. |
| gsmaxalloc = n; |
| |
| // once for the result, needs to be length n. |
| |
| // TO DO: save allocation if NULL is returned (isSorted = =TRUE) so |
| // [i|c|d]sort know they can populate o directly with no working |
| // memory needed to reorder existing order had to repace this from |
| // '0' to '-1' because 'nalast = 0' replace 'o[.]' with 0 values. |
| |
| SEXP ans = PROTECT(allocVector(INTSXP, n)); |
| o = INTEGER(ans); |
| if (n > 0) |
| o[0] = -1; |
| xd = DATAPTR(x); |
| |
| stackgrps = narg > 1 || retGrp; |
| |
| if (TYPEOF(x) == STRSXP) { |
| checkEncodings(x); |
| } |
| |
| savetl_init(); // from now on use Error not error. |
| |
| switch (TYPEOF(x)) { |
| case INTSXP: |
| case LGLSXP: |
| tmp = isorted(xd, n); |
| break; |
| case REALSXP : |
| twiddle = &dtwiddle; |
| is_nan = &dnan; |
| tmp = dsorted(xd, n); |
| break; |
| case STRSXP : |
| tmp = csorted(xd, n); |
| break; |
| default : |
| Error("First arg is type '%s', not yet supported", |
| type2char(TYPEOF(x))); |
| } |
| if (tmp) { |
| // -1 or 1. NEW: or -2 in case of nalast == 0 and all NAs |
| if (tmp == 1) { |
| // same as expected in 'order' (1 = increasing, -1 = decreasing) |
| isSorted = TRUE; |
| for (int i = 0; i < n; i++) |
| o[i] = i + 1; |
| } else if (tmp == -1) { |
| // -1 (or -n for result of strcmp), strictly opposite to |
| // -expected 'order' |
| isSorted = FALSE; |
| for (int i = 0; i < n; i++) |
| o[i] = n - i; |
| } else if (nalast == 0 && tmp == -2) { |
| // happens only when nalast=NA/0. Means all NAs, replace |
| // with 0's therefore! |
| isSorted = FALSE; |
| for (int i = 0; i < n; i++) |
| o[i] = 0; |
| } |
| } else { |
| isSorted = FALSE; |
| switch (TYPEOF(x)) { |
| case INTSXP: |
| case LGLSXP: |
| isort(xd, o, n); |
| break; |
| case REALSXP : |
| dsort(xd, o, n); |
| break; |
| case STRSXP : |
| if (sortStr) { |
| csort_pre(xd, n); |
| alloc_csort_otmp(n); |
| csort(xd, o, n); |
| } else |
| cgroup(xd, o, n); |
| break; |
| default: |
| Error |
| ("Internal error: previous default should have caught unsupported type"); |
| } |
| } |
| |
| int maxgrpn = gsmax[flip]; // biggest group in the first arg |
| void *xsub = NULL; // local |
| int (*f) (); |
| void (*g) (); |
| |
| if (narg > 1 && gsngrp[flip] < n) { |
| // double is the largest type, 8 |
| xsub = (void *) malloc(maxgrpn * sizeof(double)); |
| if (xsub == NULL) |
| Error("Couldn't allocate xsub in do_radixsort, requested %d * %d bytes.", |
| maxgrpn, sizeof(double)); |
| // global variable, used by isort, dsort, sort and cgroup |
| newo = (int *) malloc(maxgrpn * sizeof(int)); |
| if (newo == NULL) |
| Error("Couldn't allocate newo in do_radixsort, requested %d * %d bytes.", |
| maxgrpn, sizeof(int)); |
| } |
| |
| for (int col = 2; col <= narg; col++) { |
| x = CAR(args); |
| args = CDR(args); |
| xd = DATAPTR(x); |
| ngrp = gsngrp[flip]; |
| if (ngrp == n && nalast != 0) |
| break; |
| flipflop(); |
| stackgrps = col != narg || retGrp; |
| order = LOGICAL(decreasing)[col - 1] ? -1 : 1; |
| switch (TYPEOF(x)) { |
| case INTSXP: |
| case LGLSXP: |
| f = &isorted; |
| g = &isort; |
| break; |
| case REALSXP: |
| twiddle = &dtwiddle; |
| is_nan = &dnan; |
| f = &dsorted; |
| g = &dsort; |
| break; |
| case STRSXP: |
| f = &csorted; |
| if (sortStr) { |
| csort_pre(xd, n); |
| alloc_csort_otmp(gsmax[1 - flip]); |
| g = &csort; |
| } |
| // no increasing/decreasing order required if sortStr = FALSE, |
| // just a dummy argument |
| else |
| g = &cgroup; |
| break; |
| default: |
| Error("Arg %d is type '%s', not yet supported", |
| col, type2char(TYPEOF(x))); |
| } |
| int i = 0; |
| for (int grp = 0; grp < ngrp; grp++) { |
| thisgrpn = gs[1 - flip][grp]; |
| if (thisgrpn == 1) { |
| if (nalast == 0) { |
| // this edge case had to be taken care of |
| // here.. (see the bottom of this file for |
| // more explanation) |
| switch (TYPEOF(x)) { |
| case INTSXP: |
| if (INTEGER(x)[o[i] - 1] == NA_INTEGER) { |
| isSorted = FALSE; |
| o[i] = 0; |
| } |
| break; |
| case LGLSXP: |
| if (LOGICAL(x)[o[i] - 1] == NA_LOGICAL) { |
| isSorted = FALSE; |
| o[i] = 0; |
| } |
| break; |
| case REALSXP: |
| if (ISNAN(REAL(x)[o[i] - 1])) { |
| isSorted = FALSE; |
| o[i] = 0; |
| } |
| break; |
| case STRSXP: |
| if (STRING_ELT(x, o[i] - 1) == NA_STRING) { |
| isSorted = FALSE; |
| o[i] = 0; |
| } break; |
| default : |
| Error("Internal error: previous default should have caught unsupported type"); |
| } |
| } |
| i++; |
| push(1); |
| continue; |
| } |
| osub = o+i; |
| // ** TO DO **: if isSorted, we can just point xsub |
| // into x directly. If (*f)() returns 0, |
| // though, will have to copy x at that point |
| // When doing this, xsub could be allocated at |
| // that point for the first time. |
| if (TYPEOF(x) == STRSXP) |
| for (int j = 0; j < thisgrpn; j++) |
| ((SEXP *) xsub)[j] = ((SEXP *) xd)[o[i++] - 1]; |
| else if (TYPEOF(x) == REALSXP) |
| for (int j = 0; j < thisgrpn; j++) |
| ((double *) xsub)[j] = ((double *) xd)[o[i++] - 1]; |
| else |
| for (int j = 0; j < thisgrpn; j++) |
| ((int *) xsub)[j] = ((int *) xd)[o[i++] - 1]; |
| |
| // continue; // BASELINE short circuit timing |
| // point. Up to here is the cost of creating xsub. |
| // [i|d|c]sorted(); very low cost, sequential |
| tmp = (*f)(xsub, thisgrpn); |
| if (tmp) { |
| // *sorted will have already push()'d the groups |
| if (tmp == -1) { |
| isSorted = FALSE; |
| for (int k = 0; k < thisgrpn / 2; k++) { |
| // reverse the order in-place using no |
| // function call or working memory |
| // isorted only returns -1 for |
| // _strictly_ decreasing order, |
| // otherwise ties wouldn't be stable |
| tmp = osub[k]; |
| osub[k] = osub[thisgrpn - 1 - k]; |
| osub[thisgrpn - 1 - k] = tmp; |
| } |
| } else if (nalast == 0 && tmp == -2) { |
| // all NAs, replace osub[.] with 0s. |
| isSorted = FALSE; |
| for (int k = 0; k < thisgrpn; k++) osub[k] = 0; |
| } |
| continue; |
| } |
| isSorted = FALSE; |
| // nalast=NA will result in newo[0] = 0. So had to change to -1. |
| newo[0] = -1; |
| // may update osub directly, or if not will put the |
| // result in global newo |
| (*g)(xsub, osub, thisgrpn); |
| |
| if (newo[0] != -1) { |
| if (nalast != 0) |
| for (int j = 0; j < thisgrpn; j++) |
| // reuse xsub to reorder osub |
| ((int *) xsub)[j] = osub[newo[j] - 1]; |
| else |
| for (int j = 0; j < thisgrpn; j++) |
| // final nalast case to handle! |
| ((int *) xsub)[j] = (newo[j] == 0) ? 0 : |
| osub[newo[j] - 1]; |
| memcpy(osub, xsub, thisgrpn * sizeof(int)); |
| } |
| } |
| } |
| |
| if (!sortStr && ustr_n != 0) |
| Error("Internal error: at the end of do_radixsort sortStr == FALSE but ustr_n !=0 [%d]", |
| ustr_n); |
| for(int i = 0; i < ustr_n; i++) |
| SET_TRLEN(ustr[i], 0); |
| maxlen = 1; // reset global. Minimum needed to count "" and NA |
| ustr_n = 0; |
| savetl_end(); |
| free(ustr); |
| ustr = NULL; |
| ustr_alloc = 0; |
| |
| if (retGrp) { |
| int maxgrpn = NA_INTEGER; |
| ngrp = gsngrp[flip]; |
| SEXP s_ends = install("ends"); |
| setAttrib(ans, s_ends, x = allocVector(INTSXP, ngrp)); |
| if (ngrp > 0) { |
| INTEGER(x)[0] = gs[flip][0]; |
| for (int i = 1; i < ngrp; i++) |
| INTEGER(x)[i] = INTEGER(x)[i - 1] + gs[flip][i]; |
| maxgrpn = gsmax[flip]; |
| } |
| SEXP s_maxgrpn = install("maxgrpn"); |
| setAttrib(ans, s_maxgrpn, ScalarInteger(maxgrpn)); |
| SEXP nms; |
| PROTECT(nms = allocVector(STRSXP, 2)); |
| SET_STRING_ELT(nms, 0, mkChar("grouping")); |
| SET_STRING_ELT(nms, 1, mkChar("integer")); |
| setAttrib(ans, R_ClassSymbol, nms); |
| UNPROTECT(1); |
| } |
| |
| Rboolean dropZeros = !retGrp && !isSorted && nalast == 0; |
| if (dropZeros) { |
| int zeros = 0; |
| for (int i = 0; i < n; i++) { |
| if (o[i] == 0) |
| zeros++; |
| } |
| if (zeros > 0) { |
| PROTECT(ans = allocVector(INTSXP, n - zeros)); |
| int *o2 = INTEGER(ans); |
| for (int i = 0, i2 = 0; i < n; i++) { |
| if (o[i] > 0) |
| o2[i2++] = o[i]; |
| } |
| UNPROTECT(1); |
| } |
| } |
| |
| gsfree(); |
| free(radix_xsub); radix_xsub=NULL; radix_xsuballoc=0; |
| free(xsub); free(newo); xsub=newo=NULL; |
| free(xtmp); xtmp=NULL; xtmp_alloc=0; |
| free(otmp); otmp=NULL; otmp_alloc=0; |
| free(csort_otmp); csort_otmp=NULL; csort_otmp_alloc=0; |
| |
| free(cradix_counts); cradix_counts=NULL; cradix_counts_alloc=0; |
| free(cradix_xtmp); cradix_xtmp=NULL; cradix_xtmp_alloc=0; |
| // TO DO: use xtmp already got |
| |
| UNPROTECT(1); |
| return ans; |
| } |