| /* |
| * R : A Computer Language for Statistical Data Analysis |
| * Copyright (C) 1995, 1996 Robert Gentleman and Ross Ihaka |
| * Copyright (C) 1998-2018 The R Core Team. |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License as published by |
| * the Free Software Foundation; either version 2 of the License, or |
| * (at your option) any later version. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, a copy is available at |
| * https://www.R-project.org/Licenses/ |
| */ |
| |
| /* <UTF8> |
| byte-level access needed checks. |
| OK in UTF-8 provided quotes, comment, sep and dec chars are ASCII. |
| Also OK in DBCS. |
| |
| We use only ' ', tab, CR, LF as space chars. |
| There is also the possibility of other digits (which we should |
| probably continue to ignore). |
| */ |
| |
| #ifdef HAVE_CONFIG_H |
| #include <config.h> |
| #endif |
| |
| #define R_USE_SIGNALS 1 |
| #include <Defn.h> |
| #include <float.h> /* for DBL_DIG */ |
| #include <Fileio.h> |
| #include <Rconnections.h> |
| #include <errno.h> |
| #include <Print.h> |
| |
| #include <rlocale.h> /* for btowc */ |
| |
| #undef _ |
| #ifdef ENABLE_NLS |
| #include <libintl.h> |
| #define _(String) dgettext ("utils", String) |
| #else |
| #define _(String) (String) |
| #endif |
| |
| |
| /* The size of vector initially allocated by scan */ |
| #define SCAN_BLOCKSIZE 1000 |
| /* The size of the console buffer */ |
| /* NB: in Windows this also needs to be set in gnuwin32/getline/getline.c */ |
| #define CONSOLE_PROMPT_SIZE 256 |
| |
| #define NO_COMCHAR 100000 /* won't occur even in Unicode */ |
| |
| |
| /* The number of distinct strings to track */ |
| #define MAX_STRINGS 10000 |
| |
| |
| static unsigned char ConsoleBuf[CONSOLE_BUFFER_SIZE+1], *ConsoleBufp; |
| static int ConsoleBufCnt; |
| static char ConsolePrompt[CONSOLE_PROMPT_SIZE]; |
| |
| typedef struct { |
| SEXP NAstrings; |
| int quiet; |
| int sepchar; /* = 0 */ /* This gets compared to ints */ |
| char decchar; /* = '.' */ /* This only gets compared to chars */ |
| char *quoteset; /* = NULL */ |
| int comchar; /* = NO_COMCHAR */ |
| int ttyflag; /* = 0 */ |
| Rconnection con; /* = NULL */ |
| Rboolean wasopen; /* = FALSE */ |
| Rboolean escapes; /* = FALSE */ |
| int save; /* = 0; */ |
| Rboolean isLatin1; /* = FALSE */ |
| Rboolean isUTF8; /* = FALSE */ |
| Rboolean skipNul; |
| char convbuf[100]; |
| } LocalData; |
| |
| /* If mode = 0 use for numeric fields where "" is NA |
| If mode = 1 use for character fields where "" is verbatim unless |
| na.strings includes "" */ |
| static R_INLINE int isNAstring(const char *buf, int mode, LocalData *d) |
| { |
| int i; |
| |
| if(!mode && strlen(buf) == 0) return 1; |
| for (i = 0; i < length(d->NAstrings); i++) |
| if (!strcmp(CHAR(STRING_ELT(d->NAstrings, i)), buf)) return 1; |
| return 0; |
| } |
| |
| |
| static R_INLINE Rboolean Rspace(unsigned int c) |
| { |
| if (c == ' ' || c == '\t' || c == '\n' || c == '\r') return TRUE; |
| #ifdef Win32 |
| /* 0xa0 is NBSP in all 8-bit Windows locales */ |
| if(!mbcslocale && c == 0xa0) return TRUE; |
| #else |
| /* 0xa0 is NBSP in Latin-1 */ |
| if(known_to_be_latin1 && c == 0xa0) return TRUE; |
| #endif |
| return FALSE; |
| } |
| |
| |
| /* used by readline() and menu() */ |
| static int ConsoleGetchar(void) |
| { |
| if (--ConsoleBufCnt < 0) { |
| ConsoleBuf[CONSOLE_BUFFER_SIZE] = '\0'; |
| if (R_ReadConsole(ConsolePrompt, ConsoleBuf, |
| CONSOLE_BUFFER_SIZE, 0) == 0) { |
| R_ClearerrConsole(); |
| return R_EOF; |
| } |
| ConsoleBufp = ConsoleBuf; |
| ConsoleBufCnt = (int) strlen((char *)ConsoleBuf); |
| ConsoleBufCnt--; |
| } |
| /* at this point we need to use unsigned char or similar */ |
| return (int) *ConsoleBufp++; |
| } |
| |
| /* used by scan() */ |
| static int ConsoleGetcharWithPushBack(Rconnection con) |
| { |
| char *curLine; |
| int c; |
| |
| if(con->nPushBack > 0) { |
| curLine = con->PushBack[con->nPushBack-1]; |
| c = curLine[con->posPushBack++]; |
| if(con->posPushBack >= strlen(curLine)) { |
| /* last character on a line, so pop the line */ |
| free(curLine); |
| con->nPushBack--; |
| con->posPushBack = 0; |
| if(con->nPushBack == 0) free(con->PushBack); |
| } |
| return c; |
| } else |
| return ConsoleGetchar(); |
| } |
| |
| /* Like strtol, but for ints not longs and returns NA_INTEGER on overflow */ |
| static int Strtoi(const char *nptr, int base) |
| { |
| long res; |
| char *endp; |
| |
| errno = 0; |
| res = strtol(nptr, &endp, base); |
| if (*endp != '\0') res = NA_INTEGER; |
| /* next can happen on a 64-bit platform */ |
| if (res > INT_MAX || res < INT_MIN) res = NA_INTEGER; |
| if (errno == ERANGE) res = NA_INTEGER; |
| return (int) res; |
| } |
| |
| static double |
| Strtod (const char *nptr, char **endptr, Rboolean NA, LocalData *d, int i_exact) |
| { |
| return R_strtod5(nptr, endptr, d->decchar, NA, i_exact); |
| } |
| |
| static Rcomplex |
| strtoc(const char *nptr, char **endptr, Rboolean NA, LocalData *d, int i_exact) |
| { |
| Rcomplex z; |
| double x, y; |
| char *s, *endp; |
| |
| x = Strtod(nptr, &endp, NA, d, i_exact); |
| if (isBlankString(endp)) { |
| z.r = x; z.i = 0; |
| } else if (*endp == 'i') { |
| if (endp == nptr) { |
| z.r = NA_REAL; z.i = NA_REAL; |
| } |
| else { |
| z.r = 0; z.i = x; |
| endp++; |
| } |
| } else { |
| s = endp; |
| y = Strtod(s, &endp, NA, d, i_exact); |
| if (*endp == 'i') { |
| z.r = x; z.i = y; |
| endp++; |
| } else { |
| z.r = NA_REAL; z.i = NA_REAL; |
| endp = (char *) nptr; /* -Wall */ |
| } |
| } |
| *endptr = endp; |
| return z; |
| } |
| |
| |
| static R_INLINE int scanchar_raw(LocalData *d) |
| { |
| int c = (d->ttyflag) ? ConsoleGetcharWithPushBack(d->con) : |
| Rconn_fgetc(d->con); |
| if(c == 0) { |
| if(d->skipNul) { |
| do { |
| c = (d->ttyflag) ? ConsoleGetcharWithPushBack(d->con) : |
| Rconn_fgetc(d->con); |
| } while(c == 0); |
| } |
| } |
| return c; |
| } |
| |
| static R_INLINE void unscanchar(int c, LocalData *d) |
| { |
| d->save = c; |
| } |
| |
| /* For second bytes in a DBCS: |
| should not be called when a char is saved, but be cautious |
| */ |
| static R_INLINE int scanchar2(LocalData *d) |
| { |
| int next; |
| if (d->save) { |
| next = d->save; |
| d->save = 0; |
| } else |
| next = scanchar_raw(d); |
| return next; |
| } |
| |
| static int scanchar(Rboolean inQuote, LocalData *d) |
| { |
| int next; |
| if (d->save) { |
| next = d->save; |
| d->save = 0; |
| } else |
| next = scanchar_raw(d); |
| if(next == d->comchar && !inQuote) { |
| do |
| next = scanchar_raw(d); |
| while (next != '\n' && next != R_EOF); |
| } |
| if(next == '\\' && d->escapes) { |
| next = scanchar_raw(d); |
| if ('0' <= next && next <= '8') { |
| int octal = next - '0'; |
| if ('0' <= (next = scanchar_raw(d)) && next <= '8') { |
| octal = 8 * octal + next - '0'; |
| if ('0' <= (next = scanchar_raw(d)) && next <= '8') { |
| octal = 8 * octal + next - '0'; |
| } else unscanchar(next, d); |
| } else unscanchar(next, d); |
| next = octal; |
| } else |
| switch(next) { |
| case 'a': next = '\a'; break; |
| case 'b': next = '\b'; break; |
| case 'f': next = '\f'; break; |
| case 'n': next = '\n'; break; |
| case 'r': next = '\r'; break; |
| case 't': next = '\t'; break; |
| case 'v': next = '\v'; break; |
| case 'x': { |
| int val = 0; int i, ext; |
| for(i = 0; i < 2; i++) { |
| next = scanchar_raw(d); |
| if(next >= '0' && next <= '9') ext = next - '0'; |
| else if (next >= 'A' && next <= 'F') ext = next - 'A' + 10; |
| else if (next >= 'a' && next <= 'f') ext = next - 'a' + 10; |
| else {unscanchar(next, d); break;} |
| val = 16*val + ext; |
| } |
| next = val; |
| } |
| break; |
| default: |
| /* Any other char and even EOF escapes to itself, but we |
| need to preserve \" etc inside quotes. |
| */ |
| if(inQuote && strchr(d->quoteset, next)) { |
| unscanchar(next, d); |
| next = '\\'; |
| } |
| break; |
| } |
| } |
| return next; |
| } |
| |
| |
| #include "RBufferUtils.h" |
| |
| |
| SEXP countfields(SEXP args) |
| { |
| SEXP ans, file, sep, bns, quotes, comstr; |
| int nfields, nskip, i, c, inquote, quote = 0; |
| int blocksize, nlines, blskip; |
| const char *p; |
| Rboolean dbcslocale = (MB_CUR_MAX == 2); |
| LocalData data = {NULL, 0, 0, '.', NULL, NO_COMCHAR, 0, NULL, FALSE, |
| FALSE, 0, FALSE, FALSE}; |
| data.NAstrings = R_NilValue; |
| |
| args = CDR(args); |
| |
| file = CAR(args); args = CDR(args); |
| sep = CAR(args); args = CDR(args); |
| quotes = CAR(args); args = CDR(args); |
| nskip = asInteger(CAR(args)); args = CDR(args); |
| blskip = asLogical(CAR(args)); args = CDR(args); |
| comstr = CAR(args); |
| if (TYPEOF(comstr) != STRSXP || length(comstr) != 1) |
| error(_("invalid '%s' argument"), "comment.char"); |
| p = translateChar(STRING_ELT(comstr, 0)); |
| data.comchar = NO_COMCHAR; /* here for -Wall */ |
| if (strlen(p) > 1) |
| error(_("invalid '%s' argument"), "comment.char"); |
| else if (strlen(p) == 1) data.comchar = (unsigned char)*p; |
| |
| if (nskip < 0 || nskip == NA_INTEGER) nskip = 0; |
| if (blskip == NA_LOGICAL) blskip = 1; |
| |
| if (isString(sep) || isNull(sep)) { |
| if (length(sep) == 0) data.sepchar = 0; |
| else data.sepchar = (unsigned char) translateChar(STRING_ELT(sep, 0))[0]; |
| /* gets compared to chars: bug prior to 1.7.0 */ |
| } else error(_("invalid '%s' argument"), "sep"); |
| |
| if (isString(quotes)) { |
| const char *sc = translateChar(STRING_ELT(quotes, 0)); |
| if (strlen(sc)) data.quoteset = strdup(sc); |
| else data.quoteset = ""; |
| } else if (isNull(quotes)) |
| data.quoteset = ""; |
| else |
| error(_("invalid quote symbol set")); |
| |
| i = asInteger(file); |
| data.con = getConnection(i); |
| if(i == 0) { |
| data.ttyflag = 1; |
| } else { |
| data.ttyflag = 0; |
| data.wasopen = data.con->isopen; |
| if(!data.wasopen) { |
| strcpy(data.con->mode, "r"); |
| if(!data.con->open(data.con)) |
| error(_("cannot open the connection")); |
| if(!data.con->canread) { |
| data.con->close(data.con); |
| error(_("cannot read from this connection")); |
| } |
| } else { |
| if(!data.con->canread) |
| error(_("cannot read from this connection")); |
| } |
| for (i = 0; i < nskip; i++) /* MBCS-safe */ |
| while ((c = scanchar(FALSE, &data)) != '\n' && c != R_EOF); |
| } |
| |
| blocksize = SCAN_BLOCKSIZE; |
| PROTECT(ans = allocVector(INTSXP, blocksize)); |
| nlines = 0; |
| nfields = 0; |
| inquote = 0; |
| |
| data.save = 0; |
| |
| for (;;) { |
| c = scanchar(inquote, &data); |
| if (c == R_EOF) { |
| if (nfields != 0) |
| INTEGER(ans)[nlines] = nfields; |
| else nlines--; |
| goto donecf; |
| } |
| else if (c == '\n') { |
| if (inquote) { |
| INTEGER(ans)[nlines] = NA_INTEGER; |
| nlines++; |
| } else if (nfields || !blskip) { |
| INTEGER(ans)[nlines] = nfields; |
| nlines++; |
| nfields = 0; |
| inquote = 0; |
| } |
| if (nlines == blocksize) { |
| bns = ans; |
| blocksize = 2 * blocksize; |
| ans = allocVector(INTSXP, blocksize); |
| UNPROTECT(1); |
| PROTECT(ans); |
| copyVector(ans, bns); |
| } |
| continue; |
| } |
| else if (data.sepchar) { |
| if (nfields == 0) |
| nfields++; |
| if (inquote && c == R_EOF) { |
| if(!data.wasopen) data.con->close(data.con); |
| error(_("quoted string on line %d terminated by EOF"), inquote); |
| } |
| if (inquote && c == quote) |
| inquote = 0; |
| else if (strchr(data.quoteset, c)) { |
| inquote = nlines + 1; |
| quote = c; |
| } |
| if (c == data.sepchar && !inquote) |
| nfields++; |
| } |
| else if (!Rspace(c)) { |
| if (strchr(data.quoteset, c)) { |
| quote = c; |
| inquote = nlines + 1; |
| while ((c = scanchar(inquote, &data)) != quote) { |
| if (c == R_EOF) { |
| if(!data.wasopen) data.con->close(data.con); |
| error(_("quoted string on line %d terminated by EOF"), inquote); |
| } else if (c == '\n') { |
| INTEGER(ans)[nlines] = NA_INTEGER; |
| nlines++; |
| if (nlines == blocksize) { |
| bns = ans; |
| blocksize = 2 * blocksize; |
| ans = allocVector(INTSXP, blocksize); |
| UNPROTECT(1); |
| PROTECT(ans); |
| copyVector(ans, bns); |
| } |
| } |
| } |
| inquote = 0; |
| } else { |
| do { |
| if(dbcslocale && btowc(c) == WEOF) scanchar2(&data); |
| c = scanchar(FALSE, &data); |
| } while (!Rspace(c) && c != R_EOF); |
| if (c == R_EOF) c = '\n'; |
| unscanchar(c, &data); |
| } |
| nfields++; |
| } |
| |
| } |
| donecf: |
| /* we might have a character that was unscanchar-ed. |
| So pushback if possible */ |
| if (data.save && !data.ttyflag && data.wasopen) { |
| char line[2] = " "; |
| line[0] = (char) data.save; |
| con_pushback(data.con, FALSE, line); |
| } |
| if(!data.wasopen) data.con->close(data.con); |
| |
| if (nlines < 0) { |
| UNPROTECT(1); |
| return R_NilValue; |
| } |
| if (nlines == blocksize) { |
| UNPROTECT(1); |
| return ans; |
| } |
| |
| bns = allocVector(INTSXP, nlines+1); |
| for (i = 0; i <= nlines; i++) |
| INTEGER(bns)[i] = INTEGER(ans)[i]; |
| UNPROTECT(1); |
| if (data.quoteset[0]) free(data.quoteset); |
| return bns; |
| } |
| |
| /* A struct used by typeconvert to keep track of possible types for the input */ |
| typedef struct typecvt_possible_types { |
| unsigned int islogical : 1; |
| unsigned int isinteger : 1; |
| unsigned int isreal : 1; |
| unsigned int iscomplex : 1; |
| } Typecvt_Info; |
| |
| |
| /* Sets fields of typeInfo, ruling out possible types based on s. |
| * |
| * The typeInfo struct should be initialized with all fields TRUE. |
| */ |
| static void ruleout_types(const char *s, Typecvt_Info *typeInfo, LocalData *data, |
| Rboolean exact) |
| { |
| int res; |
| char *endp; |
| |
| if (typeInfo->islogical) { |
| if (strcmp(s, "F") == 0 || strcmp(s, "T") == 0 || |
| strcmp(s, "FALSE") == 0 || strcmp(s, "TRUE") == 0) { |
| typeInfo->isinteger = FALSE; |
| typeInfo->isreal = FALSE; |
| typeInfo->iscomplex = FALSE; |
| return; // short cut |
| } else { |
| typeInfo->islogical = FALSE; |
| } |
| } |
| |
| if (typeInfo->isinteger) { |
| res = Strtoi(s, 10); |
| if (res == NA_INTEGER) |
| typeInfo->isinteger = FALSE; |
| } |
| |
| if (typeInfo->isreal) { |
| Strtod(s, &endp, TRUE, data, exact); |
| if (!isBlankString(endp)) |
| typeInfo->isreal = FALSE; |
| } |
| |
| if (typeInfo->iscomplex) { |
| strtoc(s, &endp, TRUE, data, exact); |
| if (!isBlankString(endp)) |
| typeInfo->iscomplex = FALSE; |
| } |
| } |
| |
| |
| /* type.convert(char, na.strings, as.is, dec, numerals) */ |
| |
| /* This is a horrible hack which is used in read.table to take a |
| character variable, if possible to convert it to a logical, |
| integer, numeric or complex variable. If this is not possible, |
| the result is a character string if as.is == TRUE |
| or a factor if as.is == FALSE. */ |
| |
| |
| SEXP typeconvert(SEXP call, SEXP op, SEXP args, SEXP env) |
| { |
| SEXP cvec, a, dup, levs, dims, names, dec, numerals; |
| SEXP rval = R_NilValue; /* -Wall */ |
| int i, j, len, asIs, i_exact; |
| Rboolean done = FALSE, exact; |
| char *endp; |
| const char *tmp = NULL; |
| LocalData data = {NULL, 0, 0, '.', NULL, NO_COMCHAR, 0, NULL, FALSE, |
| FALSE, 0, FALSE, FALSE}; |
| Typecvt_Info typeInfo; /* keep track of possible types of cvec */ |
| typeInfo.islogical = TRUE; /* we can't rule anything out initially */ |
| typeInfo.isinteger = TRUE; |
| typeInfo.isreal = TRUE; |
| typeInfo.iscomplex = TRUE; |
| data.NAstrings = R_NilValue; |
| |
| args = CDR(args); |
| |
| if (!isString(CAR(args))) |
| error(_("the first argument must be of mode character")); |
| |
| data.NAstrings = CADR(args); |
| if (TYPEOF(data.NAstrings) != STRSXP) |
| error(_("invalid '%s' argument"), "na.strings"); |
| |
| asIs = asLogical(CADDR(args)); |
| if (asIs == NA_LOGICAL) asIs = 0; |
| |
| dec = CADDDR(args); |
| if (isString(dec) || isNull(dec)) { |
| if (length(dec) == 0) |
| data.decchar = '.'; |
| else |
| data.decchar = translateChar(STRING_ELT(dec, 0))[0]; |
| } |
| |
| numerals = CAD4R(args); // string, one of c("allow.loss", "warn.loss", "no.loss") |
| if (isString(numerals)) { |
| tmp = CHAR(STRING_ELT(numerals, 0)); |
| if(strcmp(tmp, "allow.loss") == 0) { |
| i_exact = FALSE; |
| exact = FALSE; |
| } else if(strcmp(tmp, "warn.loss") == 0) { |
| i_exact = NA_INTEGER; |
| exact = FALSE; |
| } else if(strcmp(tmp, "no.loss") == 0) { |
| i_exact = TRUE; |
| exact = TRUE; |
| } else // should never happen |
| error(_("invalid 'numerals' string: \"%s\""), tmp); |
| |
| } else { // (currently never happens): use default |
| i_exact = FALSE; |
| exact = FALSE; |
| } |
| |
| cvec = CAR(args); |
| len = length(cvec); |
| |
| /* save the dim/dimnames attributes */ |
| |
| PROTECT(dims = getAttrib(cvec, R_DimSymbol)); |
| if (isArray(cvec)) |
| PROTECT(names = getAttrib(cvec, R_DimNamesSymbol)); |
| else |
| PROTECT(names = getAttrib(cvec, R_NamesSymbol)); |
| |
| /* Find the first non-NA entry (empty => NA) */ |
| for (i = 0; i < len; i++) { |
| tmp = CHAR(STRING_ELT(cvec, i)); |
| if (!(STRING_ELT(cvec, i) == NA_STRING || strlen(tmp) == 0 |
| || isNAstring(tmp, 1, &data) || isBlankString(tmp))) |
| break; |
| } |
| if (i < len) { // Found non-NA entry; use it to screen: |
| ruleout_types(tmp, &typeInfo, &data, exact); |
| } |
| |
| if (typeInfo.islogical) { |
| PROTECT(rval = allocVector(LGLSXP, len)); |
| for (i = 0; i < len; i++) { |
| tmp = CHAR(STRING_ELT(cvec, i)); |
| if (STRING_ELT(cvec, i) == NA_STRING || strlen(tmp) == 0 |
| || isNAstring(tmp, 1, &data) || isBlankString(tmp)) |
| LOGICAL(rval)[i] = NA_LOGICAL; |
| else { |
| if (strcmp(tmp, "F") == 0 || strcmp(tmp, "FALSE") == 0) |
| LOGICAL(rval)[i] = 0; |
| else if(strcmp(tmp, "T") == 0 || strcmp(tmp, "TRUE") == 0) |
| LOGICAL(rval)[i] = 1; |
| else { |
| typeInfo.islogical = FALSE; |
| ruleout_types(tmp, &typeInfo, &data, exact); |
| break; |
| } |
| } |
| } |
| if (typeInfo.islogical) done = TRUE; else UNPROTECT(1); |
| } |
| |
| if (!done && typeInfo.isinteger) { |
| PROTECT(rval = allocVector(INTSXP, len)); |
| for (i = 0; i < len; i++) { |
| tmp = CHAR(STRING_ELT(cvec, i)); |
| if (STRING_ELT(cvec, i) == NA_STRING || strlen(tmp) == 0 |
| || isNAstring(tmp, 1, &data) || isBlankString(tmp)) |
| INTEGER(rval)[i] = NA_INTEGER; |
| else { |
| INTEGER(rval)[i] = Strtoi(tmp, 10); |
| if (INTEGER(rval)[i] == NA_INTEGER) { |
| typeInfo.isinteger = FALSE; |
| ruleout_types(tmp, &typeInfo, &data, exact); |
| break; |
| } |
| } |
| } |
| if(typeInfo.isinteger) done = TRUE; else UNPROTECT(1); |
| } |
| |
| if (!done && typeInfo.isreal) { |
| PROTECT(rval = allocVector(REALSXP, len)); |
| for (i = 0; i < len; i++) { |
| tmp = CHAR(STRING_ELT(cvec, i)); |
| if (STRING_ELT(cvec, i) == NA_STRING || strlen(tmp) == 0 |
| || isNAstring(tmp, 1, &data) || isBlankString(tmp)) |
| REAL(rval)[i] = NA_REAL; |
| else { |
| REAL(rval)[i] = Strtod(tmp, &endp, FALSE, &data, i_exact); |
| if (!isBlankString(endp)) { |
| typeInfo.isreal = FALSE; |
| ruleout_types(tmp, &typeInfo, &data, exact); |
| break; |
| } |
| } |
| } |
| if(typeInfo.isreal) done = TRUE; else UNPROTECT(1); |
| } |
| |
| if (!done && typeInfo.iscomplex) { |
| PROTECT(rval = allocVector(CPLXSXP, len)); |
| for (i = 0; i < len; i++) { |
| tmp = CHAR(STRING_ELT(cvec, i)); |
| if (STRING_ELT(cvec, i) == NA_STRING || strlen(tmp) == 0 |
| || isNAstring(tmp, 1, &data) || isBlankString(tmp)) |
| COMPLEX(rval)[i].r = COMPLEX(rval)[i].i = NA_REAL; |
| else { |
| COMPLEX(rval)[i] = strtoc(tmp, &endp, FALSE, &data, i_exact); |
| if (!isBlankString(endp)) { |
| typeInfo.iscomplex = FALSE; |
| /* this is not needed, unless other cases are added */ |
| ruleout_types(tmp, &typeInfo, &data, exact); |
| break; |
| } |
| } |
| } |
| if(typeInfo.iscomplex) done = TRUE; else UNPROTECT(1); |
| } |
| |
| if (!done) { |
| if (asIs) { |
| PROTECT(rval = duplicate(cvec)); |
| for (i = 0; i < len; i++) |
| if(isNAstring(CHAR(STRING_ELT(rval, i)), 1, &data)) |
| SET_STRING_ELT(rval, i, NA_STRING); |
| } |
| else { |
| PROTECT(dup = duplicated(cvec, FALSE)); |
| j = 0; |
| for (i = 0; i < len; i++) { |
| /* <NA> is never to be a level here */ |
| if (STRING_ELT(cvec, i) == NA_STRING) continue; |
| if (LOGICAL(dup)[i] == 0 && !isNAstring(CHAR(STRING_ELT(cvec, i)), 1, &data)) |
| j++; |
| } |
| |
| PROTECT(levs = allocVector(STRSXP,j)); |
| j = 0; |
| for (i = 0; i < len; i++) { |
| if (STRING_ELT(cvec, i) == NA_STRING) continue; |
| if (LOGICAL(dup)[i] == 0 && !isNAstring(CHAR(STRING_ELT(cvec, i)), 1, &data)) |
| SET_STRING_ELT(levs, j++, STRING_ELT(cvec, i)); |
| } |
| |
| /* We avoid an allocation by reusing dup, |
| * a LGLSXP of the right length |
| */ |
| rval = dup; |
| SET_TYPEOF(rval, INTSXP); |
| |
| /* put the levels in lexicographic order */ |
| |
| sortVector(levs, FALSE); |
| |
| PROTECT(a = matchE(levs, cvec, NA_INTEGER, env)); |
| for (i = 0; i < len; i++) |
| INTEGER(rval)[i] = INTEGER(a)[i]; |
| |
| setAttrib(rval, R_LevelsSymbol, levs); |
| PROTECT(a = mkString("factor")); |
| setAttrib(rval, R_ClassSymbol, a); |
| UNPROTECT(3); |
| } |
| } |
| |
| setAttrib(rval, R_DimSymbol, dims); |
| setAttrib(rval, isArray(cvec) ? R_DimNamesSymbol : R_NamesSymbol, names); |
| UNPROTECT(3); |
| return rval; |
| } |
| |
| |
| /* Works with digits, but OK in UTF-8 */ |
| SEXP menu(SEXP choices) |
| { |
| int c, j; |
| double first; |
| char buffer[MAXELTSIZE], *bufp = buffer; |
| LocalData data = {NULL, 0, 0, '.', NULL, NO_COMCHAR, 0, NULL, FALSE, |
| FALSE, 0, FALSE, FALSE}; |
| data.NAstrings = R_NilValue; |
| |
| |
| if (!isString(choices)) |
| error(_("invalid '%s' argument"), "choices"); |
| |
| snprintf(ConsolePrompt, CONSOLE_PROMPT_SIZE, _("Selection: ")); |
| |
| while ((c = ConsoleGetchar()) != '\n' && c != R_EOF) { |
| if (bufp >= &buffer[MAXELTSIZE - 2]) continue; |
| *bufp++ = (char) c; |
| } |
| *bufp++ = '\0'; |
| ConsolePrompt[0] = '\0'; |
| |
| bufp = buffer; |
| while (Rspace((int)*bufp)) bufp++; |
| first = LENGTH(choices) + 1; |
| if (isdigit((int)*bufp)) { |
| first = Strtod(buffer, NULL, TRUE, &data, /*exact*/FALSE); |
| } else { |
| for (j = 0; j < LENGTH(choices); j++) { |
| if (streql(translateChar(STRING_ELT(choices, j)), buffer)) { |
| first = j + 1; |
| break; |
| } |
| } |
| } |
| return ScalarInteger((int)first); |
| } |
| |
| /* readTableHead(file, nlines, comment.char, blank.lines.skip, quote, sep) */ |
| /* simplified version of readLines, with skip of blank lines and |
| comment-only lines */ |
| #define BUF_SIZE 1000 |
| SEXP readtablehead(SEXP args) |
| { |
| SEXP file, comstr, ans = R_NilValue, ans2, quotes, sep; |
| int nlines, i, c, quote = 0, nread, nbuf, buf_size = BUF_SIZE, |
| blskip, skipNul; |
| const char *p; char *buf; |
| Rboolean empty, skip, firstnonwhite; |
| LocalData data = {NULL, 0, 0, '.', NULL, NO_COMCHAR, 0, NULL, FALSE, |
| FALSE, 0, FALSE, FALSE, FALSE}; |
| data.NAstrings = R_NilValue; |
| |
| args = CDR(args); |
| |
| file = CAR(args); args = CDR(args); |
| nlines = asInteger(CAR(args)); args = CDR(args); |
| comstr = CAR(args); args = CDR(args); |
| blskip = asLogical(CAR(args)); args = CDR(args); |
| quotes = CAR(args); args = CDR(args); |
| sep = CAR(args); args = CDR(args); |
| skipNul = asLogical(CAR(args)); |
| |
| if (nlines <= 0 || nlines == NA_INTEGER) |
| error(_("invalid '%s' argument"), "nlines"); |
| if (blskip == NA_LOGICAL) blskip = 1; |
| if (isString(quotes)) { |
| const char *sc = translateChar(STRING_ELT(quotes, 0)); |
| /* FIXME: will leak memory at long jump */ |
| if (strlen(sc)) data.quoteset = strdup(sc); |
| else data.quoteset = ""; |
| } else if (isNull(quotes)) |
| data.quoteset = ""; |
| else |
| error(_("invalid quote symbol set")); |
| |
| if (TYPEOF(comstr) != STRSXP || length(comstr) != 1) |
| error(_("invalid '%s' argument"), "comment.char"); |
| p = translateChar(STRING_ELT(comstr, 0)); |
| data.comchar = NO_COMCHAR; /* here for -Wall */ |
| if (strlen(p) > 1) |
| error(_("invalid '%s' argument"), "comment.char"); |
| else if (strlen(p) == 1) data.comchar = (int)*p; |
| if (isString(sep) || isNull(sep)) { |
| if (length(sep) == 0) data.sepchar = 0; |
| else data.sepchar = (unsigned char) translateChar(STRING_ELT(sep, 0))[0]; |
| /* gets compared to chars: bug prior to 1.7.0 */ |
| } else error(_("invalid '%s' argument"), "sep"); |
| if (skipNul == NA_LOGICAL) error(_("invalid '%s' argument"), "skipNul"); |
| data.skipNul = skipNul; |
| |
| i = asInteger(file); |
| data.con = getConnection(i); |
| data.ttyflag = (i == 0); |
| data.wasopen = data.con->isopen; |
| if(!data.wasopen) { |
| strcpy(data.con->mode, "r"); |
| if(!data.con->open(data.con)) error(_("cannot open the connection")); |
| } else { /* for a non-blocking connection, more input may |
| have become available, so re-position */ |
| if(data.con->canseek && !data.con->blocking) |
| data.con->seek(data.con, data.con->seek(data.con, -1, 1, 1), 1, 1); |
| } |
| |
| /* FIXME: will leak memory at long jump */ |
| buf = (char *) malloc(buf_size); |
| if(!buf) |
| error(_("cannot allocate buffer in 'readTableHead'")); |
| |
| PROTECT(ans = allocVector(STRSXP, nlines)); |
| for(nread = 0; nread < nlines; ) { |
| nbuf = 0; empty = TRUE; skip = FALSE; firstnonwhite = TRUE; |
| if (data.ttyflag) |
| snprintf(ConsolePrompt, CONSOLE_PROMPT_SIZE, "%d: ", nread); |
| /* want to interpret comments here, not in scanchar */ |
| while((c = scanchar(TRUE, &data)) != R_EOF) { |
| if(nbuf >= buf_size - 3) { |
| buf_size *= 2; |
| /* FIXME: will leak memory at long jump */ |
| char *tmp = (char *) realloc(buf, buf_size); |
| if(!tmp) { |
| free(buf); |
| if (data.quoteset[0]) free(data.quoteset); |
| error(_("cannot allocate buffer in 'readTableHead'")); |
| } else buf = tmp; |
| } |
| /* Need to handle escaped embedded quotes, and how they are |
| escaped depends on 'sep' */ |
| if(quote) { |
| if(data.sepchar == 0 && c == '\\') { |
| /* all escapes should be passed through */ |
| /* fillBuffer would not copy a backslash preceding quote */ |
| buf[nbuf++] = (char) c; |
| c = scanchar(TRUE, &data); |
| if(c == R_EOF) { |
| free(buf); |
| if (data.quoteset[0]) free(data.quoteset); |
| error(_("\\ followed by EOF")); |
| } |
| buf[nbuf++] = (char) c; |
| continue; |
| } else if(quote && c == quote) { |
| if(data.sepchar == 0) |
| quote = 0; |
| else { /* need to check for doubled quote */ |
| char c2 = (char) scanchar(TRUE, &data); |
| if(c2 == quote) |
| buf[nbuf++] = (char) c; /* and c = c2 */ |
| else { |
| unscanchar(c2, &data); |
| quote = 0; |
| } |
| } |
| } |
| } else if(!skip && (firstnonwhite || data.sepchar != 0) && strchr(data.quoteset, c)) |
| quote = c; |
| else if (!skip && data.sepchar == 0 && Rspace(c)) |
| /* firstnonwhite stays true within quoted section */ |
| firstnonwhite = TRUE; |
| else if (c != ' ' && c != '\t') firstnonwhite = FALSE; |
| /* A line is empty only if it contains nothing before |
| EOL, EOF or a comment char. |
| A line containing just white space is not empty if sep="," |
| However foo\nEOF does not have a final empty line. |
| */ |
| if(empty && !skip) |
| if(c != '\n' && c != data.comchar) empty = FALSE; |
| if(!quote && !skip && c == data.comchar) skip = TRUE; |
| if(quote || c != '\n') buf[nbuf++] = (char) c; else break; |
| } |
| buf[nbuf] = '\0'; |
| if(data.ttyflag && empty) goto no_more_lines; |
| if(!empty || (c != R_EOF && !blskip)) { /* see previous comment */ |
| SET_STRING_ELT(ans, nread, mkChar(buf)); |
| nread++; |
| if (strlen(buf) < nbuf) // PR#15625 |
| warning("line %d appears to contain embedded nulls", nread); |
| } |
| if(c == R_EOF) goto no_more_lines; |
| } |
| UNPROTECT(1); |
| free(buf); |
| if(!data.wasopen) data.con->close(data.con); |
| if (data.quoteset[0]) free(data.quoteset); |
| return ans; |
| |
| no_more_lines: |
| if(!data.wasopen) data.con->close(data.con); |
| if(nbuf > 0) { /* incomplete last line */ |
| if(data.con->text && data.con->blocking) { |
| warning(_("incomplete final line found by readTableHeader on '%s'"), |
| data.con->description); |
| } else { |
| free(buf); |
| if (data.quoteset[0]) free(data.quoteset); |
| error(_("incomplete final line found by readTableHeader on '%s'"), |
| data.con->description); |
| } |
| } |
| free(buf); |
| PROTECT(ans2 = allocVector(STRSXP, nread)); |
| for(i = 0; i < nread; i++) |
| SET_STRING_ELT(ans2, i, STRING_ELT(ans, i)); |
| UNPROTECT(2); |
| if (data.quoteset[0]) free(data.quoteset); |
| return ans2; |
| } |
| |
| /* --------- write.table --------- */ |
| |
| /* write.table(x, file, nr, nc, rnames, sep, eol, na, dec, quote, qstring) |
| x is a matrix or data frame |
| file is a connection |
| sep eol dec qstring are character strings |
| quote is a numeric vector |
| */ |
| |
| static Rboolean isna(SEXP x, R_xlen_t indx) |
| { |
| Rcomplex rc; |
| switch(TYPEOF(x)) { |
| case LGLSXP: |
| return LOGICAL(x)[indx] == NA_LOGICAL; |
| break; |
| case INTSXP: |
| return INTEGER(x)[indx] == NA_INTEGER; |
| break; |
| case REALSXP: |
| return ISNAN(REAL(x)[indx]); |
| break; |
| case STRSXP: |
| return STRING_ELT(x, indx) == NA_STRING; |
| break; |
| case CPLXSXP: |
| rc = COMPLEX(x)[indx]; |
| return ISNAN(rc.r) || ISNAN(rc.i); |
| break; |
| default: |
| break; |
| } |
| return FALSE; |
| } |
| |
| /* a version of EncodeElement with different escaping of char strings */ |
| static const char |
| *EncodeElement2(SEXP x, R_xlen_t indx, Rboolean quote, |
| Rboolean qmethod, R_StringBuffer *buff, const char *dec) |
| { |
| int nbuf; |
| char *q; |
| const char *p, *p0; |
| |
| if (indx < 0 || indx >= xlength(x)) |
| error(_("index out of range")); |
| if(TYPEOF(x) == STRSXP) { |
| const void *vmax = vmaxget(); |
| p0 = translateChar(STRING_ELT(x, indx)); |
| if(!quote) return p0; |
| for(nbuf = 2, p = p0; *p; p++) /* find buffer length needed */ |
| nbuf += (*p == '"') ? 2 : 1; |
| R_AllocStringBuffer(nbuf, buff); |
| q = buff->data; *q++ = '"'; |
| for(p = p0; *p;) { |
| if(*p == '"') *q++ = qmethod ? '\\' : '"'; |
| *q++ = *p++; |
| } |
| *q++ = '"'; *q = '\0'; |
| vmaxset(vmax); |
| return buff->data; |
| } |
| return EncodeElement0(x, indx, quote ? '"' : 0, dec); |
| } |
| |
| typedef struct wt_info { |
| Rboolean wasopen; |
| Rconnection con; |
| R_StringBuffer *buf; |
| int savedigits; |
| } wt_info; |
| |
| /* utility to cleanup e.g. after interrupts */ |
| static void wt_cleanup(void *data) |
| { |
| wt_info *ld = data; |
| if(!ld->wasopen) { |
| errno = 0; |
| ld->con->close(ld->con); |
| if (ld->con->status != NA_INTEGER && ld->con->status < 0) { |
| int serrno = errno; |
| if (serrno) |
| warning(_("Problem closing connection: %s"), strerror(serrno)); |
| else |
| warning(_("Problem closing connection")); |
| } |
| } |
| R_FreeStringBuffer(ld->buf); |
| R_print.digits = ld->savedigits; |
| } |
| |
| SEXP writetable(SEXP call, SEXP op, SEXP args, SEXP env) |
| { |
| SEXP x, sep, rnames, eol, na, dec, quote, xj; |
| Rboolean wasopen, quote_rn = FALSE, *quote_col; |
| Rconnection con; |
| const char *csep, *ceol, *cna, *sdec, *tmp = NULL /* -Wall */; |
| SEXP *levels; |
| R_StringBuffer strBuf = {NULL, 0, MAXELTSIZE}; |
| wt_info wi; |
| RCNTXT cntxt; |
| |
| args = CDR(args); |
| |
| x = CAR(args); args = CDR(args); |
| /* this is going to be a connection open or openable for writing */ |
| if(!inherits(CAR(args), "connection")) |
| error(_("'file' is not a connection")); |
| con = getConnection(asInteger(CAR(args))); args = CDR(args); |
| if(!con->canwrite) |
| error(_("cannot write to this connection")); |
| wasopen = con->isopen; |
| if(!wasopen) { |
| strcpy(con->mode, "wt"); |
| if(!con->open(con)) error(_("cannot open the connection")); |
| } |
| int nr = asInteger(CAR(args)); args = CDR(args); |
| int nc = asInteger(CAR(args)); args = CDR(args); |
| rnames = CAR(args); args = CDR(args); |
| sep = CAR(args); args = CDR(args); |
| eol = CAR(args); args = CDR(args); |
| na = CAR(args); args = CDR(args); |
| dec = CAR(args); args = CDR(args); |
| quote = CAR(args); args = CDR(args); |
| int qmethod = asLogical(CAR(args)); |
| |
| if(nr == NA_INTEGER) error(_("invalid '%s' argument"), "nr"); |
| if(nc == NA_INTEGER) error(_("invalid '%s' argument"), "nc"); |
| if(!isNull(rnames) && !isString(rnames)) |
| error(_("invalid '%s' argument"), "rnames"); |
| if(!isString(sep)) error(_("invalid '%s' argument"), "sep"); |
| if(!isString(eol)) error(_("invalid '%s' argument"), "eol"); |
| if(!isString(na)) error(_("invalid '%s' argument"), "na"); |
| if(!isString(dec)) error(_("invalid '%s' argument"), "dec"); |
| if(qmethod == NA_LOGICAL) error(_("invalid '%s' argument"), "qmethod"); |
| csep = translateChar(STRING_ELT(sep, 0)); |
| ceol = translateChar(STRING_ELT(eol, 0)); |
| cna = translateChar(STRING_ELT(na, 0)); |
| sdec = translateChar(STRING_ELT(dec, 0)); |
| if(strlen(sdec) != 1) |
| error(_("'dec' must be a single character")); |
| quote_col = (Rboolean *) R_alloc(nc, sizeof(Rboolean)); |
| for(int j = 0; j < nc; j++) quote_col[j] = FALSE; |
| for(int i = 0; i < length(quote); i++) { /* NB, quote might be NULL */ |
| int this = INTEGER(quote)[i]; |
| if(this == 0) quote_rn = TRUE; |
| if(this > 0) quote_col[this - 1] = TRUE; |
| } |
| R_AllocStringBuffer(0, &strBuf); |
| PrintDefaults(); |
| wi.savedigits = R_print.digits; R_print.digits = DBL_DIG;/* MAX precision */ |
| wi.con = con; |
| wi.wasopen = wasopen; |
| wi.buf = &strBuf; |
| begincontext(&cntxt, CTXT_CCODE, call, R_BaseEnv, R_BaseEnv, |
| R_NilValue, R_NilValue); |
| cntxt.cend = &wt_cleanup; |
| cntxt.cenddata = &wi; |
| |
| if(isVectorList(x)) { /* A data frame */ |
| |
| /* handle factors internally, check integrity */ |
| levels = (SEXP *) R_alloc(nc, sizeof(SEXP)); |
| for(int j = 0; j < nc; j++) { |
| xj = VECTOR_ELT(x, j); |
| if(LENGTH(xj) != nr) |
| error(_("corrupt data frame -- length of column %d does not match nrows"), |
| j+1); |
| if(inherits(xj, "factor")) { |
| levels[j] = getAttrib(xj, R_LevelsSymbol); |
| } else levels[j] = R_NilValue; |
| } |
| |
| for(int i = 0; i < nr; i++) { |
| if(i % 1000 == 999) R_CheckUserInterrupt(); |
| if(!isNull(rnames)) |
| Rconn_printf(con, "%s%s", |
| EncodeElement2(rnames, i, quote_rn, qmethod, |
| &strBuf, sdec), csep); |
| for(int j = 0; j < nc; j++) { |
| xj = VECTOR_ELT(x, j); |
| if(j > 0) Rconn_printf(con, "%s", csep); |
| if(isna(xj, i)) tmp = cna; |
| else { |
| if(!isNull(levels[j])) { |
| /* We do not assume factors have integer levels, |
| although they should. */ |
| if(TYPEOF(xj) == INTSXP) |
| tmp = EncodeElement2(levels[j], INTEGER(xj)[i] - 1, |
| quote_col[j], qmethod, |
| &strBuf, sdec); |
| else if(TYPEOF(xj) == REALSXP) |
| tmp = EncodeElement2(levels[j], |
| (R_xlen_t) (REAL(xj)[i] - 1), |
| quote_col[j], qmethod, |
| &strBuf, sdec); |
| else |
| error(_("column %s claims to be a factor but does not have numeric codes"), |
| j+1); |
| } else { |
| tmp = EncodeElement2(xj, i, quote_col[j], qmethod, |
| &strBuf, sdec); |
| } |
| } |
| Rconn_printf(con, "%s", tmp); |
| } |
| Rconn_printf(con, "%s", ceol); |
| } |
| |
| } else { /* A matrix */ |
| |
| if(!isVectorAtomic(x)) |
| UNIMPLEMENTED_TYPE("write.table, matrix method", x); |
| /* quick integrity check */ |
| if(XLENGTH(x) != (R_xlen_t)nr * nc) |
| error(_("corrupt matrix -- dims do not match length")); |
| |
| for(int i = 0; i < nr; i++) { |
| if(i % 1000 == 999) R_CheckUserInterrupt(); |
| if(!isNull(rnames)) |
| Rconn_printf(con, "%s%s", |
| EncodeElement2(rnames, i, quote_rn, qmethod, |
| &strBuf, sdec), csep); |
| for(int j = 0; j < nc; j++) { |
| if(j > 0) Rconn_printf(con, "%s", csep); |
| if(isna(x, i + (R_xlen_t)j*nr)) tmp = cna; |
| else { |
| tmp = EncodeElement2(x, i + (R_xlen_t)j*nr, |
| quote_col[j], qmethod, |
| &strBuf, sdec); |
| } |
| Rconn_printf(con, "%s", tmp); |
| } |
| Rconn_printf(con, "%s", ceol); |
| } |
| |
| } |
| endcontext(&cntxt); |
| wt_cleanup(&wi); |
| return R_NilValue; |
| } |