blob: 204c83b696227313604269abfa5fcda2058a34b5 [file] [log] [blame]
/*
* R : A Computer Language for Statistical Data Analysis
* Copyright (C) 1995, 1996 Robert Gentleman and Ross Ihaka
* Copyright (C) 2001-2018 The R Core Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, a copy is available at
* https://www.R-project.org/Licenses/
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <Parse.h> // -> IOStuff.h, Defn.h
#include <Internal.h>
#include <Fileio.h>
#include <Rconnections.h>
SEXP attribute_hidden getParseContext(void)
{
int i, last = PARSE_CONTEXT_SIZE;
char context[PARSE_CONTEXT_SIZE+1];
SEXP ans = R_NilValue, ans2;
int nn, nread;
char c;
context[last] = '\0';
for (i=R_ParseContextLast; last>0 ; i += PARSE_CONTEXT_SIZE - 1) {
i = i % PARSE_CONTEXT_SIZE;
context[--last] = R_ParseContext[i];
if (!context[last]) {
last++;
break;
}
}
nn = 16; /* initially allocate space for 16 lines */
PROTECT(ans = allocVector(STRSXP, nn));
c = context[last];
nread = 0;
while(c) {
nread++;
if(nread >= nn) {
ans2 = allocVector(STRSXP, 2*nn);
for(i = 0; i < nn; i++)
SET_STRING_ELT(ans2, i, STRING_ELT(ans, i));
nn *= 2;
UNPROTECT(1); /* old ans */
PROTECT(ans = ans2);
}
i = last;
while((c = context[i++])) {
if(c == '\n') break;
}
context[i-1] = '\0';
SET_STRING_ELT(ans, nread-1, mkChar(context + last));
last = i;
}
/* get rid of empty line after last newline */
if (nread && !length(STRING_ELT(ans, nread-1))) {
nread--;
R_ParseContextLine--;
}
PROTECT(ans2 = allocVector(STRSXP, nread));
for(i = 0; i < nread; i++)
SET_STRING_ELT(ans2, i, STRING_ELT(ans, i));
UNPROTECT(2);
return ans2;
}
static void getParseFilename(char* buffer, size_t buflen)
{
buffer[0] = '\0';
if (R_ParseErrorFile) {
if (isEnvironment(R_ParseErrorFile)) {
SEXP filename;
PROTECT(filename = findVar(install("filename"), R_ParseErrorFile));
if (isString(filename) && length(filename)) {
strncpy(buffer, CHAR(STRING_ELT(filename, 0)), buflen - 1);
buffer[buflen - 1] = '\0';
}
UNPROTECT(1);
} else if (isString(R_ParseErrorFile) && length(R_ParseErrorFile)) {
strncpy(buffer, CHAR(STRING_ELT(R_ParseErrorFile, 0)), buflen - 1);
buffer[buflen - 1] = '\0';
}
}
}
static SEXP tabExpand(SEXP strings)
{
int i;
char buffer[200], *b;
const char *input;
SEXP result;
PROTECT(strings);
PROTECT(result = allocVector(STRSXP, length(strings)));
for (i = 0; i < length(strings); i++) {
input = CHAR(STRING_ELT(strings, i));
for (b = buffer; *input && (b-buffer < 192); input++) {
if (*input == '\t') do {
*b++ = ' ';
} while (((b-buffer) & 7) != 0);
else *b++ = *input;
}
*b = '\0';
SET_STRING_ELT(result, i, mkCharCE(buffer, Rf_getCharCE(STRING_ELT(strings, i))));
}
UNPROTECT(2);
return result;
}
void NORET parseError(SEXP call, int linenum)
{
SEXP context;
int len, width;
char filename[128], buffer[10];
PROTECT(context = tabExpand(getParseContext()));
len = length(context);
if (linenum) {
getParseFilename(filename, sizeof(filename)-2);
if (strlen(filename)) strcpy(filename + strlen(filename), ":");
switch (len) {
case 0:
error("%s%d:%d: %s",
filename, linenum, R_ParseErrorCol, R_ParseErrorMsg);
break;
case 1: // replaces use of %n
width = snprintf(buffer, 10, "%d: ", R_ParseContextLine);
error("%s%d:%d: %s\n%d: %s\n%*s",
filename, linenum, R_ParseErrorCol, R_ParseErrorMsg,
R_ParseContextLine, CHAR(STRING_ELT(context, 0)),
width+R_ParseErrorCol+1, "^");
break;
default:
width = snprintf(buffer, 10, "%d:", R_ParseContextLine);
error("%s%d:%d: %s\n%d: %s\n%d: %s\n%*s",
filename, linenum, R_ParseErrorCol, R_ParseErrorMsg,
R_ParseContextLine-1, CHAR(STRING_ELT(context, len-2)),
R_ParseContextLine, CHAR(STRING_ELT(context, len-1)),
width+R_ParseErrorCol+1, "^");
break;
}
} else {
switch (len) {
case 0:
error("%s", R_ParseErrorMsg);
break;
case 1:
error("%s in \"%s\"",
R_ParseErrorMsg, CHAR(STRING_ELT(context, 0)));
break;
default:
error("%s in:\n\"%s\n%s\"",
R_ParseErrorMsg, CHAR(STRING_ELT(context, len-2)),
CHAR(STRING_ELT(context, len-1)));
break;
}
}
UNPROTECT(1);
}
typedef struct parse_info {
Rconnection con;
Rboolean old_latin1;
Rboolean old_utf8;
} parse_cleanup_info;
static void parse_cleanup(void *data)
{
parse_cleanup_info *pci = (parse_cleanup_info *)data;
Rconnection con = pci->con;
if(con && con->isopen)
con->close(con);
known_to_be_latin1 = pci->old_latin1;
known_to_be_utf8 = pci->old_utf8;
}
/* "do_parse" - the user interface input/output to files.
The internal R_Parse.. functions are defined in ./gram.y (-> gram.c)
.Internal( parse(file, n, text, prompt, srcfile, encoding) )
If there is text then that is read and the other arguments are ignored.
*/
SEXP attribute_hidden do_parse(SEXP call, SEXP op, SEXP args, SEXP env)
{
checkArity(op, args);
if(!inherits(CAR(args), "connection"))
error(_("'file' must be a character string or connection"));
R_ParseError = 0;
R_ParseErrorMsg[0] = '\0';
int ifile = asInteger(CAR(args)); args = CDR(args);
Rconnection con = getConnection(ifile);
Rboolean wasopen = con->isopen;
int num = asInteger(CAR(args)); args = CDR(args);
if (num == 0)
return(allocVector(EXPRSXP, 0));
SEXP text = PROTECT(coerceVector(CAR(args), STRSXP));
if(length(CAR(args)) && !length(text))
error(_("coercion of 'text' to character was unsuccessful"));
args = CDR(args);
SEXP prompt = CAR(args); args = CDR(args);
SEXP source = CAR(args); args = CDR(args);
if(!isString(CAR(args)) || LENGTH(CAR(args)) != 1)
error(_("invalid '%s' value"), "encoding");
const char *encoding = CHAR(STRING_ELT(CAR(args), 0)); /* ASCII */
parse_cleanup_info pci;
pci.con = NULL;
pci.old_latin1 = known_to_be_latin1;
pci.old_utf8 = known_to_be_utf8;
RCNTXT cntxt;
/* set up context to recover known_to_be_* and to close connection on
error if opened by do_parse */
begincontext(&cntxt, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv,
R_NilValue, R_NilValue);
cntxt.cend = &parse_cleanup;
cntxt.cenddata = &pci;
known_to_be_latin1 = known_to_be_utf8 = FALSE;
Rboolean allKnown = TRUE;
/* allow 'encoding' to override declaration on 'text'. */
if(streql(encoding, "latin1")) {
known_to_be_latin1 = TRUE;
allKnown = FALSE;
} else if(streql(encoding, "UTF-8")) {
known_to_be_utf8 = TRUE;
allKnown = FALSE;
} else if(!streql(encoding, "unknown") && !streql(encoding, "native.enc"))
warning(_("argument '%s = \"%s\"' will be ignored"), "encoding", encoding);
if (prompt == R_NilValue)
PROTECT(prompt);
else
PROTECT(prompt = coerceVector(prompt, STRSXP));
ParseStatus status;
SEXP s;
if (length(text) > 0) {
/* If 'text' has known encoding then we can be sure it will be
correctly re-encoded to the current encoding by
translateChar in the parser and so could mark the result in
a Latin-1 or UTF-8 locale.
A small complication is that different elements could have
different encodings, but all that matters is that all
non-ASCII elements have known encoding.
*/
for(int i = 0; i < length(text); i++)
if(!ENC_KNOWN(STRING_ELT(text, i)) &&
!IS_ASCII(STRING_ELT(text, i))) {
allKnown = FALSE;
break;
}
if(allKnown) {
known_to_be_latin1 = pci.old_latin1;
known_to_be_utf8 = pci.old_utf8;
}
if (num == NA_INTEGER) num = -1;
s = R_ParseVector(text, num, &status, source);
if (status != PARSE_OK) parseError(call, R_ParseError);
}
else if (ifile >= 3) {/* file != "" */
if (num == NA_INTEGER) num = -1;
if(!wasopen) {
if(!con->open(con)) error(_("cannot open the connection"));
pci.con = con; /* close the connection on error */
}
if(!con->canread) error(_("cannot read from this connection"));
s = R_ParseConn(con, num, &status, source);
if(!wasopen) {
PROTECT(s);
pci.con = NULL;
con->close(con);
UNPROTECT(1);
}
if (status != PARSE_OK) parseError(call, R_ParseError);
}
else {
if (num == NA_INTEGER) num = 1;
s = R_ParseBuffer(&R_ConsoleIob, num, &status, prompt, source);
if (status != PARSE_OK) parseError(call, R_ParseError);
}
known_to_be_latin1 = pci.old_latin1;
known_to_be_utf8 = pci.old_utf8;
PROTECT(s);
endcontext(&cntxt);
UNPROTECT(3);
return s;
}