src/main/source.c - R - Git at Google

 /*
  *  R : A Computer Language for Statistical Data Analysis
  *  Copyright (C) 1995, 1996  Robert Gentleman and Ross Ihaka
  *  Copyright (C) 2001-2018   The R Core Team
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 2 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, a copy is available at
  *  https://www.R-project.org/Licenses/
  */

 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif

 #include <Parse.h> // -> IOStuff.h, Defn.h
 #include <Internal.h>
 #include <Fileio.h>
 #include <Rconnections.h>

 SEXP attribute_hidden getParseContext(void)
 {
     int i, last = PARSE_CONTEXT_SIZE;
     char context[PARSE_CONTEXT_SIZE+1];

     SEXP ans = R_NilValue, ans2;
     int nn, nread;
     char c;

     context[last] = '\0';
     for (i=R_ParseContextLast; last>0 ; i += PARSE_CONTEXT_SIZE - 1) {
 	i = i % PARSE_CONTEXT_SIZE;
 	context[--last] = R_ParseContext[i];
 	if (!context[last]) {
 	    last++;
 	    break;
 	}
     }

     nn = 16; /* initially allocate space for 16 lines */
     PROTECT(ans = allocVector(STRSXP, nn));
     c = context[last];
     nread = 0;
     while(c) {
 	nread++;
 	if(nread >= nn) {
 	    ans2 = allocVector(STRSXP, 2*nn);
 	    for(i = 0; i < nn; i++)
 		SET_STRING_ELT(ans2, i, STRING_ELT(ans, i));
 	    nn *= 2;
 	    UNPROTECT(1); /* old ans */
 	    PROTECT(ans = ans2);
 	}
 	i = last;
 	while((c = context[i++])) {
 	    if(c == '\n') break;
 	}
 	context[i-1] = '\0';
 	SET_STRING_ELT(ans, nread-1, mkChar(context + last));
 	last = i;
     }
     /* get rid of empty line after last newline */
     if (nread && !length(STRING_ELT(ans, nread-1))) {
 	nread--;
 	R_ParseContextLine--;
     }
     PROTECT(ans2 = allocVector(STRSXP, nread));
     for(i = 0; i < nread; i++)
 	SET_STRING_ELT(ans2, i, STRING_ELT(ans, i));
     UNPROTECT(2);
     return ans2;
 }

 static void getParseFilename(char* buffer, size_t buflen)
 {
     buffer[0] = '\0';
     if (R_ParseErrorFile) {
 	if (isEnvironment(R_ParseErrorFile)) {
 	    SEXP filename;
 	    PROTECT(filename = findVar(install("filename"), R_ParseErrorFile));
 	    if (isString(filename) && length(filename)) {
 		strncpy(buffer, CHAR(STRING_ELT(filename, 0)), buflen - 1);
 		buffer[buflen - 1] = '\0';
 	    }
 	    UNPROTECT(1);
 	} else if (isString(R_ParseErrorFile) && length(R_ParseErrorFile)) {
 	    strncpy(buffer, CHAR(STRING_ELT(R_ParseErrorFile, 0)), buflen - 1);
 	    buffer[buflen - 1] = '\0';
 	}
     }
 }

 static SEXP tabExpand(SEXP strings)
 {
     int i;
     char buffer[200], *b;
     const char *input;
     SEXP result;
     PROTECT(strings);
     PROTECT(result = allocVector(STRSXP, length(strings)));
     for (i = 0; i < length(strings); i++) {
 	input = CHAR(STRING_ELT(strings, i));
 	for (b = buffer; *input && (b-buffer < 192); input++) {
 	    if (*input == '\t') do {
 		*b++ = ' ';
 	    } while (((b-buffer) & 7) != 0);
 	    else *b++ = *input;
 	}
 	*b = '\0';
 	SET_STRING_ELT(result, i, mkCharCE(buffer, Rf_getCharCE(STRING_ELT(strings, i))));
     }
     UNPROTECT(2);
     return result;
 }

 void NORET parseError(SEXP call, int linenum)
 {
     SEXP context;
     int len, width;
     char filename[128], buffer[10];
     PROTECT(context = tabExpand(getParseContext()));
     len = length(context);
     if (linenum) {
 	getParseFilename(filename, sizeof(filename)-2);
 	if (strlen(filename)) strcpy(filename + strlen(filename), ":");

 	switch (len) {
 	case 0:
 	    error("%s%d:%d: %s",
 		  filename, linenum, R_ParseErrorCol, R_ParseErrorMsg);
 	    break;
 	case 1: // replaces use of %n
 	    width = snprintf(buffer, 10, "%d: ", R_ParseContextLine);
 	    error("%s%d:%d: %s\n%d: %s\n%*s",
 		  filename, linenum, R_ParseErrorCol, R_ParseErrorMsg,
 		  R_ParseContextLine, CHAR(STRING_ELT(context, 0)),
 		  width+R_ParseErrorCol+1, "^");
 	    break;
 	default:
 	    width = snprintf(buffer, 10, "%d:", R_ParseContextLine);
 	    error("%s%d:%d: %s\n%d: %s\n%d: %s\n%*s",
 		  filename, linenum, R_ParseErrorCol, R_ParseErrorMsg,
 		  R_ParseContextLine-1, CHAR(STRING_ELT(context, len-2)),
 		  R_ParseContextLine, CHAR(STRING_ELT(context, len-1)),
 		  width+R_ParseErrorCol+1, "^");
 	    break;
 	}
     } else {
 	switch (len) {
 	case 0:
 	    error("%s", R_ParseErrorMsg);
 	    break;
 	case 1:
 	    error("%s in \"%s\"",
 		  R_ParseErrorMsg, CHAR(STRING_ELT(context, 0)));
 	    break;
 	default:
 	    error("%s in:\n\"%s\n%s\"",
 		  R_ParseErrorMsg, CHAR(STRING_ELT(context, len-2)),
 		  CHAR(STRING_ELT(context, len-1)));
 	    break;
 	}
     }
     UNPROTECT(1);
 }

 typedef struct parse_info {
     Rconnection con;
     Rboolean old_latin1;
     Rboolean old_utf8;
 }  parse_cleanup_info;

 static void parse_cleanup(void *data)
 {
     parse_cleanup_info *pci = (parse_cleanup_info *)data;
     Rconnection con = pci->con;
     if(con && con->isopen)
 	con->close(con);
     known_to_be_latin1 = pci->old_latin1;
     known_to_be_utf8 = pci->old_utf8;
 }

 /* "do_parse" - the user interface input/output to files.

  The internal R_Parse.. functions are defined in ./gram.y (-> gram.c)

  .Internal( parse(file, n, text, prompt, srcfile, encoding) )
  If there is text then that is read and the other arguments are ignored.
 */
 SEXP attribute_hidden do_parse(SEXP call, SEXP op, SEXP args, SEXP env)
 {
     checkArity(op, args);
     if(!inherits(CAR(args), "connection"))
 	error(_("'file' must be a character string or connection"));
     R_ParseError = 0;
     R_ParseErrorMsg[0] = '\0';

     int ifile = asInteger(CAR(args));                   args = CDR(args);
     Rconnection con = getConnection(ifile);
     Rboolean wasopen = con->isopen;
     int num = asInteger(CAR(args));			args = CDR(args);
     if (num == 0)
 	return(allocVector(EXPRSXP, 0));

     SEXP text = PROTECT(coerceVector(CAR(args), STRSXP));
     if(length(CAR(args)) && !length(text))
 	error(_("coercion of 'text' to character was unsuccessful"));
     args = CDR(args);
     SEXP prompt = CAR(args);				args = CDR(args);
     SEXP source = CAR(args);				args = CDR(args);
     if(!isString(CAR(args)) || LENGTH(CAR(args)) != 1)
 	error(_("invalid '%s' value"), "encoding");
     const char *encoding = CHAR(STRING_ELT(CAR(args), 0)); /* ASCII */

     parse_cleanup_info pci;
     pci.con = NULL;
     pci.old_latin1 = known_to_be_latin1;
     pci.old_utf8 = known_to_be_utf8;
     RCNTXT cntxt;
     /* set up context to recover known_to_be_* and to close connection on
        error if opened by do_parse */
     begincontext(&cntxt, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv,
 		 R_NilValue, R_NilValue);
     cntxt.cend = &parse_cleanup;
     cntxt.cenddata = &pci;

     known_to_be_latin1 = known_to_be_utf8 = FALSE;
     Rboolean allKnown = TRUE;
     /* allow 'encoding' to override declaration on 'text'. */
     if(streql(encoding, "latin1")) {
 	known_to_be_latin1 = TRUE;
 	allKnown = FALSE;
     } else if(streql(encoding, "UTF-8"))  {
 	known_to_be_utf8 = TRUE;
 	allKnown = FALSE;
     } else if(!streql(encoding, "unknown") && !streql(encoding, "native.enc"))
 	warning(_("argument '%s = \"%s\"' will be ignored"), "encoding", encoding);

     if (prompt == R_NilValue)
 	PROTECT(prompt);
     else
 	PROTECT(prompt = coerceVector(prompt, STRSXP));

     ParseStatus status;
     SEXP s;
     if (length(text) > 0) {
 	/* If 'text' has known encoding then we can be sure it will be
 	   correctly re-encoded to the current encoding by
 	   translateChar in the parser and so could mark the result in
 	   a Latin-1 or UTF-8 locale.

 	   A small complication is that different elements could have
 	   different encodings, but all that matters is that all
 	   non-ASCII elements have known encoding.
 	*/
 	for(int i = 0; i < length(text); i++)
 	    if(!ENC_KNOWN(STRING_ELT(text, i)) &&
 	       !IS_ASCII(STRING_ELT(text, i))) {
 		allKnown = FALSE;
 		break;
 	    }
 	if(allKnown) {
 	    known_to_be_latin1 = pci.old_latin1;
 	    known_to_be_utf8 = pci.old_utf8;
 	}
 	if (num == NA_INTEGER) num = -1;
 	s = R_ParseVector(text, num, &status, source);
 	if (status != PARSE_OK) parseError(call, R_ParseError);
     }
     else if (ifile >= 3) {/* file != "" */
 	if (num == NA_INTEGER) num = -1;
 	if(!wasopen) {
 	    if(!con->open(con)) error(_("cannot open the connection"));
 	    pci.con = con; /* close the connection on error */
 	}
 	if(!con->canread) error(_("cannot read from this connection"));
 	s = R_ParseConn(con, num, &status, source);
 	if(!wasopen) {
 	    PROTECT(s);
 	    pci.con = NULL;
 	    con->close(con);
 	    UNPROTECT(1);
 	}
 	if (status != PARSE_OK) parseError(call, R_ParseError);
     }
     else {
 	if (num == NA_INTEGER) num = 1;
 	s = R_ParseBuffer(&R_ConsoleIob, num, &status, prompt, source);
 	if (status != PARSE_OK) parseError(call, R_ParseError);
     }
     known_to_be_latin1 = pci.old_latin1;
     known_to_be_utf8 = pci.old_utf8;
     PROTECT(s);
     endcontext(&cntxt);
     UNPROTECT(3);
     return s;
 }
	/*
	* R : A Computer Language for Statistical Data Analysis
	* Copyright (C) 1995, 1996 Robert Gentleman and Ross Ihaka
	* Copyright (C) 2001-2018 The R Core Team
	*
	* This program is free software; you can redistribute it and/or modify
	* it under the terms of the GNU General Public License as published by
	* the Free Software Foundation; either version 2 of the License, or
	* (at your option) any later version.
	*
	* This program is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	* GNU General Public License for more details.
	*
	* You should have received a copy of the GNU General Public License
	* along with this program; if not, a copy is available at
	* https://www.R-project.org/Licenses/
	*/

	#ifdef HAVE_CONFIG_H
	#include <config.h>
	#endif

	#include <Parse.h> // -> IOStuff.h, Defn.h
	#include <Internal.h>
	#include <Fileio.h>
	#include <Rconnections.h>

	SEXP attribute_hidden getParseContext(void)
	{
	int i, last = PARSE_CONTEXT_SIZE;
	char context[PARSE_CONTEXT_SIZE+1];

	SEXP ans = R_NilValue, ans2;
	int nn, nread;
	char c;

	context[last] = '\0';
	for (i=R_ParseContextLast; last>0 ; i += PARSE_CONTEXT_SIZE - 1) {
	i = i % PARSE_CONTEXT_SIZE;
	context[--last] = R_ParseContext[i];
	if (!context[last]) {
	last++;
	break;
	}
	}

	nn = 16; /* initially allocate space for 16 lines */
	PROTECT(ans = allocVector(STRSXP, nn));
	c = context[last];
	nread = 0;
	while(c) {
	nread++;
	if(nread >= nn) {
	ans2 = allocVector(STRSXP, 2*nn);
	for(i = 0; i < nn; i++)
	SET_STRING_ELT(ans2, i, STRING_ELT(ans, i));
	nn *= 2;
	UNPROTECT(1); /* old ans */
	PROTECT(ans = ans2);
	}
	i = last;
	while((c = context[i++])) {
	if(c == '\n') break;
	}
	context[i-1] = '\0';
	SET_STRING_ELT(ans, nread-1, mkChar(context + last));
	last = i;
	}
	/* get rid of empty line after last newline */
	if (nread && !length(STRING_ELT(ans, nread-1))) {
	nread--;
	R_ParseContextLine--;
	}
	PROTECT(ans2 = allocVector(STRSXP, nread));
	for(i = 0; i < nread; i++)
	SET_STRING_ELT(ans2, i, STRING_ELT(ans, i));
	UNPROTECT(2);
	return ans2;
	}

	static void getParseFilename(char* buffer, size_t buflen)
	{
	buffer[0] = '\0';
	if (R_ParseErrorFile) {
	if (isEnvironment(R_ParseErrorFile)) {
	SEXP filename;
	PROTECT(filename = findVar(install("filename"), R_ParseErrorFile));
	if (isString(filename) && length(filename)) {
	strncpy(buffer, CHAR(STRING_ELT(filename, 0)), buflen - 1);
	buffer[buflen - 1] = '\0';
	}
	UNPROTECT(1);
	} else if (isString(R_ParseErrorFile) && length(R_ParseErrorFile)) {
	strncpy(buffer, CHAR(STRING_ELT(R_ParseErrorFile, 0)), buflen - 1);
	buffer[buflen - 1] = '\0';
	}
	}
	}

	static SEXP tabExpand(SEXP strings)
	{
	int i;
	char buffer[200], *b;
	const char *input;
	SEXP result;
	PROTECT(strings);
	PROTECT(result = allocVector(STRSXP, length(strings)));
	for (i = 0; i < length(strings); i++) {
	input = CHAR(STRING_ELT(strings, i));
	for (b = buffer; *input && (b-buffer < 192); input++) {
	if (*input == '\t') do {
	*b++ = ' ';
	} while (((b-buffer) & 7) != 0);
	else b++ = input;
	}
	*b = '\0';
	SET_STRING_ELT(result, i, mkCharCE(buffer, Rf_getCharCE(STRING_ELT(strings, i))));
	}
	UNPROTECT(2);
	return result;
	}

	void NORET parseError(SEXP call, int linenum)
	{
	SEXP context;
	int len, width;
	char filename[128], buffer[10];
	PROTECT(context = tabExpand(getParseContext()));
	len = length(context);
	if (linenum) {
	getParseFilename(filename, sizeof(filename)-2);
	if (strlen(filename)) strcpy(filename + strlen(filename), ":");

	switch (len) {
	case 0:
	error("%s%d:%d: %s",
	filename, linenum, R_ParseErrorCol, R_ParseErrorMsg);
	break;
	case 1: // replaces use of %n
	width = snprintf(buffer, 10, "%d: ", R_ParseContextLine);
	error("%s%d:%d: %s\n%d: %s\n%*s",
	filename, linenum, R_ParseErrorCol, R_ParseErrorMsg,
	R_ParseContextLine, CHAR(STRING_ELT(context, 0)),
	width+R_ParseErrorCol+1, "^");
	break;
	default:
	width = snprintf(buffer, 10, "%d:", R_ParseContextLine);
	error("%s%d:%d: %s\n%d: %s\n%d: %s\n%*s",
	filename, linenum, R_ParseErrorCol, R_ParseErrorMsg,
	R_ParseContextLine-1, CHAR(STRING_ELT(context, len-2)),
	R_ParseContextLine, CHAR(STRING_ELT(context, len-1)),
	width+R_ParseErrorCol+1, "^");
	break;
	}
	} else {
	switch (len) {
	case 0:
	error("%s", R_ParseErrorMsg);
	break;
	case 1:
	error("%s in \"%s\"",
	R_ParseErrorMsg, CHAR(STRING_ELT(context, 0)));
	break;
	default:
	error("%s in:\n\"%s\n%s\"",
	R_ParseErrorMsg, CHAR(STRING_ELT(context, len-2)),
	CHAR(STRING_ELT(context, len-1)));
	break;
	}
	}
	UNPROTECT(1);
	}

	typedef struct parse_info {
	Rconnection con;
	Rboolean old_latin1;
	Rboolean old_utf8;
	} parse_cleanup_info;

	static void parse_cleanup(void *data)
	{
	parse_cleanup_info pci = (parse_cleanup_info )data;
	Rconnection con = pci->con;
	if(con && con->isopen)
	con->close(con);
	known_to_be_latin1 = pci->old_latin1;
	known_to_be_utf8 = pci->old_utf8;
	}

	/* "do_parse" - the user interface input/output to files.

	The internal R_Parse.. functions are defined in ./gram.y (-> gram.c)

	.Internal( parse(file, n, text, prompt, srcfile, encoding) )
	If there is text then that is read and the other arguments are ignored.
	*/
	SEXP attribute_hidden do_parse(SEXP call, SEXP op, SEXP args, SEXP env)
	{
	checkArity(op, args);
	if(!inherits(CAR(args), "connection"))
	error(_("'file' must be a character string or connection"));
	R_ParseError = 0;
	R_ParseErrorMsg[0] = '\0';

	int ifile = asInteger(CAR(args)); args = CDR(args);
	Rconnection con = getConnection(ifile);
	Rboolean wasopen = con->isopen;
	int num = asInteger(CAR(args)); args = CDR(args);
	if (num == 0)
	return(allocVector(EXPRSXP, 0));

	SEXP text = PROTECT(coerceVector(CAR(args), STRSXP));
	if(length(CAR(args)) && !length(text))
	error(_("coercion of 'text' to character was unsuccessful"));
	args = CDR(args);
	SEXP prompt = CAR(args); args = CDR(args);
	SEXP source = CAR(args); args = CDR(args);
	if(!isString(CAR(args)) \|\| LENGTH(CAR(args)) != 1)
	error(_("invalid '%s' value"), "encoding");
	const char encoding = CHAR(STRING_ELT(CAR(args), 0)); / ASCII */

	parse_cleanup_info pci;
	pci.con = NULL;
	pci.old_latin1 = known_to_be_latin1;
	pci.old_utf8 = known_to_be_utf8;
	RCNTXT cntxt;
	/* set up context to recover known_to_be_* and to close connection on
	error if opened by do_parse */
	begincontext(&cntxt, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv,
	R_NilValue, R_NilValue);
	cntxt.cend = &parse_cleanup;
	cntxt.cenddata = &pci;

	known_to_be_latin1 = known_to_be_utf8 = FALSE;
	Rboolean allKnown = TRUE;
	/* allow 'encoding' to override declaration on 'text'. */
	if(streql(encoding, "latin1")) {
	known_to_be_latin1 = TRUE;
	allKnown = FALSE;
	} else if(streql(encoding, "UTF-8")) {
	known_to_be_utf8 = TRUE;
	allKnown = FALSE;
	} else if(!streql(encoding, "unknown") && !streql(encoding, "native.enc"))
	warning(_("argument '%s = \"%s\"' will be ignored"), "encoding", encoding);

	if (prompt == R_NilValue)
	PROTECT(prompt);
	else
	PROTECT(prompt = coerceVector(prompt, STRSXP));

	ParseStatus status;
	SEXP s;
	if (length(text) > 0) {
	/* If 'text' has known encoding then we can be sure it will be
	correctly re-encoded to the current encoding by
	translateChar in the parser and so could mark the result in
	a Latin-1 or UTF-8 locale.

	A small complication is that different elements could have
	different encodings, but all that matters is that all
	non-ASCII elements have known encoding.
	*/
	for(int i = 0; i < length(text); i++)
	if(!ENC_KNOWN(STRING_ELT(text, i)) &&
	!IS_ASCII(STRING_ELT(text, i))) {
	allKnown = FALSE;
	break;
	}
	if(allKnown) {
	known_to_be_latin1 = pci.old_latin1;
	known_to_be_utf8 = pci.old_utf8;
	}
	if (num == NA_INTEGER) num = -1;
	s = R_ParseVector(text, num, &status, source);
	if (status != PARSE_OK) parseError(call, R_ParseError);
	}
	else if (ifile >= 3) {/* file != "" */
	if (num == NA_INTEGER) num = -1;
	if(!wasopen) {
	if(!con->open(con)) error(_("cannot open the connection"));
	pci.con = con; /* close the connection on error */
	}
	if(!con->canread) error(_("cannot read from this connection"));
	s = R_ParseConn(con, num, &status, source);
	if(!wasopen) {
	PROTECT(s);
	pci.con = NULL;
	con->close(con);
	UNPROTECT(1);
	}
	if (status != PARSE_OK) parseError(call, R_ParseError);
	}
	else {
	if (num == NA_INTEGER) num = 1;
	s = R_ParseBuffer(&R_ConsoleIob, num, &status, prompt, source);
	if (status != PARSE_OK) parseError(call, R_ParseError);
	}
	known_to_be_latin1 = pci.old_latin1;
	known_to_be_utf8 = pci.old_utf8;
	PROTECT(s);
	endcontext(&cntxt);
	UNPROTECT(3);
	return s;
	}