| % File src/library/base/man/parse.Rd |
| % Part of the R package, https://www.R-project.org |
| % Copyright 1995-2019 R Core Team |
| % Distributed under GPL 2 or later |
| |
| \name{parse} |
| \title{Parse R Expressions} |
| \alias{parse} |
| \alias{str2lang} |
| \alias{str2expression} |
| \description{ |
| \code{parse()} returns the parsed but unevaluated expressions in an |
| \code{\link{expression}}, a \dQuote{list} of \code{\link{call}}s. |
| |
| \code{str2expression(s)} and \code{str2lang(s)} return special versions |
| of \code{parse(text=s, keep.source=FALSE)} and can therefore be regarded as |
| transforming character strings \code{s} to expressions, calls, etc. |
| } |
| \usage{ |
| parse(file = "", n = NULL, text = NULL, prompt = "?", |
| keep.source = getOption("keep.source"), srcfile, |
| encoding = "unknown") |
| |
| str2lang(s) |
| str2expression(text) |
| } |
| \arguments{ |
| \item{file}{a \link{connection}, or a character string giving the name of a |
| file or a URL to read the expressions from. |
| If \code{file} is \code{""} and \code{text} is missing or \code{NULL} |
| then input is taken from the console.} |
| \item{n}{integer (or coerced to integer). The maximum number of |
| expressions to parse. If \code{n} is \code{NULL} or negative or |
| \code{NA} the input is parsed in its entirety.} |
| \item{text}{character vector. The text to parse. Elements are treated |
| as if they were lines of a file. Other \R objects will be coerced |
| to character if possible.} |
| \item{prompt}{the prompt to print when parsing from the keyboard. |
| \code{NULL} means to use \R's prompt, \code{getOption("prompt")}.} |
| \item{keep.source}{a logical value; if \code{TRUE}, keep |
| source reference information.} |
| \item{srcfile}{\code{NULL}, a character vector, or a |
| \code{\link{srcfile}} object. See the \sQuote{Details} section.} |
| \item{encoding}{encoding to be assumed for input strings. If the |
| value is \code{"latin1"} or \code{"UTF-8"} it is used to mark |
| character strings as known to be in Latin-1 or UTF-8: it is not used |
| to re-encode the input. To do the latter, specify the encoding as |
| part of the connection \code{con} or \emph{via} |
| \code{\link{options}(encoding=)}: see the example under |
| \code{\link{file}}.} |
| \item{s}{a \code{\link{character}} vector of length \code{1}, i.e., a |
| \dQuote{string}.} |
| } |
| \references{ |
| Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) |
| \emph{The New S Language}. |
| Wadsworth & Brooks/Cole. |
| |
| Murdoch, D. (2010). |
| \href{https://journal.r-project.org/archive/2010-2/RJournal_2010-2_Murdoch.pdf}{Source |
| References}. \emph{The R Journal} 2/2, 16-19. |
| } |
| \seealso{ |
| \code{\link{scan}}, \code{\link{source}}, \code{\link{eval}}, |
| \code{\link{deparse}}. |
| |
| The source reference information can be used for debugging (see |
| e.g.\sspace{}\code{\link{setBreakpoint}}) and profiling (see |
| \code{\link{Rprof}}). It can be examined by \code{\link{getSrcref}} |
| and related functions. More detailed information is available through |
| \code{\link{getParseData}}. |
| } |
| \details{ |
| \describe{ |
| \item{\code{parse(....)}: }{ |
| |
| If \code{text} has length greater than zero (after coercion) it is used in |
| preference to \code{file}. |
| |
| All versions of \R accept input from a connection with end of line |
| marked by LF (as used on Unix), CRLF (as used on DOS/Windows) |
| or CR (as used on classic Mac OS). The final line can be incomplete, |
| that is missing the final EOL marker. |
| |
| When input is taken from the console, \code{n = NULL} is equivalent to |
| \code{n = 1}, and \code{n < 0} will read until an EOF character is |
| read. (The EOF character is Ctrl-Z for the Windows front-ends.) The |
| line-length limit is 4095 bytes when reading from the console (which |
| may impose a lower limit: see \sQuote{An Introduction to R}). |
| |
| The default for \code{srcfile} is set as follows. If |
| \code{keep.source} is not \code{TRUE}, \code{srcfile} |
| defaults to a character string, either \code{"<text>"} or one |
| derived from \code{file}. When \code{keep.source} is |
| \code{TRUE}, if \code{text} is used, \code{srcfile} will be set to a |
| \code{\link{srcfilecopy}} containing the text. If a character |
| string is used for \code{file}, a \code{\link{srcfile}} object |
| referring to that file will be used. |
| |
| When \code{srcfile} is a character string, error messages will |
| include the name, but source reference information will not be added |
| to the result. When \code{srcfile} is a \code{\link{srcfile}} |
| object, source reference information will be retained. |
| } |
| \item{\code{str2expression(s)}: }{for a \code{\link{character}} vector |
| \code{s}, \code{str2expression(s)} corresponds to |
| \code{\link{parse}(text = s, keep.source=FALSE)}, which is always of |
| type (\code{\link{typeof}}) and \code{\link{class}} \code{expression}. |
| } |
| \item{\code{str2lang(s)}: }{for a \code{\link{character}} string |
| \code{s}, \code{str2lang(s)} corresponds to |
| \code{\link{parse}(text = s, keep.source=FALSE)[[1]]} (plus a check |
| that both \code{s} and the \code{parse(*)} result are of length one) |
| which is typically a \code{call} but may also be a \code{symbol} aka |
| \code{\link{name}}, \code{\link{NULL}} or an atomic constant such as |
| \code{2}, \code{1L}, or \code{TRUE}. Put differently, the value of |
| \code{str2lang(.)} is a call or one of its parts, in short |
| \dQuote{a call or simpler}. |
| } |
| }% describe |
| Currently, encoding is not handled in \code{str2lang()} and |
| \code{str2expression()}. |
| } |
| \section{Partial parsing}{ |
| When a syntax error occurs during parsing, \code{parse} |
| signals an error. The partial parse data will be stored in the |
| \code{srcfile} argument if it is a \code{\link{srcfile}} object |
| and the \code{text} argument was used to supply the text. In other |
| cases it will be lost when the error is triggered. |
| |
| The partial parse data can be retrieved using |
| \code{\link{getParseData}} applied to the \code{srcfile} object. |
| Because parsing was incomplete, it will typically include references |
| to \code{"parent"} entries that are not present. |
| } |
| \value{ |
| \code{parse()} and \code{str2expression()} return an object of type |
| \code{"\link{expression}"}, for \code{parse()} with up to \code{n} |
| elements if specified as a non-negative integer. |
| |
| \code{str2lang(s)}, \code{s} a string, returns \dQuote{a |
| \code{\link{call}} or simpler}, see the \sQuote{Details:} section. |
| |
| When \code{srcfile} is non-\code{NULL}, a \code{"srcref"} attribute |
| will be attached to the result containing a list of |
| \code{\link{srcref}} records corresponding to each element, a |
| \code{"srcfile"} attribute will be attached containing a copy of |
| \code{srcfile}, and a \code{"wholeSrcref"} attribute will be |
| attached containing a \code{\link{srcref}} record corresponding to |
| all of the parsed text. Detailed parse information will be stored in |
| the \code{"srcfile"} attribute, to be retrieved by |
| \code{\link{getParseData}}. |
| |
| A syntax error (including an incomplete expression) will throw an error. |
| |
| Character strings in the result will have a declared encoding if |
| \code{encoding} is \code{"latin1"} or \code{"UTF-8"}, or if |
| \code{text} is supplied with every element of known encoding in a |
| Latin-1 or UTF-8 locale. |
| } |
| \note{ |
| Using \code{parse(text = *, ..)} or its simplified and hence more |
| efficient versions \code{str2lang()} or \code{str2expression()} is at |
| least an order of magnitude less efficient than \code{\link{call}(..)} or |
| \code{\link{as.call}()}. |
| } |
| \examples{ |
| fil <- tempfile(fileext = ".Rdmped") |
| cat("x <- c(1, 4)\n x ^ 3 -10 ; outer(1:7, 5:9)\n", file = fil) |
| # parse 3 statements from our temp file |
| parse(file = fil, n = 3) |
| unlink(fil) |
| |
| ## str2lang(<string>) || str2expression(<character>) : |
| stopifnot(exprs = { |
| identical( str2lang("x[3] <- 1+4"), quote(x[3] <- 1+4)) |
| identical( str2lang("log(y)"), quote(log(y)) ) |
| identical( str2lang("abc" ), quote(abc) -> qa) |
| is.symbol(qa) & !is.call(qa) # a symbol/name, not a call |
| identical( str2lang("1.375" ), 1.375) # just a number, not a call |
| }) |
| |
| # A partial parse with a syntax error |
| txt <- " |
| x <- 1 |
| an error |
| " |
| sf <- srcfile("txt") |
| try(parse(text = txt, srcfile = sf)) |
| getParseData(sf) |
| } |
| \keyword{file} |
| \keyword{programming} |
| \keyword{connection} |