blob: 0c2d3be85d38563ce00a04f84f975c4bedf1986a [file] [log] [blame]
# File src/library/tools/R/parseRd.R
# Part of the R package, https://www.R-project.org
#
# Copyright (C) 1995-2016 The R Core Team
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# https://www.R-project.org/Licenses/
parse_Rd <- function(file, srcfile = NULL, encoding = "unknown",
verbose = FALSE, fragment = FALSE,
warningCalls = TRUE,
macros = file.path(R.home("share"), "Rd", "macros", "system.Rd"),
permissive = FALSE)
{
if(is.character(file)) {
file0 <- file
if(file == "") {
file <- stdin()
} else {
## keep.source is FALSE in batch use
## encoding issues here, for now use file encoding
if (missing(srcfile)) ## && isTRUE(getOption("keep.source")))
srcfile <- srcfile(file)
}
} else file0 <- "<connection>"
lines <- readLines(file, warn = FALSE)
if(is.character(macros))
macros <- initialRdMacros(macros = macros)
## remove old-style marking for data, keep line nos
lines[lines == "\\non_function{}"] <- ""
## Extract the encoding if marked in the file:
## do this in two steps to minimize warnings in MBCS locales
## Note this is required to be on a line by itself,
## but some people have preceding whitespace
enc <- grep("\\encoding{", lines, fixed = TRUE, useBytes=TRUE)
enc <- grep("^[[:space:]]*\\\\encoding\\{([^}]*)\\}.*", lines[enc],
value = TRUE)
if(length(enc)) {
if(length(enc) > 1L)
warning(file0, ": multiple \\encoding lines, using the first",
domain = NA, call. = warningCalls)
## keep first one
enc <- enc[1L]
enc <- sub("^[[:space:]]*\\\\encoding\\{([^}]*)\\}.*", "\\1", enc)
if(verbose) message("found encoding ", enc, domain = NA)
encoding <- if(enc %in% c("UTF-8", "utf-8", "utf8")) "UTF-8" else enc
}
if (encoding == "unknown") encoding <- ""
## the internal function must get some sort of srcfile
if (!inherits(srcfile, "srcfile"))
srcfile <- srcfile(file0)
basename <- basename(srcfile$filename)
srcfile$encoding <- encoding
srcfile$Enc <- "UTF-8"
if (encoding == "ASCII") {
if (any(is.na(iconv(lines, "", "ASCII"))))
stop(file0, ": non-ASCII input and no declared encoding",
domain = NA, call. = warningCalls)
} else {
if (encoding != "UTF-8")
lines <- iconv(lines, encoding, "UTF-8", sub = "byte")
## Strip UTF-8 BOM if necessary.
bytes <- charToRaw(lines[1L])
if(identical(as.integer(bytes[1L : 3L]),
c(0xefL, 0xbbL, 0xbfL)))
lines[1L] <- rawToChar(bytes[-(1L : 3L)])
}
tcon <- file()
writeLines(lines, tcon, useBytes = TRUE)
on.exit(close(tcon))
warndups <- config_val_to_logical(Sys.getenv("_R_WARN_DUPLICATE_RD_MACROS_", "FALSE"))
if (permissive)
# FIXME: this should test for a special class of warning rather than testing the
# message, but those are currently not easily generated from C code.
result <- withCallingHandlers(.External2(C_parseRd, tcon, srcfile, "UTF-8",
verbose, basename, fragment,
warningCalls, macros, warndups),
warning = function(w)
if (grepl("unknown macro", conditionMessage(w)))
invokeRestart("muffleWarning") )
else
result <- .External2(C_parseRd, tcon, srcfile, "UTF-8",
verbose, basename, fragment, warningCalls,
macros, warndups)
result <- expandDynamicFlags(result)
if (permissive)
result <- permissify(result)
result
}
print.Rd <- function(x, deparse = FALSE, ...)
{
cat(as.character.Rd(x, deparse = deparse), sep = "", collapse = "")
invisible(x)
}
as.character.Rd <- function(x, deparse = FALSE, ...)
{
ZEROARG <- c("\\cr", "\\dots", "\\ldots", "\\R", "\\tab") # Only these cause trouble when {} is added
TWOARG <- c("\\section", "\\subsection", "\\item", "\\enc",
"\\method", "\\S3method", "\\S4method", "\\tabular")
USERMACROS <- c("USERMACRO", "\\newcommand", "\\renewcommand")
EQN <- c("\\deqn", "\\eqn", "\\figure")
modes <- c(RLIKE = 1L, LATEXLIKE = 2L, VERBATIM = 3L, INOPTION = 4L, COMMENTMODE = 5L, UNKNOWNMODE = 6L)
tags <- c(RCODE = 1L, TEXT = 2L, VERB = 3L, COMMENT = 5L, UNKNOWN = 6L)
state <- c(braceDepth = 0L, inRString = 0L)
needBraces <- FALSE # if next character is alphabetic, separate by braces.
inEqn <- 0L
pr <- function(x, quoteBraces) {
tag <- attr(x, "Rd_tag")
if (is.null(tag) || tag == "LIST") tag <- ""
if (is.list(x)) {
savestate <- state
state <<- c(0L, 0L)
needBraces <<- FALSE
if (tag == "Rd") { # a whole file
result <- character()
for (i in seq_along(x))
result <- c(result, pr(x[[i]], quoteBraces))
} else if (startsWith(tag, "#")) {
if (deparse) {
dep <- deparseRdElement(x[[1L]][[1L]],
c(state, modes["LATEXLIKE"],
inEqn,
as.integer(quoteBraces)))
result <- c(tag, dep[[1L]])
} else
result <- c(tag, x[[1L]][[1L]])
for (i in seq_along(x[[2L]]))
result <- c(result, pr(x[[2L]][[i]], quoteBraces))
result <- c(result, "#endif\n")
} else if (tag %in% ZEROARG) {
result <- tag
needBraces <<- TRUE
} else if (tag %in% TWOARG) {
result <- tag
for (i in seq_along(x))
result <- c(result, pr(x[[i]], quoteBraces))
} else if (tag %in% EQN) {
result <- tag
inEqn <<- 1L
result <- c(result, pr(x[[1L]], quoteBraces))
inEqn <<- 0L
if (length(x) > 1L)
result <- c(result, pr(x[[2L]], quoteBraces))
} else {
result <- tag
if (!is.null(option <- attr(x, "Rd_option")))
result <- c(result, "[", pr(option, quoteBraces), "]")
result <- c(result, "{")
for (i in seq_along(x))
result <- c(result, pr(x[[i]], quoteBraces))
result <- c(result, "}")
}
if (state[1L]) # If braces didn't match within the list, try again, quoting them
result <- pr(x, TRUE)
state <<- savestate
} else if (tag %in% USERMACROS) {
result <- c()
} else {
if (deparse) {
dep <- deparseRdElement(as.character(x), c(state, tags[tag], inEqn, as.integer(quoteBraces)))
result <- dep[[1L]]
state <<- dep[[2L]][1L:2L]
} else {
if (inherits(x, "Rd"))
class(x) <- setdiff(class(x), "Rd") # Avoid infinite recursion from misuse (PR#16448)
result <- as.character(x)
}
if (needBraces) {
if (grepl("^[[:alpha:]]", result)) result <- c("{}", result)
needBraces <<- FALSE
}
}
result
}
if (is.null(attr(x, "Rd_tag"))) attr(x, "Rd_tag") <- "Rd"
pr(x, quoteBraces = FALSE)
}
deparseRdElement <- function(element, state)
.Call(C_deparseRd, element, state)
# Convert unknown tags into text displaying the tag with braces if necessary
# This allows unknown LateX macros to be embedded in the text, and to be just passed
# through.
permissify <- function(Rd)
{
tags <- RdTags(Rd)
oldclass <- class(Rd)
oldsrcref <- utils::getSrcref(Rd)
oldtag <- attr(Rd, "Rd_tag")
i <- 0
while (i < length(tags)) {
i <- i+1
if (tags[i] == "UNKNOWN") {
Rd[[i]] <- tagged(Rd[[i]], "TEXT", utils::getSrcref(Rd[[i]]))
while (i < length(tags)) {
if (tags[i+1] == "LIST") {
Rd <- c(Rd[seq_len(i)],
list(tagged("{", "TEXT", utils::getSrcref(Rd[[i+1]]))),
permissify(Rd[[i+1]]),
list(tagged("}", "TEXT", utils::getSrcref(Rd[[i+1]]))),
Rd[seq_along(Rd)[-seq_len(i+1)]])
tags <- RdTags(Rd)
i <- i+3
} else if (tags[i+1] == "TEXT" && grepl("^ *$", Rd[[i+1]]))
i <- i + 1
else
break
}
} else if (is.recursive(Rd[[i]]))
Rd[[i]] <- permissify(Rd[[i]])
}
class(Rd) <- oldclass
attr(Rd, "srcref") <- oldsrcref
attr(Rd, "Rd_tag") <- oldtag
Rd
}