blob: b4eb8d9477e28bda0d6fe5ae646b82810107cb1d [file] [log] [blame]
# File src/library/tools/R/Rdtools.R
# Part of the R package, https://www.R-project.org
#
# Copyright (C) 1995-2014 The R Core Team
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# https://www.R-project.org/Licenses/
RdTextFilter <-
function(ifile, encoding = "unknown", keepSpacing = TRUE,
drop = character(), keep = character(),
macros = file.path(R.home("share"), "Rd", "macros", "system.Rd"))
{
if(inherits(ifile, "srcfile"))
ifile <- ifile$filename
if (inherits(ifile, "Rd")) {
# Undo sorting done in prepare2_Rd
srcrefs <- sapply(ifile, function(s) attr(s, "srcref"))
p <- ifile[ order(srcrefs[1,], srcrefs[2,]) ]
class(p) <- class(ifile)
} else
p <- parse_Rd(ifile, encoding = encoding, macros = macros)
tags <- RdTags(p)
if ("\\encoding" %in% tags) {
encoding <- p[[which.max(tags == "\\encoding")]][[1L]]
if (encoding %in% c("UTF-8", "utf-8", "utf8")) encoding <- "UTF-8"
if (!inherits(ifile, "Rd"))
p <- parse_Rd(ifile, encoding=encoding, macros = macros)
} else
encoding <- ""
## Directly using a text connection to accumulate the filtered
## output seems to be faster than using .eval_with_capture(): to use
## the latter, change mycat to cat, or use mycat <- cat, and create
## out via
## out <- .eval_with_capture({
## show(p)
## mycat("\n")
## })$output
## myval <- character()
mycon <- textConnection("myval", open = "w", local = TRUE,
encoding = "UTF-8")
on.exit(close(mycon))
mycat <- function(...) cat(..., file = mycon)
prevline <- 1L
prevcol <- 0L
doPartialMarkup <- function(x, tags, i) { # handle things like \bold{pre}fix
result <- FALSE
if (i < length(tags)
&& tags[i+1L] == "TEXT"
&& length(x[[i]]) == 1L
&& tags[i] %in% c("\\bold", "\\emph", "\\strong", "\\link")
&& (tags[i] %notin% drop)
&& RdTags(x[[i]]) == "TEXT") {
text1 <- x[[i]][[1L]]
if (length(grep("[^[:space:]]$", text1))) { # Ends in non-blank
text2 <- x[[i+1L]]
if (length(grep("^[^[:space:]]", text2))) { # Starts non-blank
show(text1)
prevcol <<- prevcol+1L # Shift text2 left by one column
saveline <- prevline
show(text2)
if (prevline == saveline)
prevcol <<- prevcol-1L
result <- TRUE
}
}
}
result
}
show <- function(x) {
srcref <- attr(x, "srcref")
firstline <- srcref[1L]
lastline <- srcref[3L]
firstcol <- srcref[5L]
lastcol <- srcref[6L]
tag <- attr(x, "Rd_tag")
if (is.null(tag)) tag <- "NULL"
if (tag %in% drop) tag <- "DROP"
else if (tag %in% keep) tag <- "KEEPLIST" # Include both text and lists
switch(tag,
KEEP =,
TEXT = {
if (prevline < firstline) {
prevcol <<- 0L
mycat(rep.int("\n",
if(keepSpacing) firstline - prevline else 1L))
}
if (keepSpacing)
mycat(rep.int(" ", firstcol - prevcol - 1L), sep = "")
x <- as.character(srcref) # go back to original form
mycat(x, sep = "")
prevcol <<- lastcol
prevline <<- lastline
},
"\\S3method"=,
"\\S4method"=,
"\\command"=,
"\\docType"=,
"\\email"=,
"\\encoding"=,
"\\file"=,
"\\keyword"=,
"\\link"=,
"\\linkS4class"=,
"\\method"=,
"\\pkg"=,
"\\var"=,
DROP = {}, # do nothing
"\\tabular"=,
"#ifdef"=,
"#ifndef"={ # Ignore the first arg, process the second
show(x[[2L]])
},
"\\item"={ # Ignore the first arg of a two-arg item
if (length(x) == 2L) show(x[[2L]])
},
{ # default
if (is.list(x)) {
tags <- RdTags(x)
i <- 0L
while (i < length(x)) {
i <- i + 1L
if (doPartialMarkup(x, tags, i))
i <- i + 1L
else
show(x[[i]])
}
} else if (tag == "KEEPLIST") {
attr(x, "Rd_tag") <- "KEEP"
show(x)
}
})# {switch}
}# end show()
show(p)
mycat("\n")
out <- textConnectionValue(mycon)
## Ideally, we would always canonicalize to UTF-8.
## However, when using RdTextFilter() for aspell(), it is not clear
## whether this is a good idea: the aspell program does not need to
## have full UTF-8 support (and what precisely holds is not clear:
## the manuals says that aspell
## can easily check documents in UTF-8 without having to use a
## special dictionary.
## but also
## If Aspell is compiled with a version of the curses library that
## support wide characters then Aspell can also check UTF-8 text.
## So at least until this can be resolved, turn filter results for
## Rd files originally in latin1 back to latin1.
if(encoding == "latin1")
out <- iconv(out, "UTF-8", "latin1")
out
}