blob: 29e92f2d503dced608a63674f944026472affdcc [file] [log] [blame]
# File src/library/utils/R/aspell.R
# Part of the R package, https://www.R-project.org
#
# Copyright (C) 1995-2016 The R Core Team
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# https://www.R-project.org/Licenses/
aspell <-
function(files, filter, control = list(), encoding = "unknown",
program = NULL, dictionaries = character())
{
## Take the given files and feed them through spell checker in
## Ispell pipe mode.
## Think about options and more command line options eventually.
program <- aspell_find_program(program)
if(is.na(program))
stop("No suitable spell-checker program found")
## Be nice.
if(inherits(files, "Rd"))
files <- list(files)
files_are_names <- is.character(files)
filter_args <- list()
if(missing(filter) || is.null(filter)) {
filter <- if(!files_are_names) {
function(ifile, encoding) {
if(inherits(ifile, "srcfile"))
readLines(ifile$filename, encoding = encoding,
warn = FALSE)
else if(inherits(ifile, "connection"))
readLines(ifile, encoding = encoding, warn = FALSE)
else {
## What should this do with encodings?
as.character(ifile)
}
}
}
else NULL
}
else if(is.character(filter)) {
## Look up filter in aspell filter db.
filter_name <- filter[1L]
filter <- aspell_filter_db[[filter_name]]
## Warn if the filter was not found in the db.
if(is.null(filter))
warning(gettextf("Filter '%s' is not available.",
filter_name),
domain = NA)
}
else if(is.list(filter)) {
## Support
## list("Rd", drop = "\\references"
## at least for now.
filter_name <- filter[[1L]][1L]
filter_args <- filter[-1L]
filter <- aspell_filter_db[[filter_name]]
## Warn if the filter was not found in the db.
if(is.null(filter))
warning(gettextf("Filter '%s' is not available.",
filter_name),
domain = NA)
}
else if(!is.function(filter))
stop("Invalid 'filter' argument.")
encoding <- rep_len(encoding, length(files))
verbose <- getOption("verbose")
db <- data.frame(Original = character(), File = character(),
Line = integer(), Column = integer(),
stringsAsFactors = FALSE)
db$Suggestions <- list()
tfile <- tempfile("aspell")
on.exit(unlink(tfile))
if(length(dictionaries)) {
paths <- aspell_find_dictionaries(dictionaries)
ind <- paths == ""
if(any(ind)) {
warning(gettextf("The following dictionaries were not found:\n%s",
paste(sprintf(" %s", dictionaries[ind]),
collapse = "\n")),
domain = NA)
paths <- paths[!ind]
}
if(length(paths)) {
words <- unlist(lapply(paths, readRDS), use.names = FALSE)
personal <- tempfile("aspell_personal")
on.exit(unlink(personal), add = TRUE)
## <FIXME>
## How can we get the right language set (if needed)?
## Maybe aspell() needs an additional 'language' arg?
aspell_write_personal_dictionary_file(words, personal,
program = program)
## </FIXME>
control <- c(control, "-p", shQuote(personal))
}
}
## No special expansion of control argument for now.
control <- as.character(control)
fnames <- names(files)
files <- as.list(files)
for (i in seq_along(files)) {
file <- files[[i]]
if(files_are_names)
fname <- file
else {
## Try srcfiles and srcrefs ...
fname <- if(inherits(file, "srcfile"))
file$filename
else
attr(attr(file, "srcref"), "srcfile")$filename
## As a last resort, try the names of the files argument.
if(is.null(fname))
fname <- fnames[i]
## If unknown ...
if(is.null(fname))
fname <- "<unknown>"
}
enc <- encoding[i]
if(verbose)
message(gettextf("Processing file %s", fname),
domain = NA)
lines <- if(is.null(filter))
readLines(file, encoding = enc, warn = FALSE)
else {
## Assume that filter takes an input file (and additional
## arguments) and return a character vector.
do.call(filter, c(list(file, encoding = enc), filter_args))
}
## Allow filters to pass additional control arguments, in case
## these need to be inferred from the file contents.
control <- c(control, attr(lines, "control"))
## Need to escape all lines with carets to ensure Aspell handles
## them as data: the Aspell docs say
## It is recommended that programmatic interfaces prefix every
## data line with an uparrow to protect themselves against
## future changes in Aspell.
writeLines(paste0("^", lines), tfile)
## Note that this re-encodes character strings with marked
## encodings to the current encoding (which is definitely fine
## if this is UTF-8 and Aspell was compiled with full UTF-8
## support). Alternatively, we could try using something along
## the lines of
## writeLines(paste0("^", lines), tfile,
## useBytes = TRUE)
## and pass the encoding info to Aspell in case we know it.
out <- tools:::.system_with_capture(program, c("-a", control),
stdin = tfile)
if(out$status != 0L)
stop(gettextf("Running aspell failed with diagnostics:\n%s",
paste(out$stderr, collapse = "\n")),
domain = NA)
## Hopefully everything worked ok.
lines <- out$stdout[-1L]
pos <- cumsum(lines == "") + 1L
## Format is as follows.
## First line is a header.
## Blank lines separate the results for each line.
## Results for the word on each line are given as follows.
## * If the word was found in the main dictionary, or your personal
## dictionary, then the line contains only a `*'.
## * If the word is not in the dictionary, but there are
## suggestions, then the line contains an `&', a space, the
## misspelled word, a space, the number of near misses, the number
## of characters between the beginning of the line and the
## beginning of the misspelled word, a colon, another space, and a
## list of the suggestions separated by commas and spaces.
## * If the word does not appear in the dictionary, and there are no
## suggestions, then the line contains a `#', a space, the
## misspelled word, a space, and the character offset from the
## beginning of the line.
## This can be summarized as follows:
## OK: *
## Suggestions: & original count offset: miss, miss, ...
## None: # original offset
## Look at words not in dictionary with suggestions.
if(any(ind <- startsWith(lines, "&"))) {
info <- strsplit(lines[ind], ": ", fixed = TRUE)
one <- strsplit(sapply(info, `[`, 1L), " ", fixed = TRUE)
two <- strsplit(sapply(info, `[`, 2L), ", ", fixed = TRUE)
db1 <- data.frame(Original =
as.character(sapply(one, `[`, 2L)),
File = fname,
Line = pos[ind],
Column =
as.integer(sapply(one, `[`, 4L)),
stringsAsFactors = FALSE)
db1$Suggestions <- two
db <- rbind(db, db1)
}
## Looks at words not in dictionary with no suggestions.
if(any(ind <- startsWith(lines, "#"))) {
one <- strsplit(lines[ind], " ", fixed = TRUE)
db1 <- data.frame(Original =
as.character(sapply(one, `[`, 2L)),
File = fname,
Line = pos[ind],
Column =
as.integer(sapply(one, `[`, 3L)),
stringsAsFactors = FALSE)
db1$Suggestions <- vector("list", length(one))
db <- rbind(db, db1)
}
}
class(db) <- c("aspell", "data.frame")
db
}
format.aspell <-
function(x, sort = TRUE, verbose = FALSE, indent = 2L, ...)
{
if(!nrow(x)) return(character())
if(sort)
x <- x[order(x$Original, x$File, x$Line, x$Column), ]
from <- split(sprintf("%s:%d:%d", x$File, x$Line, x$Column),
x$Original)
if(verbose) {
unlist(Map(function(w, f, s) {
sprintf("Word: %s\nFrom: %s\n%s",
w,
paste0(c("", rep.int(" ", length(f) - 1L)),
f, collapse = "\n"),
paste(strwrap(paste("Suggestions:",
paste(s[[1L]], collapse = " ")),
exdent = 6L, indent = 0L),
collapse = "\n"))
},
names(from),
from,
split(x$Suggestions, x$Original)))
} else {
sep <- sprintf("\n%s", strrep(" ", indent))
paste(names(from),
vapply(from, paste, "", collapse = sep),
sep = sep)
}
}
print.aspell <-
function(x, ...)
{
if(nrow(x))
writeLines(paste(format(x, ...), collapse = "\n\n"))
invisible(x)
}
summary.aspell <-
function(object, ...)
{
words <- sort(unique(object$Original))
if(length(words)) {
writeLines("Possibly mis-spelled words:")
print(words)
}
invisible(words)
}
aspell_filter_db <- new.env(hash = FALSE) # small
aspell_filter_db$Rd <- tools::RdTextFilter
aspell_filter_db$Sweave <- tools::SweaveTeXFilter
aspell_find_program <-
function(program = NULL)
{
check <- !is.null(program) || !is.null(names(program))
if(is.null(program))
program <- getOption("aspell_program")
if(is.null(program))
program <- c("aspell", "hunspell", "ispell")
program <- Filter(nzchar, Sys.which(program))[1L]
if(!is.na(program) && check) {
out <- c(system(sprintf("%s -v", program),
intern = TRUE), "")[1L]
if(grepl("really Aspell", out))
names(program) <- "aspell"
else if(grepl("really Hunspell", out))
names(program) <- "hunspell"
else if(grepl("International Ispell", out))
names(program) <- "ispell"
else
names(program) <- NA_character_
}
program
}
aspell_dictionaries_R <- "en_stats"
aspell_find_dictionaries <-
function(dictionaries, dirnames = character())
{
dictionaries <- as.character(dictionaries)
if(!(n <- length(dictionaries))) return(character())
## Always search the R system dictionary directory first.
dirnames <- c(file.path(R.home("share"), "dictionaries"), dirnames)
## For now, all dictionary files should be .rds files.
if(any(ind <- !endsWith(dictionaries, ".rds")))
dictionaries[ind] <- sprintf("%s.rds", dictionaries[ind])
out <- character(n)
## Dictionaries with no path separators are looked for in the given
## dictionary directories (by default, the R system dictionary
## directory).
ind <- grepl(.Platform$file.sep, dictionaries, fixed = TRUE)
## (Equivalently, could check where paths == basename(paths).)
if(length(pos <- which(ind))) {
pos <- pos[file_test("-f", dictionaries[pos])]
out[pos] <- normalizePath(dictionaries[pos], "/")
}
if(length(pos <- which(!ind))) {
out[pos] <- find_files_in_directories(dictionaries[pos],
dirnames)
}
out
}
### Utilities.
aspell_inspect_context <-
function(x)
{
x <- split(x, x$File)
y <- Map(function(f, x) {
lines <- readLines(f, warn = FALSE)[x$Line]
cbind(f,
x$Line,
substring(lines, 1L, x$Column - 1L),
x$Original,
substring(lines, x$Column + nchar(x$Original)))
},
names(x), x)
y <- data.frame(do.call(rbind, y), stringsAsFactors = FALSE)
names(y) <- c("File", "Line", "Left", "Original", "Right")
class(y) <- c("aspell_inspect_context", "data.frame")
y
}
print.aspell_inspect_context <-
function(x, ...)
{
s <- split(x, x$File)
nms <- names(s)
for(i in seq_along(s)) {
e <- s[[i]]
writeLines(c(sprintf("File '%s':", nms[i]),
sprintf(" Line %s: \"%s\", \"%s\", \"%s\"",
format(e$Line),
gsub("\"", "\\\"", e$Left),
e$Original,
gsub("\"", "\\\"", e$Right)),
""))
}
invisible(x)
}
## For spell-checking the R manuals:
## This can really only be done with Aspell as the other checkers have
## no texinfo mode.
aspell_control_R_manuals <-
list(aspell =
c("--master=en_US",
"--add-extra-dicts=en_GB",
"--mode=texinfo",
"--add-texinfo-ignore=acronym",
"--add-texinfo-ignore=deftypefun",
"--add-texinfo-ignore=deftypefunx",
"--add-texinfo-ignore=findex",
"--add-texinfo-ignore=enindex",
"--add-texinfo-ignore=include",
"--add-texinfo-ignore=ifclear",
"--add-texinfo-ignore=ifset",
"--add-texinfo-ignore=math",
"--add-texinfo-ignore=macro",
"--add-texinfo-ignore=multitable",
"--add-texinfo-ignore=node",
"--add-texinfo-ignore=pkg",
"--add-texinfo-ignore=printindex",
"--add-texinfo-ignore=set",
"--add-texinfo-ignore=vindex",
"--add-texinfo-ignore-env=menu",
"--add-texinfo-ignore=CRANpkg"
),
hunspell =
c("-d en_US,en_GB"))
aspell_R_manuals <-
function(which = NULL, dir = NULL, program = NULL,
dictionaries = aspell_dictionaries_R)
{
if(is.null(dir)) dir <- tools:::.R_top_srcdir_from_Rd()
## Allow specifying 'R-exts' and alikes, or full paths.
files <- if(is.null(which)) {
Sys.glob(file.path(dir, "doc", "manual", "*.texi"))
} else {
ind <- which(which ==
basename(tools::file_path_sans_ext(which)))
which[ind] <-
file.path(dir, "doc", "manual",
sprintf("%s.texi", which[ind]))
which
}
program <- aspell_find_program(program)
aspell(files,
control = aspell_control_R_manuals[[names(program)]],
program = program,
dictionaries = dictionaries)
}
## For spell-checking the R Rd files:
aspell_control_R_Rd_files <-
list(aspell =
c("--master=en_US",
"--add-extra-dicts=en_GB"),
hunspell =
c("-d en_US,en_GB"))
aspell_R_Rd_files <-
function(which = NULL, dir = NULL, drop = "\\references",
program = NULL, dictionaries = aspell_dictionaries_R)
{
files <- character()
if(is.null(dir)) dir <- tools:::.R_top_srcdir_from_Rd()
if(is.null(which)) {
which <- tools:::.get_standard_package_names()$base
# CHANGES.Rd could be dropped from checks in the future;
# it will not be updated post 2.15.0
files <- c(file.path(dir, "doc", "NEWS.Rd"),
file.path(dir, "src", "gnuwin32", "CHANGES.Rd"))
files <- files[file_test("-f", files)]
}
files <-
c(files,
unlist(lapply(file.path(dir, "src", "library", which, "man"),
tools::list_files_with_type,
"docs", OS_subdirs = c("unix", "windows")),
use.names = FALSE))
program <- aspell_find_program(program)
aspell(files,
filter = list("Rd", drop = drop),
control = aspell_control_R_Rd_files[[names(program)]],
program = program,
dictionaries = dictionaries)
}
## For spell-checking Rd files in a package:
aspell_package_Rd_files <-
function(dir, drop = c("\\author", "\\references"),
control = list(), program = NULL, dictionaries = character())
{
dir <- normalizePath(dir, "/")
subdir <- file.path(dir, "man")
files <- if(dir.exists(subdir))
tools::list_files_with_type(subdir,
"docs",
OS_subdirs = c("unix", "windows"))
else character()
meta <- tools:::.get_package_metadata(dir, installed = FALSE)
if(is.na(encoding <- meta["Encoding"]))
encoding <- "unknown"
defaults <- .aspell_package_defaults(dir, encoding)$Rd_files
if(!is.null(defaults)) {
## Direct settings currently override (could add a list add =
## TRUE mechanism eventually).
if(!is.null(d <- defaults$drop))
drop <- d
if(!is.null(d <- defaults$control))
control <- d
if(!is.null(d <- defaults$program))
program <- d
if(!is.null(d <- defaults$dictionaries)) {
dictionaries <-
aspell_find_dictionaries(d, file.path(dir, ".aspell"))
}
## <FIXME>
## Deprecated in favor of specifying R level dictionaries.
## Maybe give a warning (in particular if both are given)?
if(!is.null(d <- defaults$personal))
control <- c(control,
sprintf("-p %s",
shQuote(file.path(dir, ".aspell", d))))
## </FIXME>
}
macros <- tools::loadPkgRdMacros(dir,
macros = file.path(R.home("share"),
"Rd", "macros",
"system.Rd"))
aspell(files,
filter = list("Rd", drop = drop, macros = macros),
control = control,
encoding = encoding,
program = program,
dictionaries = dictionaries)
}
## For spell-checking the R vignettes:
## This should really be done with Aspell as the other checkers have far
## less powerful TeX modes.
aspell_control_R_vignettes <-
list(aspell =
c("--mode=tex",
"--master=en_US",
"--add-extra-dicts=en_GB",
"--add-tex-command='code p'",
"--add-tex-command='pkg p'",
"--add-tex-command='CRANpkg p'"
),
hunspell =
c("-t", "-d en_US,en_GB"))
aspell_R_vignettes <-
function(program = NULL, dictionaries = aspell_dictionaries_R)
{
files <- Sys.glob(file.path(tools:::.R_top_srcdir_from_Rd(),
"src", "library", "*", "vignettes",
"*.Rnw"))
program <- aspell_find_program(program)
aspell(files,
filter = "Sweave",
control = aspell_control_R_vignettes[[names(program)]],
program = program,
dictionaries = dictionaries)
}
## For spell-checking vignettes in a package:
## This should really be done with Aspell as the other checkers have far
## less powerful TeX modes.
aspell_control_package_vignettes <-
list(aspell =
c("--add-tex-command='citep oop'",
"--add-tex-command='Sexpr p'",
"--add-tex-command='code p'",
"--add-tex-command='pkg p'",
"--add-tex-command='proglang p'",
"--add-tex-command='samp p'"
))
aspell_package_vignettes <-
function(dir,
control = list(), program = NULL, dictionaries = character())
{
dir <- tools::file_path_as_absolute(dir)
vinfo <- tools::pkgVignettes(dir = dir)
files <- vinfo$docs
if(!length(files)) return(aspell(character()))
## We need the package encoding to read the defaults file ...
meta <- tools:::.get_package_metadata(dir, installed = FALSE)
if(is.na(encoding <- meta["Encoding"]))
encoding <- "unknown"
defaults <- .aspell_package_defaults(dir, encoding)$vignettes
if(!is.null(defaults)) {
if(!is.null(d <- defaults$control))
control <- d
if(!is.null(d <- defaults$program))
program <- d
if(!is.null(d <- defaults$dictionaries)) {
dictionaries <-
aspell_find_dictionaries(d, file.path(dir, ".aspell"))
}
## <FIXME>
## Deprecated in favor of specifying R level dictionaries.
## Maybe give a warning (in particular if both are given)?
if(!is.null(d <- defaults$personal))
control <- c(control,
sprintf("-p %s",
shQuote(file.path(dir, ".aspell", d))))
## </FIXME>
}
program <- aspell_find_program(program)
fgroups <- split(files, vinfo$engines)
egroups <- split(vinfo$encodings, vinfo$engines)
do.call(rbind,
Map(function(fgroup, egroup, engine) {
engine <- tools::vignetteEngine(engine)
aspell(fgroup,
filter = engine$aspell$filter,
control =
c(engine$aspell$control,
aspell_control_package_vignettes[[names(program)]],
control),
encoding = egroup,
program = program,
dictionaries = dictionaries)
},
fgroups,
egroups,
names(fgroups)
)
)
}
## Spell-checking R files.
aspell_filter_db$R <-
function(ifile, encoding = "unknown", ignore = character())
{
pd <- get_parse_data_for_message_strings(ifile, encoding)
if(is.null(pd) || !NROW(pd)) return(character())
## Strip the string delimiters.
pd$text <- substring(pd$text, 2L, nchar(pd$text) - 1L)
## Replace whitespace C backslash escape sequences by whitespace.
pd$text <- gsub("(^|[^\\])\\\\[fnrt]", "\\1 ", pd$text)
pd$text <- gsub( "([^\\])\\\\[fnrt]", "\\1 ", pd$text)
## (Do this twice for now because in e.g.
## \n\t\tInformation on package %s
## the first \t is not matched the first time. Alternatively, we
## could match with
## (^|[^\\])((\\\\[fnrt])+)
## but then computing the replacement (\\1 plus as many blanks as
## the characters in \\2) is not straightforward.
## For gettextf() calls, replace basic percent escape sequences by
## whitespace.
ind <- pd$caller == "gettextf"
if(any(ind)) {
pd$text[ind] <-
gsub("(^|[^%])%[dioxXfeEgGaAs]", "\\1 ", pd$text[ind])
pd$text[ind] <-
gsub(" ([^%])%[dioxXfeEgGaAs]", "\\1 ", pd$text[ind])
## (See above for doing this twice.)
}
lines <- readLines(ifile, encoding = encoding, warn = FALSE)
## Column positions in the parse data have tabs expanded to tab
## stops using a tab width of 8, so for lines with tabs we need to
## map the column positions back to character positions.
lines_in_pd <- sort(unique(c(pd$line1, pd$line2)))
tab <- Map(function(tp, nc) {
if(tp[1L] == -1L) return(NULL)
widths <- rep.int(1, nc)
for(i in tp) {
cols <- cumsum(widths)
widths[i] <- 8 - (cols[i] - 1) %% 8
}
cumsum(widths)
},
gregexpr("\t", lines[lines_in_pd], fixed = TRUE),
nchar(lines[lines_in_pd]))
names(tab) <- lines_in_pd
lines[lines_in_pd] <- gsub("[^\t]", " ", lines[lines_in_pd])
lines[-lines_in_pd] <- ""
for(entry in split(pd, seq_len(NROW(pd)))) {
line1 <- entry$line1
line2 <- entry$line2
col1 <- entry$col1
col2 <- entry$col2
if(line1 == line2) {
if(length(ptab <- tab[[as.character(line1)]])) {
col1 <- which(ptab == col1) + 1L
col2 <- which(ptab == col2) - 1L
}
substring(lines[line1], col1, col2) <- entry$text
} else {
texts <- unlist(strsplit(entry$text, "\n", fixed = TRUE))
n <- length(texts)
if(length(ptab <- tab[[as.character(line1)]])) {
col1 <- which(ptab == col1) + 1L
}
substring(lines[line1], col1) <- texts[1L]
pos <- seq.int(from = 2L, length.out = n - 2L)
if(length(pos))
lines[line1 + pos - 1] <- texts[pos]
if(length(ptab <- tab[[as.character(line2)]])) {
col2 <- which(ptab == col2) - 1L
}
substring(lines[line2], 1L, col2) <- texts[n]
}
}
blank_out_ignores_in_lines(lines, ignore)
}
get_parse_data_for_message_strings <-
function(file, encoding = "unknown")
{
## The message strings considered are the string constants subject to
## translation in gettext-family calls (see below for details).
exprs <-
suppressWarnings(tools:::.parse_code_file(file = file,
encoding = encoding,
keep.source = TRUE))
if(!length(exprs)) return(NULL)
pd <- getParseData(exprs)
## Function for computing grandparent ids.
parents <- pd$parent
names(parents) <- pd$id
gpids <- function(ids)
parents[as.character(parents[as.character(ids)])]
ind <- (pd$token == "SYMBOL_FUNCTION_CALL") &
!is.na(match(pd$text,
c("warning", "stop",
"message", "packageStartupMessage",
"gettext", "gettextf", "ngettext")))
funs <- pd$text[ind]
ids <- gpids(pd$id[ind])
calls <- getParseText(pd, ids)
table <- pd[pd$token == "STR_CONST", ]
## Could have run into truncation ...
table$text <- getParseText(table, table$id)
pos <- match(gpids(table$id), ids)
ind <- !is.na(pos)
table <- split(table[ind, ], factor(pos[ind], seq_along(ids)))
## We have synopses
## message(..., domain = NULL, appendLF = TRUE)
## packageStartupMessage(..., domain = NULL, appendLF = TRUE)
## warning(..., call. = TRUE, immediate. = FALSE, domain = NULL)
## stop(..., call. = TRUE, domain = NULL)
## gettext(..., domain = NULL)
## ngettext(n, msg1, msg2, domain = NULL)
## gettextf(fmt, ..., domain = NULL)
## For the first five, we simply take all unnamed strings.
## (Could make this more precise, of course.)
## For the latter two, we take the msg1/msg2 and fmt arguments,
## provided these are strings.
## <NOTE>
## Using domain = NA inhibits translation: perhaps it should
## optionally also inhibit spell checking?
## </NOTE>
extract_message_strings <- function(fun, call, table) {
## Matching a call containing ... gives
## Error in match.call(message, call) :
## ... used in a situation where it doesn't exist
## so eliminate these.
## (Note that we also drop "..." strings.)
call <- parse(text = call)[[1L]]
call <- call[ as.character(call) != "..." ]
mc <- as.list(match.call(get(fun, envir = .BaseNamespaceEnv),
call))
args <- if(fun == "gettextf")
mc["fmt"]
else if(fun == "ngettext")
mc[c("msg1", "msg2")]
else {
if(!is.null(names(mc)))
mc <- mc[!nzchar(names(mc))]
mc[-1L]
}
strings <- as.character(args[vapply(args, is.character, TRUE)])
## Need to canonicalize to match string constants before and
## after parsing ...
texts <- vapply(parse(text = table$text), as.character, "")
pos <- which(!is.na(match(texts, strings)))
cbind(table[pos, ], caller = rep.int(fun, length(pos)))
}
do.call(rbind,
Map(extract_message_strings,
as.list(funs), as.list(calls), table))
}
## For spell-checking the R R files.
aspell_R_R_files <-
function(which = NULL, dir = NULL,
ignore = c("[ \t]'[^']*'[ \t[:punct:]]",
"[ \t][[:alnum:]_.]*\\(\\)[ \t[:punct:]]"),
program = NULL, dictionaries = aspell_dictionaries_R)
{
if(is.null(dir)) dir <- tools:::.R_top_srcdir_from_Rd()
if(is.null(which))
which <- tools:::.get_standard_package_names()$base
files <-
unlist(lapply(file.path(dir, "src", "library", which, "R"),
tools::list_files_with_type,
"code",
OS_subdirs = c("unix", "windows")),
use.names = FALSE)
program <- aspell_find_program(program)
aspell(files,
filter = list("R", ignore = ignore),
control = aspell_control_R_Rd_files[[names(program)]],
program = program,
dictionaries = dictionaries)
}
## For spell-checking R files in a package.
aspell_package_R_files <-
function(dir, ignore = character(),
control = list(), program = NULL, dictionaries = character())
{
dir <- tools::file_path_as_absolute(dir)
subdir <- file.path(dir, "R")
files <- if(dir.exists(subdir))
tools::list_files_with_type(subdir,
"code",
OS_subdirs = c("unix", "windows"))
else character()
meta <- tools:::.get_package_metadata(dir, installed = FALSE)
if(is.na(encoding <- meta["Encoding"]))
encoding <- "unknown"
defaults <- .aspell_package_defaults(dir, encoding)$R_files
if(!is.null(defaults)) {
if(!is.null(d <- defaults$ignore))
ignore <- d
if(!is.null(d <- defaults$control))
control <- d
if(!is.null(d <- defaults$program))
program <- d
if(!is.null(d <- defaults$dictionaries)) {
dictionaries <-
aspell_find_dictionaries(d, file.path(dir, ".aspell"))
}
}
program <- aspell_find_program(program)
aspell(files,
filter = list("R", ignore = ignore),
control = control,
encoding = encoding,
program = program,
dictionaries = dictionaries)
}
## Spell-checking pot files.
## (Of course, directly analyzing the message strings would be more
## useful, but require writing appropriate text filters.)
## See also tools:::checkPoFile().
aspell_filter_db$pot <-
function (ifile, encoding = "unknown", ignore = character())
{
lines <- readLines(ifile, encoding = encoding, warn = FALSE)
ind <- grepl("^msgid[ \t]", lines)
do_entry <- function(s) {
out <- character(length(s))
i <- 1L
out[i] <- blank_out_regexp_matches(s[i], "^msgid[ \t]+\"")
while(startsWith(s[i <- i + 1L], '"'))
out[i] <- sub("^\"", " ", s[i])
if(grepl("^msgid_plural[ \t]", s[i])) {
out[i] <- blank_out_regexp_matches(s[i], "^msgid_plural[ \t]+\"")
while(startsWith(s[i <- i + 1L], '"'))
out[i] <- sub("^\"", " ", s[i])
}
out
}
entries <- split(lines, cumsum(ind))
lines <- c(character(length(entries[[1L]])),
as.character(do.call(c, lapply(entries[-1L], do_entry))))
lines <- sub("\"[ \t]*$", " ", lines)
## <FIXME>
## Could replace backslash escapes for blanks and percent escapes by
## blanks, similar to what the R text filter does.
## </FIXME>
blank_out_ignores_in_lines(lines, ignore)
}
## For spell-checking all pot files in a package.
aspell_package_pot_files <-
function(dir, ignore = character(),
control = list(), program = NULL, dictionaries = character())
{
dir <- tools::file_path_as_absolute(dir)
subdir <- file.path(dir, "po")
files <- if(dir.exists(subdir))
Sys.glob(file.path(subdir, "*.pot"))
else character()
meta <- tools:::.get_package_metadata(dir, installed = FALSE)
if(is.na(encoding <- meta["Encoding"]))
encoding <- "unknown"
program <- aspell_find_program(program)
aspell(files,
filter = list("pot", ignore = ignore),
control = control,
encoding = encoding,
program = program,
dictionaries = dictionaries)
}
## For spell-checking the R C files.
aspell_R_C_files <-
function(which = NULL, dir = NULL,
ignore = c("[ \t]'[[:alnum:]_.]*'[ \t[:punct:]]",
"[ \t][[:alnum:]_.]*\\(\\)[ \t[:punct:]]"),
program = NULL, dictionaries = aspell_dictionaries_R)
{
if(is.null(dir)) dir <- tools:::.R_top_srcdir_from_Rd()
if(is.null(which))
which <- tools:::.get_standard_package_names()$base
if(!is.na(pos <- match("base", which)))
which[pos] <- "R"
files <- sprintf("%s.pot",
file.path(dir, "src", "library",
which, "po", which))
files <- files[file_test("-f", files)]
program <- aspell_find_program(program)
aspell(files,
filter = list("pot", ignore = ignore),
control = aspell_control_R_Rd_files[[names(program)]],
program = program,
dictionaries = dictionaries)
}
## For spell-checking package C files.
aspell_package_C_files <-
function(dir, ignore = character(),
control = list(), program = NULL, dictionaries = character())
{
dir <- tools::file_path_as_absolute(dir)
## Assume that the package C message template file is shipped as
## 'po/PACKAGE.pot'.
files <- file.path(dir, "po",
paste(basename(dir), "pot", collapse = "."))
files <- files[file_test("-f", files)]
meta <- tools:::.get_package_metadata(dir, installed = FALSE)
if(is.na(encoding <- meta["Encoding"]))
encoding <- "unknown"
defaults <- .aspell_package_defaults(dir, encoding)$C_files
if(!is.null(defaults)) {
if(!is.null(d <- defaults$ignore))
ignore <- d
if(!is.null(d <- defaults$control))
control <- d
if(!is.null(d <- defaults$program))
program <- d
if(!is.null(d <- defaults$dictionaries)) {
dictionaries <-
aspell_find_dictionaries(d, file.path(dir, ".aspell"))
}
}
program <- aspell_find_program(program)
aspell(files,
filter = list("pot", ignore = ignore),
control = control,
encoding = encoding,
program = program,
dictionaries = dictionaries)
}
## Spell-checking DCF files.
aspell_filter_db$dcf <-
function(ifile, encoding, keep = c("Title", "Description"),
ignore = character())
{
lines <- readLines(ifile, encoding = encoding, warn = FALSE)
line_has_tags <- grepl("^[^[:blank:]][^:]*:", lines)
tags <- sub(":.*", "", lines[line_has_tags])
lines[line_has_tags] <-
blank_out_regexp_matches(lines[line_has_tags], "^[^:]*:")
lines <- split(lines, cumsum(line_has_tags))
ind <- is.na(match(tags, keep))
lines[ind] <- lapply(lines[ind], function(s) rep.int("", length(s)))
ind <- !ind
lines[ind] <- lapply(lines[ind], paste0, " ")
lines <- unlist(lines, use.names = FALSE)
blank_out_ignores_in_lines(lines, ignore)
}
## For spell-checking package DESCRIPTION files.
aspell_package_description <-
function(dir, ignore = character(),
control = list(), program = NULL, dictionaries = character())
{
dir <- tools::file_path_as_absolute(dir)
files <- file.path(dir, "DESCRIPTION")
meta <- tools:::.get_package_metadata(dir, installed = FALSE)
if(is.na(encoding <- meta["Encoding"]))
encoding <- "unknown"
## Allow providing package defaults but make this controllable via
## _R_ASPELL_USE_DEFAULTS_FOR_PACKAGE_DESCRIPTION_
## to safeguard against possible mis-use for CRAN incoming checks.
defaults <-
Sys.getenv("_R_ASPELL_USE_DEFAULTS_FOR_PACKAGE_DESCRIPTION_",
"TRUE")
defaults <- if(tools:::config_val_to_logical(defaults)) {
.aspell_package_defaults(dir, encoding)$description
} else NULL
if(!is.null(defaults)) {
if(!is.null(d <- defaults$ignore))
ignore <- d
if(!is.null(d <- defaults$control))
control <- d
if(!is.null(d <- defaults$program))
program <- d
if(!is.null(d <- defaults$dictionaries)) {
dictionaries <-
aspell_find_dictionaries(d, file.path(dir, ".aspell"))
}
}
program <- aspell_find_program(program)
aspell(files,
filter = list("dcf", ignore = ignore),
control = control,
encoding = encoding,
program = program,
dictionaries = dictionaries)
}
## Spell-checking Markdown files.
aspell_filter_db$md <-
function(ifile, encoding = "UTF-8")
{
x <- readLines(ifile, encoding = encoding, warn = FALSE)
n <- nchar(x)
y <- strrep(rep.int(" ", length(x)), n)
## Determine positions of 'texts' along the lines of
## spelling::parse_text_md () by Jeroen Ooms.
md <- commonmark::markdown_xml(x, extensions = TRUE,
sourcepos = TRUE)
doc <- xml2::xml_ns_strip(xml2::read_xml(md))
pos <- strsplit(xml2::xml_attr(xml2::xml_find_all(doc,
"//text[@sourcepos]"),
"sourcepos"),
"[:-]")
## Now use the following idea.
## Each elt of pos now has positions for l1:c1 to l2:c2.
## If l1 < l2
## Lines in (l1, l2) are taken as a whole
## Line l1 from c1 to nchar for l1
## Line l2 from 1 to c1
## otherwise
## Line l1 from c1 to c2.
for(p in pos) {
p <- as.integer(p)
## Legibility ...
l1 <- p[1L]; c1 <- p[2L]; l2 <- p[3L]; c2 <- p[4L]
if(l1 < l2) {
substring(y[l1], c1, n[l1]) <- substring(x[l1], c1, n[l1])
if(l1 + 1L < l2) {
w <- seq.int(from = l1 + 1L, to = l2 - 1L)
y[w] <- x[w]
}
substring(y[l2], 1L, c2) <- substring(x[l2], 1L, c2)
} else {
substring(y[l1], c1, c2) <- substring(x[l1], c1, c2)
}
}
y
}
## For spell checking packages.
aspell_package <-
function(dir,
control = list(), program = NULL, dictionaries = character())
{
args <- list(dir = dir,
program = program,
control = control,
dictionaries = dictionaries)
a <- rbind(do.call(aspell_package_description, args),
do.call(aspell_package_Rd_files, args),
do.call(aspell_package_vignettes, args),
do.call(aspell_package_R_files, args),
do.call(aspell_package_C_files, args))
if(nrow(a)) {
a$File <- tools:::.file_path_relative_to_dir(a$File,
dirname(dir))
}
a
}
## For writing personal dictionaries:
aspell_write_personal_dictionary_file <-
function(x, out, language = "en", program = NULL)
{
if(inherits(x, "aspell"))
x <- sort(unique(x$Original))
program <- aspell_find_program(program)
if(is.na(program))
stop("No suitable spell check program found.")
## <NOTE>
## Ispell and Hunspell take simple word lists as personal dictionary
## files, but Aspell requires a special format, see e.g.
## http://aspell.net/man-html/Format-of-the-Personal-and-Replacement-Dictionaries.html
## and one has to create these by hand, as
## aspell --lang=en create personal ./foo "a b c"
## gives: Sorry "create/merge personal" is currently unimplemented.
## Encodings are a nightmare.
## Try to canonicalize to UTF-8 for Aspell (which allows recording
## the encoding in the personal dictionary).
## <FIXME>
## What should we do for Hunspell (which can handle UTF-8, but has
## no encoding information in the personal dictionary), or Ispell
## (which cannot handle UTF-8)?
## </FIXME>
if(names(program) == "aspell") {
header <- sprintf("personal_ws-1.1 %s %d UTF-8",
language, length(x))
x <- enc2utf8(x)
}
else {
header <- NULL
}
writeLines(c(header, x), out, useBytes = TRUE)
}
## For reading package defaults:
.aspell_package_defaults <-
function(dir, encoding = "unknown")
{
dfile <- file.path(dir, ".aspell", "defaults.R")
if(!file_test("-f", dfile))
return(NULL)
exprs <- parse(dfile, encoding = encoding)
envir <- new.env()
for(e in exprs) eval(e, envir)
as.list(envir)
}
## Utilities.
blank_out_regexp_matches <-
function(s, re, ...)
{
m <- gregexpr(re, s, ...)
regmatches(s, m) <-
Map(function(n) strrep(" ", n),
lapply(regmatches(s, m), nchar))
s
}
blank_out_ignores_in_lines <-
function(lines, ignore)
{
args <- list()
if(is.list(ignore)) {
args <- ignore[-1L]
ignore <- ignore[[1L]]
}
for(re in ignore[nzchar(ignore)])
lines <- do.call(blank_out_regexp_matches,
c(list(lines, re), args))
lines
}
find_files_in_directories <-
function(basenames, dirnames)
{
dirnames <- dirnames[dir.exists(dirnames)]
dirnames <- normalizePath(dirnames, "/")
out <- character(length(basenames))
pos <- seq_along(out)
for(dir in dirnames) {
paths <- file.path(dir, basenames[pos])
ind <- file_test("-f", paths)
out[pos[ind]] <- paths[ind]
pos <- pos[!ind]
if(!length(pos)) break
}
out
}