src/library/utils/R/aspell.R - R - Git at Google

 #  File src/library/utils/R/aspell.R
 #  Part of the R package, https://www.R-project.org
 #
 #  Copyright (C) 1995-2016 The R Core Team
 #
 #  This program is free software; you can redistribute it and/or modify
 #  it under the terms of the GNU General Public License as published by
 #  the Free Software Foundation; either version 2 of the License, or
 #  (at your option) any later version.
 #
 #  This program is distributed in the hope that it will be useful,
 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #  GNU General Public License for more details.
 #
 #  A copy of the GNU General Public License is available at
 #  https://www.R-project.org/Licenses/

 aspell <-
 function(files, filter, control = list(), encoding = "unknown",
          program = NULL, dictionaries = character())
 {
     ## Take the given files and feed them through spell checker in
     ## Ispell pipe mode.

     ## Think about options and more command line options eventually.

     program <- aspell_find_program(program)
     if(is.na(program))
         stop("No suitable spell-checker program found")

     ## Be nice.
     if(inherits(files, "Rd"))
         files <- list(files)

     files_are_names <- is.character(files)

     filter_args <- list()
     if(missing(filter) || is.null(filter)) {
         filter <- if(!files_are_names) {
             function(ifile, encoding) {
                 if(inherits(ifile, "srcfile"))
                     readLines(ifile$filename, encoding = encoding,
                               warn = FALSE)
                 else if(inherits(ifile, "connection"))
                     readLines(ifile, encoding = encoding, warn = FALSE)
                 else {
                     ## What should this do with encodings?
                     as.character(ifile)
                 }
             }
         }
         else NULL
     }
     else if(is.character(filter)) {
         ## Look up filter in aspell filter db.
         filter_name <- filter[1L]
         filter <- aspell_filter_db[[filter_name]]
         ## Warn if the filter was not found in the db.
         if(is.null(filter))
             warning(gettextf("Filter '%s' is not available.",
                              filter_name),
                     domain = NA)
     }
     else if(is.list(filter)) {
         ## Support
         ##   list("Rd", drop = "\\references"
         ## at least for now.
         filter_name <- filter[[1L]][1L]
         filter_args <- filter[-1L]
         filter <- aspell_filter_db[[filter_name]]
         ## Warn if the filter was not found in the db.
         if(is.null(filter))
             warning(gettextf("Filter '%s' is not available.",
                              filter_name),
                     domain = NA)
     }
     else if(!is.function(filter))
         stop("Invalid 'filter' argument.")

     encoding <- rep_len(encoding, length(files))

     verbose <- getOption("verbose")

     db <- data.frame(Original = character(), File = character(),
                      Line = integer(), Column = integer(),
                      stringsAsFactors = FALSE)
     db$Suggestions <- list()

     tfile <- tempfile("aspell")
     on.exit(unlink(tfile))

     if(length(dictionaries)) {
         paths <- aspell_find_dictionaries(dictionaries)
         ind <- paths == ""
         if(any(ind)) {
             warning(gettextf("The following dictionaries were not found:\n%s",
                              paste(sprintf("  %s", dictionaries[ind]),
                                    collapse = "\n")),
                     domain = NA)
             paths <- paths[!ind]
         }
         if(length(paths)) {
             words <- unlist(lapply(paths, readRDS), use.names = FALSE)
             personal <- tempfile("aspell_personal")
             on.exit(unlink(personal), add = TRUE)
             ## <FIXME>
             ## How can we get the right language set (if needed)?
             ## Maybe aspell() needs an additional 'language' arg?
             aspell_write_personal_dictionary_file(words, personal,
                                                   program = program)
             ## </FIXME>
             control <- c(control, "-p", shQuote(personal))
         }
     }

     ## No special expansion of control argument for now.
     control <- as.character(control)

     fnames <- names(files)
     files <- as.list(files)

     for (i in seq_along(files)) {

         file <- files[[i]]
         if(files_are_names)
             fname <- file
         else {
             ## Try srcfiles and srcrefs ...
             fname <- if(inherits(file, "srcfile"))
                 file$filename
             else
                 attr(attr(file, "srcref"), "srcfile")$filename
             ## As a last resort, try the names of the files argument.
             if(is.null(fname))
                 fname <- fnames[i]
             ## If unknown ...
             if(is.null(fname))
                 fname <- "<unknown>"
         }

         enc <- encoding[i]

         if(verbose)
             message(gettextf("Processing file %s", fname),
                     domain = NA)

         lines <- if(is.null(filter))
             readLines(file, encoding = enc, warn = FALSE)
         else {
             ## Assume that filter takes an input file (and additional
             ## arguments) and return a character vector.
             do.call(filter, c(list(file, encoding = enc), filter_args))
         }

         ## Allow filters to pass additional control arguments, in case
         ## these need to be inferred from the file contents.
         control <- c(control, attr(lines, "control"))

         ## Need to escape all lines with carets to ensure Aspell handles
         ## them as data: the Aspell docs say
         ##   It is recommended that programmatic interfaces prefix every
         ##   data line with an uparrow to protect themselves against
         ##   future changes in Aspell.
         writeLines(paste0("^", lines), tfile)
         ## Note that this re-encodes character strings with marked
         ## encodings to the current encoding (which is definitely fine
         ## if this is UTF-8 and Aspell was compiled with full UTF-8
         ## support).  Alternatively, we could try using something along
         ## the lines of
         ##   writeLines(paste0("^", lines), tfile,
         ##              useBytes = TRUE)
         ## and pass the encoding info to Aspell in case we know it.

         out <- tools:::.system_with_capture(program, c("-a", control),
                                             stdin = tfile)

 	if(out$status != 0L)
 	    stop(gettextf("Running aspell failed with diagnostics:\n%s",
 			  paste(out$stderr, collapse = "\n")),
                  domain = NA)

 	## Hopefully everything worked ok.
 	lines <- out$stdout[-1L]
 	pos <- cumsum(lines == "") + 1L

 	## Format is as follows.
 	## First line is a header.
 	## Blank lines separate the results for each line.
 	## Results for the word on each line are given as follows.
 	## * If the word was found in the main dictionary, or your personal
 	##   dictionary, then the line contains only a `*'.
 	## * If the word is not in the dictionary, but there are
 	##   suggestions, then the line contains an `&', a space, the
 	##   misspelled word, a space, the number of near misses, the number
 	##   of characters between the beginning of the line and the
 	##   beginning of the misspelled word, a colon, another space, and a
 	##   list of the suggestions separated by commas and spaces.
 	## * If the word does not appear in the dictionary, and there are no
 	##   suggestions, then the line contains a `#', a space, the
 	##   misspelled word, a space, and the character offset from the
 	##   beginning of the line.
 	## This can be summarized as follows:
 	##   OK: *
 	##   Suggestions: & original count offset: miss, miss, ...
 	##   None: # original offset

 	## Look at words not in dictionary with suggestions.

 	if(any(ind <- startsWith(lines, "&"))) {
 	    info <- strsplit(lines[ind], ": ", fixed = TRUE)
 	    one <- strsplit(sapply(info, `[`, 1L), " ",  fixed = TRUE)
 	    two <- strsplit(sapply(info, `[`, 2L), ", ", fixed = TRUE)
 	    db1 <- data.frame(Original =
 			      as.character(sapply(one, `[`, 2L)),
 			      File = fname,
 			      Line = pos[ind],
 			      Column =
 			      as.integer(sapply(one, `[`, 4L)),
 			      stringsAsFactors = FALSE)
 	    db1$Suggestions <- two
 	    db <- rbind(db, db1)
 	}
 	## Looks at words not in dictionary with no suggestions.
 	if(any(ind <- startsWith(lines, "#"))) {
 	    one <- strsplit(lines[ind], " ", fixed = TRUE)
 	    db1 <- data.frame(Original =
 			      as.character(sapply(one, `[`, 2L)),
 			      File = fname,
 			      Line = pos[ind],
 			      Column =
 			      as.integer(sapply(one, `[`, 3L)),
 			      stringsAsFactors = FALSE)
 	    db1$Suggestions <- vector("list", length(one))
 	    db <- rbind(db, db1)
 	}
     }

     class(db) <- c("aspell", "data.frame")
     db
 }

 format.aspell <-
 function(x, sort = TRUE, verbose = FALSE, indent = 2L, ...)
 {
     if(!nrow(x)) return(character())

     if(sort)
         x <- x[order(x$Original, x$File, x$Line, x$Column), ]

     from <- split(sprintf("%s:%d:%d", x$File, x$Line, x$Column),
                   x$Original)

     if(verbose) {
         unlist(Map(function(w, f, s) {
             sprintf("Word: %s\nFrom: %s\n%s",
                     w,
                     paste0(c("", rep.int("      ", length(f) - 1L)),
                            f, collapse = "\n"),
                     paste(strwrap(paste("Suggestions:",
                                         paste(s[[1L]], collapse = " ")),
                                   exdent = 6L, indent = 0L),
                           collapse = "\n"))
         },
                    names(from),
                    from,
                    split(x$Suggestions, x$Original)))
     } else {
         sep <- sprintf("\n%s", strrep(" ", indent))
         paste(names(from),
               vapply(from, paste, "", collapse = sep),
               sep = sep)
     }
 }

 print.aspell <-
 function(x, ...)
 {
     if(nrow(x))
         writeLines(paste(format(x, ...), collapse = "\n\n"))
     invisible(x)
 }

 summary.aspell <-
 function(object, ...)
 {
     words <- sort(unique(object$Original))
     if(length(words)) {
         writeLines("Possibly mis-spelled words:")
         print(words)
     }
     invisible(words)
 }

 aspell_filter_db <- new.env(hash = FALSE) # small
 aspell_filter_db$Rd <- tools::RdTextFilter
 aspell_filter_db$Sweave <- tools::SweaveTeXFilter

 aspell_find_program <-
 function(program = NULL)
 {
     check <- !is.null(program) || !is.null(names(program))
     if(is.null(program))
         program <- getOption("aspell_program")
     if(is.null(program))
         program <- c("aspell", "hunspell", "ispell")
     program <- Filter(nzchar, Sys.which(program))[1L]
     if(!is.na(program) && check) {
         out <- c(system(sprintf("%s -v", program),
                         intern = TRUE), "")[1L]
         if(grepl("really Aspell", out))
             names(program) <- "aspell"
         else if(grepl("really Hunspell", out))
             names(program) <- "hunspell"
         else if(grepl("International Ispell", out))
             names(program) <- "ispell"
         else
             names(program) <- NA_character_
     }
     program
 }

 aspell_dictionaries_R <- "en_stats"

 aspell_find_dictionaries <-
 function(dictionaries, dirnames = character())
 {
     dictionaries <- as.character(dictionaries)
     if(!(n <- length(dictionaries))) return(character())

     ## Always search the R system dictionary directory first.
     dirnames <- c(file.path(R.home("share"), "dictionaries"), dirnames)

     ## For now, all dictionary files should be .rds files.
     if(any(ind <- !endsWith(dictionaries, ".rds")))
         dictionaries[ind] <- sprintf("%s.rds", dictionaries[ind])

     out <- character(n)
     ## Dictionaries with no path separators are looked for in the given
     ## dictionary directories (by default, the R system dictionary
     ## directory).
     ind <- grepl(.Platform$file.sep, dictionaries, fixed = TRUE)
     ## (Equivalently, could check where paths == basename(paths).)
     if(length(pos <- which(ind))) {
         pos <- pos[file_test("-f", dictionaries[pos])]
         out[pos] <- normalizePath(dictionaries[pos], "/")
     }
     if(length(pos <- which(!ind))) {
         out[pos] <- find_files_in_directories(dictionaries[pos],
                                               dirnames)
     }
     out
 }

 ### Utilities.

 aspell_inspect_context <-
 function(x)
 {
     x <- split(x, x$File)
     y <- Map(function(f, x) {
         lines <- readLines(f, warn = FALSE)[x$Line]
         cbind(f,
               x$Line,
               substring(lines, 1L, x$Column - 1L),
               x$Original,
               substring(lines, x$Column + nchar(x$Original)))
     },
              names(x), x)
     y <- data.frame(do.call(rbind, y), stringsAsFactors = FALSE)
     names(y) <- c("File", "Line", "Left", "Original", "Right")
     class(y) <- c("aspell_inspect_context", "data.frame")
     y
 }

 print.aspell_inspect_context <-
 function(x, ...)
 {
     s <- split(x, x$File)
     nms <- names(s)
     for(i in seq_along(s)) {
         e <- s[[i]]
         writeLines(c(sprintf("File '%s':", nms[i]),
                      sprintf("  Line %s: \"%s\", \"%s\", \"%s\"",
                              format(e$Line),
                              gsub("\"", "\\\"", e$Left),
                              e$Original,
                              gsub("\"", "\\\"", e$Right)),
                      ""))
     }
     invisible(x)
 }


 ## For spell-checking the R manuals:

 ## This can really only be done with Aspell as the other checkers have
 ## no texinfo mode.

 aspell_control_R_manuals <-
     list(aspell =
          c("--master=en_US",
            "--add-extra-dicts=en_GB",
            "--mode=texinfo",
            "--add-texinfo-ignore=acronym",
            "--add-texinfo-ignore=deftypefun",
            "--add-texinfo-ignore=deftypefunx",
            "--add-texinfo-ignore=findex",
            "--add-texinfo-ignore=enindex",
            "--add-texinfo-ignore=include",
            "--add-texinfo-ignore=ifclear",
            "--add-texinfo-ignore=ifset",
            "--add-texinfo-ignore=math",
            "--add-texinfo-ignore=macro",
            "--add-texinfo-ignore=multitable",
            "--add-texinfo-ignore=node",
            "--add-texinfo-ignore=pkg",
            "--add-texinfo-ignore=printindex",
            "--add-texinfo-ignore=set",
            "--add-texinfo-ignore=vindex",
            "--add-texinfo-ignore-env=menu",
            "--add-texinfo-ignore=CRANpkg"
            ),
          hunspell =
          c("-d en_US,en_GB"))

 aspell_R_manuals <-
 function(which = NULL, dir = NULL, program = NULL,
          dictionaries = aspell_dictionaries_R)
 {
     if(is.null(dir)) dir <- tools:::.R_top_srcdir_from_Rd()
     ## Allow specifying 'R-exts' and alikes, or full paths.
     files <- if(is.null(which)) {
         Sys.glob(file.path(dir, "doc", "manual", "*.texi"))
     } else {
         ind <- which(which ==
                      basename(tools::file_path_sans_ext(which)))
         which[ind] <-
             file.path(dir, "doc", "manual",
                       sprintf("%s.texi", which[ind]))
         which
     }

     program <- aspell_find_program(program)

     aspell(files,
            control = aspell_control_R_manuals[[names(program)]],
            program = program,
            dictionaries = dictionaries)
 }

 ## For spell-checking the R Rd files:

 aspell_control_R_Rd_files <-
     list(aspell =
          c("--master=en_US",
            "--add-extra-dicts=en_GB"),
          hunspell =
          c("-d en_US,en_GB"))

 aspell_R_Rd_files <-
 function(which = NULL, dir = NULL, drop = "\\references",
          program = NULL, dictionaries = aspell_dictionaries_R)
 {
     files <- character()

     if(is.null(dir)) dir <- tools:::.R_top_srcdir_from_Rd()

     if(is.null(which)) {
         which <- tools:::.get_standard_package_names()$base
         # CHANGES.Rd could be dropped from checks in the future;
         # it will not be updated post 2.15.0
         files <- c(file.path(dir, "doc", "NEWS.Rd"),
                    file.path(dir, "src", "gnuwin32", "CHANGES.Rd"))
         files <- files[file_test("-f", files)]
     }

     files <-
         c(files,
           unlist(lapply(file.path(dir, "src", "library", which, "man"),
                         tools::list_files_with_type,
                         "docs", OS_subdirs = c("unix", "windows")),
                  use.names = FALSE))

     program <- aspell_find_program(program)

     aspell(files,
            filter = list("Rd", drop = drop),
            control = aspell_control_R_Rd_files[[names(program)]],
            program = program,
            dictionaries = dictionaries)
 }

 ## For spell-checking Rd files in a package:

 aspell_package_Rd_files <-
 function(dir, drop = c("\\author", "\\references"),
          control = list(), program = NULL, dictionaries = character())
 {
     dir <- normalizePath(dir, "/")

     subdir <- file.path(dir, "man")
     files <- if(dir.exists(subdir))
         tools::list_files_with_type(subdir,
                                     "docs",
                                     OS_subdirs = c("unix", "windows"))
     else character()

     meta <- tools:::.get_package_metadata(dir, installed = FALSE)
     if(is.na(encoding <- meta["Encoding"]))
         encoding <- "unknown"

     defaults <- .aspell_package_defaults(dir, encoding)$Rd_files
     if(!is.null(defaults)) {
         ## Direct settings currently override (could add a list add =
         ## TRUE mechanism eventually).
         if(!is.null(d <- defaults$drop))
             drop <- d
         if(!is.null(d <- defaults$control))
             control <- d
         if(!is.null(d <- defaults$program))
             program <- d
         if(!is.null(d <- defaults$dictionaries)) {
             dictionaries <-
                 aspell_find_dictionaries(d, file.path(dir, ".aspell"))
         }
         ## <FIXME>
         ## Deprecated in favor of specifying R level dictionaries.
         ## Maybe give a warning (in particular if both are given)?
         if(!is.null(d <- defaults$personal))
             control <- c(control,
                          sprintf("-p %s",
                                  shQuote(file.path(dir, ".aspell", d))))
         ## </FIXME>
     }

     macros <- tools::loadPkgRdMacros(dir,
                                      macros = file.path(R.home("share"),
                                                         "Rd", "macros",
                                                         "system.Rd"))

     aspell(files,
            filter = list("Rd", drop = drop, macros = macros),
            control = control,
            encoding = encoding,
            program = program,
            dictionaries = dictionaries)
 }

 ## For spell-checking the R vignettes:

 ## This should really be done with Aspell as the other checkers have far
 ## less powerful TeX modes.

 aspell_control_R_vignettes <-
     list(aspell =
          c("--mode=tex",
            "--master=en_US",
            "--add-extra-dicts=en_GB",
            "--add-tex-command='code p'",
            "--add-tex-command='pkg p'",
            "--add-tex-command='CRANpkg p'"
            ),
          hunspell =
          c("-t", "-d en_US,en_GB"))

 aspell_R_vignettes <-
 function(program = NULL, dictionaries = aspell_dictionaries_R)
 {
     files <- Sys.glob(file.path(tools:::.R_top_srcdir_from_Rd(),
                                 "src", "library", "*", "vignettes",
                                 "*.Rnw"))

     program <- aspell_find_program(program)

     aspell(files,
            filter = "Sweave",
            control = aspell_control_R_vignettes[[names(program)]],
            program = program,
            dictionaries = dictionaries)
 }

 ## For spell-checking vignettes in a package:

 ## This should really be done with Aspell as the other checkers have far
 ## less powerful TeX modes.

 aspell_control_package_vignettes <-
     list(aspell =
          c("--add-tex-command='citep oop'",
            "--add-tex-command='Sexpr p'",
            "--add-tex-command='code p'",
            "--add-tex-command='pkg p'",
            "--add-tex-command='proglang p'",
            "--add-tex-command='samp p'"
            ))

 aspell_package_vignettes <-
 function(dir,
          control = list(), program = NULL, dictionaries = character())
 {
     dir <- tools::file_path_as_absolute(dir)

     vinfo <- tools::pkgVignettes(dir = dir)
     files <- vinfo$docs
     if(!length(files)) return(aspell(character()))

     ## We need the package encoding to read the defaults file ...
     meta <- tools:::.get_package_metadata(dir, installed = FALSE)
     if(is.na(encoding <- meta["Encoding"]))
         encoding <- "unknown"

     defaults <- .aspell_package_defaults(dir, encoding)$vignettes
     if(!is.null(defaults)) {
         if(!is.null(d <- defaults$control))
             control <- d
         if(!is.null(d <- defaults$program))
             program <- d
         if(!is.null(d <- defaults$dictionaries)) {
             dictionaries <-
                 aspell_find_dictionaries(d, file.path(dir, ".aspell"))
         }
         ## <FIXME>
         ## Deprecated in favor of specifying R level dictionaries.
         ## Maybe give a warning (in particular if both are given)?
         if(!is.null(d <- defaults$personal))
             control <- c(control,
                          sprintf("-p %s",
                                  shQuote(file.path(dir, ".aspell", d))))
         ## </FIXME>
     }

     program <- aspell_find_program(program)

     fgroups <- split(files, vinfo$engines)
     egroups <- split(vinfo$encodings, vinfo$engines)

     do.call(rbind,
             Map(function(fgroup, egroup, engine) {
                 engine <- tools::vignetteEngine(engine)
                 aspell(fgroup,
                        filter = engine$aspell$filter,
                        control =
                        c(engine$aspell$control,
                          aspell_control_package_vignettes[[names(program)]],
                          control),
                        encoding = egroup,
                        program = program,
                        dictionaries = dictionaries)
             },
                 fgroups,
                 egroups,
                 names(fgroups)
                 )
             )
 }

 ## Spell-checking R files.

 aspell_filter_db$R <-
 function(ifile, encoding = "unknown", ignore = character())
 {
     pd <- get_parse_data_for_message_strings(ifile, encoding)
     if(is.null(pd) || !NROW(pd)) return(character())

     ## Strip the string delimiters.
     pd$text <- substring(pd$text, 2L, nchar(pd$text) - 1L)
     ## Replace whitespace C backslash escape sequences by whitespace.
     pd$text <- gsub("(^|[^\\])\\\\[fnrt]", "\\1  ", pd$text)
     pd$text <- gsub(  "([^\\])\\\\[fnrt]", "\\1  ", pd$text)
     ## (Do this twice for now because in e.g.
     ##    \n\t\tInformation on package %s
     ## the first \t is not matched the first time.  Alternatively, we
     ## could match with
     ##    (^|[^\\])((\\\\[fnrt])+)
     ## but then computing the replacement (\\1 plus as many blanks as
     ## the characters in \\2) is not straightforward.
     ## For gettextf() calls, replace basic percent escape sequences by
     ## whitespace.
     ind <- pd$caller == "gettextf"
     if(any(ind)) {
         pd$text[ind] <-
             gsub("(^|[^%])%[dioxXfeEgGaAs]", "\\1  ", pd$text[ind])
         pd$text[ind] <-
             gsub("  ([^%])%[dioxXfeEgGaAs]", "\\1  ", pd$text[ind])
         ## (See above for doing this twice.)
     }

     lines <- readLines(ifile, encoding = encoding, warn = FALSE)

     ## Column positions in the parse data have tabs expanded to tab
     ## stops using a tab width of 8, so for lines with tabs we need to
     ## map the column positions back to character positions.
     lines_in_pd <- sort(unique(c(pd$line1, pd$line2)))
     tab <- Map(function(tp, nc) {
         if(tp[1L] == -1L) return(NULL)
         widths <- rep.int(1, nc)
         for(i in tp) {
             cols <- cumsum(widths)
             widths[i] <- 8 - (cols[i] - 1) %% 8
         }
         cumsum(widths)
     },
                gregexpr("\t", lines[lines_in_pd], fixed = TRUE),
                nchar(lines[lines_in_pd]))
     names(tab) <- lines_in_pd

     lines[lines_in_pd] <- gsub("[^\t]", " ", lines[lines_in_pd])
     lines[-lines_in_pd] <- ""

     for(entry in split(pd, seq_len(NROW(pd)))) {
         line1 <- entry$line1
         line2 <- entry$line2
         col1 <- entry$col1
         col2 <- entry$col2
         if(line1 == line2) {
             if(length(ptab <- tab[[as.character(line1)]])) {
                 col1 <- which(ptab == col1) + 1L
                 col2 <- which(ptab == col2) - 1L
             }
             substring(lines[line1], col1, col2) <- entry$text
         } else {
             texts <- unlist(strsplit(entry$text, "\n", fixed = TRUE))
             n <- length(texts)
             if(length(ptab <- tab[[as.character(line1)]])) {
                 col1 <- which(ptab == col1) + 1L
             }
             substring(lines[line1], col1) <- texts[1L]
             pos <- seq.int(from = 2L, length.out = n - 2L)
             if(length(pos))
                 lines[line1 + pos - 1] <- texts[pos]
             if(length(ptab <- tab[[as.character(line2)]])) {
                 col2 <- which(ptab == col2) - 1L
             }
             substring(lines[line2], 1L, col2) <- texts[n]
         }
     }

     blank_out_ignores_in_lines(lines, ignore)
 }

 get_parse_data_for_message_strings <-
 function(file, encoding = "unknown")
 {
     ## The message strings considered are the string constants subject to
     ## translation in gettext-family calls (see below for details).

     exprs <-
         suppressWarnings(tools:::.parse_code_file(file = file,
                                                   encoding = encoding,
                                                   keep.source = TRUE))
     if(!length(exprs)) return(NULL)

     pd <- getParseData(exprs)

     ## Function for computing grandparent ids.
     parents <- pd$parent
     names(parents) <- pd$id
     gpids <- function(ids)
         parents[as.character(parents[as.character(ids)])]

     ind <- (pd$token == "SYMBOL_FUNCTION_CALL") &
         !is.na(match(pd$text,
                      c("warning", "stop",
                        "message", "packageStartupMessage",
                        "gettext", "gettextf", "ngettext")))

     funs <- pd$text[ind]

     ids <- gpids(pd$id[ind])
     calls <- getParseText(pd, ids)

     table <- pd[pd$token == "STR_CONST", ]
     ## Could have run into truncation ...
     table$text <- getParseText(table, table$id)
     pos <- match(gpids(table$id), ids)
     ind <- !is.na(pos)
     table <- split(table[ind, ], factor(pos[ind], seq_along(ids)))

     ## We have synopses
     ##   message(..., domain = NULL, appendLF = TRUE)
     ##   packageStartupMessage(..., domain = NULL, appendLF = TRUE)
     ##   warning(..., call. = TRUE, immediate. = FALSE, domain = NULL)
     ##   stop(..., call. = TRUE, domain = NULL)
     ##   gettext(..., domain = NULL)
     ##   ngettext(n, msg1, msg2, domain = NULL)
     ##   gettextf(fmt, ..., domain = NULL)
     ## For the first five, we simply take all unnamed strings.
     ## (Could make this more precise, of course.)
     ## For the latter two, we take the msg1/msg2 and fmt arguments,
     ## provided these are strings.

     ## <NOTE>
     ## Using domain = NA inhibits translation: perhaps it should
     ## optionally also inhibit spell checking?
     ## </NOTE>

     extract_message_strings <- function(fun, call, table) {
         ## Matching a call containing ... gives
         ##   Error in match.call(message, call) :
         ##   ... used in a situation where it doesn't exist
         ## so eliminate these.
         ## (Note that we also drop "..." strings.)
         call <- parse(text = call)[[1L]]
         call <- call[ as.character(call) != "..." ]
         mc <- as.list(match.call(get(fun, envir = .BaseNamespaceEnv),
                                  call))
         args <- if(fun == "gettextf")
             mc["fmt"]
         else if(fun == "ngettext")
             mc[c("msg1", "msg2")]
         else {
             if(!is.null(names(mc)))
                 mc <- mc[!nzchar(names(mc))]
             mc[-1L]
         }
         strings <- as.character(args[vapply(args, is.character, TRUE)])
         ## Need to canonicalize to match string constants before and
         ## after parsing ...
         texts <- vapply(parse(text = table$text), as.character, "")
         pos <- which(!is.na(match(texts, strings)))
         cbind(table[pos, ], caller = rep.int(fun, length(pos)))
     }

     do.call(rbind,
             Map(extract_message_strings,
                 as.list(funs), as.list(calls), table))
 }

 ## For spell-checking the R R files.

 aspell_R_R_files <-
 function(which = NULL, dir = NULL,
          ignore = c("[ \t]'[^']*'[ \t[:punct:]]",
                     "[ \t][[:alnum:]_.]*\\(\\)[ \t[:punct:]]"),
          program = NULL, dictionaries = aspell_dictionaries_R)
 {
     if(is.null(dir)) dir <- tools:::.R_top_srcdir_from_Rd()
     if(is.null(which))
         which <- tools:::.get_standard_package_names()$base

     files <-
         unlist(lapply(file.path(dir, "src", "library", which, "R"),
                       tools::list_files_with_type,
                       "code",
                       OS_subdirs = c("unix", "windows")),
                use.names = FALSE)

     program <- aspell_find_program(program)

     aspell(files,
            filter = list("R", ignore = ignore),
            control = aspell_control_R_Rd_files[[names(program)]],
            program = program,
            dictionaries = dictionaries)
 }

 ## For spell-checking R files in a package.

 aspell_package_R_files <-
 function(dir, ignore = character(),
          control = list(), program = NULL, dictionaries = character())
 {
     dir <- tools::file_path_as_absolute(dir)

     subdir <- file.path(dir, "R")
     files <- if(dir.exists(subdir))
         tools::list_files_with_type(subdir,
                                     "code",
                                     OS_subdirs = c("unix", "windows"))
     else character()

     meta <- tools:::.get_package_metadata(dir, installed = FALSE)
     if(is.na(encoding <- meta["Encoding"]))
         encoding <- "unknown"

     defaults <- .aspell_package_defaults(dir, encoding)$R_files
     if(!is.null(defaults)) {
         if(!is.null(d <- defaults$ignore))
             ignore <- d
         if(!is.null(d <- defaults$control))
             control <- d
         if(!is.null(d <- defaults$program))
             program <- d
         if(!is.null(d <- defaults$dictionaries)) {
             dictionaries <-
                 aspell_find_dictionaries(d, file.path(dir, ".aspell"))
         }
     }

     program <- aspell_find_program(program)

     aspell(files,
            filter = list("R", ignore = ignore),
            control = control,
            encoding = encoding,
            program = program,
            dictionaries = dictionaries)
 }

 ## Spell-checking pot files.

 ## (Of course, directly analyzing the message strings would be more
 ## useful, but require writing appropriate text filters.)

 ## See also tools:::checkPoFile().

 aspell_filter_db$pot <-
 function (ifile, encoding = "unknown", ignore = character())
 {
     lines <- readLines(ifile, encoding = encoding, warn = FALSE)

     ind <- grepl("^msgid[ \t]", lines)

     do_entry <- function(s) {
         out <- character(length(s))
         i <- 1L
         out[i] <- blank_out_regexp_matches(s[i], "^msgid[ \t]+\"")
         while(startsWith(s[i <- i + 1L], '"'))
             out[i] <- sub("^\"", " ", s[i])
         if(grepl("^msgid_plural[ \t]", s[i])) {
             out[i] <- blank_out_regexp_matches(s[i], "^msgid_plural[ \t]+\"")
             while(startsWith(s[i <- i + 1L], '"'))
                 out[i] <- sub("^\"", " ", s[i])
         }
         out
     }

     entries <- split(lines, cumsum(ind))
     lines <- c(character(length(entries[[1L]])),
                as.character(do.call(c, lapply(entries[-1L], do_entry))))

     lines <- sub("\"[ \t]*$", " ", lines)

     ## <FIXME>
     ## Could replace backslash escapes for blanks and percent escapes by
     ## blanks, similar to what the R text filter does.
     ## </FIXME>

     blank_out_ignores_in_lines(lines, ignore)
 }

 ## For spell-checking all pot files in a package.

 aspell_package_pot_files <-
 function(dir, ignore = character(),
          control = list(), program = NULL, dictionaries = character())
 {
     dir <- tools::file_path_as_absolute(dir)
     subdir <- file.path(dir, "po")
     files <- if(dir.exists(subdir))
         Sys.glob(file.path(subdir, "*.pot"))
     else character()

     meta <- tools:::.get_package_metadata(dir, installed = FALSE)
     if(is.na(encoding <- meta["Encoding"]))
         encoding <- "unknown"

     program <- aspell_find_program(program)

     aspell(files,
            filter = list("pot", ignore = ignore),
            control = control,
            encoding = encoding,
            program = program,
            dictionaries = dictionaries)
 }

 ## For spell-checking the R C files.

 aspell_R_C_files <-
 function(which = NULL, dir = NULL,
          ignore = c("[ \t]'[[:alnum:]_.]*'[ \t[:punct:]]",
                     "[ \t][[:alnum:]_.]*\\(\\)[ \t[:punct:]]"),
          program = NULL, dictionaries = aspell_dictionaries_R)
 {
     if(is.null(dir)) dir <- tools:::.R_top_srcdir_from_Rd()
     if(is.null(which))
         which <- tools:::.get_standard_package_names()$base
     if(!is.na(pos <- match("base", which)))
         which[pos] <- "R"

     files <- sprintf("%s.pot",
                      file.path(dir, "src", "library",
                                which, "po", which))
     files <- files[file_test("-f", files)]

     program <- aspell_find_program(program)

     aspell(files,
            filter = list("pot", ignore = ignore),
            control = aspell_control_R_Rd_files[[names(program)]],
            program = program,
            dictionaries = dictionaries)
 }

 ## For spell-checking package C files.

 aspell_package_C_files <-
 function(dir, ignore = character(),
          control = list(), program = NULL, dictionaries = character())
 {
     dir <- tools::file_path_as_absolute(dir)
     ## Assume that the package C message template file is shipped as
     ## 'po/PACKAGE.pot'.
     files <- file.path(dir, "po",
                        paste(basename(dir), "pot", collapse = "."))
     files <- files[file_test("-f", files)]

     meta <- tools:::.get_package_metadata(dir, installed = FALSE)
     if(is.na(encoding <- meta["Encoding"]))
         encoding <- "unknown"

     defaults <- .aspell_package_defaults(dir, encoding)$C_files
     if(!is.null(defaults)) {
         if(!is.null(d <- defaults$ignore))
             ignore <- d
         if(!is.null(d <- defaults$control))
             control <- d
         if(!is.null(d <- defaults$program))
             program <- d
         if(!is.null(d <- defaults$dictionaries)) {
             dictionaries <-
                 aspell_find_dictionaries(d, file.path(dir, ".aspell"))
         }
     }

     program <- aspell_find_program(program)

     aspell(files,
            filter = list("pot", ignore = ignore),
            control = control,
            encoding = encoding,
            program = program,
            dictionaries = dictionaries)
 }

 ## Spell-checking DCF files.

 aspell_filter_db$dcf <-
 function(ifile, encoding, keep = c("Title", "Description"),
          ignore = character())
 {
     lines <- readLines(ifile, encoding = encoding, warn = FALSE)
     line_has_tags <- grepl("^[^[:blank:]][^:]*:", lines)
     tags <- sub(":.*", "", lines[line_has_tags])
     lines[line_has_tags] <-
         blank_out_regexp_matches(lines[line_has_tags], "^[^:]*:")
     lines <- split(lines, cumsum(line_has_tags))
     ind <- is.na(match(tags, keep))
     lines[ind] <- lapply(lines[ind], function(s) rep.int("", length(s)))
     ind <- !ind
     lines[ind] <- lapply(lines[ind], paste0, " ")
     lines <- unlist(lines, use.names = FALSE)
     blank_out_ignores_in_lines(lines, ignore)
 }

 ## For spell-checking package DESCRIPTION files.

 aspell_package_description <-
 function(dir, ignore = character(),
          control = list(), program = NULL, dictionaries = character())
 {
     dir <- tools::file_path_as_absolute(dir)
     files <- file.path(dir, "DESCRIPTION")

     meta <- tools:::.get_package_metadata(dir, installed = FALSE)
     if(is.na(encoding <- meta["Encoding"]))
         encoding <- "unknown"

     ## Allow providing package defaults but make this controllable via
     ##   _R_ASPELL_USE_DEFAULTS_FOR_PACKAGE_DESCRIPTION_
     ## to safeguard against possible mis-use for CRAN incoming checks.
     defaults <-
         Sys.getenv("_R_ASPELL_USE_DEFAULTS_FOR_PACKAGE_DESCRIPTION_",
                    "TRUE")
     defaults <- if(tools:::config_val_to_logical(defaults)) {
                     .aspell_package_defaults(dir, encoding)$description
                 } else NULL
     if(!is.null(defaults)) {
         if(!is.null(d <- defaults$ignore))
             ignore <- d
         if(!is.null(d <- defaults$control))
             control <- d
         if(!is.null(d <- defaults$program))
             program <- d
         if(!is.null(d <- defaults$dictionaries)) {
             dictionaries <-
                 aspell_find_dictionaries(d, file.path(dir, ".aspell"))
         }
     }

     program <- aspell_find_program(program)

     aspell(files,
            filter = list("dcf", ignore = ignore),
            control = control,
            encoding = encoding,
            program = program,
            dictionaries = dictionaries)
 }

 ## Spell-checking Markdown files.

 aspell_filter_db$md <-
 function(ifile, encoding = "UTF-8")
 {
     x <- readLines(ifile, encoding = encoding, warn = FALSE)
     n <- nchar(x)
     y <- strrep(rep.int(" ", length(x)), n)
     ## Determine positions of 'texts' along the lines of
     ## spelling::parse_text_md () by Jeroen Ooms.
     md <- commonmark::markdown_xml(x, extensions = TRUE,
                                    sourcepos = TRUE)
     doc <- xml2::xml_ns_strip(xml2::read_xml(md))
     pos <- strsplit(xml2::xml_attr(xml2::xml_find_all(doc,
                                                       "//text[@sourcepos]"),
                                    "sourcepos"),
                     "[:-]")
     ## Now use the following idea.
     ## Each elt of pos now has positions for l1:c1 to l2:c2.
     ## If l1 < l2
     ##   Lines in (l1, l2) are taken as a whole
     ##   Line l1 from c1 to nchar for l1
     ##   Line l2 from  1 to c1
     ## otherwise
     ##   Line l1 from c1 to c2.
     for(p in pos) {
         p <- as.integer(p)
         ## Legibility ...
         l1 <- p[1L]; c1 <- p[2L]; l2 <- p[3L]; c2 <- p[4L]
         if(l1 < l2) {
             substring(y[l1], c1, n[l1]) <- substring(x[l1], c1, n[l1])
             if(l1 + 1L < l2) {
                 w <- seq.int(from = l1 + 1L, to = l2 - 1L)
                 y[w] <- x[w]
             }
             substring(y[l2], 1L, c2) <- substring(x[l2], 1L, c2)
         } else {
             substring(y[l1], c1, c2) <- substring(x[l1], c1, c2)
         }
     }
     y
 }

 ## For spell checking packages.

 aspell_package <-
 function(dir,
          control = list(), program = NULL, dictionaries = character())
 {
     args <- list(dir = dir,
                  program = program,
                  control = control,
                  dictionaries = dictionaries)
     a <- rbind(do.call(aspell_package_description, args),
                do.call(aspell_package_Rd_files, args),
                do.call(aspell_package_vignettes, args),
                do.call(aspell_package_R_files, args),
                do.call(aspell_package_C_files, args))
     if(nrow(a)) {
         a$File <- tools:::.file_path_relative_to_dir(a$File,
                                                      dirname(dir))
     }
     a
 }

 ## For writing personal dictionaries:

 aspell_write_personal_dictionary_file <-
 function(x, out, language = "en", program = NULL)
 {
     if(inherits(x, "aspell"))
         x <- sort(unique(x$Original))

     program <- aspell_find_program(program)
     if(is.na(program))
         stop("No suitable spell check program found.")

     ## <NOTE>
     ## Ispell and Hunspell take simple word lists as personal dictionary
     ## files, but Aspell requires a special format, see e.g.
     ## http://aspell.net/man-html/Format-of-the-Personal-and-Replacement-Dictionaries.html
     ## and one has to create these by hand, as
     ##   aspell --lang=en create personal ./foo "a b c"
     ## gives: Sorry "create/merge personal" is currently unimplemented.

     ## Encodings are a nightmare.
     ## Try to canonicalize to UTF-8 for Aspell (which allows recording
     ## the encoding in the personal dictionary).
     ## <FIXME>
     ## What should we do for Hunspell (which can handle UTF-8, but has
     ## no encoding information in the personal dictionary), or Ispell
     ## (which cannot handle UTF-8)?
     ## </FIXME>

     if(names(program) == "aspell") {
         header <- sprintf("personal_ws-1.1 %s %d UTF-8",
                           language, length(x))
         x <- enc2utf8(x)
     }
     else {
         header <- NULL
     }

     writeLines(c(header, x), out, useBytes = TRUE)
 }

 ## For reading package defaults:

 .aspell_package_defaults <-
 function(dir, encoding = "unknown")
 {
     dfile <- file.path(dir, ".aspell", "defaults.R")
     if(!file_test("-f", dfile))
         return(NULL)
     exprs <- parse(dfile, encoding = encoding)
     envir <- new.env()
     for(e in exprs) eval(e, envir)
     as.list(envir)
 }

 ## Utilities.

 blank_out_regexp_matches <-
 function(s, re, ...)
 {
     m <- gregexpr(re, s, ...)
     regmatches(s, m) <-
         Map(function(n) strrep(" ", n),
             lapply(regmatches(s, m), nchar))
     s
 }

 blank_out_ignores_in_lines <-
 function(lines, ignore)
 {
     args <- list()
     if(is.list(ignore)) {
         args <- ignore[-1L]
         ignore <- ignore[[1L]]
     }
     for(re in ignore[nzchar(ignore)])
         lines <- do.call(blank_out_regexp_matches,
                          c(list(lines, re), args))
     lines
 }

 find_files_in_directories <-
 function(basenames, dirnames)
 {
     dirnames <- dirnames[dir.exists(dirnames)]
     dirnames <- normalizePath(dirnames, "/")

     out <- character(length(basenames))
     pos <- seq_along(out)

     for(dir in dirnames) {
         paths <- file.path(dir, basenames[pos])
         ind <- file_test("-f", paths)
         out[pos[ind]] <- paths[ind]
         pos <- pos[!ind]
         if(!length(pos)) break
     }

     out
 }