blob: c37255616e4430eb4e0638d26b1bd4877f1de08d [file] [log] [blame]
# File src/library/tools/R/rtags.R
# Part of the R package, https://www.R-project.org
#
# Copyright (C) 1995-2013, 2015 The R Core Team
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# https://www.R-project.org/Licenses/
### These utilities are intended to read R code files and produce tags
### in Emacs' etags format. Doing so in R allows us to use R's
### parser. Support for vi-style tags could be useful, but it needs
### the tags file needs to be sorted, making file-by-file processing
### difficult. It may be easier to write a script to convert an etags
### format file (see http://https://en.wikipedia.org/wiki/Ctags).
### * shorten.to.string
## The etags format requires the initial part of a matching line to be
## recorded in the TAGS file, with an optional entry for the `token'
## that is to be matched. Exact matches to the token are preferred,
## but it seems that subsequent non-exact matches look in the initial
## string as well as other tokens with equal priority. Such matches
## seem pointless, so an attempt is made to shorten the matching line.
## It is not clear to me whether there are restrictions on what this
## part could be, but a completely blank string doesn't seem to work.
## For now, I'm just keeping the first letter. May change if problems
## arise.
shorten.to.string <-
function(line, token)
{
if (FALSE) {
ans <- regexpr(strsplit("token", ",", fixed = TRUE)[[1L]][1L],
line, fixed = TRUE)
if (ans == -1L) line
else substr(line, 1L, ans + attr(ans, "match.length") - 1L)
}
else {
## can we just put essentially nothing? Seems to work
substr(line, 1L, 1L)
}
}
### * write.etags
## this function is responsible for formatting the output for a single
## file given the relevant information. The format was inferred from
## the "Ctags" wikipedia entry and by studying etags output.
write.etags <-
function(src,
tokens, startlines, lines, nchars,
...,
shorten.lines = c("token", "simple", "none"))
{
## extra 1 for newline
shorten.lines <- match.arg(shorten.lines)
offsets <- (cumsum(nchars + 1L) - (nchars + 1L))[startlines]
lines <-
switch(shorten.lines,
none = lines,
simple = sapply(strsplit(lines, "function", fixed = TRUE), "[", 1),
token = mapply(shorten.to.string, lines, tokens))
tag.lines <-
paste(sprintf("%s\x7f%s\x01%d,%d",
lines, tokens, startlines,
as.integer(offsets)),
collapse = "\n")
## simpler format: tag.lines <- paste(sprintf("%s\x7f%d,%d", lines, startlines, as.integer(offsets)), collapse = "\n")
tagsize <- nchar(tag.lines, type = "bytes") + 1L
cat("\x0c\n", src, ",", tagsize, "\n", tag.lines, "\n", sep = "", ...)
}
### * expr2token
## this computes the tag name from an expression. Currently, this
## returns the second thing in the expression; so
##
## foo <- function(x) ... ==> `<-`, foo, ...
## setMethod("foo", "bar" ... ==> setMethod, foo, ...
## setGeneric("foo", "bar" ... ==> setGeneric, foo, ...
##
## which covers the typical uses. We match against a list to restrict
## types of expressions that are tagged. To reject things like
##
## x[i] <- 10
##
## the second component is required to have length 1. One limitation
## is that things like
##
## if (require(pkg)) foo <- ... else foo <- ...
##
## will not be handled.
expr2token <-
function(x,
ok = c("<-", "=", "<<-", "assign",
"setGeneric", "setGroupGeneric", "setMethod",
"setClass", "setClassUnion"),
extended = TRUE)
{
id <- ""
value <-
if ((length(x) > 1L) &&
(length(token <- as.character(x[[2L]])) == 1L) &&
(length(id <- as.character(x[[1L]])) == 1L) &&
(id %in% ok)) token
else
character(0L)
if (extended && identical(id, "setMethod"))
{
## try to add the signature, comma separated
sig <- tryCatch(eval(x[[3L]]), error = identity)
if (!inherits(sig, "error") && is.character(sig))
value <- paste(c(value, sig), collapse=",")
}
value
}
### * rtags.file
## Handles a single file
rtags.file <-
function(src, ofile = "", append = FALSE,
write.fun = write.etags) ## getOption("writeTags")
{
## FIXME: do we need to worry about encoding etc.?
elist <- parse(src, srcfile = srcfile(src))
if (length(elist) == 0) return(invisible())
lines <- readLines(src)
tokens <- lapply(elist, expr2token)
startlines <- sapply(attr(elist, "srcref"), "[", 1L)
if (length(tokens) != length(startlines))
stop("length mismatch: bug in code!", domain = NA)
keep <- lengths(tokens) == 1L
if (!any(keep)) return(invisible())
tokens <- unlist(tokens[keep])
startlines <- startlines[keep]
write.fun(src = src,
tokens = tokens,
startlines = startlines,
lines = lines[startlines],
nchars = nchar(lines, type = "bytes"),
file = ofile, append = append)
}
### * rtags
## Public interface. Tags files under a specified directory, using
## regular expressions to filter out inappropriate files.
rtags <-
function(path = ".", pattern = "\\.[RrSs]$",
recursive = FALSE,
src = list.files(path = path,
pattern = pattern,
full.names = TRUE,
recursive = recursive),
keep.re = NULL,
ofile = "", append = FALSE,
verbose = getOption("verbose"))
{
if (nzchar(ofile) && !append) {
if (!file.create(ofile, showWarnings = FALSE))
stop(gettextf("Could not create file %s, aborting", ofile),
domain = NA)
}
if (!missing(keep.re))
src <- grep(keep.re, src, value = TRUE)
for (s in src)
{
if (verbose) message(gettextf("Processing file %s", s), domain = NA)
tryCatch(
rtags.file(s, ofile = ofile, append = TRUE),
error = function(e) NULL)
}
invisible()
}
### Local variables: ***
### mode: outline-minor ***
### outline-regexp: "### [*]+" ***
### End: ***