blob: 8eb9fa026b9fe316008eb18569cb5cd40ec8a0c7 [file] [log] [blame]
# File src/library/tools/R/read.00Index.R
# Part of the R package, https://www.R-project.org
#
# Copyright (C) 1995-2012 The R Core Team
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# https://www.R-project.org/Licenses/
read.00Index <-
function(file)
{
if(is.character(file)) {
if(file == "") file <- stdin()
else {
file <- file(file, "r")
on.exit(close(file))
}
}
if(!inherits(file, "connection"))
stop(gettextf("argument '%s' must be a character string or connection",
file),
domain = NA)
y <- matrix("", nrow = 0L, ncol = 2L)
x <- paste(readLines(file), collapse = "\n")
## <FIXME>
## We cannot necessarily assume that the 00Index-style file to be
## read in was generated by @code{Rdindex()} or by R using
## formatDL(style = "table"). In particular, some packages have
## 00Index files with (section) headers and footers in addition to
## the data base chunks which are description lists rendered in
## tabular form. Hence, we need some heuristic for identifying the
## db chunks. Easy to the human eye (is there a column for aligning
## entries?) but far from trivial ... as a first approximation we
## try to consider chunks containing at least one tab or three
## spaces a db chunk. (A better heuristic would be the following:
## entries rendered in one line have item and description separated
## by at least 3 spaces or tabs; entries with a line break have
## continuation lines starting with whitespace (no test whether for
## alignment). If a chunk is made of such entries only it is
## considered a db chunk. But not all current packages follow this
## scheme. Argh.)
## Clearly we need to move to something better in future versions.
## </FIXME>
## First split into paragraph chunks separated by whitespace-only
## lines.
for(chunk in unlist(strsplit(x, "\n[ \t\n]*\n"))) {
entries <- tryCatch({
if(!grepl("( |\t)", chunk))
NULL
else {
## Combine entries with continuation lines.
chunk <- gsub("\n[ \t]+", "\t", chunk)
## Split into lines and then according to whitespace.
x <- strsplit(unlist(strsplit(chunk, "\n")), "[ \t]")
cbind(unlist(lapply(x, "[[", 1L)),
unlist(lapply(x, function(t) {
paste(t[-c(1L, which(!nzchar(t)))],
collapse = " ")
})))
}
},
error = identity)
if(!inherits(entries, "error") && NCOL(entries) == 2L)
y <- rbind(y, entries)
}
colnames(y) <- c("Item", "Description")
y
}