| # File src/library/tools/R/read.00Index.R |
| # Part of the R package, https://www.R-project.org |
| # |
| # Copyright (C) 1995-2012 The R Core Team |
| # |
| # This program is free software; you can redistribute it and/or modify |
| # it under the terms of the GNU General Public License as published by |
| # the Free Software Foundation; either version 2 of the License, or |
| # (at your option) any later version. |
| # |
| # This program is distributed in the hope that it will be useful, |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| # GNU General Public License for more details. |
| # |
| # A copy of the GNU General Public License is available at |
| # https://www.R-project.org/Licenses/ |
| |
| read.00Index <- |
| function(file) |
| { |
| if(is.character(file)) { |
| if(file == "") file <- stdin() |
| else { |
| file <- file(file, "r") |
| on.exit(close(file)) |
| } |
| } |
| if(!inherits(file, "connection")) |
| stop(gettextf("argument '%s' must be a character string or connection", |
| file), |
| domain = NA) |
| |
| y <- matrix("", nrow = 0L, ncol = 2L) |
| x <- paste(readLines(file), collapse = "\n") |
| |
| ## <FIXME> |
| ## We cannot necessarily assume that the 00Index-style file to be |
| ## read in was generated by @code{Rdindex()} or by R using |
| ## formatDL(style = "table"). In particular, some packages have |
| ## 00Index files with (section) headers and footers in addition to |
| ## the data base chunks which are description lists rendered in |
| ## tabular form. Hence, we need some heuristic for identifying the |
| ## db chunks. Easy to the human eye (is there a column for aligning |
| ## entries?) but far from trivial ... as a first approximation we |
| ## try to consider chunks containing at least one tab or three |
| ## spaces a db chunk. (A better heuristic would be the following: |
| ## entries rendered in one line have item and description separated |
| ## by at least 3 spaces or tabs; entries with a line break have |
| ## continuation lines starting with whitespace (no test whether for |
| ## alignment). If a chunk is made of such entries only it is |
| ## considered a db chunk. But not all current packages follow this |
| ## scheme. Argh.) |
| ## Clearly we need to move to something better in future versions. |
| ## </FIXME> |
| |
| ## First split into paragraph chunks separated by whitespace-only |
| ## lines. |
| for(chunk in unlist(strsplit(x, "\n[ \t\n]*\n"))) { |
| entries <- tryCatch({ |
| if(!grepl("( |\t)", chunk)) |
| NULL |
| else { |
| ## Combine entries with continuation lines. |
| chunk <- gsub("\n[ \t]+", "\t", chunk) |
| ## Split into lines and then according to whitespace. |
| x <- strsplit(unlist(strsplit(chunk, "\n")), "[ \t]") |
| cbind(unlist(lapply(x, "[[", 1L)), |
| unlist(lapply(x, function(t) { |
| paste(t[-c(1L, which(!nzchar(t)))], |
| collapse = " ") |
| }))) |
| } |
| }, |
| error = identity) |
| if(!inherits(entries, "error") && NCOL(entries) == 2L) |
| y <- rbind(y, entries) |
| } |
| colnames(y) <- c("Item", "Description") |
| y |
| } |