src/library/tools/R/read.00Index.R - R - Git at Google

 #  File src/library/tools/R/read.00Index.R
 #  Part of the R package, https://www.R-project.org
 #
 #  Copyright (C) 1995-2012 The R Core Team
 #
 #  This program is free software; you can redistribute it and/or modify
 #  it under the terms of the GNU General Public License as published by
 #  the Free Software Foundation; either version 2 of the License, or
 #  (at your option) any later version.
 #
 #  This program is distributed in the hope that it will be useful,
 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #  GNU General Public License for more details.
 #
 #  A copy of the GNU General Public License is available at
 #  https://www.R-project.org/Licenses/

 read.00Index <-
 function(file)
 {
     if(is.character(file)) {
         if(file == "") file <- stdin()
         else {
             file <- file(file, "r")
             on.exit(close(file))
         }
     }
     if(!inherits(file, "connection"))
         stop(gettextf("argument '%s' must be a character string or connection",
                       file),
              domain = NA)

     y <- matrix("", nrow = 0L, ncol = 2L)
     x <- paste(readLines(file), collapse = "\n")

     ## <FIXME>
     ## We cannot necessarily assume that the 00Index-style file to be
     ## read in was generated by @code{Rdindex()} or by R using
     ## formatDL(style = "table").  In particular, some packages have
     ## 00Index files with (section) headers and footers in addition to
     ## the data base chunks which are description lists rendered in
     ## tabular form.  Hence, we need some heuristic for identifying the
     ## db chunks.  Easy to the human eye (is there a column for aligning
     ## entries?) but far from trivial ... as a first approximation we
     ## try to consider chunks containing at least one tab or three
     ## spaces a db chunk.  (A better heuristic would be the following:
     ## entries rendered in one line have item and description separated
     ## by at least 3 spaces or tabs; entries with a line break have
     ## continuation lines starting with whitespace (no test whether for
     ## alignment).  If a chunk is made of such entries only it is
     ## considered a db chunk.  But not all current packages follow this
     ## scheme.  Argh.)
     ## Clearly we need to move to something better in future versions.
     ## </FIXME>

     ## First split into paragraph chunks separated by whitespace-only
     ## lines.
     for(chunk in unlist(strsplit(x, "\n[ \t\n]*\n"))) {
         entries <- tryCatch({
             if(!grepl("(   |\t)", chunk))
                 NULL
             else {
                 ## Combine entries with continuation lines.
                 chunk <- gsub("\n[ \t]+", "\t", chunk)
                 ## Split into lines and then according to whitespace.
                 x <- strsplit(unlist(strsplit(chunk, "\n")), "[ \t]")
                 cbind(unlist(lapply(x, "[[", 1L)),
                       unlist(lapply(x, function(t) {
                           paste(t[-c(1L, which(!nzchar(t)))],
                                 collapse = " ")
                       })))
             }
         },
                             error = identity)
         if(!inherits(entries, "error") && NCOL(entries) == 2L)
             y <- rbind(y, entries)
     }
     colnames(y) <- c("Item", "Description")
     y
 }
	# File src/library/tools/R/read.00Index.R
	# Part of the R package, https://www.R-project.org
	#
	# Copyright (C) 1995-2012 The R Core Team
	#
	# This program is free software; you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation; either version 2 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# A copy of the GNU General Public License is available at
	# https://www.R-project.org/Licenses/

	read.00Index <-
	function(file)
	{
	if(is.character(file)) {
	if(file == "") file <- stdin()
	else {
	file <- file(file, "r")
	on.exit(close(file))
	}
	}
	if(!inherits(file, "connection"))
	stop(gettextf("argument '%s' must be a character string or connection",
	file),
	domain = NA)

	y <- matrix("", nrow = 0L, ncol = 2L)
	x <- paste(readLines(file), collapse = "\n")

	## <FIXME>
	## We cannot necessarily assume that the 00Index-style file to be
	## read in was generated by @code{Rdindex()} or by R using
	## formatDL(style = "table"). In particular, some packages have
	## 00Index files with (section) headers and footers in addition to
	## the data base chunks which are description lists rendered in
	## tabular form. Hence, we need some heuristic for identifying the
	## db chunks. Easy to the human eye (is there a column for aligning
	## entries?) but far from trivial ... as a first approximation we
	## try to consider chunks containing at least one tab or three
	## spaces a db chunk. (A better heuristic would be the following:
	## entries rendered in one line have item and description separated
	## by at least 3 spaces or tabs; entries with a line break have
	## continuation lines starting with whitespace (no test whether for
	## alignment). If a chunk is made of such entries only it is
	## considered a db chunk. But not all current packages follow this
	## scheme. Argh.)
	## Clearly we need to move to something better in future versions.
	## </FIXME>

	## First split into paragraph chunks separated by whitespace-only
	## lines.
	for(chunk in unlist(strsplit(x, "\n[ \t\n]*\n"))) {
	entries <- tryCatch({
	if(!grepl("( \|\t)", chunk))
	NULL
	else {
	## Combine entries with continuation lines.
	chunk <- gsub("\n[ \t]+", "\t", chunk)
	## Split into lines and then according to whitespace.
	x <- strsplit(unlist(strsplit(chunk, "\n")), "[ \t]")
	cbind(unlist(lapply(x, "[[", 1L)),
	unlist(lapply(x, function(t) {
	paste(t[-c(1L, which(!nzchar(t)))],
	collapse = " ")
	})))
	}
	},
	error = identity)
	if(!inherits(entries, "error") && NCOL(entries) == 2L)
	y <- rbind(y, entries)
	}
	colnames(y) <- c("Item", "Description")
	y
	}