src/library/tools/R/Rdtools.R - R - Git at Google

 #  File src/library/tools/R/Rdtools.R
 #  Part of the R package, https://www.R-project.org
 #
 #  Copyright (C) 1995-2014 The R Core Team
 #
 #  This program is free software; you can redistribute it and/or modify
 #  it under the terms of the GNU General Public License as published by
 #  the Free Software Foundation; either version 2 of the License, or
 #  (at your option) any later version.
 #
 #  This program is distributed in the hope that it will be useful,
 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #  GNU General Public License for more details.
 #
 #  A copy of the GNU General Public License is available at
 #  https://www.R-project.org/Licenses/


 RdTextFilter <-
 function(ifile, encoding = "unknown", keepSpacing = TRUE,
          drop = character(), keep = character(),
          macros = file.path(R.home("share"), "Rd", "macros", "system.Rd"))
 {
     if(inherits(ifile, "srcfile"))
         ifile <- ifile$filename
     if (inherits(ifile, "Rd")) {
 	# Undo sorting done in prepare2_Rd
 	srcrefs <- sapply(ifile, function(s) attr(s, "srcref"))
 	p <- ifile[ order(srcrefs[1,], srcrefs[2,]) ]
 	class(p) <- class(ifile)
     } else
     	p <- parse_Rd(ifile, encoding = encoding, macros = macros)

     tags <- RdTags(p)

     if ("\\encoding" %in% tags) {
 	encoding <- p[[which.max(tags == "\\encoding")]][[1L]]
 	if (encoding %in% c("UTF-8", "utf-8", "utf8")) encoding <- "UTF-8"
 	if (!inherits(ifile, "Rd"))
 	    p <- parse_Rd(ifile, encoding=encoding, macros = macros)
     } else
 	encoding <- ""

     ## Directly using a text connection to accumulate the filtered
     ## output seems to be faster than using .eval_with_capture(): to use
     ## the latter, change mycat to cat, or use mycat <- cat, and create
     ## out via
     ## out <- .eval_with_capture({
     ##     show(p)
     ##     mycat("\n")
     ## })$output

 ##    myval <- character()
     mycon <- textConnection("myval", open = "w", local = TRUE,
                             encoding = "UTF-8")
     on.exit(close(mycon))
     mycat <- function(...) cat(..., file = mycon)

     prevline <- 1L
     prevcol <- 0L

     doPartialMarkup <- function(x, tags, i) { # handle things like \bold{pre}fix
         result <- FALSE
     	if (i < length(tags)
             && tags[i+1L] == "TEXT"
             && length(x[[i]]) == 1L
             && tags[i] %in% c("\\bold", "\\emph", "\\strong", "\\link")
             && (tags[i] %notin% drop)
             && RdTags(x[[i]]) == "TEXT") {
     	    text1 <- x[[i]][[1L]]
     	    if (length(grep("[^[:space:]]$", text1))) { # Ends in non-blank
     	    	text2 <- x[[i+1L]]
     	    	if (length(grep("^[^[:space:]]", text2))) { # Starts non-blank
     	    	    show(text1)
     	    	    prevcol <<- prevcol+1L # Shift text2 left by one column
     	    	    saveline <- prevline
     	    	    show(text2)
     	    	    if (prevline == saveline)
     	    	    	prevcol <<- prevcol-1L
     		    result <- TRUE
     		}
     	    }
 	}
 	result
     }

     show <- function(x) {
 	srcref <- attr(x, "srcref")
 	firstline <- srcref[1L]
 	lastline <- srcref[3L]
 	firstcol <- srcref[5L]
 	lastcol <- srcref[6L]
 	tag <- attr(x, "Rd_tag")
 	if (is.null(tag)) tag <- "NULL"
 	if (tag %in% drop) tag <- "DROP"
 	else if (tag %in% keep) tag <- "KEEPLIST"  # Include both text and lists
 	switch(tag,
 	KEEP =,
 	TEXT = {
 	    if (prevline < firstline) {
 		prevcol <<- 0L
 		mycat(rep.int("\n",
                               if(keepSpacing) firstline - prevline else 1L))
 	    }
 	    if (keepSpacing)
                 mycat(rep.int(" ", firstcol - prevcol - 1L), sep = "")
 	    x <- as.character(srcref) # go back to original form
 	    mycat(x, sep = "")
 	    prevcol <<- lastcol
 	    prevline <<- lastline
 	},
 	"\\S3method"=,
 	"\\S4method"=,
         "\\command"=,
 	"\\docType"=,
 	"\\email"=,
 	"\\encoding"=,
 	"\\file"=,
 	"\\keyword"=,
 	"\\link"=,
         "\\linkS4class"=,
 	"\\method"=,
 	"\\pkg"=,
 	"\\var"=,
 	DROP = {},  # do nothing

 	"\\tabular"=,
 	"#ifdef"=,
 	"#ifndef"={  # Ignore the first arg, process the second
 	    show(x[[2L]])
 	},
 	"\\item"={   # Ignore the first arg of a two-arg item
 	    if (length(x) == 2L) show(x[[2L]])
 	},
 	{	# default
 	    if (is.list(x)) {
              	tags <- RdTags(x)
              	i <- 0L
              	while (i < length(x)) {
              	    i <- i + 1L
              	    if (doPartialMarkup(x, tags, i))
              	    	i <- i + 1L
              	    else
              		show(x[[i]])
              	}
 	    } else if (tag == "KEEPLIST") {
 	    	attr(x, "Rd_tag") <- "KEEP"
 	    	show(x)
 	    }
 	})# {switch}
     }# end show()

     show(p)
     mycat("\n")

     out <- textConnectionValue(mycon)

     ## Ideally, we would always canonicalize to UTF-8.
     ## However, when using RdTextFilter() for aspell(), it is not clear
     ## whether this is a good idea: the aspell program does not need to
     ## have full UTF-8 support (and what precisely holds is not clear:
     ## the manuals says that aspell
     ##   can easily check documents in UTF-8 without having to use a
     ##   special dictionary.
     ## but also
     ##   If Aspell is compiled with a version of the curses library that
     ##   support wide characters then Aspell can also check UTF-8 text.
     ## So at least until this can be resolved, turn filter results for
     ## Rd files originally in latin1 back to latin1.
     if(encoding == "latin1")
         out <- iconv(out, "UTF-8", "latin1")

     out
 }
	# File src/library/tools/R/Rdtools.R
	# Part of the R package, https://www.R-project.org
	#
	# Copyright (C) 1995-2014 The R Core Team
	#
	# This program is free software; you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation; either version 2 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# A copy of the GNU General Public License is available at
	# https://www.R-project.org/Licenses/


	RdTextFilter <-
	function(ifile, encoding = "unknown", keepSpacing = TRUE,
	drop = character(), keep = character(),
	macros = file.path(R.home("share"), "Rd", "macros", "system.Rd"))
	{
	if(inherits(ifile, "srcfile"))
	ifile <- ifile$filename
	if (inherits(ifile, "Rd")) {
	# Undo sorting done in prepare2_Rd
	srcrefs <- sapply(ifile, function(s) attr(s, "srcref"))
	p <- ifile[ order(srcrefs[1,], srcrefs[2,]) ]
	class(p) <- class(ifile)
	} else
	p <- parse_Rd(ifile, encoding = encoding, macros = macros)

	tags <- RdTags(p)

	if ("\\encoding" %in% tags) {
	encoding <- p[[which.max(tags == "\\encoding")]][[1L]]
	if (encoding %in% c("UTF-8", "utf-8", "utf8")) encoding <- "UTF-8"
	if (!inherits(ifile, "Rd"))
	p <- parse_Rd(ifile, encoding=encoding, macros = macros)
	} else
	encoding <- ""

	## Directly using a text connection to accumulate the filtered
	## output seems to be faster than using .eval_with_capture(): to use
	## the latter, change mycat to cat, or use mycat <- cat, and create
	## out via
	## out <- .eval_with_capture({
	## show(p)
	## mycat("\n")
	## })$output

	## myval <- character()
	mycon <- textConnection("myval", open = "w", local = TRUE,
	encoding = "UTF-8")
	on.exit(close(mycon))
	mycat <- function(...) cat(..., file = mycon)

	prevline <- 1L
	prevcol <- 0L

	doPartialMarkup <- function(x, tags, i) { # handle things like \bold{pre}fix
	result <- FALSE
	if (i < length(tags)
	&& tags[i+1L] == "TEXT"
	&& length(x[[i]]) == 1L
	&& tags[i] %in% c("\\bold", "\\emph", "\\strong", "\\link")
	&& (tags[i] %notin% drop)
	&& RdTags(x[[i]]) == "TEXT") {
	text1 <- x[[i]][[1L]]
	if (length(grep("[^[:space:]]$", text1))) { # Ends in non-blank
	text2 <- x[[i+1L]]
	if (length(grep("^[^[:space:]]", text2))) { # Starts non-blank
	show(text1)
	prevcol <<- prevcol+1L # Shift text2 left by one column
	saveline <- prevline
	show(text2)
	if (prevline == saveline)
	prevcol <<- prevcol-1L
	result <- TRUE
	}
	}
	}
	result
	}

	show <- function(x) {
	srcref <- attr(x, "srcref")
	firstline <- srcref[1L]
	lastline <- srcref[3L]
	firstcol <- srcref[5L]
	lastcol <- srcref[6L]
	tag <- attr(x, "Rd_tag")
	if (is.null(tag)) tag <- "NULL"
	if (tag %in% drop) tag <- "DROP"
	else if (tag %in% keep) tag <- "KEEPLIST" # Include both text and lists
	switch(tag,
	KEEP =,
	TEXT = {
	if (prevline < firstline) {
	prevcol <<- 0L
	mycat(rep.int("\n",
	if(keepSpacing) firstline - prevline else 1L))
	}
	if (keepSpacing)
	mycat(rep.int(" ", firstcol - prevcol - 1L), sep = "")
	x <- as.character(srcref) # go back to original form
	mycat(x, sep = "")
	prevcol <<- lastcol
	prevline <<- lastline
	},
	"\\S3method"=,
	"\\S4method"=,
	"\\command"=,
	"\\docType"=,
	"\\email"=,
	"\\encoding"=,
	"\\file"=,
	"\\keyword"=,
	"\\link"=,
	"\\linkS4class"=,
	"\\method"=,
	"\\pkg"=,
	"\\var"=,
	DROP = {}, # do nothing

	"\\tabular"=,
	"#ifdef"=,
	"#ifndef"={ # Ignore the first arg, process the second
	show(x[[2L]])
	},
	"\\item"={ # Ignore the first arg of a two-arg item
	if (length(x) == 2L) show(x[[2L]])
	},
	{ # default
	if (is.list(x)) {
	tags <- RdTags(x)
	i <- 0L
	while (i < length(x)) {
	i <- i + 1L
	if (doPartialMarkup(x, tags, i))
	i <- i + 1L
	else
	show(x[[i]])
	}
	} else if (tag == "KEEPLIST") {
	attr(x, "Rd_tag") <- "KEEP"
	show(x)
	}
	})# {switch}
	}# end show()

	show(p)
	mycat("\n")

	out <- textConnectionValue(mycon)

	## Ideally, we would always canonicalize to UTF-8.
	## However, when using RdTextFilter() for aspell(), it is not clear
	## whether this is a good idea: the aspell program does not need to
	## have full UTF-8 support (and what precisely holds is not clear:
	## the manuals says that aspell
	## can easily check documents in UTF-8 without having to use a
	## special dictionary.
	## but also
	## If Aspell is compiled with a version of the curses library that
	## support wide characters then Aspell can also check UTF-8 text.
	## So at least until this can be resolved, turn filter results for
	## Rd files originally in latin1 back to latin1.
	if(encoding == "latin1")
	out <- iconv(out, "UTF-8", "latin1")

	out
	}