src/library/base/R/strwrap.R - R - Git at Google

 #  File src/library/base/R/strwrap.R
 #  Part of the R package, https://www.R-project.org
 #
 #  Copyright (C) 1995-2018 The R Core Team
 #
 #  This program is free software; you can redistribute it and/or modify
 #  it under the terms of the GNU General Public License as published by
 #  the Free Software Foundation; either version 2 of the License, or
 #  (at your option) any later version.
 #
 #  This program is distributed in the hope that it will be useful,
 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #  GNU General Public License for more details.
 #
 #  A copy of the GNU General Public License is available at
 #  https://www.R-project.org/Licenses/

 strtrim <- function(x, width)
 {
     if(!is.character(x)) x <- as.character(x)
     .Internal(strtrim(x, width))
 }

 strwrap <-
 function(x, width = 0.9 * getOption("width"), indent = 0, exdent = 0,
          prefix = "", simplify = TRUE, initial = prefix)
 {
     if(!is.character(x)) x <- as.character(x)

     ## Useful variables.
     indentString <- strrep(" ", indent)
     exdentString <- strrep(" ", exdent)
     y <- list()                         # return value

     ## We use strsplit() to tokenize input into paras and words, and
     ## hence need to tweak how it handles/transforms encodings.  To
     ## preserve encodings, it seems "best" to canonicalize to UTF-8
     ## (ensuring valid UTF-8), and at the end convert back to latin1
     ## where we originally had latin1.
     enc <- Encoding(x)
     x <- enc2utf8(x)
     if(any(ind <- !validEnc(x)))
         x[ind] <- iconv(x[ind], "UTF-8", "UTF-8", sub = "byte")

     z <- lapply(strsplit(x, "\n[ \t\n]*\n", perl = TRUE),
                 strsplit, "[ \t\n]", perl = TRUE)

     ## Now z[[i]][[j]] is a character vector of all "words" in
     ## paragraph j of x[i].

     for(i in seq_along(z)) {
         yi <- character()
         for(j in seq_along(z[[i]])) {
             ## Format paragraph j in x[i].
             words <- z[[i]][[j]]
             nc <- nchar(words, type="w")
 	    if(anyNA(nc)) {
 		## use byte count as a reasonable substitute
 		nc0 <- nchar(words, type="b")
 		nc[is.na(nc)] <- nc0[is.na(nc)]
 	    }

             ## Remove extra white space unless after a period which
             ## hopefully ends a sentence.
             ## Add ? ! as other possible ends, and there might be
             ## quoted and parenthesised sentences.
             ## NB, input could be invalid here.
             if(any(nc == 0L)) {
                 zLenInd <- which(nc == 0L)
                 zLenInd <- zLenInd[!(zLenInd %in%
                                      (grep("[.?!][)\"']{0,1}$", words,
                                            perl = TRUE, useBytes = TRUE) + 1L))]
                 if(length(zLenInd)) {
                     words <- words[-zLenInd]
                     nc <- nc[-zLenInd]
                 }
             }

             if(!length(words)) {
                 yi <- c(yi, "", initial)
                 next
             }

             currentIndex <- 0L
             lowerBlockIndex <- 1L
             upperBlockIndex <- integer()
             lens <- cumsum(nc + 1L)

             first <- TRUE
             maxLength <- width - nchar(initial, type="w") - indent

             ## Recursively build a sequence of lower and upper indices
             ## such that the words in line k are the ones in the k-th
             ## index block.
             while(length(lens)) {
                 k <- max(sum(lens <= maxLength), 1L)
                 if(first) {
                     first <- FALSE
                     maxLength <- width - nchar(prefix, type="w") - exdent
                 }
                 currentIndex <- currentIndex + k
                 if(nc[currentIndex] == 0L)
                     ## Are we sitting on a space?
                     upperBlockIndex <- c(upperBlockIndex,
                                          currentIndex - 1L)
                 else
                     upperBlockIndex <- c(upperBlockIndex,
                                          currentIndex)
                 if(length(lens) > k) {
                     ## Are we looking at a space?
                     if(nc[currentIndex + 1L] == 0L) {
                         currentIndex <- currentIndex + 1L
                         k <- k + 1L
                     }
                     lowerBlockIndex <- c(lowerBlockIndex,
                                          currentIndex + 1L)
                 }
                 if(length(lens) > k)
                     lens <- lens[-seq_len(k)] - lens[k]
                 else
                     lens <- NULL
             }

             nBlocks <- length(upperBlockIndex)
 	    s <- paste0(c(initial, rep.int(prefix, nBlocks - 1L)),
 			c(indentString, rep.int(exdentString, nBlocks - 1L)))
             initial <- prefix
             for(k in seq_len(nBlocks))
 		s[k] <- paste0(s[k], paste(words[lowerBlockIndex[k] :
 						 upperBlockIndex[k]],
 					   collapse = " "))

             yi <- c(yi, s, prefix)
         }
         y <- if(length(yi))
             c(y, list(yi[-length(yi)]))
         else
             c(y, "")
     }

     if(length(pos <- which(enc == "latin1"))) {
         y[pos] <-
             lapply(y[pos],
                    function(s) {
                        e <- Encoding(s)
                        if(length(p <- which(e == "UTF-8")))
                            s[p] <- iconv(s[p], "UTF-8", "latin1",
                                          sub = "byte")
                        s
                    })
     }

     if(simplify) y <- as.character(unlist(y))
     y
 }

 formatDL <-
 function(x, y, style = c("table", "list"),
          width = 0.9 * getOption("width"), indent = NULL)
 {
     if(is.list(x)) {
         if(length(x) == 2L && diff(lengths(x)) == 0L) {
             y <- x[[2L]]; x <- x[[1L]]
         }
         else
             stop("incorrect value for 'x'")
     }
     else if(is.matrix(x)) {
         if(NCOL(x) == 2L) {
             y <- x[, 2L]; x <- x[, 1L]
         }
         else
             stop("incorrect value for 'x'")
     }
     else if(missing(y) && !is.null(nms <- names(x))) {
         y <- x
         x <- nms
     }
     else if(length(x) != length(y))
         stop("'x' and 'y' must have the same length")
     x <- as.character(x)
     if(!length(x)) return(x)
     y <- as.character(y)

     style <- match.arg(style)

     if(is.null(indent))
         indent <- switch(style, table = width / 3, list = width / 9)
     ## change 2017-03-12 suggeested by Bill Dunlap
     ## https://stat.ethz.ch/pipermail/r-devel/2017-March/073873.html
     ## if(indent > 0.5 * width)
     ##    warning("'indent' is too large for 'width' and will be reduced")
     indent <- min(indent, 0.5*width)

     indentString <- strrep(" ", indent)

     if(style == "table") {
         i <- (nchar(x, type="w") > indent - 3L)
         if(any(i))
             x[i] <- paste0(x[i], "\n", indentString)
         i <- !i
         if(any(i))
             x[i] <- formatC(x[i], width = indent, flag = "-")
         y <- lapply(strwrap(y, width = width - indent,
                             simplify = FALSE),
                     paste,
                     collapse = paste0("\n", indentString))
         r <- paste0(x, unlist(y))
     }
     else if(style == "list") {
         y <- strwrap(paste0(x, ": ", y), exdent = indent,
                      width = width, simplify = FALSE)
         r <- unlist(lapply(y, paste, collapse = "\n"))
     }
     r
 }

 trimws <- function(x, which = c("both", "left", "right"),
                    whitespace = "[ \t\r\n]")
 {
     which <- match.arg(which)
     mysub <- function(re, x) sub(re, "", x, perl = TRUE)
     switch(which,
            "left" = mysub(paste0("^", whitespace, "+"), x),
            "right"= mysub(paste0(whitespace, "+$"), x),
            "both" = mysub(paste0(whitespace, "+$"),
                           mysub(paste0("^", whitespace, "+"), x)))
 }
	# File src/library/base/R/strwrap.R
	# Part of the R package, https://www.R-project.org
	#
	# Copyright (C) 1995-2018 The R Core Team
	#
	# This program is free software; you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation; either version 2 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# A copy of the GNU General Public License is available at
	# https://www.R-project.org/Licenses/

	strtrim <- function(x, width)
	{
	if(!is.character(x)) x <- as.character(x)
	.Internal(strtrim(x, width))
	}

	strwrap <-
	function(x, width = 0.9 * getOption("width"), indent = 0, exdent = 0,
	prefix = "", simplify = TRUE, initial = prefix)
	{
	if(!is.character(x)) x <- as.character(x)

	## Useful variables.
	indentString <- strrep(" ", indent)
	exdentString <- strrep(" ", exdent)
	y <- list() # return value

	## We use strsplit() to tokenize input into paras and words, and
	## hence need to tweak how it handles/transforms encodings. To
	## preserve encodings, it seems "best" to canonicalize to UTF-8
	## (ensuring valid UTF-8), and at the end convert back to latin1
	## where we originally had latin1.
	enc <- Encoding(x)
	x <- enc2utf8(x)
	if(any(ind <- !validEnc(x)))
	x[ind] <- iconv(x[ind], "UTF-8", "UTF-8", sub = "byte")

	z <- lapply(strsplit(x, "\n[ \t\n]*\n", perl = TRUE),
	strsplit, "[ \t\n]", perl = TRUE)

	## Now z[[i]][[j]] is a character vector of all "words" in
	## paragraph j of x[i].

	for(i in seq_along(z)) {
	yi <- character()
	for(j in seq_along(z[[i]])) {
	## Format paragraph j in x[i].
	words <- z[[i]][[j]]
	nc <- nchar(words, type="w")
	if(anyNA(nc)) {
	## use byte count as a reasonable substitute
	nc0 <- nchar(words, type="b")
	nc[is.na(nc)] <- nc0[is.na(nc)]
	}

	## Remove extra white space unless after a period which
	## hopefully ends a sentence.
	## Add ? ! as other possible ends, and there might be
	## quoted and parenthesised sentences.
	## NB, input could be invalid here.
	if(any(nc == 0L)) {
	zLenInd <- which(nc == 0L)
	zLenInd <- zLenInd[!(zLenInd %in%
	(grep("[.?!][)\"']{0,1}$", words,
	perl = TRUE, useBytes = TRUE) + 1L))]
	if(length(zLenInd)) {
	words <- words[-zLenInd]
	nc <- nc[-zLenInd]
	}
	}

	if(!length(words)) {
	yi <- c(yi, "", initial)
	next
	}

	currentIndex <- 0L
	lowerBlockIndex <- 1L
	upperBlockIndex <- integer()
	lens <- cumsum(nc + 1L)

	first <- TRUE
	maxLength <- width - nchar(initial, type="w") - indent

	## Recursively build a sequence of lower and upper indices
	## such that the words in line k are the ones in the k-th
	## index block.
	while(length(lens)) {
	k <- max(sum(lens <= maxLength), 1L)
	if(first) {
	first <- FALSE
	maxLength <- width - nchar(prefix, type="w") - exdent
	}
	currentIndex <- currentIndex + k
	if(nc[currentIndex] == 0L)
	## Are we sitting on a space?
	upperBlockIndex <- c(upperBlockIndex,
	currentIndex - 1L)
	else
	upperBlockIndex <- c(upperBlockIndex,
	currentIndex)
	if(length(lens) > k) {
	## Are we looking at a space?
	if(nc[currentIndex + 1L] == 0L) {
	currentIndex <- currentIndex + 1L
	k <- k + 1L
	}
	lowerBlockIndex <- c(lowerBlockIndex,
	currentIndex + 1L)
	}
	if(length(lens) > k)
	lens <- lens[-seq_len(k)] - lens[k]
	else
	lens <- NULL
	}

	nBlocks <- length(upperBlockIndex)
	s <- paste0(c(initial, rep.int(prefix, nBlocks - 1L)),
	c(indentString, rep.int(exdentString, nBlocks - 1L)))
	initial <- prefix
	for(k in seq_len(nBlocks))
	s[k] <- paste0(s[k], paste(words[lowerBlockIndex[k] :
	upperBlockIndex[k]],
	collapse = " "))

	yi <- c(yi, s, prefix)
	}
	y <- if(length(yi))
	c(y, list(yi[-length(yi)]))
	else
	c(y, "")
	}

	if(length(pos <- which(enc == "latin1"))) {
	y[pos] <-
	lapply(y[pos],
	function(s) {
	e <- Encoding(s)
	if(length(p <- which(e == "UTF-8")))
	s[p] <- iconv(s[p], "UTF-8", "latin1",
	sub = "byte")
	s
	})
	}

	if(simplify) y <- as.character(unlist(y))
	y
	}

	formatDL <-
	function(x, y, style = c("table", "list"),
	width = 0.9 * getOption("width"), indent = NULL)
	{
	if(is.list(x)) {
	if(length(x) == 2L && diff(lengths(x)) == 0L) {
	y <- x[[2L]]; x <- x[[1L]]
	}
	else
	stop("incorrect value for 'x'")
	}
	else if(is.matrix(x)) {
	if(NCOL(x) == 2L) {
	y <- x[, 2L]; x <- x[, 1L]
	}
	else
	stop("incorrect value for 'x'")
	}
	else if(missing(y) && !is.null(nms <- names(x))) {
	y <- x
	x <- nms
	}
	else if(length(x) != length(y))
	stop("'x' and 'y' must have the same length")
	x <- as.character(x)
	if(!length(x)) return(x)
	y <- as.character(y)

	style <- match.arg(style)

	if(is.null(indent))
	indent <- switch(style, table = width / 3, list = width / 9)
	## change 2017-03-12 suggeested by Bill Dunlap
	## https://stat.ethz.ch/pipermail/r-devel/2017-March/073873.html
	## if(indent > 0.5 * width)
	## warning("'indent' is too large for 'width' and will be reduced")
	indent <- min(indent, 0.5*width)

	indentString <- strrep(" ", indent)

	if(style == "table") {
	i <- (nchar(x, type="w") > indent - 3L)
	if(any(i))
	x[i] <- paste0(x[i], "\n", indentString)
	i <- !i
	if(any(i))
	x[i] <- formatC(x[i], width = indent, flag = "-")
	y <- lapply(strwrap(y, width = width - indent,
	simplify = FALSE),
	paste,
	collapse = paste0("\n", indentString))
	r <- paste0(x, unlist(y))
	}
	else if(style == "list") {
	y <- strwrap(paste0(x, ": ", y), exdent = indent,
	width = width, simplify = FALSE)
	r <- unlist(lapply(y, paste, collapse = "\n"))
	}
	r
	}

	trimws <- function(x, which = c("both", "left", "right"),
	whitespace = "[ \t\r\n]")
	{
	which <- match.arg(which)
	mysub <- function(re, x) sub(re, "", x, perl = TRUE)
	switch(which,
	"left" = mysub(paste0("^", whitespace, "+"), x),
	"right"= mysub(paste0(whitespace, "+$"), x),
	"both" = mysub(paste0(whitespace, "+$"),
	mysub(paste0("^", whitespace, "+"), x)))
	}