| % File src/library/base/man/regmatches.Rd |
| % Part of the R package, https://www.R-project.org |
| % Copyright 1995-2014 R Core Team |
| % Distributed under GPL 2 or later |
| |
| \name{regmatches} |
| \alias{regmatches} |
| \alias{regmatches<-} |
| \title{Extract or Replace Matched Substrings} |
| \description{ |
| Extract or replace matched substrings from match data obtained by |
| \code{\link{regexpr}}, \code{\link{gregexpr}} or |
| \code{\link{regexec}}. |
| } |
| \usage{ |
| regmatches(x, m, invert = FALSE) |
| regmatches(x, m, invert = FALSE) <- value |
| } |
| \arguments{ |
| \item{x}{a character vector} |
| \item{m}{an object with match data} |
| \item{invert}{a logical: if \code{TRUE}, extract or replace the |
| non-matched substrings.} |
| \item{value}{an object with suitable replacement values for the |
| matched or non-matched substrings (see \code{Details}).} |
| } |
| \details{ |
| If \code{invert} is \code{FALSE} (default), \code{regmatches} extracts |
| the matched substrings as specified by the match data. For vector |
| match data (as obtained from \code{\link{regexpr}}), empty matches are |
| dropped; for list match data, empty matches give empty components |
| (zero-length character vectors). |
| |
| If \code{invert} is \code{TRUE}, \code{regmatches} extracts the |
| non-matched substrings, i.e., the strings are split according to the |
| matches similar to \code{\link{strsplit}} (for vector match data, at |
| most a single split is performed). |
| |
| If \code{invert} is \code{NA}, \code{regmatches} extracts both |
| non-matched and matched substrings, always starting and ending with a |
| non-match (empty if the match occurred at the beginning or the end, |
| respectively). |
| |
| Note that the match data can be obtained from regular expression |
| matching on a modified version of \code{x} with the same numbers of |
| characters. |
| |
| The replacement function can be used for replacing the matched or |
| non-matched substrings. For vector match data, if \code{invert} is |
| \code{FALSE}, \code{value} should be a character vector with length the |
| number of matched elements in \code{m}. Otherwise, it should be a |
| list of character vectors with the same length as \code{m}, each as |
| long as the number of replacements needed. Replacement coerces values |
| to character or list and generously recycles values as needed. |
| Missing replacement values are not allowed. |
| } |
| \value{ |
| For \code{regmatches}, a character vector with the matched substrings |
| if \code{m} is a vector and \code{invert} is \code{FALSE}. Otherwise, |
| a list with the matched or/and non-matched substrings. |
| |
| For \code{regmatches<-}, the updated character vector. |
| } |
| \examples{ |
| x <- c("A and B", "A, B and C", "A, B, C and D", "foobar") |
| pattern <- "[[:space:]]*(,|and)[[:space:]]" |
| ## Match data from regexpr() |
| m <- regexpr(pattern, x) |
| regmatches(x, m) |
| regmatches(x, m, invert = TRUE) |
| ## Match data from gregexpr() |
| m <- gregexpr(pattern, x) |
| regmatches(x, m) |
| regmatches(x, m, invert = TRUE) |
| |
| ## Consider |
| x <- "John (fishing, hunting), Paul (hiking, biking)" |
| ## Suppose we want to split at the comma (plus spaces) between the |
| ## persons, but not at the commas in the parenthesized hobby lists. |
| ## One idea is to "blank out" the parenthesized parts to match the |
| ## parts to be used for splitting, and extract the persons as the |
| ## non-matched parts. |
| ## First, match the parenthesized hobby lists. |
| m <- gregexpr("\\\\([^)]*\\\\)", x) |
| ## Create blank strings with given numbers of characters. |
| blanks <- function(n) strrep(" ", n) |
| ## Create a copy of x with the parenthesized parts blanked out. |
| s <- x |
| regmatches(s, m) <- Map(blanks, lapply(regmatches(s, m), nchar)) |
| s |
| ## Compute the positions of the split matches (note that we cannot call |
| ## strsplit() on x with match data from s). |
| m <- gregexpr(", *", s) |
| ## And finally extract the non-matched parts. |
| regmatches(x, m, invert = TRUE) |
| } |
| \keyword{character} |
| \keyword{utilities} |