blob: 0afc8c56d9a651954e109273fed36d94ee97110e [file] [log] [blame]
## File src/library/utils/R/strcapture.R
## Part of the R package, https://www.R-project.org
##
## Copyright (C) 1995-2016 The R Core Team
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## A copy of the GNU General Public License is available at
## https://www.R-project.org/Licenses/
strcapture <- function(pattern, x, proto, perl = FALSE, useBytes = FALSE) {
m <- regexec(pattern, x, perl=perl, useBytes=useBytes)
str <- regmatches(x, m)
ntokens <- length(proto) + 1L
nomatch <- lengths(str) == 0L
str[nomatch] <- list(rep.int(NA_character_, ntokens))
if (length(str) > 0L && length(str[[1L]]) != ntokens) {
### FIXME: this will not always detect an error when there are no matches
stop("The number of captures in 'pattern' != 'length(proto)'")
}
mat <- matrix(as.character(unlist(str)), ncol=ntokens,
byrow=TRUE)[,-1L,drop=FALSE]
conformToProto(mat, proto)
}
## Not yet exported
strextract <- function(pattern, x, perl = FALSE, useBytes = FALSE) {
m <- regexec(pattern, x, perl=perl, useBytes=useBytes)
unlist(regmatches(x, m))
}
conformToProto <- function(mat, proto) {
ans <- lapply(seq_along(proto), function(i) {
if (isS4(proto[[i]])) {
methods::as(mat[,i], class(proto[[i]]))
} else {
fun <- match.fun(paste0("as.", class(proto[[i]])))
fun(mat[,i])
}
})
names(ans) <- names(proto)
if (isS4(proto)) {
methods::as(ans, class(proto))
} else {
as.data.frame(ans, optional=TRUE, stringsAsFactors=FALSE)
}
}
## Not yet exported
strslice <- function(x, split, proto, fixed = FALSE, perl = FALSE,
useBytes = FALSE)
{
str <- strsplit(x, split, fixed=fixed, perl=perl, useBytes=useBytes)
ntokens <- length(proto)
if (length(str) > 0L) {
if (length(str[[1L]]) != ntokens) {
stop("The number of tokens != 'length(proto)'")
} else if (length(unique(lengths(str))) > 1L) {
stop("The number of tokens is not consistent across 'x'")
}
}
mat <- matrix(as.character(unlist(str)), ncol=ntokens, byrow=TRUE)
conformToProto(mat, proto)
}