blob: a09d920c3cb5ba70b77cd9013784761edcfc9476 [file] [log] [blame]
% File src/library/utils/man/aregexec.Rd
% Part of the R package, https://www.R-project.org
% Copyright 2011-2 R Core Team
% Distributed under GPL 2 or later
\name{aregexec}
\alias{aregexec}
\title{Approximate String Match Positions}
\description{
Determine positions of approximate string matches.
}
\usage{
aregexec(pattern, text, max.distance = 0.1, costs = NULL,
ignore.case = FALSE, fixed = FALSE, useBytes = FALSE)
}
\arguments{
\item{pattern}{a non-empty character string or a character string
containing a regular expression (for \code{fixed = FALSE}) to be
matched.
Coerced by \code{\link{as.character}} to a string if possible.}
\item{text}{character vector where matches are sought.
Coerced by \code{\link{as.character}} to a character vector if
possible.}
\item{max.distance}{maximum distance allowed for a match.
See \code{\link{agrep}}.}
\item{costs}{cost of transformations.
See \code{\link{agrep}}.}
\item{ignore.case}{a logical. If \code{TRUE}, case is ignored for
computing the distances.}
\item{fixed}{If \code{TRUE}, the pattern is matched literally (as is).
Otherwise (default), it is matched as a regular expression.}
\item{useBytes}{a logical. If \code{TRUE} comparisons are
byte-by-byte rather than character-by-character.}
}
\details{
\code{aregexec} provides a different interface to approximate string
matching than \code{\link{agrep}} (along the lines of the interfaces
to exact string matching provided by \code{\link{regexec}} and
\code{\link{grep}}).
Note that by default, \code{\link{agrep}} performs literal matches,
whereas \code{aregexec} performs regular expression matches.
See \code{\link{agrep}} and \code{\link{adist}} for more information
about approximate string matching and distances.
Comparisons are byte-by-byte if \code{pattern} or any element of
\code{text} is marked as \code{"bytes"}.
}
\value{
A list of the same length as \code{text}, each element of which is
either \eqn{-1} if there is no match, or a sequence of integers with
the starting positions of the match and all substrings corresponding
to parenthesized subexpressions of \code{pattern}, with attribute
\code{"match.length"} an integer vector giving the lengths of the
matches (or \eqn{-1} for no match).
}
\seealso{
\code{\link{regmatches}} for extracting the matched substrings.
}
\examples{
## Cf. the examples for agrep.
x <- c("1 lazy", "1", "1 LAZY")
aregexec("laysy", x, max.distance = 2)
aregexec("(lay)(sy)", x, max.distance = 2)
aregexec("(lay)(sy)", x, max.distance = 2, ignore.case = TRUE)
m <- aregexec("(lay)(sy)", x, max.distance = 2)
regmatches(x, m)
}
\keyword{character}