blob: f14f37058aa6d68c6ce2fe431773727f0b8b72a6 [file] [log] [blame]
/* Pattern Matcher for Fixed String search.
Copyright (C) 1992, 1998, 2000, 2005-2006, 2010, 2013, 2020 Free Software
Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
/* Specification. */
#include "libgrep.h"
#include <ctype.h>
#include <limits.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
/* We can handle multibyte string. */
# define MBS_SUPPORT
# include <wchar.h>
# include <wctype.h>
#endif
#include "error.h"
#include "exitfail.h"
#include "xalloc.h"
#include "kwset.h"
#include "gettext.h"
#define _(str) gettext (str)
#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
# define IN_CTYPE_DOMAIN(c) 1
#else
# define IN_CTYPE_DOMAIN(c) isascii(c)
#endif
#define ISUPPER(C) (IN_CTYPE_DOMAIN (C) && isupper (C))
#define TOLOWER(C) (ISUPPER(C) ? tolower(C) : (C))
#define ISALNUM(C) (IN_CTYPE_DOMAIN (C) && isalnum (C))
#define IS_WORD_CONSTITUENT(C) (ISALNUM(C) || (C) == '_')
#define NCHAR (UCHAR_MAX + 1)
struct compiled_kwset {
kwset_t kwset;
char *trans;
bool match_words;
bool match_lines;
char eolbyte;
};
static void
kwsinit (struct compiled_kwset *ckwset,
bool match_icase, bool match_words, bool match_lines, char eolbyte)
{
if (match_icase)
{
int i;
ckwset->trans = XNMALLOC (NCHAR, char);
for (i = 0; i < NCHAR; i++)
ckwset->trans[i] = TOLOWER (i);
ckwset->kwset = kwsalloc (ckwset->trans);
}
else
{
ckwset->trans = NULL;
ckwset->kwset = kwsalloc (NULL);
}
if (ckwset->kwset == NULL)
error (exit_failure, 0, _("memory exhausted"));
ckwset->match_words = match_words;
ckwset->match_lines = match_lines;
ckwset->eolbyte = eolbyte;
}
static void *
Fcompile (const char *pattern, size_t pattern_size,
bool match_icase, bool match_words, bool match_lines,
char eolbyte)
{
struct compiled_kwset *ckwset;
const char *beg;
const char *err;
ckwset = XMALLOC (struct compiled_kwset);
kwsinit (ckwset, match_icase, match_words, match_lines, eolbyte);
beg = pattern;
do
{
const char *lim;
for (lim = beg; lim < pattern + pattern_size && *lim != '\n'; ++lim)
;
if ((err = kwsincr (ckwset->kwset, beg, lim - beg)) != NULL)
error (exit_failure, 0, "%s", err);
if (lim < pattern + pattern_size)
++lim;
beg = lim;
}
while (beg < pattern + pattern_size);
if ((err = kwsprep (ckwset->kwset)) != NULL)
error (exit_failure, 0, "%s", err);
return ckwset;
}
#ifdef MBS_SUPPORT
/* This function allocate the array which correspond to "buf".
Then this check multibyte string and mark on the positions which
are not singlebyte character nor the first byte of a multibyte
character. Caller must free the array. */
static char*
check_multibyte_string (const char *buf, size_t buf_size)
{
char *mb_properties = (char *) malloc (buf_size);
mbstate_t cur_state;
int i;
memset (&cur_state, 0, sizeof (mbstate_t));
memset (mb_properties, 0, sizeof (char) * buf_size);
for (i = 0; i < buf_size ;)
{
size_t mbclen;
mbclen = mbrlen (buf + i, buf_size - i, &cur_state);
if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
{
/* An invalid sequence, or a truncated multibyte character.
We treat it as a singlebyte character. */
mbclen = 1;
}
mb_properties[i] = mbclen;
i += mbclen;
}
return mb_properties;
}
#endif
static size_t
Fexecute (const void *compiled_pattern, const char *buf, size_t buf_size,
size_t *match_size, bool exact)
{
const struct compiled_kwset *ckwset =
(const struct compiled_kwset *) compiled_pattern;
char eol = ckwset->eolbyte;
register const char *buflim = buf + buf_size;
register const char *beg;
register size_t len;
#ifdef MBS_SUPPORT
char *mb_properties;
if (MB_CUR_MAX > 1)
mb_properties = check_multibyte_string (buf, buf_size);
#endif /* MBS_SUPPORT */
for (beg = buf; beg <= buflim; ++beg)
{
struct kwsmatch kwsmatch;
size_t offset = kwsexec (ckwset->kwset, beg, buflim - beg, &kwsmatch);
if (offset == (size_t) -1)
{
#ifdef MBS_SUPPORT
if (MB_CUR_MAX > 1)
free (mb_properties);
#endif /* MBS_SUPPORT */
return offset;
}
#ifdef MBS_SUPPORT
if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
continue; /* It is a part of multibyte character. */
#endif /* MBS_SUPPORT */
beg += offset;
len = kwsmatch.size[0];
if (exact)
{
*match_size = len;
#ifdef MBS_SUPPORT
if (MB_CUR_MAX > 1)
free (mb_properties);
#endif /* MBS_SUPPORT */
return beg - buf;
}
if (ckwset->match_lines)
{
if (beg > buf && beg[-1] != eol)
continue;
if (beg + len < buflim && beg[len] != eol)
continue;
goto success;
}
else if (ckwset->match_words)
{
register const char *curr;
for (curr = beg; len; )
{
if (curr > buf && IS_WORD_CONSTITUENT ((unsigned char) curr[-1]))
break;
if (curr + len < buflim
&& IS_WORD_CONSTITUENT ((unsigned char) curr[len]))
{
offset = kwsexec (ckwset->kwset, beg, --len, &kwsmatch);
if (offset == (size_t) -1)
{
#ifdef MBS_SUPPORT
if (MB_CUR_MAX > 1)
free (mb_properties);
#endif /* MBS_SUPPORT */
return offset;
}
curr = beg + offset;
len = kwsmatch.size[0];
}
else
goto success;
}
}
else
goto success;
}
#ifdef MBS_SUPPORT
if (MB_CUR_MAX > 1)
free (mb_properties);
#endif /* MBS_SUPPORT */
return -1;
success:
{
register const char *end;
end = (const char *) memchr (beg + len, eol, buflim - (beg + len));
if (end != NULL)
end++;
else
end = buflim;
while (buf < beg && beg[-1] != eol)
--beg;
*match_size = end - beg;
#ifdef MBS_SUPPORT
if (MB_CUR_MAX > 1)
free (mb_properties);
#endif /* MBS_SUPPORT */
return beg - buf;
}
}
static void
Ffree (void *compiled_pattern)
{
struct compiled_kwset *ckwset = (struct compiled_kwset *) compiled_pattern;
free (ckwset->trans);
free (ckwset);
}
matcher_t matcher_fgrep =
{
Fcompile,
Fexecute,
Ffree
};