blob: cf80719b47c15052c7a6f487d46498d07903f1e1 [file] [log] [blame]
/*
* BRLTTY - A background process providing access to the console screen (when in
* text mode) for a blind person using a refreshable braille display.
*
* Copyright (C) 1995-2023 by The BRLTTY Developers.
*
* BRLTTY comes with ABSOLUTELY NO WARRANTY.
*
* This is free software, placed under the terms of the
* GNU Lesser General Public License, as published by the Free Software
* Foundation; either version 2.1 of the License, or (at your option) any
* later version. Please see the file LICENSE-LGPL for details.
*
* Web Page: http://brltty.app/
*
* This software is maintained by Dave Mielke <dave@mielke.cc>.
*/
#include "prologue.h"
#include <string.h>
#include "log.h"
#include "rgx.h"
#include "rgx_internal.h"
#include "utf8.h"
#include "queue.h"
#include "strfmt.h"
#define RGX_UTF8_TO_CHARACTERS \
size_t size = strlen(string) + 1; \
wchar_t characters[size]; \
size_t count; \
{ \
const char *from = string; \
wchar_t *to = characters; \
convertUtf8ToWchars(&from, &to, size); \
count = to - characters; \
}
#define RGX_CHARACTERS_TO_INTERNAL \
RGX_CharacterType internal[length + 1]; \
internal[length] = 0; \
for (unsigned int index=0; index<length; index+=1) { \
internal[index] = characters[index]; \
}
struct RGX_ObjectStruct {
void *data;
Queue *matchers;
RGX_OptionsType options;
};
struct RGX_MatcherStruct {
void *data;
RGX_MatchHandler *handler;
RGX_OptionsType options;
struct {
wchar_t *characters;
size_t length;
} pattern;
struct {
RGX_CodeType *code;
RGX_DataType *data;
} compiled;
};
static void
rgxLogError (int error, const RGX_Matcher *matcher, RGX_OffsetType *offset) {
char log[0X100];
STR_BEGIN(log, sizeof(log));
STR_PRINTF("regular expression error");
if (offset) STR_PRINTF(" at offset %"PRIu32, (uint32_t)*offset);
STR_PRINTF(": ");
{
size_t oldLength = STR_LENGTH;
STR_FORMAT(rgxFormatErrorMessage, error);
if (STR_LENGTH == oldLength) STR_PRINTF("unrecognized error %d", error);
}
if (matcher) {
STR_PRINTF(
": %.*"PRIws, (int)matcher->pattern.length, matcher->pattern.characters
);
}
STR_END;
logMessage(LOG_WARNING, "%s", log);
}
static void
rgxDeallocateMatcher (void *item, void *data) {
RGX_Matcher *matcher = item;
rgxDeallocateData(matcher->compiled.data);
rgxDeallocateCode(matcher->compiled.code);
free(matcher->pattern.characters);
free(matcher);
}
RGX_Matcher *
rgxAddPatternCharacters (
RGX_Object *rgx,
const wchar_t *characters, size_t length,
RGX_MatchHandler *handler, void *data
) {
RGX_Matcher *matcher;
if ((matcher = malloc(sizeof(*matcher)))) {
memset(matcher, 0, sizeof(*matcher));
matcher->data = data;
matcher->handler = handler;
matcher->options = 0;
matcher->pattern.characters = calloc(
(matcher->pattern.length = length) + 1,
sizeof(*matcher->pattern.characters)
);
if (matcher->pattern.characters) {
wmemcpy(matcher->pattern.characters, characters, length);
matcher->pattern.characters[length] = 0;
RGX_CHARACTERS_TO_INTERNAL;
int error;
RGX_OffsetType offset;
matcher->compiled.code = rgxCompilePattern(
internal, length, rgx->options, &offset, &error
);
if (matcher->compiled.code) {
matcher->compiled.data = rgxAllocateData(matcher->compiled.code);
if (matcher->compiled.data) {
if (enqueueItem(rgx->matchers, matcher)) {
return matcher;
}
rgxDeallocateData(matcher->compiled.data);
} else {
logMallocError();
}
rgxDeallocateCode(matcher->compiled.code);
} else {
rgxLogError(error, matcher, &offset);
}
free(matcher->pattern.characters);
} else {
logMallocError();
}
free(matcher);
} else {
logMallocError();
}
return NULL;
}
RGX_Matcher *
rgxAddPatternString (
RGX_Object *rgx,
const wchar_t *string,
RGX_MatchHandler *handler, void *data
) {
return rgxAddPatternCharacters(rgx, string, wcslen(string), handler, data);
}
RGX_Matcher *
rgxAddPatternUTF8 (
RGX_Object *rgx,
const char *string,
RGX_MatchHandler *handler, void *data
) {
RGX_UTF8_TO_CHARACTERS;
return rgxAddPatternCharacters(rgx, characters, count, handler, data);
}
static int
rgxTestMatcher (const void *item, void *data) {
const RGX_Matcher *matcher = item;
RGX_Match *match = data;
int error;
int matched = rgxMatchText(
match->text.internal, match->text.length,
matcher->compiled.code, matcher->compiled.data,
matcher->options, &match->capture.count, &error
);
if (!matched) {
if (error != RGX_NO_MATCH) rgxLogError(error, matcher, NULL);
return 0;
}
RGX_MatchHandler *handler = matcher->handler;
if (!handler) return 1;
match->matcher = matcher;
match->data.pattern = matcher->data;
match->pattern.characters = matcher->pattern.characters;
match->pattern.length = matcher->pattern.length;
return handler(match);
}
RGX_Matcher *
rgxMatchTextCharacters (
RGX_Object *rgx,
const wchar_t *characters, size_t length,
RGX_Match **result, void *data
) {
RGX_CHARACTERS_TO_INTERNAL;
RGX_Match match = {
.text = {
.internal = internal,
.characters = characters,
.length = length
},
.data = {
.object = rgx->data,
.match = data
}
};
Element *element = findElement(rgx->matchers, rgxTestMatcher, &match);
if (!element) return NULL;
if (result) {
typedef struct {
RGX_Match match;
wchar_t text[];
} Block;
Block *block;
size_t size = sizeof(*block);
size += (length + 1) * sizeof(block->text[0]);
if (!(block = malloc(size))) {
logMallocError();
return NULL;
}
block->match = match;
block->match.text.internal = NULL;
wmemcpy(block->text, match.text.characters, length);
block->text[length] = 0;
block->match.text.characters = block->text;
*result = &block->match;
}
return getElementItem(element);
}
RGX_Matcher *
rgxMatchTextString (
RGX_Object *rgx,
const wchar_t *string,
RGX_Match **result, void *data
) {
return rgxMatchTextCharacters(rgx, string, wcslen(string), result, data);
}
RGX_Matcher *
rgxMatchTextUTF8 (
RGX_Object *rgx,
const char *string,
RGX_Match **result, void *data
) {
RGX_UTF8_TO_CHARACTERS;
return rgxMatchTextCharacters(rgx, characters, count, result, data);
}
int
rgxGetNameNumberCharacters (
const RGX_Matcher *matcher,
const wchar_t *characters, size_t length,
size_t *number
) {
RGX_CHARACTERS_TO_INTERNAL;
int error;
if (rgxNameNumber(matcher->compiled.code, internal, number, &error)) return 1;
if (error != RGX_NO_NAME) rgxLogError(error, matcher, NULL);
return 0;
}
int
rgxGetNameNumberString (
const RGX_Matcher *matcher,
const wchar_t *string,
size_t *number
) {
return rgxGetNameNumberCharacters(matcher, string, wcslen(string), number);
}
int
rgxGetNameNumberUTF8 (
const RGX_Matcher *matcher,
const char *string,
size_t *number
) {
RGX_UTF8_TO_CHARACTERS;
return rgxGetNameNumberCharacters(matcher, characters, count, number);
}
size_t
rgxGetCaptureCount (
const RGX_Match *match
) {
return match->capture.count;
}
int
rgxGetCaptureBounds (
const RGX_Match *match,
size_t number, size_t *from, size_t *to
) {
if (number > match->capture.count) return 0;
return rgxCaptureBounds(match->matcher->compiled.data, number, from, to);
}
int
rgxGetCaptureText (
const RGX_Match *match,
size_t number, const wchar_t **characters, size_t *length
) {
size_t from, to;
if (!rgxGetCaptureBounds(match, number, &from, &to)) return 0;
*characters = &match->text.characters[from];
*length = to - from;
return 1;
}
RGX_Object *
rgxNewObject (void *data) {
RGX_Object *rgx;
if ((rgx = malloc(sizeof(*rgx)))) {
memset(rgx, 0, sizeof(*rgx));
rgx->data = data;
rgx->options = 0;
if ((rgx->matchers = newQueue(rgxDeallocateMatcher, NULL))) {
return rgx;
}
free(rgx);
} else {
logMallocError();
}
return NULL;
}
void
rgxDestroyObject (RGX_Object *rgx) {
deallocateQueue(rgx->matchers);
free(rgx);
}
static int
rgxOption (
RGX_OptionAction action, int option,
RGX_OptionsType *bits, const RGX_OptionMap *map
) {
RGX_OptionsType bit = ((option >= 0) && (option < map->count))? map->array[option]: 0;
int wasSet = !!(*bits & bit);
if (action == RGX_OPTION_TOGGLE) {
action = wasSet? RGX_OPTION_CLEAR: RGX_OPTION_SET;
}
switch (action) {
case RGX_OPTION_SET:
*bits |= bit;
break;
case RGX_OPTION_CLEAR:
*bits &= ~bit;
break;
default:
logMessage(LOG_WARNING, "unimplemented regular expression option action: %d", action);
/* fall through */
case RGX_OPTION_TEST:
break;
}
return wasSet;
}
int
rgxCompileOption (
RGX_Object *rgx,
RGX_OptionAction action,
RGX_CompileOption option
) {
return rgxOption(action, option, &rgx->options, &rgxCompileOptionsMap);
}
int
rgxMatchOption (
RGX_Matcher *matcher,
RGX_OptionAction action,
RGX_MatchOption option
) {
return rgxOption(action, option, &matcher->options, &rgxMatchOptionsMap);
}