blob: d9af3762f9accb4a6ce2fbe2373b285d86d18771 [file] [log] [blame]
/*
tre-ast.h - Abstract syntax tree (AST) definitions
This software is released under a BSD-style license.
See the file LICENSE for details and copyright.
*/
#ifndef TRE_AST_H
#define TRE_AST_H 1
#include "tre-mem.h"
#include "tre-internal.h"
#include "tre-compile.h"
/* The different AST node types. */
typedef enum {
LITERAL,
CATENATION,
ITERATION,
UNION
} tre_ast_type_t;
/* Special subtypes of TRE_LITERAL. */
#define EMPTY -1 /* Empty leaf (denotes empty string). */
#define ASSERTION -2 /* Assertion leaf. */
#define TAG -3 /* Tag leaf. */
#define BACKREF -4 /* Back reference leaf. */
#define PARAMETER -5 /* Parameter. */
#define IS_SPECIAL(x) ((x)->code_min < 0)
#define IS_EMPTY(x) ((x)->code_min == EMPTY)
#define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
#define IS_TAG(x) ((x)->code_min == TAG)
#define IS_BACKREF(x) ((x)->code_min == BACKREF)
#define IS_PARAMETER(x) ((x)->code_min == PARAMETER)
/* A generic AST node. All AST nodes consist of this node on the top
level with `obj' pointing to the actual content. */
typedef struct {
tre_ast_type_t type; /* Type of the node. */
void *obj; /* Pointer to actual node. */
int nullable;
int submatch_id;
int num_submatches;
int num_tags;
tre_pos_and_tags_t *firstpos;
tre_pos_and_tags_t *lastpos;
} tre_ast_node_t;
/* A "literal" node. These are created for assertions, back references,
tags, matching parameter settings, and all expressions that match one
character. */
typedef struct {
long code_min;
long code_max;
int position;
union {
tre_ctype_t class;
int *params;
} u;
tre_ctype_t *neg_classes;
} tre_literal_t;
/* A "catenation" node. These are created when two regexps are concatenated.
If there are more than one subexpressions in sequence, the `left' part
holds all but the last, and `right' part holds the last subexpression
(catenation is left associative). */
typedef struct {
tre_ast_node_t *left;
tre_ast_node_t *right;
} tre_catenation_t;
/* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"
operators. */
typedef struct {
/* Subexpression to match. */
tre_ast_node_t *arg;
/* Minimum number of consecutive matches. */
int min;
/* Maximum number of consecutive matches. */
int max;
/* If 0, match as many characters as possible, if 1 match as few as
possible. Note that this does not always mean the same thing as
matching as many/few repetitions as possible. */
unsigned int minimal:1;
/* Approximate matching parameters (or NULL). */
int *params;
} tre_iteration_t;
/* An "union" node. These are created for the "|" operator. */
typedef struct {
tre_ast_node_t *left;
tre_ast_node_t *right;
} tre_union_t;
tre_ast_node_t *
tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size);
tre_ast_node_t *
tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position);
tre_ast_node_t *
tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
int minimal);
tre_ast_node_t *
tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right);
tre_ast_node_t *
tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
tre_ast_node_t *right);
#ifdef TRE_DEBUG
void
tre_ast_print(tre_ast_node_t *tree);
/* XXX - rethink AST printing API */
void
tre_print_params(int *params);
#endif /* TRE_DEBUG */
#endif /* TRE_AST_H */
/* EOF */