| /* |
| * parser classes for MySpell |
| * |
| * implemented: text, HTML, TeX |
| * |
| * Copyright (C) 2002, Laszlo Nemeth |
| * |
| */ |
| |
| #ifndef _TEXTPARSER_HXX_ |
| #define _TEXTPARSER_HXX_ |
| |
| // set sum of actual and previous lines |
| #define MAXPREVLINE 4 |
| |
| #ifndef MAXLNLEN |
| #define MAXLNLEN 8192 |
| #endif |
| |
| /* |
| * Base Text Parser |
| * |
| */ |
| |
| class TextParser |
| { |
| |
| protected: |
| void init(const char *); |
| void init(unsigned short * wordchars, int len); |
| int wordcharacters[256]; // for detection of the word boundaries |
| // NOTE: Dynamically allocate space for line and urlline |
| // according to input length, to avoid segfault when the input is too long. |
| char * line[MAXPREVLINE]; // parsed and previous lines |
| char * urlline; // mask for url detection |
| int checkurl; |
| int actual; // actual line |
| int head; // head position |
| int token; // begin of token |
| int state; // state of automata |
| int utf8; // UTF-8 character encoding |
| int next_char(char * line, int * pos); |
| unsigned short * wordchars_utf16; |
| int wclen; |
| |
| public: |
| |
| TextParser(); |
| TextParser(unsigned short * wordchars, int len, int text_len); |
| TextParser(const char * wc); |
| virtual ~TextParser(); |
| |
| void put_line(char * line); |
| char * get_line(); |
| char * get_prevline(int n); |
| virtual char * next_token(); |
| int change_token(const char * word); |
| void set_url_checking(int check); |
| |
| int get_tokenpos(); |
| int is_wordchar(char * w); |
| const char * get_latin1(char * s); |
| char * next_char(); |
| int tokenize_urls(); |
| void check_urls(); |
| int get_url(int token_pos, int * head); |
| char * alloc_token(int token, int * head); |
| }; |
| |
| #endif |
| |