| Change 50722709 by junyangl@junyangl:latency:20:citc on 2013/08/12 11:06:20 |
| |
| Wrap the parser for Hunspell check. |
| Use a parameter/dynamic flag to control which parser to use. |
| Fix bug in hunspell TextParser to accept very long input. |
| |
| PRESUBMIT=passed |
| R=lianglin,shine,jiho |
| CC=caribou-backend-reviews,caribou-prod-reviews |
| APPROVED=jiho,shine |
| DELTA=527 (420 added, 51 deleted, 56 changed) |
| OCL=50412080 |
| |
| Affected files ... |
| |
| ... //depot/google3/caribou/medley/lib/internal/medley_server.cc#239 edit |
| ... //depot/google3/caribou/spell/BUILD#46 edit |
| ... //depot/google3/caribou/spell/server_main.cc#7 edit |
| ... //depot/google3/caribou/spell/spell_server.cc#15 edit |
| ... //depot/google3/caribou/spell/spell_server.h#5 edit |
| ... //depot/google3/caribou/spell/spellcheck.cc#61 edit |
| ... //depot/google3/caribou/spell/spellcheck.h#24 edit |
| ... //depot/google3/caribou/spell/spellcheck_benchmark.cc#2 edit |
| ... //depot/google3/caribou/spell/spellcheck_parser.cc#1 add |
| ... //depot/google3/caribou/spell/spellcheck_parser.h#1 add |
| ... //depot/google3/caribou/spell/spellcheck_parser_test.cc#1 add |
| ... //depot/google3/caribou/spell/spellcheck_unittest.cc#56 edit |
| ... //depot/google3/caribou/spell/stats.cc#9 edit |
| ... //depot/google3/caribou/spell/stats.h#8 edit |
| ... //depot//README.google#4 edit |
| ... //depot//src/parsers/htmlparser.cxx#3 edit |
| ... //depot//src/parsers/htmlparser.hxx#2 edit |
| ... //depot//src/parsers/textparser.cxx#2 edit |
| ... //depot//src/parsers/textparser.hxx#2 edit |
| |
| ==== //depot//src/parsers/htmlparser.cxx#2 - /google/src/files/50722709/depot//src/parsers/htmlparser.cxx ==== |
| --- /google/src/files/49864191/depot//src/parsers/htmlparser.cxx 2013-07-24 23:12:12.000000000 -0400 |
| +++ /google/src/files/50722709/depot//src/parsers/htmlparser.cxx 2013-08-12 14:06:20.000000000 -0400 |
| @@ -43,10 +43,8 @@ |
| init(wordchars); |
| } |
| |
| -HTMLParser::HTMLParser(unsigned short * wordchars, int len) |
| -{ |
| - init(wordchars, len); |
| -} |
| +HTMLParser::HTMLParser(unsigned short *wordchars, int len, int text_len) |
| + : TextParser(wordchars, len, text_len) {} |
| |
| HTMLParser::~HTMLParser() |
| { |
| ==== //depot//src/parsers/htmlparser.hxx#1 - /google/src/files/50722709/depot//src/parsers/htmlparser.hxx ==== |
| --- /google/src/files/37375269/depot//src/parsers/htmlparser.hxx 2012-10-31 14:23:51.000000000 -0400 |
| +++ /google/src/files/50722709/depot//src/parsers/htmlparser.hxx 2013-08-12 14:06:20.000000000 -0400 |
| @@ -23,7 +23,7 @@ |
| public: |
| |
| HTMLParser(const char * wc); |
| - HTMLParser(unsigned short * wordchars, int len); |
| + HTMLParser(unsigned short * wordchars, int len, int text_len); |
| virtual ~HTMLParser(); |
| |
| virtual char * next_token(); |
| ==== //depot//src/parsers/textparser.cxx#1 - /google/src/files/50722709/depot//src/parsers/textparser.cxx ==== |
| --- /google/src/files/37375269/depot//src/parsers/textparser.cxx 2012-10-31 14:23:51.000000000 -0400 |
| +++ /google/src/files/50722709/depot//src/parsers/textparser.cxx 2013-08-12 14:06:20.000000000 -0400 |
| @@ -55,13 +55,25 @@ |
| init(wordchars); |
| } |
| |
| -TextParser::TextParser(unsigned short * wordchars, int len) |
| +TextParser::TextParser(unsigned short * wordchars, int len, int text_len) |
| { |
| - init(wordchars, len); |
| + // Note(junyangl@google.com): Dynamically allocate space according to text_len |
| + // to avoid segfault for too long input. |
| + for (int i = 0; i < MAXPREVLINE; i++) { |
| + line[i] = (char*)malloc(text_len + 1); |
| + } |
| + urlline = (char*)malloc(text_len + 1); |
| + |
| + init(wordchars, len); |
| } |
| |
| TextParser::~TextParser() |
| { |
| + // Note(junyangl@google.com): Free dynamically allocated space. |
| + for (int i = 0; i < MAXPREVLINE; i++) { |
| + free(line[i]); |
| + } |
| + free(urlline); |
| } |
| |
| int TextParser::is_wordchar(char * w) |
| ==== //depot//src/parsers/textparser.hxx#1 - /google/src/files/50722709/depot//src/parsers/textparser.hxx ==== |
| --- /google/src/files/37375269/depot//src/parsers/textparser.hxx 2012-10-31 14:23:51.000000000 -0400 |
| +++ /google/src/files/50722709/depot//src/parsers/textparser.hxx 2013-08-12 14:06:20.000000000 -0400 |
| @@ -29,8 +29,10 @@ |
| void init(const char *); |
| void init(unsigned short * wordchars, int len); |
| int wordcharacters[256]; // for detection of the word boundaries |
| - char line[MAXPREVLINE][MAXLNLEN]; // parsed and previous lines |
| - char urlline[MAXLNLEN]; // mask for url detection |
| + // Note(junyangl@google.com): Dynamically allocate space for line and urlline |
| + // according to input length, to avoid segfault when the input is too long. |
| + char * line[MAXPREVLINE]; // parsed and previous lines |
| + char * urlline; // mask for url detection |
| int checkurl; |
| int actual; // actual line |
| int head; // head position |
| @@ -44,7 +46,7 @@ |
| public: |
| |
| TextParser(); |
| - TextParser(unsigned short * wordchars, int len); |
| + TextParser(unsigned short * wordchars, int len, int text_len); |
| TextParser(const char * wc); |
| virtual ~TextParser(); |
| |