blob: 3e4469a7ffab0b8a419f483b8695cfec9914a375 [file] [log] [blame]
Change 50722709 by junyangl@junyangl:latency:20:citc on 2013/08/12 11:06:20
Wrap the parser for Hunspell check.
Use a parameter/dynamic flag to control which parser to use.
Fix bug in hunspell TextParser to accept very long input.
PRESUBMIT=passed
R=lianglin,shine,jiho
CC=caribou-backend-reviews,caribou-prod-reviews
APPROVED=jiho,shine
DELTA=527 (420 added, 51 deleted, 56 changed)
OCL=50412080
Affected files ...
... //depot/google3/caribou/medley/lib/internal/medley_server.cc#239 edit
... //depot/google3/caribou/spell/BUILD#46 edit
... //depot/google3/caribou/spell/server_main.cc#7 edit
... //depot/google3/caribou/spell/spell_server.cc#15 edit
... //depot/google3/caribou/spell/spell_server.h#5 edit
... //depot/google3/caribou/spell/spellcheck.cc#61 edit
... //depot/google3/caribou/spell/spellcheck.h#24 edit
... //depot/google3/caribou/spell/spellcheck_benchmark.cc#2 edit
... //depot/google3/caribou/spell/spellcheck_parser.cc#1 add
... //depot/google3/caribou/spell/spellcheck_parser.h#1 add
... //depot/google3/caribou/spell/spellcheck_parser_test.cc#1 add
... //depot/google3/caribou/spell/spellcheck_unittest.cc#56 edit
... //depot/google3/caribou/spell/stats.cc#9 edit
... //depot/google3/caribou/spell/stats.h#8 edit
... //depot//README.google#4 edit
... //depot//src/parsers/htmlparser.cxx#3 edit
... //depot//src/parsers/htmlparser.hxx#2 edit
... //depot//src/parsers/textparser.cxx#2 edit
... //depot//src/parsers/textparser.hxx#2 edit
==== //depot//src/parsers/htmlparser.cxx#2 - /google/src/files/50722709/depot//src/parsers/htmlparser.cxx ====
--- /google/src/files/49864191/depot//src/parsers/htmlparser.cxx 2013-07-24 23:12:12.000000000 -0400
+++ /google/src/files/50722709/depot//src/parsers/htmlparser.cxx 2013-08-12 14:06:20.000000000 -0400
@@ -43,10 +43,8 @@
init(wordchars);
}
-HTMLParser::HTMLParser(unsigned short * wordchars, int len)
-{
- init(wordchars, len);
-}
+HTMLParser::HTMLParser(unsigned short *wordchars, int len, int text_len)
+ : TextParser(wordchars, len, text_len) {}
HTMLParser::~HTMLParser()
{
==== //depot//src/parsers/htmlparser.hxx#1 - /google/src/files/50722709/depot//src/parsers/htmlparser.hxx ====
--- /google/src/files/37375269/depot//src/parsers/htmlparser.hxx 2012-10-31 14:23:51.000000000 -0400
+++ /google/src/files/50722709/depot//src/parsers/htmlparser.hxx 2013-08-12 14:06:20.000000000 -0400
@@ -23,7 +23,7 @@
public:
HTMLParser(const char * wc);
- HTMLParser(unsigned short * wordchars, int len);
+ HTMLParser(unsigned short * wordchars, int len, int text_len);
virtual ~HTMLParser();
virtual char * next_token();
==== //depot//src/parsers/textparser.cxx#1 - /google/src/files/50722709/depot//src/parsers/textparser.cxx ====
--- /google/src/files/37375269/depot//src/parsers/textparser.cxx 2012-10-31 14:23:51.000000000 -0400
+++ /google/src/files/50722709/depot//src/parsers/textparser.cxx 2013-08-12 14:06:20.000000000 -0400
@@ -55,13 +55,25 @@
init(wordchars);
}
-TextParser::TextParser(unsigned short * wordchars, int len)
+TextParser::TextParser(unsigned short * wordchars, int len, int text_len)
{
- init(wordchars, len);
+ // Note(junyangl@google.com): Dynamically allocate space according to text_len
+ // to avoid segfault for too long input.
+ for (int i = 0; i < MAXPREVLINE; i++) {
+ line[i] = (char*)malloc(text_len + 1);
+ }
+ urlline = (char*)malloc(text_len + 1);
+
+ init(wordchars, len);
}
TextParser::~TextParser()
{
+ // Note(junyangl@google.com): Free dynamically allocated space.
+ for (int i = 0; i < MAXPREVLINE; i++) {
+ free(line[i]);
+ }
+ free(urlline);
}
int TextParser::is_wordchar(char * w)
==== //depot//src/parsers/textparser.hxx#1 - /google/src/files/50722709/depot//src/parsers/textparser.hxx ====
--- /google/src/files/37375269/depot//src/parsers/textparser.hxx 2012-10-31 14:23:51.000000000 -0400
+++ /google/src/files/50722709/depot//src/parsers/textparser.hxx 2013-08-12 14:06:20.000000000 -0400
@@ -29,8 +29,10 @@
void init(const char *);
void init(unsigned short * wordchars, int len);
int wordcharacters[256]; // for detection of the word boundaries
- char line[MAXPREVLINE][MAXLNLEN]; // parsed and previous lines
- char urlline[MAXLNLEN]; // mask for url detection
+ // Note(junyangl@google.com): Dynamically allocate space for line and urlline
+ // according to input length, to avoid segfault when the input is too long.
+ char * line[MAXPREVLINE]; // parsed and previous lines
+ char * urlline; // mask for url detection
int checkurl;
int actual; // actual line
int head; // head position
@@ -44,7 +46,7 @@
public:
TextParser();
- TextParser(unsigned short * wordchars, int len);
+ TextParser(unsigned short * wordchars, int len, int text_len);
TextParser(const char * wc);
virtual ~TextParser();