blob: da174aca38908fad8fba04ce5d31df39684e092e [file] [log] [blame] [edit]
Change 18870443 by jiho@jiho-earthsea-medley-work-git5 on 2011/01/06 23:29:34
Fixes a buffer overrun problem of hunspell-1.2.12 when a Korean word longer
than MAXWORDUTF8LEN/3 bytes by changing input length checking code. Hunspell
uses MAXWORDUTF8LEN byte long internal buffers. But, a very long Korean word
can overrun those buffers because Korean dictionary converts a UTF-8 charater
to 3 UTF-8 charaters.
PRESUBMIT=passed
BUG=2962572
R=jayr,jaisunda
CC=caribou-backend-reviews
APPROVED=jaisunda
DELTA=55 (44 added, 0 deleted, 11 changed)
OCL=18854064
Affected files ...
... //depot/google3/caribou/spell/spellcheck_unittest.cc#43 edit
... //depot//hunspell_1_2_12/README.google#2 edit
... //depot//hunspell_1_2_12/src/hunspell/hunspell.cxx#2 edit
... //depot//hunspell_1_2_12/src/hunspell/replist.cxx#2 edit
... //depot//hunspell_1_2_12/src/hunspell/replist.hxx#2 edit
==== //depot//src/hunspell/hunspell.cxx#1 - /google/src/files/18870443/depot//hunspell_1_2_12/src/hunspell/hunspell.cxx ====
--- /google/src/files/17885847/depot//src/hunspell/hunspell.cxx 2010-10-27 21:25:42.000000000 -0400
+++ /google/src/files/18870443/depot//src/hunspell/hunspell.cxx 2011-01-07 02:29:34.000000000 -0500
@@ -346,8 +346,8 @@
// input conversion
RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
- if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
- else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
+ if (rl && rl->conv(word, wspace, sizeof(wspace))) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
+ else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
int info2 = 0;
if (wl == 0 || maxdic == 0) return 1;
@@ -685,8 +687,8 @@
// input conversion
RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
- if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
- else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
+ if (rl && rl->conv(word, wspace, sizeof(wspace))) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
+ else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
if (wl == 0) return 0;
int ns = 0;
@@ -971,7 +975,7 @@
// output conversion
rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
for (int j = 0; rl && j < ns; j++) {
- if (rl->conv((*slst)[j], wspace)) {
+ if (rl->conv((*slst)[j], wspace, sizeof(wspace))) {
free((*slst)[j]);
(*slst)[j] = mystrdup(wspace);
}
@@ -1346,8 +1350,8 @@
// input conversion
RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
- if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
- else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
+ if (rl && rl->conv(word, wspace, sizeof(wspace))) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
+ else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
if (wl == 0) {
if (abbv) {
==== //depot//src/hunspell/replist.cxx#1 - /google/src/files/18870443/depot//hunspell_1_2_12/src/hunspell/replist.cxx ====
--- /google/src/files/17885847/depot//src/hunspell/replist.cxx 2010-10-27 21:25:42.000000000 -0400
+++ /google/src/files/18870443/depot//src/hunspell/replist.cxx 2011-01-07 02:29:34.000000000 -0500
@@ -69,7 +69,10 @@
return 0;
}
-int RepList::conv(const char * word, char * dest) {
+int RepList::conv(const char * word, char * dest, unsigned int dest_size) {
+ // NOTE: Changed to check the size of dest buffer.
+ // Korean dictionary converts one utf-8 character to 3 utf-8 characters, and
+ // it can lead to buffer overrun errors.
int stl = 0;
int change = 0;
for (size_t i = 0; i < strlen(word); i++) {
@@ -77,11 +80,20 @@
int n = near(word + i);
int l = match(word + i, n);
if (l) {
+ int pattern_len = strlen(dat[n]->pattern2);
+ if ((stl + pattern_len) >= dest_size) {
+ break;
+ }
strcpy(dest + stl, dat[n]->pattern2);
- stl += strlen(dat[n]->pattern2);
+ stl += pattern_len;
i += l - 1;
change = 1;
- } else dest[stl++] = word[i];
+ } else {
+ if ((stl + 1) >= dest_size) {
+ break;
+ }
+ dest[stl++] = word[i];
+ }
}
dest[stl] = '\0';
return change;
==== //depot//src/hunspell/replist.hxx#1 - /google/src/files/18870443/depot//hunspell_1_2_12/src/hunspell/replist.hxx ====
--- /google/src/files/17885847/depot//src/hunspell/replist.hxx 2010-10-27 21:25:42.000000000 -0400
+++ /google/src/files/18870443/depot//src/hunspell/replist.hxx 2011-01-07 02:29:34.000000000 -0500
@@ -22,6 +22,6 @@
replentry * item(int n);
int near(const char * word);
int match(const char * word, int n);
- int conv(const char * word, char * dest);
+ int conv(const char * word, char * dest, unsigned int dest_size);
};
#endif