| Change 18870443 by jiho@jiho-earthsea-medley-work-git5 on 2011/01/06 23:29:34 |
| |
| Fixes a buffer overrun problem of hunspell-1.2.12 when a Korean word longer |
| than MAXWORDUTF8LEN/3 bytes by changing input length checking code. Hunspell |
| uses MAXWORDUTF8LEN byte long internal buffers. But, a very long Korean word |
| can overrun those buffers because Korean dictionary converts a UTF-8 charater |
| to 3 UTF-8 charaters. |
| |
| PRESUBMIT=passed |
| BUG=2962572 |
| R=jayr,jaisunda |
| CC=caribou-backend-reviews |
| APPROVED=jaisunda |
| DELTA=55 (44 added, 0 deleted, 11 changed) |
| OCL=18854064 |
| |
| Affected files ... |
| |
| ... //depot/google3/caribou/spell/spellcheck_unittest.cc#43 edit |
| ... //depot//hunspell_1_2_12/README.google#2 edit |
| ... //depot//hunspell_1_2_12/src/hunspell/hunspell.cxx#2 edit |
| ... //depot//hunspell_1_2_12/src/hunspell/replist.cxx#2 edit |
| ... //depot//hunspell_1_2_12/src/hunspell/replist.hxx#2 edit |
| |
| ==== //depot//src/hunspell/hunspell.cxx#1 - /google/src/files/18870443/depot//hunspell_1_2_12/src/hunspell/hunspell.cxx ==== |
| --- /google/src/files/17885847/depot//src/hunspell/hunspell.cxx 2010-10-27 21:25:42.000000000 -0400 |
| +++ /google/src/files/18870443/depot//src/hunspell/hunspell.cxx 2011-01-07 02:29:34.000000000 -0500 |
| @@ -346,8 +346,8 @@ |
| |
| // input conversion |
| RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; |
| - if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); |
| - else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); |
| + if (rl && rl->conv(word, wspace, sizeof(wspace))) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); |
| + else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); |
| |
| int info2 = 0; |
| if (wl == 0 || maxdic == 0) return 1; |
| @@ -685,8 +687,8 @@ |
| |
| // input conversion |
| RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; |
| - if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); |
| - else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); |
| + if (rl && rl->conv(word, wspace, sizeof(wspace))) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); |
| + else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); |
| |
| if (wl == 0) return 0; |
| int ns = 0; |
| @@ -971,7 +975,7 @@ |
| // output conversion |
| rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; |
| for (int j = 0; rl && j < ns; j++) { |
| - if (rl->conv((*slst)[j], wspace)) { |
| + if (rl->conv((*slst)[j], wspace, sizeof(wspace))) { |
| free((*slst)[j]); |
| (*slst)[j] = mystrdup(wspace); |
| } |
| @@ -1346,8 +1350,8 @@ |
| |
| // input conversion |
| RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; |
| - if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); |
| - else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); |
| + if (rl && rl->conv(word, wspace, sizeof(wspace))) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); |
| + else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); |
| |
| if (wl == 0) { |
| if (abbv) { |
| ==== //depot//src/hunspell/replist.cxx#1 - /google/src/files/18870443/depot//hunspell_1_2_12/src/hunspell/replist.cxx ==== |
| --- /google/src/files/17885847/depot//src/hunspell/replist.cxx 2010-10-27 21:25:42.000000000 -0400 |
| +++ /google/src/files/18870443/depot//src/hunspell/replist.cxx 2011-01-07 02:29:34.000000000 -0500 |
| @@ -69,7 +69,10 @@ |
| return 0; |
| } |
| |
| -int RepList::conv(const char * word, char * dest) { |
| +int RepList::conv(const char * word, char * dest, unsigned int dest_size) { |
| + // NOTE: Changed to check the size of dest buffer. |
| + // Korean dictionary converts one utf-8 character to 3 utf-8 characters, and |
| + // it can lead to buffer overrun errors. |
| int stl = 0; |
| int change = 0; |
| for (size_t i = 0; i < strlen(word); i++) { |
| @@ -77,11 +80,20 @@ |
| int n = near(word + i); |
| int l = match(word + i, n); |
| if (l) { |
| + int pattern_len = strlen(dat[n]->pattern2); |
| + if ((stl + pattern_len) >= dest_size) { |
| + break; |
| + } |
| strcpy(dest + stl, dat[n]->pattern2); |
| - stl += strlen(dat[n]->pattern2); |
| + stl += pattern_len; |
| i += l - 1; |
| change = 1; |
| - } else dest[stl++] = word[i]; |
| + } else { |
| + if ((stl + 1) >= dest_size) { |
| + break; |
| + } |
| + dest[stl++] = word[i]; |
| + } |
| } |
| dest[stl] = '\0'; |
| return change; |
| ==== //depot//src/hunspell/replist.hxx#1 - /google/src/files/18870443/depot//hunspell_1_2_12/src/hunspell/replist.hxx ==== |
| --- /google/src/files/17885847/depot//src/hunspell/replist.hxx 2010-10-27 21:25:42.000000000 -0400 |
| +++ /google/src/files/18870443/depot//src/hunspell/replist.hxx 2011-01-07 02:29:34.000000000 -0500 |
| @@ -22,6 +22,6 @@ |
| replentry * item(int n); |
| int near(const char * word); |
| int match(const char * word, int n); |
| - int conv(const char * word, char * dest); |
| + int conv(const char * word, char * dest, unsigned int dest_size); |
| }; |
| #endif |