patches/02.buffer_overrun.patch - hunspell - Git at Google

 Change 18870443 by jiho@jiho-earthsea-medley-work-git5 on 2011/01/06 23:29:34

 	Fixes a buffer overrun problem of hunspell-1.2.12 when a Korean word longer
 	than MAXWORDUTF8LEN/3 bytes by changing input length checking code. Hunspell
 	uses MAXWORDUTF8LEN byte long internal buffers.  But, a very long Korean word
 	can overrun those buffers because Korean dictionary converts a UTF-8 charater
 	to 3 UTF-8 charaters.

 	PRESUBMIT=passed
 	BUG=2962572
 	R=jayr,jaisunda
 	CC=caribou-backend-reviews
 	APPROVED=jaisunda
 	DELTA=55  (44 added, 0 deleted, 11 changed)
 	OCL=18854064

 Affected files ...

 ... //depot/google3/caribou/spell/spellcheck_unittest.cc#43 edit
 ... //depot//hunspell_1_2_12/README.google#2 edit
 ... //depot//hunspell_1_2_12/src/hunspell/hunspell.cxx#2 edit
 ... //depot//hunspell_1_2_12/src/hunspell/replist.cxx#2 edit
 ... //depot//hunspell_1_2_12/src/hunspell/replist.hxx#2 edit

 ==== //depot//src/hunspell/hunspell.cxx#1 - /google/src/files/18870443/depot//hunspell_1_2_12/src/hunspell/hunspell.cxx ====
 --- /google/src/files/17885847/depot//src/hunspell/hunspell.cxx	2010-10-27 21:25:42.000000000 -0400
 +++ /google/src/files/18870443/depot//src/hunspell/hunspell.cxx	2011-01-07 02:29:34.000000000 -0500
 @@ -346,8 +346,8 @@

    // input conversion
    RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
 -  if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
 -  else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
 +  if (rl && rl->conv(word, wspace, sizeof(wspace))) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
 +  else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);

    int info2 = 0;
    if (wl == 0 || maxdic == 0) return 1;
 @@ -685,8 +687,8 @@

    // input conversion
    RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
 -  if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
 -  else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
 +  if (rl && rl->conv(word, wspace, sizeof(wspace))) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
 +  else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);

    if (wl == 0) return 0;
    int ns = 0;
 @@ -971,7 +975,7 @@
    // output conversion
    rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
    for (int j = 0; rl && j < ns; j++) {
 -    if (rl->conv((*slst)[j], wspace)) {
 +    if (rl->conv((*slst)[j], wspace, sizeof(wspace))) {
        free((*slst)[j]);
        (*slst)[j] = mystrdup(wspace);
      }
 @@ -1346,8 +1350,8 @@

    // input conversion
    RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
 -  if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
 -  else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
 +  if (rl && rl->conv(word, wspace, sizeof(wspace))) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
 +  else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);

    if (wl == 0) {
        if (abbv) {
 ==== //depot//src/hunspell/replist.cxx#1 - /google/src/files/18870443/depot//hunspell_1_2_12/src/hunspell/replist.cxx ====
 --- /google/src/files/17885847/depot//src/hunspell/replist.cxx	2010-10-27 21:25:42.000000000 -0400
 +++ /google/src/files/18870443/depot//src/hunspell/replist.cxx	2011-01-07 02:29:34.000000000 -0500
 @@ -69,7 +69,10 @@
      return 0;
  }

 -int RepList::conv(const char * word, char * dest) {
 +int RepList::conv(const char * word, char * dest, unsigned int dest_size) {
 +    // NOTE: Changed to check the size of dest buffer.
 +    // Korean dictionary converts one utf-8 character to 3 utf-8 characters, and
 +    // it can lead to buffer overrun errors.
      int stl = 0;
      int change = 0;
      for (size_t i = 0; i < strlen(word); i++) {
 @@ -77,11 +80,20 @@
          int n = near(word + i);
          int l = match(word + i, n);
          if (l) {
 +          int pattern_len = strlen(dat[n]->pattern2);
 +          if ((stl + pattern_len) >= dest_size) {
 +            break;
 +          }
            strcpy(dest + stl, dat[n]->pattern2);
 -          stl += strlen(dat[n]->pattern2);
 +          stl += pattern_len;
            i += l - 1;
            change = 1;
 -        } else dest[stl++] = word[i];
 +        } else {
 +          if ((stl + 1) >= dest_size) {
 +            break;
 +          }
 +          dest[stl++] = word[i];
 +        }
      }
      dest[stl] = '\0';
      return change;
 ==== //depot//src/hunspell/replist.hxx#1 - /google/src/files/18870443/depot//hunspell_1_2_12/src/hunspell/replist.hxx ====
 --- /google/src/files/17885847/depot//src/hunspell/replist.hxx	2010-10-27 21:25:42.000000000 -0400
 +++ /google/src/files/18870443/depot//src/hunspell/replist.hxx	2011-01-07 02:29:34.000000000 -0500
 @@ -22,6 +22,6 @@
      replentry * item(int n);
      int near(const char * word);
      int match(const char * word, int n);
 -    int conv(const char * word, char * dest);
 +    int conv(const char * word, char * dest, unsigned int dest_size);
  };
  #endif
	Change 18870443 by jiho@jiho-earthsea-medley-work-git5 on 2011/01/06 23:29:34

	Fixes a buffer overrun problem of hunspell-1.2.12 when a Korean word longer
	than MAXWORDUTF8LEN/3 bytes by changing input length checking code. Hunspell
	uses MAXWORDUTF8LEN byte long internal buffers. But, a very long Korean word
	can overrun those buffers because Korean dictionary converts a UTF-8 charater
	to 3 UTF-8 charaters.

	PRESUBMIT=passed
	BUG=2962572
	R=jayr,jaisunda
	CC=caribou-backend-reviews
	APPROVED=jaisunda
	DELTA=55 (44 added, 0 deleted, 11 changed)
	OCL=18854064

	Affected files ...

	... //depot/google3/caribou/spell/spellcheck_unittest.cc#43 edit
	... //depot//hunspell_1_2_12/README.google#2 edit
	... //depot//hunspell_1_2_12/src/hunspell/hunspell.cxx#2 edit
	... //depot//hunspell_1_2_12/src/hunspell/replist.cxx#2 edit
	... //depot//hunspell_1_2_12/src/hunspell/replist.hxx#2 edit

	==== //depot//src/hunspell/hunspell.cxx#1 - /google/src/files/18870443/depot//hunspell_1_2_12/src/hunspell/hunspell.cxx ====
	--- /google/src/files/17885847/depot//src/hunspell/hunspell.cxx 2010-10-27 21:25:42.000000000 -0400
	+++ /google/src/files/18870443/depot//src/hunspell/hunspell.cxx 2011-01-07 02:29:34.000000000 -0500
	@@ -346,8 +346,8 @@

	// input conversion
	RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
	- if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
	- else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
	+ if (rl && rl->conv(word, wspace, sizeof(wspace))) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
	+ else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);

	int info2 = 0;
	if (wl == 0 \|\| maxdic == 0) return 1;
	@@ -685,8 +687,8 @@

	// input conversion
	RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
	- if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
	- else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
	+ if (rl && rl->conv(word, wspace, sizeof(wspace))) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
	+ else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);

	if (wl == 0) return 0;
	int ns = 0;
	@@ -971,7 +975,7 @@
	// output conversion
	rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
	for (int j = 0; rl && j < ns; j++) {
	- if (rl->conv((*slst)[j], wspace)) {
	+ if (rl->conv((*slst)[j], wspace, sizeof(wspace))) {
	free((*slst)[j]);
	(*slst)[j] = mystrdup(wspace);
	}
	@@ -1346,8 +1350,8 @@

	// input conversion
	RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
	- if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
	- else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
	+ if (rl && rl->conv(word, wspace, sizeof(wspace))) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
	+ else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);

	if (wl == 0) {
	if (abbv) {
	==== //depot//src/hunspell/replist.cxx#1 - /google/src/files/18870443/depot//hunspell_1_2_12/src/hunspell/replist.cxx ====
	--- /google/src/files/17885847/depot//src/hunspell/replist.cxx 2010-10-27 21:25:42.000000000 -0400
	+++ /google/src/files/18870443/depot//src/hunspell/replist.cxx 2011-01-07 02:29:34.000000000 -0500
	@@ -69,7 +69,10 @@
	return 0;
	}

	-int RepList::conv(const char * word, char * dest) {
	+int RepList::conv(const char * word, char * dest, unsigned int dest_size) {
	+ // NOTE: Changed to check the size of dest buffer.
	+ // Korean dictionary converts one utf-8 character to 3 utf-8 characters, and
	+ // it can lead to buffer overrun errors.
	int stl = 0;
	int change = 0;
	for (size_t i = 0; i < strlen(word); i++) {
	@@ -77,11 +80,20 @@
	int n = near(word + i);
	int l = match(word + i, n);
	if (l) {
	+ int pattern_len = strlen(dat[n]->pattern2);
	+ if ((stl + pattern_len) >= dest_size) {
	+ break;
	+ }
	strcpy(dest + stl, dat[n]->pattern2);
	- stl += strlen(dat[n]->pattern2);
	+ stl += pattern_len;
	i += l - 1;
	change = 1;
	- } else dest[stl++] = word[i];
	+ } else {
	+ if ((stl + 1) >= dest_size) {
	+ break;
	+ }
	+ dest[stl++] = word[i];
	+ }
	}
	dest[stl] = '\0';
	return change;
	==== //depot//src/hunspell/replist.hxx#1 - /google/src/files/18870443/depot//hunspell_1_2_12/src/hunspell/replist.hxx ====
	--- /google/src/files/17885847/depot//src/hunspell/replist.hxx 2010-10-27 21:25:42.000000000 -0400
	+++ /google/src/files/18870443/depot//src/hunspell/replist.hxx 2011-01-07 02:29:34.000000000 -0500
	@@ -22,6 +22,6 @@
	replentry * item(int n);
	int near(const char * word);
	int match(const char * word, int n);
	- int conv(const char * word, char * dest);
	+ int conv(const char * word, char * dest, unsigned int dest_size);
	};
	#endif