patches/01.korean_affix.patch - hunspell - Git at Google

 Change 16091682 by jiho@jiho-earthsea-medley-review-fix_segfault-git5 on 2010/06/16 01:13:54

 	Fix buffer allocation problem of hunspell, expecially for Korean Affix which
 	doesn't use UTF-8 as its base encoding.

 	PRESUBMIT=passed
 	BUG=2661104
 	R=jayr
 	DELTA=40  (9 added, 25 deleted, 6 changed)
 	OCL=16077686

 Affected files ...

 ... //depot//hunspell_1_2_8/README.google#2 edit
 ... //depot//hunspell_1_2_8/src/hunspell/atypes.hxx#2 edit
 ... //depot//hunspell_1_2_8/src/hunspell/hunspell.cxx#3 edit

 ==== //depot//src/hunspell/atypes.hxx#1 - /google/src/files/16091682/depot//hunspell_1_2_8/src/hunspell/atypes.hxx ====
 --- /google/src/files/13856229/depot//src/hunspell/atypes.hxx	2009-12-08 16:54:13.000000000 -0500
 +++ /google/src/files/16091682/depot//src/hunspell/atypes.hxx	2010-06-16 04:13:54.000000000 -0400
 @@ -19,7 +19,13 @@
  #define SETSIZE         256
  #define CONTSIZE        65536
  #define MAXWORDLEN      100
 -#define MAXWORDUTF8LEN  256
 +// Note(jiho@google.com): Korean Hunspell dictionary doesn't use UTF-8 directly.
 +// It decomposes one single Korean character to three characters. So, one single
 +// UTF-8 Korean character(3 bytes) will eventually take 9 bytes. At least
 +// MAXWORDLEN * 3 bytes are required for handling Korean dictionary.
 +// And the Korean dictionary has a word with 513 byte length. I've changed this
 +// value to 550 to cover the word. If not, hunspell dies with segfault.
 +#define MAXWORDUTF8LEN 550

  // affentry options
  #define aeXPRODUCT      (1 << 0)
	Change 16091682 by jiho@jiho-earthsea-medley-review-fix_segfault-git5 on 2010/06/16 01:13:54

	Fix buffer allocation problem of hunspell, expecially for Korean Affix which
	doesn't use UTF-8 as its base encoding.

	PRESUBMIT=passed
	BUG=2661104
	R=jayr
	DELTA=40 (9 added, 25 deleted, 6 changed)
	OCL=16077686

	Affected files ...

	... //depot//hunspell_1_2_8/README.google#2 edit
	... //depot//hunspell_1_2_8/src/hunspell/atypes.hxx#2 edit
	... //depot//hunspell_1_2_8/src/hunspell/hunspell.cxx#3 edit

	==== //depot//src/hunspell/atypes.hxx#1 - /google/src/files/16091682/depot//hunspell_1_2_8/src/hunspell/atypes.hxx ====
	--- /google/src/files/13856229/depot//src/hunspell/atypes.hxx 2009-12-08 16:54:13.000000000 -0500
	+++ /google/src/files/16091682/depot//src/hunspell/atypes.hxx 2010-06-16 04:13:54.000000000 -0400
	@@ -19,7 +19,13 @@
	#define SETSIZE 256
	#define CONTSIZE 65536
	#define MAXWORDLEN 100
	-#define MAXWORDUTF8LEN 256
	+// Note(jiho@google.com): Korean Hunspell dictionary doesn't use UTF-8 directly.
	+// It decomposes one single Korean character to three characters. So, one single
	+// UTF-8 Korean character(3 bytes) will eventually take 9 bytes. At least
	+// MAXWORDLEN * 3 bytes are required for handling Korean dictionary.
	+// And the Korean dictionary has a word with 513 byte length. I've changed this
	+// value to 550 to cover the word. If not, hunspell dies with segfault.
	+#define MAXWORDUTF8LEN 550

	// affentry options
	#define aeXPRODUCT (1 << 0)