patches/31.overflow.patch - hunspell - Git at Google

 Change 567444076 by sungyc@sungyc:fig-export-icing-153-change-421:6204:citc on 2023/09/21 15:58:36

 	[hunspell][vulnerability] Fix buffer overflow vulnerability

 	## Test Plan
 	```
 	sso_client -location 'https://clusterfuzz.corp.google.com/testcase-detail/download-testcase?id=4833834816307200' > /tmp/testcase-4833834816307200 && \
 	blaze --blazerc=/dev/null test -c opt --config=msan-fuzzer --test_strategy=local --test_sharding_strategy=disabled \
 	  --test_env=ENABLE_BLAZE_TEST_FUZZING=1 --test_arg=-rss_limit_mb=2560 --test_arg=-timeout=90 --test_arg=-runs=100 --test_arg=/tmp/testcase-4833834816307200 \
 	  //third_party/hunspell/fuzzers:dict_fuzzer
 	```

 	## Description
 	This security bug is caused by inconsistent length after converting back and forth between utf8 and utf16. The length of `st` may be greater than the original `wbl`, and it causes `add_word` allocating insufficient buffer to store `st`.

 	Recalculating the length of the C string after conversion will fix the issue.

 	PRESUBMIT=passed
 	BUG=293794756
 	R=tjbarron
 	CC=mghiware
 	APPROVED=tjbarron
 	REQUIRED_REVIEW=1
 	DELTA=11 (9 added, 0 deleted, 2 changed)
 	DELTA_BY_EXTENSION=cxx=11
 	OCL=567441268
 	FIG_CHANGESET=004a2f7bb331b9564a411dec60145f5f92379061
 	FIG_WORKSPACE=sungyc/153:icing
 	MARKDOWN=true

 Affected files ...

 ... //depot//src/hunspell/hashmgr.cxx#12 edit

 ==== //depot//src/hunspell/hashmgr.cxx#11 - /google/src/files/567444076/depot//src/hunspell/hashmgr.cxx ====
 --- /google/src/files/542907937/depot//src/hunspell/hashmgr.cxx	2023-06-23 14:09:48.000000000 -0400
 +++ /google/src/files/567444076/depot//src/hunspell/hashmgr.cxx	2023-09-21 18:58:36.000000000 -0400
 @@ -223,13 +223,22 @@
            if (al) memcpy(flags2, flags, al * sizeof(unsigned short));
            flags2[al] = ONLYUPCASEFLAG;
            if (utf8) {
 -              char st[MAXWORDUTF8LEN];
 +              // reserve one additional space for '\0', due to u16_u8() may set
 +              // '\0' at position = MAXWORDUTF8LEN.
 +              char st[MAXWORDUTF8LEN + 1];
                w_char w[MAXWORDLEN];
                int wlen = u8_u16(w, MAXWORDLEN, word);
                mkallsmall_utf(w, wlen, langnum);
                mkallcap_utf(w, 1, langnum);
                u16_u8(st, MAXWORDUTF8LEN, w, wlen);
 -              return add_word(st,wbl,wcl,flags2,al+1,dp, true);
 +
 +              // The length may be different after converting back and forth
 +              // between utf8 and utf16, so we have to recalculate the length of
 +              // st.
 +              int st_captype;
 +              int st_wbl = strlen(st);
 +              int st_wcl = get_clen_and_captype(st, st_wbl, &st_captype);
 +              return add_word(st,st_wbl,st_wcl,flags2,al+1,dp, true);
             } else {
                 mkallsmall(word, csconv);
                 mkinitcap(word, csconv);
	Change 567444076 by sungyc@sungyc:fig-export-icing-153-change-421:6204:citc on 2023/09/21 15:58:36

	[hunspell][vulnerability] Fix buffer overflow vulnerability

	## Test Plan
	```
	sso_client -location 'https://clusterfuzz.corp.google.com/testcase-detail/download-testcase?id=4833834816307200' > /tmp/testcase-4833834816307200 && \
	blaze --blazerc=/dev/null test -c opt --config=msan-fuzzer --test_strategy=local --test_sharding_strategy=disabled \
	--test_env=ENABLE_BLAZE_TEST_FUZZING=1 --test_arg=-rss_limit_mb=2560 --test_arg=-timeout=90 --test_arg=-runs=100 --test_arg=/tmp/testcase-4833834816307200 \
	//third_party/hunspell/fuzzers:dict_fuzzer
	```

	## Description
	This security bug is caused by inconsistent length after converting back and forth between utf8 and utf16. The length of `st` may be greater than the original `wbl`, and it causes `add_word` allocating insufficient buffer to store `st`.

	Recalculating the length of the C string after conversion will fix the issue.

	PRESUBMIT=passed
	BUG=293794756
	R=tjbarron
	CC=mghiware
	APPROVED=tjbarron
	REQUIRED_REVIEW=1
	DELTA=11 (9 added, 0 deleted, 2 changed)
	DELTA_BY_EXTENSION=cxx=11
	OCL=567441268
	FIG_CHANGESET=004a2f7bb331b9564a411dec60145f5f92379061
	FIG_WORKSPACE=sungyc/153:icing
	MARKDOWN=true

	Affected files ...

	... //depot//src/hunspell/hashmgr.cxx#12 edit

	==== //depot//src/hunspell/hashmgr.cxx#11 - /google/src/files/567444076/depot//src/hunspell/hashmgr.cxx ====
	--- /google/src/files/542907937/depot//src/hunspell/hashmgr.cxx 2023-06-23 14:09:48.000000000 -0400
	+++ /google/src/files/567444076/depot//src/hunspell/hashmgr.cxx 2023-09-21 18:58:36.000000000 -0400
	@@ -223,13 +223,22 @@
	if (al) memcpy(flags2, flags, al * sizeof(unsigned short));
	flags2[al] = ONLYUPCASEFLAG;
	if (utf8) {
	- char st[MAXWORDUTF8LEN];
	+ // reserve one additional space for '\0', due to u16_u8() may set
	+ // '\0' at position = MAXWORDUTF8LEN.
	+ char st[MAXWORDUTF8LEN + 1];
	w_char w[MAXWORDLEN];
	int wlen = u8_u16(w, MAXWORDLEN, word);
	mkallsmall_utf(w, wlen, langnum);
	mkallcap_utf(w, 1, langnum);
	u16_u8(st, MAXWORDUTF8LEN, w, wlen);
	- return add_word(st,wbl,wcl,flags2,al+1,dp, true);
	+
	+ // The length may be different after converting back and forth
	+ // between utf8 and utf16, so we have to recalculate the length of
	+ // st.
	+ int st_captype;
	+ int st_wbl = strlen(st);
	+ int st_wcl = get_clen_and_captype(st, st_wbl, &st_captype);
	+ return add_word(st,st_wbl,st_wcl,flags2,al+1,dp, true);
	} else {
	mkallsmall(word, csconv);
	mkinitcap(word, csconv);