patches/10.mkinit_overflow.patch - hunspell - Git at Google

 Change 160151083 by lbaudoin@lbaudoin:hunspell-fix:6013:citc on 2017/06/26 09:01:35

 	Fix a buffer overflow in hunspell case mkinit* functions.
 	The fix is flawed but:
 	 - It prevents the buffer overflows
 	 - I think that it works for the dictionaries we use with hunspell.
 	 - The failure mode is getting a bad suggestion or no suggestions so I think we can live with it.

 	A real fix would be to use std::strings, allocate more than needed or allow re-allocation.
 	All of these require extensive changes to an outdated version of hunspell (checked in version is 1.3, current version is 1.6 and doesn't have these issues). Our plan is to deprecate all uses of hunspell instead of investing the time needed to either upgrade or fully fix.

 	PRESUBMIT=passed
 	BUG=28718593
 	R=bgoodman,shine
 	CC=gmail-security+reviews,sfen
 	APPROVED=shine
 	REQUIRED_REVIEW=1
 	DELTA=19 (14 added, 2 deleted, 3 changed)
 	DELTA_BY_EXTENSION=cxx=10
 	OCL=159994634

 Affected files ...

 ... //depot//BUILD#16 edit
 ... //depot//src/hunspell/hunspell.cxx#5 edit

 ==== //depot//src/hunspell/hunspell.cxx#4 - /google/src/files/160151083/depot//src/hunspell/hunspell.cxx ====
 --- /google/src/files/151456944/depot//src/hunspell/hunspell.cxx	2017-03-28 12:13:02.000000000 -0400
 +++ /google/src/files/160151083/depot//src/hunspell/hunspell.cxx	2017-06-26 12:01:35.000000000 -0400
 @@ -1297,7 +1297,9 @@
        unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
        u[0].h = (unsigned char) (i >> 8);
        u[0].l = (unsigned char) (i & 0x00FF);
 -      u16_u8(p, MAXWORDUTF8LEN, u, len);
 +      // TODO: p might not be long enough if the upper case character
 +      // is longer in UTF-8 than the original one.
 +      u16_u8(p, strlen(p), u, len);
    }
  }

 @@ -1309,7 +1311,9 @@
        unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
        u[0].h = (unsigned char) (i >> 8);
        u[0].l = (unsigned char) (i & 0x00FF);
 -      u16_u8(p, MAXWORDUTF8LEN, u, nc);
 +      // TODO: p might not be long enough if the upper case character
 +      // is longer in UTF-8 than the original one.
 +      u16_u8(p, strlen(p), u, nc);
        return strlen(p);
    }
    return nc;
 @@ -1323,7 +1327,10 @@
        unsigned short i = unicodetolower((u[0].h << 8) + u[0].l, langnum);
        u[0].h = (unsigned char) (i >> 8);
        u[0].l = (unsigned char) (i & 0x00FF);
 -      u16_u8(p, MAXWORDUTF8LEN, u, nc);
 +      auto len = strlen(p);
 +      // TODO: p might not be long enough if the lower case character
 +      // is longer in UTF-8 than the original one.
 +      u16_u8(p, len, u, nc);
        return strlen(p);
    }
    return nc;
	Change 160151083 by lbaudoin@lbaudoin:hunspell-fix:6013:citc on 2017/06/26 09:01:35

	Fix a buffer overflow in hunspell case mkinit* functions.
	The fix is flawed but:
	- It prevents the buffer overflows
	- I think that it works for the dictionaries we use with hunspell.
	- The failure mode is getting a bad suggestion or no suggestions so I think we can live with it.

	A real fix would be to use std::strings, allocate more than needed or allow re-allocation.
	All of these require extensive changes to an outdated version of hunspell (checked in version is 1.3, current version is 1.6 and doesn't have these issues). Our plan is to deprecate all uses of hunspell instead of investing the time needed to either upgrade or fully fix.

	PRESUBMIT=passed
	BUG=28718593
	R=bgoodman,shine
	CC=gmail-security+reviews,sfen
	APPROVED=shine
	REQUIRED_REVIEW=1
	DELTA=19 (14 added, 2 deleted, 3 changed)
	DELTA_BY_EXTENSION=cxx=10
	OCL=159994634

	Affected files ...

	... //depot//BUILD#16 edit
	... //depot//src/hunspell/hunspell.cxx#5 edit

	==== //depot//src/hunspell/hunspell.cxx#4 - /google/src/files/160151083/depot//src/hunspell/hunspell.cxx ====
	--- /google/src/files/151456944/depot//src/hunspell/hunspell.cxx 2017-03-28 12:13:02.000000000 -0400
	+++ /google/src/files/160151083/depot//src/hunspell/hunspell.cxx 2017-06-26 12:01:35.000000000 -0400
	@@ -1297,7 +1297,9 @@
	unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
	u[0].h = (unsigned char) (i >> 8);
	u[0].l = (unsigned char) (i & 0x00FF);
	- u16_u8(p, MAXWORDUTF8LEN, u, len);
	+ // TODO: p might not be long enough if the upper case character
	+ // is longer in UTF-8 than the original one.
	+ u16_u8(p, strlen(p), u, len);
	}
	}

	@@ -1309,7 +1311,9 @@
	unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
	u[0].h = (unsigned char) (i >> 8);
	u[0].l = (unsigned char) (i & 0x00FF);
	- u16_u8(p, MAXWORDUTF8LEN, u, nc);
	+ // TODO: p might not be long enough if the upper case character
	+ // is longer in UTF-8 than the original one.
	+ u16_u8(p, strlen(p), u, nc);
	return strlen(p);
	}
	return nc;
	@@ -1323,7 +1327,10 @@
	unsigned short i = unicodetolower((u[0].h << 8) + u[0].l, langnum);
	u[0].h = (unsigned char) (i >> 8);
	u[0].l = (unsigned char) (i & 0x00FF);
	- u16_u8(p, MAXWORDUTF8LEN, u, nc);
	+ auto len = strlen(p);
	+ // TODO: p might not be long enough if the lower case character
	+ // is longer in UTF-8 than the original one.
	+ u16_u8(p, len, u, nc);
	return strlen(p);
	}
	return nc;