| From 6df43f8b17ff16e8b6124dee2bd47cbd05a89e30 Mon Sep 17 00:00:00 2001 |
| From: Anne-Edgar WILKE <wilke.ae@gmail.com> |
| Date: Fri, 28 Aug 2015 00:38:22 +0200 |
| Subject: [PATCH] Fix COMPOUNDHYPHENMIN=1 compound hyphenation |
| |
| FIRST BUG |
| --------- |
| |
| Problem |
| |
| In a compound word, the word parts of two characters are never |
| hyphenated. |
| |
| Example |
| |
| To reproduce the bug, just go to the directory hyphen-2.8.8 and do the |
| following : |
| |
| echo "\ |
| UTF-8 |
| LEFTHYPHENMIN 1 |
| RIGHTHYPHENMIN 1 |
| COMPOUNDLEFTHYPHENMIN 1 |
| COMPOUNDRIGHTHYPHENMIN 1 |
| .post1 |
| NEXTLEVEL |
| e1 |
| a1 |
| " > hyphen.pat |
| |
| ./example hyphen.pat <(echo postea) |
| |
| The output is post=ea ; but it should be post=e=a. |
| |
| If you replace postea with posteaque in the command above, you get |
| post=e=a=que, which is correct. Indeed, the component "eaque" is now |
| five characters long, so it is hyphenated. |
| |
| If you replace postea with ea, you get e=a, which is also correct ; |
| this is because ea is not a compound word. |
| |
| Solution |
| |
| In the file hyphen.c, line 966, "if (i - begin > 1)" must be replaced |
| with "if (i - begin > 0)". |
| Indeed, the word part is comprised between begin and i inclusively ; |
| its length is i - begin + 1. So, if you want to hyphenate the words |
| parts of length 2 and above, you have to check that i - begin + 1 >= 2, |
| ie i - begin > 0. |
| |
| SECOND BUG |
| ---------- |
| |
| Problem |
| |
| In a compound word, the word parts are never hyphenated between their |
| second to last and their last character. |
| |
| Example |
| |
| To reproduce the bug, do the following : |
| |
| echo "\ |
| UTF-8 |
| LEFTHYPHENMIN 1 |
| RIGHTHYPHENMIN 1 |
| COMPOUNDLEFTHYPHENMIN 1 |
| COMPOUNDRIGHTHYPHENMIN 1 |
| 1que. |
| NEXTLEVEL |
| e1 |
| " > hyphen.pat |
| |
| ./example hyphen.pat <(echo meaque) |
| |
| The output is mea=que ; but it should be me=a=que. |
| |
| Again, if you replace meaque with mea, you get me=a, which is correct, |
| because mea is not a compound word. |
| |
| If you replace meaque with eamque, you get e=am=que, as expected ; it |
| shows that there is no similar bug with the first and the second |
| character of word parts. |
| |
| Solution |
| |
| In the file hyphen.c, line 983, "for (j = 0; j < i - begin - 1; j++)" |
| must be replaced with "for (j = 0; j < i - begin; j++)". |
| Indeed, the word part has length i - begin + 1. So there are i - begin |
| possible places for a hyphen. Thus j must take i - begin different |
| values, ie go from 0 to i - begin - 1. |
| --- |
| hyphen.c | 4 ++-- |
| 1 file changed, 2 insertions(+), 2 deletions(-) |
| |
| diff --git a/hyphen.c b/hyphen.c |
| index 79dc072..4954dbd 100644 |
| --- a/hyphen.c |
| +++ b/hyphen.c |
| @@ -963,7 +963,7 @@ int hnj_hyphen_hyph_(HyphenDict *dict, const char *word, int word_size, |
| for (i = 0; i < word_size; i++) rep2[i] = NULL; |
| for (i = 0; i < word_size; i++) if |
| (hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) { |
| - if (i - begin > 1) { |
| + if (i - begin > 0) { |
| int hyph = 0; |
| prep_word[i + 2] = '\0'; |
| /* non-standard hyphenation at compound boundary (Schiffahrt) */ |
| @@ -980,7 +980,7 @@ int hnj_hyphen_hyph_(HyphenDict *dict, const char *word, int word_size, |
| hnj_hyphen_hyph_(dict, prep_word + begin + 1, i - begin + 1 + hyph, |
| hyphens2, &rep2, &pos2, &cut2, clhmin, |
| crhmin, (begin > 0 ? 0 : lend), (hyphens[i]&1 ? 0 : rend)); |
| - for (j = 0; j < i - begin - 1; j++) { |
| + for (j = 0; j < i - begin; j++) { |
| hyphens[begin + j] = hyphens2[j]; |
| if (rep2[j] && rep && pos && cut) { |
| if (!*rep && !*pos && !*cut) { |