Programs/ctb_native.c - brltty - Git at Google

 /*
  * BRLTTY - A background process providing access to the console screen (when in
  *          text mode) for a blind person using a refreshable braille display.
  *
  * Copyright (C) 1995-2023 by The BRLTTY Developers.
  *
  * BRLTTY comes with ABSOLUTELY NO WARRANTY.
  *
  * This is free software, placed under the terms of the
  * GNU Lesser General Public License, as published by the Free Software
  * Foundation; either version 2.1 of the License, or (at your option) any
  * later version. Please see the file LICENSE-LGPL for details.
  *
  * Web Page: http://brltty.app/
  *
  * This software is maintained by Dave Mielke <dave@mielke.cc>.
  */

 #include "prologue.h"

 #include <string.h>

 #include "log.h"
 #include "ctb_translate.h"
 #include "ttb.h"
 #include "brl_dots.h"
 #include "unicode.h"
 #include "utf8.h"

 #ifdef HAVE_ICU
 #include <unicode/uchar.h>

 typedef struct {
   unsigned int index;
   ULineBreak after;
   ULineBreak before;
   ULineBreak previous;
   ULineBreak indirect;
 } LineBreakOpportunitiesState;

 static void
 prepareLineBreakOpportunitiesState (LineBreakOpportunitiesState *lbo) {
   lbo->index = 0;
   lbo->after = U_LB_SPACE;
   lbo->before = lbo->after;
   lbo->previous = lbo->before;
   lbo->indirect = U_LB_SPACE;
 }

 static void
 findLineBreakOpportunities (
   BrailleContractionData *bcd,
   LineBreakOpportunitiesState *lbo,
   unsigned char *opportunities,
   const wchar_t *characters, unsigned int end
 ) {
   /* UAX #14: Line Breaking Properties
    * http://unicode.org/reports/tr14/
    * Section 6: Line Breaking Algorithm
    *
    * !  Mandatory break at the indicated position
    * ^  No break allowed at the indicated position
    * _  Break allowed at the indicated position
    *
    * H  ideographs
    * h  small kana
    * 9  digits
    */

   while (lbo->index <= end) {
     unsigned char *opportunity = &opportunities[lbo->index];

     lbo->previous = lbo->before;
     lbo->before = lbo->after;
     lbo->after = u_getIntPropertyValue(characters[lbo->index], UCHAR_LINE_BREAK);
     lbo->index += 1;

     /* LB9  Do not break a combining character sequence.
      */
     if (lbo->after == U_LB_COMBINING_MARK) {
       /* LB10: Treat any remaining combining mark as AL.
        */
       if ((lbo->before == U_LB_MANDATORY_BREAK) ||
           (lbo->before == U_LB_CARRIAGE_RETURN) ||
           (lbo->before == U_LB_LINE_FEED) ||
           (lbo->before == U_LB_NEXT_LINE) ||
           (lbo->before == U_LB_SPACE) ||
           (lbo->before == U_LB_ZWSPACE)) {
         lbo->before = U_LB_ALPHABETIC;
       }

       /* treat it as if it has the line breaking class of the base character
        */
       lbo->after = lbo->before;
       *opportunity = 0;
       continue;
     }

     if (lbo->before != U_LB_SPACE) lbo->indirect = lbo->before;

     /* LB2: Never break at the start of text.
      * sot ×
      */
     if (opportunity == opportunities) {
       *opportunity = 0;
       continue;
     }

     /* LB4: Always break after hard line breaks
      * BK !
      */
     if (lbo->before == U_LB_MANDATORY_BREAK) {
       *opportunity = 1;
       continue;
     }

     /* LB5: Treat CR followed by LF, as well as CR, LF, and NL as hard line breaks.
      * CR ^ LF
      * CR !
      * LF !
      * NL !
      */
     if ((lbo->before == U_LB_CARRIAGE_RETURN) && (lbo->after == U_LB_LINE_FEED)) {
       *opportunity = 0;
       continue;
     }
     if ((lbo->before == U_LB_CARRIAGE_RETURN) ||
         (lbo->before == U_LB_LINE_FEED) ||
         (lbo->before == U_LB_NEXT_LINE)) {
       *opportunity = 1;
       continue;
     }

     /* LB6: Do not break before hard line breaks.
      * ^ ( BK | CR | LF | NL )
      */
     if ((lbo->after == U_LB_MANDATORY_BREAK) ||
         (lbo->after == U_LB_CARRIAGE_RETURN) ||
         (lbo->after == U_LB_LINE_FEED) ||
         (lbo->after == U_LB_NEXT_LINE)) {
       *opportunity = 0;
       continue;
     }

     /* LB7: Do not break before spaces or zero width space.
      * ^ SP
      * ^ ZW
      */
     if ((lbo->after == U_LB_SPACE) || (lbo->after == U_LB_ZWSPACE)) {
       *opportunity = 0;
       continue;
     }

     /* LB8: Break after zero width space.
      * ZW _
      */
     if (lbo->before == U_LB_ZWSPACE) {
       *opportunity = 1;
       continue;
     }

     /* LB11: Do not break before or after Word joiner and related characters.
      * ^ WJ
      * WJ ^
      */
     if ((lbo->before == U_LB_WORD_JOINER) || (lbo->after == U_LB_WORD_JOINER)) {
       *opportunity = 0;
       continue;
     }

     /* LB12: Do not break before or after NBSP and related characters.
      * [^SP] ^ GL
      * GL ^
      */
     if ((lbo->before != U_LB_SPACE) && (lbo->after == U_LB_GLUE)) {
       *opportunity = 0;
       continue;
     }
     if (lbo->before == U_LB_GLUE) {
       *opportunity = 0;
       continue;
     }

     /* LB13: Do not break before ‘]' or ‘!' or ‘;' or ‘/', even after spaces.
      * ^ CL
      * ^ EX
      * ^ IS
      * ^ SY
      */
     if ((lbo->after == U_LB_CLOSE_PUNCTUATION) ||
         (lbo->after == U_LB_EXCLAMATION) ||
         (lbo->after == U_LB_INFIX_NUMERIC) ||
         (lbo->after == U_LB_BREAK_SYMBOLS)) {
       *opportunity = 0;
       continue;
     }

     /* LB14: Do not break after ‘[', even after spaces.
      * OP SP* ^
      */
     if (lbo->indirect == U_LB_OPEN_PUNCTUATION) {
       *opportunity = 0;
       continue;
     }

     /* LB15: Do not break within ‘"[', even with intervening spaces.
      * QU SP* ^ OP
      */
     if ((lbo->indirect == U_LB_QUOTATION) && (lbo->after == U_LB_OPEN_PUNCTUATION)) {
       *opportunity = 0;
       continue;
     }

     /* LB16: Do not break within ‘]h', even with intervening spaces.
      * CL SP* ^ NS
      */
     if ((lbo->indirect == U_LB_CLOSE_PUNCTUATION) && (lbo->after == U_LB_NONSTARTER)) {
       *opportunity = 0;
       continue;
     }

     /* LB17: Do not break within ‘ــ', even with intervening spaces.
      * B2 SP* ^ B2
      */
     if ((lbo->indirect == U_LB_BREAK_BOTH) && (lbo->after == U_LB_BREAK_BOTH)) {
       *opportunity = 0;
       continue;
     }

     /* LB18: Break after spaces.
      * SP _
      */
     if (lbo->before == U_LB_SPACE) {
       *opportunity = 1;
       continue;
     }

     /* LB19: Do not break before or after  quotation marks.
      * ^ QU
      * QU ^
      */
     if ((lbo->before == U_LB_QUOTATION) || (lbo->after == U_LB_QUOTATION)) {
       *opportunity = 0;
       continue;
     }

     /* LB20: Break before and after unresolved.
      * _ CB
      * CB _
      */
     if ((lbo->after == U_LB_CONTINGENT_BREAK) || (lbo->before == U_LB_CONTINGENT_BREAK)) {
       *opportunity = 1;
       continue;
     }

     /* LB21: Do not break before hyphen-minus, other hyphens,
      *       fixed-width spaces, small kana, and other non-starters,
      *       or lbo->after acute accents.
      * ^ BA
      * ^ HY
      * ^ NS
      * BB ^
      */
     if ((lbo->after == U_LB_BREAK_AFTER) ||
         (lbo->after == U_LB_HYPHEN) ||
         (lbo->after == U_LB_NONSTARTER) ||
         (lbo->before == U_LB_BREAK_BEFORE)) {
       *opportunity = 0;
       continue;
     }

     /* LB22: Do not break between two ellipses,
      *       or between letters or numbers and ellipsis.
      * AL ^ IN
      * ID ^ IN
      * IN ^ IN
      * NU ^ IN
      */
     if ((lbo->after == U_LB_INSEPARABLE) &&
         ((lbo->before == U_LB_ALPHABETIC) ||
          (lbo->before == U_LB_IDEOGRAPHIC) ||
          (lbo->before == U_LB_INSEPARABLE) ||
          (lbo->before == U_LB_NUMERIC))) {
       *opportunity = 0;
       continue;
     }

     /* LB23: Do not break within ‘a9', ‘3a', or ‘H%'.
      * ID ^ PO
      * AL ^ NU
      * NU ^ AL
      */
     if (((lbo->before == U_LB_IDEOGRAPHIC) && (lbo->after == U_LB_POSTFIX_NUMERIC)) ||
         ((lbo->before == U_LB_ALPHABETIC) && (lbo->after == U_LB_NUMERIC)) ||
         ((lbo->before == U_LB_NUMERIC) && (lbo->after == U_LB_ALPHABETIC))) {
       *opportunity = 0;
       continue;
     }

     /* LB24: Do not break between prefix and letters or ideographs.
      * PR ^ ID
      * PR ^ AL
      * PO ^ AL
      */
     if (((lbo->before == U_LB_PREFIX_NUMERIC) && (lbo->after == U_LB_IDEOGRAPHIC)) ||
         ((lbo->before == U_LB_PREFIX_NUMERIC) && (lbo->after == U_LB_ALPHABETIC)) ||
         ((lbo->before == U_LB_POSTFIX_NUMERIC) && (lbo->after == U_LB_ALPHABETIC))) {
       *opportunity = 0;
       continue;
     }

     /* LB25:  Do not break between the following pairs of classes relevant to numbers:
      * CL ^ PO
      * CL ^ PR
      * NU ^ PO
      * NU ^ PR
      * PO ^ OP
      * PO ^ NU
      * PR ^ OP
      * PR ^ NU
      * HY ^ NU
      * IS ^ NU
      * NU ^ NU
      * SY ^ NU
      */
     if (((lbo->before == U_LB_CLOSE_PUNCTUATION) && (lbo->after == U_LB_POSTFIX_NUMERIC)) ||
         ((lbo->before == U_LB_CLOSE_PUNCTUATION) && (lbo->after == U_LB_PREFIX_NUMERIC)) ||
         ((lbo->before == U_LB_NUMERIC) && (lbo->after == U_LB_POSTFIX_NUMERIC)) ||
         ((lbo->before == U_LB_NUMERIC) && (lbo->after == U_LB_PREFIX_NUMERIC)) ||
         ((lbo->before == U_LB_POSTFIX_NUMERIC) && (lbo->after == U_LB_OPEN_PUNCTUATION)) ||
         ((lbo->before == U_LB_POSTFIX_NUMERIC) && (lbo->after == U_LB_NUMERIC)) ||
         ((lbo->before == U_LB_PREFIX_NUMERIC) && (lbo->after == U_LB_OPEN_PUNCTUATION)) ||
         ((lbo->before == U_LB_PREFIX_NUMERIC) && (lbo->after == U_LB_NUMERIC)) ||
         ((lbo->before == U_LB_HYPHEN) && (lbo->after == U_LB_NUMERIC)) ||
         ((lbo->before == U_LB_INFIX_NUMERIC) && (lbo->after == U_LB_NUMERIC)) ||
         ((lbo->before == U_LB_NUMERIC) && (lbo->after == U_LB_NUMERIC)) ||
         ((lbo->before == U_LB_BREAK_SYMBOLS) && (lbo->after == U_LB_NUMERIC))) {
       *opportunity = 0;
       continue;
     }

     /* LB26: Do not break a Korean syllable.
      * JL ^ (JL | JV | H2 | H3)
      * (JV | H2) ^ (JV | JT)
      * (JT | H3) ^ JT
      */
     if ((lbo->before == U_LB_JL) &&
         ((lbo->after == U_LB_JL) ||
          (lbo->after == U_LB_JV) ||
          (lbo->after == U_LB_H2) ||
          (lbo->after == U_LB_H3))) {
       *opportunity = 0;
       continue;
     }
     if (((lbo->before == U_LB_JV) || (lbo->before == U_LB_H2)) &&
         ((lbo->after == U_LB_JV) || (lbo->after == U_LB_JT))) {
       *opportunity = 0;
       continue;
     }
     if (((lbo->before == U_LB_JT) || (lbo->before == U_LB_H3)) &&
         (lbo->after == U_LB_JT)) {
       *opportunity = 0;
       continue;
     }

     /* LB27: Treat a Korean Syllable Block the same as ID.
      * (JL | JV | JT | H2 | H3) ^ IN
      * (JL | JV | JT | H2 | H3) ^ PO
      * PR ^ (JL | JV | JT | H2 | H3)
      */
     if (((lbo->before == U_LB_JL) || (lbo->before == U_LB_JV) || (lbo->before == U_LB_JT) ||
          (lbo->before == U_LB_H2) || (lbo->before == U_LB_H3)) &&
         (lbo->after == U_LB_INSEPARABLE)) {
       *opportunity = 0;
       continue;
     }
     if (((lbo->before == U_LB_JL) || (lbo->before == U_LB_JV) || (lbo->before == U_LB_JT) ||
          (lbo->before == U_LB_H2) || (lbo->before == U_LB_H3)) &&
         (lbo->after == U_LB_POSTFIX_NUMERIC)) {
       *opportunity = 0;
       continue;
     }
     if ((lbo->before == U_LB_PREFIX_NUMERIC) &&
         ((lbo->after == U_LB_JL) || (lbo->after == U_LB_JV) || (lbo->after == U_LB_JT) ||
          (lbo->after == U_LB_H2) || (lbo->after == U_LB_H3))) {
       *opportunity = 0;
       continue;
     }

     /* LB28: Do not break between alphabetics.
      * AL ^ AL
      */
     if ((lbo->before == U_LB_ALPHABETIC) && (lbo->after == U_LB_ALPHABETIC)) {
       *opportunity = 0;
       continue;
     }

     /* LB29: Do not break between numeric punctuation and alphabetics.
      * IS ^ AL
      */
     if ((lbo->before == U_LB_INFIX_NUMERIC) && (lbo->after == U_LB_ALPHABETIC)) {
       *opportunity = 0;
       continue;
     }

     /* LB30: Do not break between letters, numbers, or ordinary symbols
      *       and opening or closing punctuation.
      * (AL | NU) ^ OP
      * CL ^ (AL | NU)
      */
     if (((lbo->before == U_LB_ALPHABETIC) || (lbo->before == U_LB_NUMERIC)) &&
         (lbo->after == U_LB_OPEN_PUNCTUATION)) {
       *opportunity = 0;
       continue;
     }
     if ((lbo->before == U_LB_CLOSE_PUNCTUATION) &&
         ((lbo->after == U_LB_ALPHABETIC) || (lbo->after == U_LB_NUMERIC))) {
       *opportunity = 0;
       continue;
     }

     /* Unix options begin with a minus sign. */
     if ((lbo->before == U_LB_HYPHEN) &&
         (lbo->after != U_LB_SPACE) &&
         (lbo->previous == U_LB_SPACE)) {
       *opportunity = 0;
       continue;
     }

     /* LB31: Break everywhere else.
      * ALL _
      * _ ALL
      */
     *opportunity = 1;
   }
 }

 #else /* HAVE_ICU */
 typedef struct {
   unsigned int index;
   int wasSpace;
 } LineBreakOpportunitiesState;

 static void
 prepareLineBreakOpportunitiesState (LineBreakOpportunitiesState *lbo) {
   lbo->index = 0;
   lbo->wasSpace = 0;
 }

 static void
 findLineBreakOpportunities (
   BrailleContractionData *bcd,
   LineBreakOpportunitiesState *lbo,
   unsigned char *opportunities,
   const wchar_t *characters, unsigned int end
 ) {
   while (lbo->index <= end) {
     int isSpace = testCharacter(bcd, characters[lbo->index], CTC_Space);
     opportunities[lbo->index] = lbo->wasSpace && !isSpace;

     lbo->wasSpace = isSpace;
     lbo->index += 1;
   }
 }
 #endif /* HAVE_ICU */

 static int
 isLineBreakOpportunity (
   BrailleContractionData *bcd,
   LineBreakOpportunitiesState *lbo,
   unsigned char *opportunities
 ) {
   unsigned int index = getInputConsumed(bcd);
   if (index == getInputCount(bcd)) return 1;

   findLineBreakOpportunities(bcd, lbo, opportunities, bcd->input.begin, index);
   return opportunities[index];
 }

 static inline ContractionTableHeader *
 getContractionTableHeader (BrailleContractionData *bcd) {
   return bcd->table->data.internal.header.fields;
 }

 static inline const void *
 getContractionTableItem (BrailleContractionData *bcd, ContractionTableOffset offset) {
   return &bcd->table->data.internal.header.bytes[offset];
 }

 static const ContractionTableCharacter *
 getContractionTableCharacter (BrailleContractionData *bcd, wchar_t character) {
   const ContractionTableCharacter *characters = getContractionTableItem(bcd, getContractionTableHeader(bcd)->characters);
   int first = 0;
   int last = getContractionTableHeader(bcd)->characterCount - 1;

   while (first <= last) {
     int current = (first + last) / 2;
     const ContractionTableCharacter *ctc = &characters[current];

     if (ctc->value < character) {
       first = current + 1;
     } else if (ctc->value > character) {
       last = current - 1;
     } else {
       return ctc;
     }
   }

   return NULL;
 }

 static int
 addRule (BrailleContractionData *bcd, ContractionTableRule *rule) {
   ContractionTable *table = bcd->table;

   if (table->rules.count == table->rules.size) {
     size_t newSize = table->rules.size + 10;
     ContractionTableRule **newArray = realloc(table->rules.array, ARRAY_SIZE(newArray, newSize));

     if (!newArray) {
       logMallocError();
       return 0;
     }

     table->rules.array = newArray;
     table->rules.size = newSize;
   }

   table->rules.array[table->rules.count++] = rule;
   return 1;
 }

 static size_t
 makeDecomposedBraille (BrailleContractionData *bcd, wchar_t character, BYTE *cells, size_t size) {
   wchar_t characters[0X10];
   size_t characterCount = decomposeCharacter(character, characters, ARRAY_COUNT(characters));

   if (characterCount > 1) {
     BYTE *from = cells;
     const BYTE *end = from + size;
     unsigned int characterIndex = 1;

     while (1) {
       wchar_t character = characters[characterIndex];
       const CharacterEntry *entry = getCharacterEntry(bcd, character);
       if (!entry) break;
       if (character != entry->value) break;

       const ContractionTableRule *rule = entry->always;
       if (!rule) break;

       unsigned int cellCount = rule->replen;
       if (!cellCount) break;
       if ((end - from) < cellCount) break;
       from = mempcpy(from, &rule->findrep[rule->findlen], cellCount);

       if (!characterIndex) return from - cells;
       if (++characterIndex == characterCount) characterIndex = 0;
     }
   }

   return 0;
 }

 typedef struct {
   BrailleContractionData *bcd;
   CharacterEntry *character;
 } SetAlwaysRuleData;

 static int
 setAlwaysRule (wchar_t character, void *data) {
   SetAlwaysRuleData *sar = data;
   BrailleContractionData *bcd = sar->bcd;

   CharacterEntry *entry = sar->character;
   const ContractionTableCharacter *ctc = getContractionTableCharacter(bcd, character);

   if (ctc) {
     ContractionTableOffset offset = ctc->always;

     if (offset) {
       const ContractionTableRule *rule = getContractionTableItem(bcd, offset);

       if (rule->replen) {
         entry->always = rule;
         return 1;
       }
     }
   }

   if (character == entry->value) {
     BYTE cells[0X100];
     size_t count = makeDecomposedBraille(bcd, character, cells, sizeof(cells));

     {
       unsigned int position;
       findCharacterEntry(bcd, character, &position);

       entry = &bcd->table->characters.array[position];
       sar->character = entry;
     }

     if (count) {
       ContractionTableRule *rule;
       size_t size = sizeof(*rule) + sizeof(character) + count;

       if ((rule = malloc(size))) {
         memset(rule, 0, sizeof(*rule));
         rule->opcode = CTO_Always;

         rule->findrep[0] = character;
         memcpy(&rule->findrep[rule->findlen = 1], cells, (rule->replen = count));

         if (addRule(bcd, rule)) {
           entry->always = rule;
           return 1;
         }

         free(rule);
       }
     }
   }

   return 0;
 }

 static wchar_t
 toLowerCase (BrailleContractionData *bcd, wchar_t character) {
   const CharacterEntry *entry = getCharacterEntry(bcd, character);
   return entry? entry->lowercase: character;
 }

 static const ContractionTableRule *
 getAlwaysRule (BrailleContractionData *bcd, wchar_t character) {
   const CharacterEntry *entry = getCharacterEntry(bcd, toLowerCase(bcd, character));
   return entry? entry->always: NULL;
 }

 static wchar_t
 getBestCharacter (BrailleContractionData *bcd, wchar_t character) {
   const ContractionTableRule *rule = getAlwaysRule(bcd, character);
   return rule? rule->findrep[0]: 0;
 }

 static int
 sameCharacters (BrailleContractionData *bcd, wchar_t character1, wchar_t character2) {
   wchar_t best1 = getBestCharacter(bcd, character1);
   return best1 && (best1 == getBestCharacter(bcd, character2));
 }

 static int
 matchCurrentRule (BrailleContractionData *bcd) {
   const wchar_t *input = bcd->input.current;
   const wchar_t *find = bcd->current.rule->findrep;
   const wchar_t *findEnd = find + bcd->current.length;

   while (find < findEnd) {
     if (toLowerCase(bcd, *input++) != toLowerCase(bcd, *find++)) {
       return 0;
     }
   }

   return 1;
 }

 static void
 setBefore (BrailleContractionData *bcd) {
   bcd->current.before = (bcd->input.current == bcd->input.begin)? WC_C(' '): bcd->input.current[-1];
 }

 static void
 setAfter (BrailleContractionData *bcd, int length) {
   bcd->current.after = (bcd->input.current + length < bcd->input.end)? bcd->input.current[length]: WC_C(' ');
 }

 static int
 isBeginning (BrailleContractionData *bcd) {
   const wchar_t *ptr = bcd->input.current;

   while (ptr > bcd->input.begin) {
     if (!testCharacter(bcd, *--ptr, CTC_Punctuation)) {
       if (!testCharacter(bcd, *ptr, CTC_Space)) return 0;
       break;
     }
   }

   return 1;
 }

 static int
 isEnding (BrailleContractionData *bcd) {
   const wchar_t *ptr = bcd->input.current + bcd->current.length;

   while (ptr < bcd->input.end) {
     if (!testCharacter(bcd, *ptr, CTC_Punctuation)) {
       if (!testCharacter(bcd, *ptr, CTC_Space)) return 0;
       break;
     }

     ptr += 1;
   }

   return 1;
 }

 static void
 setCurrentRule (BrailleContractionData *bcd, const ContractionTableRule *rule) {
   bcd->current.rule = rule;
   bcd->current.opcode = bcd->current.rule->opcode;
   bcd->current.length = bcd->current.rule->findlen;
   setAfter(bcd, bcd->current.length);
 }

 static int
 selectRule (BrailleContractionData *bcd, int length) {
   if (length < 1) return 0;

   int ruleOffset;
   int maximumLength;

   if (length == 1) {
     wchar_t character = toLowerCase(bcd, *bcd->input.current);
     const ContractionTableCharacter *ctc = getContractionTableCharacter(bcd, character);

     if (!ctc) {
       const CharacterEntry *entry = getCharacterEntry(bcd, character);
       if (!entry) return 0;

       const ContractionTableRule *rule = entry->always;
       if (!rule) return 0;

       setCurrentRule(bcd, rule);
       return 1;
     }

     ruleOffset = ctc->rules;
     maximumLength = 1;
   } else {
     const wchar_t characters[] = {
       toLowerCase(bcd, bcd->input.current[0]),
       toLowerCase(bcd, bcd->input.current[1]),
     };

     ruleOffset = getContractionTableHeader(bcd)->rules[CTH(characters)];
     maximumLength = 0;
   }

   while (ruleOffset) {
     setCurrentRule(bcd, getContractionTableItem(bcd, ruleOffset));

     if ((length == 1) ||
         ((bcd->current.length <= length) &&
          matchCurrentRule(bcd))) {
       if (!maximumLength) {
         maximumLength = bcd->current.length;

         if (prefs.capitalizationMode != CTB_CAP_NONE) {
           typedef enum {CS_Any, CS_Lower, CS_UpperSingle, CS_UpperMultiple} CapitalizationState;
 #define STATE(c) (testCharacter(bcd, (c), CTC_UpperCase)? CS_UpperSingle: testCharacter(bcd, (c), CTC_LowerCase)? CS_Lower: CS_Any)

           CapitalizationState current = STATE(bcd->current.before);

           for (int i=0; i<bcd->current.length; i+=1) {
             wchar_t character = bcd->input.current[i];
             CapitalizationState next = STATE(character);

             if (i > 0) {
               if (((current == CS_Lower) && (next == CS_UpperSingle)) ||
                   ((current == CS_UpperMultiple) && (next == CS_Lower))) {
                 maximumLength = i;
                 break;
               }

               if ((prefs.capitalizationMode != CTB_CAP_SIGN) &&
                   (next == CS_UpperSingle)) {
                 maximumLength = i;
                 break;
               }
             }

             if ((prefs.capitalizationMode == CTB_CAP_SIGN) && (current > CS_Lower) && (next == CS_UpperSingle)) {
               current = CS_UpperMultiple;
             } else if (next != CS_Any) {
               current = next;
             } else if (current == CS_Any) {
               current = CS_Lower;
             }
           }

 #undef STATE
         }
       }

       if ((bcd->current.length <= maximumLength) &&
           (!bcd->current.rule->after || testBefore(bcd, bcd->current.rule->after)) &&
           (!bcd->current.rule->before || testAfter(bcd, bcd->current.rule->before))) {
         switch (bcd->current.opcode) {
           case CTO_Always:
           case CTO_Repeatable:
           case CTO_Literal:
           case CTO_Replace:
             return 1;

           case CTO_LargeSign:
           case CTO_LastLargeSign:
             if (!isBeginning(bcd) || !isEnding(bcd)) bcd->current.opcode = CTO_Always;
             return 1;

           case CTO_WholeWord:
             if (testBefore(bcd, CTC_Space|CTC_Punctuation) &&
                 testAfter(bcd, CTC_Space|CTC_Punctuation))
               return 1;
             break;

           case CTO_Contraction:
             if ((bcd->input.current > bcd->input.begin) && sameCharacters(bcd, bcd->input.current[-1], WC_C('\''))) break;
             if (isBeginning(bcd) && isEnding(bcd)) return 1;
             break;

           case CTO_LowWord:
             if (testBefore(bcd, CTC_Space) && testAfter(bcd, CTC_Space) &&
                 (bcd->previous.opcode != CTO_JoinedWord) &&
                 ((bcd->output.current == bcd->output.begin) || !bcd->output.current[-1]))
               return 1;
             break;

           case CTO_JoinedWord:
             if (testBefore(bcd, CTC_Space|CTC_Punctuation) &&
                 !sameCharacters(bcd, bcd->current.before, WC_C('-')) &&
                 (bcd->output.current + bcd->current.rule->replen < bcd->output.end)) {
               const wchar_t *end = bcd->input.current + bcd->current.length;
               const wchar_t *ptr = end;

               while (ptr < bcd->input.end) {
                 if (!testCharacter(bcd, *ptr, CTC_Space)) {
                   if (!testCharacter(bcd, *ptr, CTC_Letter)) break;
                   if (ptr == end) break;
                   return 1;
                 }

                 if (ptr++ == bcd->input.cursor) break;
               }
             }
             break;

           case CTO_SuffixableWord:
             if (testBefore(bcd, CTC_Space|CTC_Punctuation) &&
                 testAfter(bcd, CTC_Space|CTC_Letter|CTC_Punctuation))
               return 1;
             break;

           case CTO_PrefixableWord:
             if (testBefore(bcd, CTC_Space|CTC_Letter|CTC_Punctuation) &&
                 testAfter(bcd, CTC_Space|CTC_Punctuation))
               return 1;
             break;

           case CTO_BegWord:
             if (testBefore(bcd, CTC_Space|CTC_Punctuation) &&
                 testAfter(bcd, CTC_Letter))
               return 1;
             break;

           case CTO_BegMidWord:
             if (testBefore(bcd, CTC_Letter|CTC_Space|CTC_Punctuation) &&
                 testAfter(bcd, CTC_Letter))
               return 1;
             break;

           case CTO_MidWord:
             if (testBefore(bcd, CTC_Letter) && testAfter(bcd, CTC_Letter))
               return 1;
             break;

           case CTO_MidEndWord:
             if (testBefore(bcd, CTC_Letter) &&
                 testAfter(bcd, CTC_Letter|CTC_Space|CTC_Punctuation))
               return 1;
             break;

           case CTO_EndWord:
             if (testBefore(bcd, CTC_Letter) &&
                 testAfter(bcd, CTC_Space|CTC_Punctuation))
               return 1;
             break;

           case CTO_BegNum:
             if (testBefore(bcd, CTC_Space|CTC_Punctuation) &&
                 testAfter(bcd, CTC_Digit))
               return 1;
             break;

           case CTO_MidNum:
             if (testBefore(bcd, CTC_Digit) && testAfter(bcd, CTC_Digit))
               return 1;
             break;

           case CTO_EndNum:
             if (testBefore(bcd, CTC_Digit) &&
                 testAfter(bcd, CTC_Space|CTC_Punctuation))
               return 1;
             break;

           case CTO_PrePunc:
             if (testCurrent(bcd, CTC_Punctuation) && isBeginning(bcd) && !isEnding(bcd)) return 1;
             break;

           case CTO_PostPunc:
             if (testCurrent(bcd, CTC_Punctuation) && !isBeginning(bcd) && isEnding(bcd)) return 1;
             break;

           default:
             break;
         }
       }
     }

     ruleOffset = bcd->current.rule->next;
   }

   return 0;
 }

 static int
 putCells (BrailleContractionData *bcd, const BYTE *cells, int count) {
   if (bcd->output.current + count > bcd->output.end) return 0;
   bcd->output.current = mempcpy(bcd->output.current, cells, count);
   return 1;
 }

 static int
 putCell (BrailleContractionData *bcd, BYTE byte) {
   return putCells(bcd, &byte, 1);
 }

 static int
 putReplace (BrailleContractionData *bcd, const ContractionTableRule *rule, wchar_t character) {
   const BYTE *cells = (BYTE *)&rule->findrep[rule->findlen];
   int count = rule->replen;

   if ((prefs.capitalizationMode == CTB_CAP_DOT7) &&
       testCharacter(bcd, character, CTC_UpperCase)) {
     if (!putCell(bcd, *cells++ | BRL_DOT_7)) return 0;
     if (!(count -= 1)) return 1;
   }

   return putCells(bcd, cells, count);
 }

 static int
 putCharacter (BrailleContractionData *bcd, wchar_t character) {
   {
     const ContractionTableRule *rule = getAlwaysRule(bcd, character);
     if (rule) return putReplace(bcd, rule, character);
   }

   if (isBrailleCharacter(character)) {
     return putCell(bcd, (character & UNICODE_CELL_MASK));
   }

   if (textTable) {
     unsigned char dots = convertCharacterToDots(textTable, character);
     return putCell(bcd, dots);
   }

   {
     const wchar_t replacementCharacter = getReplacementCharacter();

     if (replacementCharacter != character) {
       const ContractionTableRule *rule = getAlwaysRule(bcd, replacementCharacter);
       if (rule) return putReplace(bcd, rule, replacementCharacter);
     }
   }

   return putCell(bcd, (BRL_DOT_1 | BRL_DOT_2 | BRL_DOT_3 | BRL_DOT_4 | BRL_DOT_5 | BRL_DOT_6 | BRL_DOT_7 | BRL_DOT_8));
 }

 static int
 putSequence (BrailleContractionData *bcd, ContractionTableOffset offset) {
   const BYTE *sequence = getContractionTableItem(bcd, offset);
   return putCells(bcd, sequence+1, *sequence);
 }

 static void
 clearRemainingOffsets (BrailleContractionData *bcd) {
   const wchar_t *next = bcd->input.current + bcd->current.length;
   while (++bcd->input.current < next) clearOffset(bcd);
 }

 static int
 contractText_native (BrailleContractionData *bcd) {
   bcd->previous.opcode = CTO_None;

   const wchar_t *srcword = NULL;
   const wchar_t *srcjoin = NULL;
   const wchar_t *literal = NULL;

   BYTE *destword = NULL;
   BYTE *destjoin = NULL;
   BYTE *destlast = NULL;

   unsigned char lineBreakOpportunities[getInputCount(bcd) + 1];
   LineBreakOpportunitiesState lbo;
   prepareLineBreakOpportunitiesState(&lbo);

   while (bcd->input.current < bcd->input.end) {
     int wasLiteral = bcd->input.current == literal;

     destlast = bcd->output.current;
     setOffset(bcd);
     setBefore(bcd);

     if (literal)
       if (bcd->input.current >= literal)
         if (testCurrent(bcd, CTC_Space) || testPrevious(bcd, CTC_Space))
           literal = NULL;

     if ((!literal && selectRule(bcd, getInputUnconsumed(bcd))) || selectRule(bcd, 1)) {
       if (!literal &&
           ((bcd->current.opcode == CTO_Literal) ||
            (prefs.expandCurrentWord &&
             (bcd->input.cursor >= bcd->input.current) &&
             (bcd->input.cursor < (bcd->input.current + bcd->current.length))))) {
         literal = bcd->input.current + bcd->current.length;

         if (!testCurrent(bcd, CTC_Space)) {
           if (destjoin) {
             bcd->input.current = srcjoin;
             bcd->output.current = destjoin;
           } else {
             bcd->input.current = bcd->input.begin;
             bcd->output.current = bcd->output.begin;
           }
         }

         continue;
       }

       if (bcd->current.opcode == CTO_Replace) {
         const ContractionTableRule *rule = bcd->current.rule;

         size_t size = rule->replen + 1;
         wchar_t characters[size];
         wchar_t *to = characters;
         const char *from = (const char *)&rule->findrep[rule->findlen];
         convertUtf8ToWchars(&from, &to, size);

         const wchar_t *inputBuffer = characters;
         int inputLength = to - characters;
         int outputLength = bcd->output.end - bcd->output.current;

         contractText(
           bcd->table, NULL,
           inputBuffer, &inputLength,
           bcd->output.current, &outputLength,
           NULL, CTB_NO_CURSOR
         );

         bcd->output.current += outputLength;
         clearRemainingOffsets(bcd);
         continue;
       }

       if (getContractionTableHeader(bcd)->numberSign && (bcd->previous.opcode != CTO_MidNum) &&
           !testBefore(bcd, CTC_Digit) && testCurrent(bcd, CTC_Digit)) {
         if (!putSequence(bcd, getContractionTableHeader(bcd)->numberSign)) break;
       } else if (getContractionTableHeader(bcd)->letterSign && testCurrent(bcd, CTC_Letter)) {
         if ((bcd->current.opcode == CTO_Contraction) ||
             ((bcd->current.opcode != CTO_EndNum) && testBefore(bcd, CTC_Digit)) ||
             (testCurrent(bcd, CTC_Letter) &&
              (bcd->current.opcode == CTO_Always) &&
              (bcd->current.length == 1) &&
              testBefore(bcd, CTC_Space) &&
              (((bcd->input.current + 1) == bcd->input.end) ||
               testNext(bcd, CTC_Space) ||
               (testNext(bcd, CTC_Punctuation) &&
                !sameCharacters(bcd, bcd->input.current[1], WC_C('.')) &&
                !sameCharacters(bcd, bcd->input.current[1], WC_C('\'')))))) {
           if (!putSequence(bcd, getContractionTableHeader(bcd)->letterSign)) break;
         }
       }

       if (prefs.capitalizationMode == CTB_CAP_SIGN) {
         if (testCurrent(bcd, CTC_UpperCase)) {
           if (!testBefore(bcd, CTC_UpperCase)) {
             if (getContractionTableHeader(bcd)->beginCapitalSign &&
                 (bcd->input.current + 1 < bcd->input.end) && testNext(bcd, CTC_UpperCase)) {
               if (!putSequence(bcd, getContractionTableHeader(bcd)->beginCapitalSign)) break;
             } else if (getContractionTableHeader(bcd)->capitalSign) {
               if (!putSequence(bcd, getContractionTableHeader(bcd)->capitalSign)) break;
             }
           }
         } else if (testCurrent(bcd, CTC_LowerCase)) {
           if (getContractionTableHeader(bcd)->endCapitalSign && (bcd->input.current - 2 >= bcd->input.begin) &&
               testPrevious(bcd, CTC_UpperCase) && testRelative(bcd, -2, CTC_UpperCase)) {
             if (!putSequence(bcd, getContractionTableHeader(bcd)->endCapitalSign)) break;
           }
         }
       }

       switch (bcd->current.opcode) {
         case CTO_LargeSign:
         case CTO_LastLargeSign:
           if ((bcd->previous.opcode == CTO_LargeSign) && !wasLiteral) {
             while ((bcd->output.current > bcd->output.begin) && !bcd->output.current[-1]) bcd->output.current -= 1;
             setOffset(bcd);

             {
               BYTE **destptrs[] = {&destword, &destjoin, &destlast, NULL};
               BYTE ***destptr = destptrs;

               while (*destptr) {
                 if (**destptr && (**destptr > bcd->output.current)) **destptr = bcd->output.current;
                 destptr += 1;
               }
             }
           }
           break;

         default:
           break;
       }

       if (bcd->current.rule->replen &&
           !((bcd->current.opcode == CTO_Always) && (bcd->current.length == 1))) {
         if (!putReplace(bcd, bcd->current.rule, *bcd->input.current)) goto done;
         clearRemainingOffsets(bcd);
       } else {
         const wchar_t *srclim = bcd->input.current + bcd->current.length;
         while (1) {
           if (!putCharacter(bcd, *bcd->input.current)) goto done;
           if (++bcd->input.current == srclim) break;
           setOffset(bcd);
         }
       }

       {
         const wchar_t *srcorig = bcd->input.current;
         const wchar_t *srcbeg = NULL;
         BYTE *destbeg = NULL;

         switch (bcd->current.opcode) {
           case CTO_Repeatable: {
             const wchar_t *srclim = bcd->input.end - bcd->current.length;

             srcbeg = bcd->input.current - bcd->current.length;
             destbeg = destlast;

             while ((bcd->input.current <= srclim) && matchCurrentRule(bcd)) {
               clearOffset(bcd);
               clearRemainingOffsets(bcd);
             }

             break;
           }

           case CTO_JoinedWord:
             srcbeg = bcd->input.current;
             destbeg = bcd->output.current;

             while ((bcd->input.current < bcd->input.end) && testCurrent(bcd, CTC_Space)) {
               clearOffset(bcd);
               bcd->input.current += 1;
             }
             break;

           default:
             break;
         }

         if (srcbeg && (bcd->input.cursor >= srcbeg) && (bcd->input.cursor < bcd->input.current)) {
           int repeat = !literal;
           literal = bcd->input.current;

           if (repeat) {
             bcd->input.current = srcbeg;
             bcd->output.current = destbeg;
             continue;
           }

           bcd->input.current = srcorig;
         }
       }
     } else {
       bcd->current.opcode = CTO_Always;
       if (!putCharacter(bcd, *bcd->input.current)) break;
       bcd->input.current += 1;
     }

     if (isLineBreakOpportunity(bcd, &lbo, lineBreakOpportunities)) {
       srcjoin = bcd->input.current;
       destjoin = bcd->output.current;

       if (bcd->current.opcode != CTO_JoinedWord) {
         srcword = bcd->input.current;
         destword = bcd->output.current;
       }
     }

     if ((bcd->output.current == bcd->output.begin) || bcd->output.current[-1]) {
       bcd->previous.opcode = bcd->current.opcode;
     }
   }

 done:
   if (bcd->input.current < bcd->input.end) {
     if (destword && (destword > bcd->output.begin) &&
         (!(testPrevious(bcd, CTC_Space) || testCurrent(bcd, CTC_Space)) ||
          (bcd->previous.opcode == CTO_JoinedWord))) {
       bcd->input.current = srcword;
       bcd->output.current = destword;
     } else if (destlast) {
       bcd->output.current = destlast;
     }
   }

   return 1;
 }

 static void
 finishCharacterEntry_native (BrailleContractionData *bcd, CharacterEntry *entry) {
   wchar_t character = entry->value;

   {
     const ContractionTableCharacter *ctc = getContractionTableCharacter(bcd, character);
     if (ctc) entry->attributes |= ctc->attributes;
   }

   {
     SetAlwaysRuleData sar = {
       .bcd = bcd,
       .character = entry
     };

     int ok = (character == getReplacementCharacter())?
              setAlwaysRule(character, &sar):
              handleBestCharacter(character, setAlwaysRule, &sar);

     if (!ok) entry->always = NULL;
   }
 }

 static const ContractionTableTranslationMethods nativeTranslationMethods = {
   .contractText = contractText_native,
   .finishCharacterEntry = finishCharacterEntry_native
 };

 const ContractionTableTranslationMethods *
 getContractionTableTranslationMethods_native (void) {
   return &nativeTranslationMethods;
 }