qt-everywhere-src-5.15.1/qtbase/util/unicode/main.cpp - orbit - Git at Google

 /****************************************************************************
 **
 ** Copyright (C) 2019 The Qt Company Ltd.
 ** Contact: https://www.qt.io/licensing/
 **
 ** This file is part of the utils of the Qt Toolkit.
 **
 ** $QT_BEGIN_LICENSE:GPL-EXCEPT$
 ** Commercial License Usage
 ** Licensees holding valid commercial Qt licenses may use this file in
 ** accordance with the commercial license agreement provided with the
 ** Software or, alternatively, in accordance with the terms contained in
 ** a written agreement between you and The Qt Company. For licensing terms
 ** and conditions see https://www.qt.io/terms-conditions. For further
 ** information use the contact form at https://www.qt.io/contact-us.
 **
 ** GNU General Public License Usage
 ** Alternatively, this file may be used under the terms of the GNU
 ** General Public License version 3 as published by the Free Software
 ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
 ** included in the packaging of this file. Please review the following
 ** information to ensure the GNU General Public License requirements will
 ** be met: https://www.gnu.org/licenses/gpl-3.0.html.
 **
 ** $QT_END_LICENSE$
 **
 ****************************************************************************/

 #include <qlist.h>
 #include <qhash.h>
 #include <qfile.h>
 #include <qbytearray.h>
 #include <qstring.h>
 #include <qchar.h>
 #include <qvector.h>
 #include <qdebug.h>
 #if 0
 #include <private/qunicodetables_p.h>
 #endif

 #define DATA_VERSION_S "13.0"
 #define DATA_VERSION_STR "QChar::Unicode_13_0"


 static QHash<QByteArray, QChar::UnicodeVersion> age_map;

 static void initAgeMap()
 {
     struct AgeMap {
         const QChar::UnicodeVersion version;
         const char *age;
     } ageMap[] = {
         { QChar::Unicode_1_1,   "1.1" },
         { QChar::Unicode_2_0,   "2.0" },
         { QChar::Unicode_2_1_2, "2.1" },
         { QChar::Unicode_3_0,   "3.0" },
         { QChar::Unicode_3_1,   "3.1" },
         { QChar::Unicode_3_2,   "3.2" },
         { QChar::Unicode_4_0,   "4.0" },
         { QChar::Unicode_4_1,   "4.1" },
         { QChar::Unicode_5_0,   "5.0" },
         { QChar::Unicode_5_1,   "5.1" },
         { QChar::Unicode_5_2,   "5.2" },
         { QChar::Unicode_6_0,   "6.0" },
         { QChar::Unicode_6_1,   "6.1" },
         { QChar::Unicode_6_2,   "6.2" },
         { QChar::Unicode_6_3,   "6.3" },
         { QChar::Unicode_7_0,   "7.0" },
         { QChar::Unicode_8_0,   "8.0" },
         { QChar::Unicode_9_0,   "9.0" },
         { QChar::Unicode_10_0,   "10.0" },
         { QChar::Unicode_11_0,   "11.0" },
         { QChar::Unicode_12_0,   "12.0" },
         { QChar::Unicode_12_1,   "12.1" }, // UCD Revision 24
         { QChar::Unicode_13_0,   "13.0" }, // UCD Revision 26
         { QChar::Unicode_Unassigned, 0 }
     };
     AgeMap *d = ageMap;
     while (d->age) {
         age_map.insert(d->age, d->version);
         ++d;
     }
 }

 static QHash<QByteArray, QChar::Category> categoryMap;

 static void initCategoryMap()
 {
     struct Cat {
         QChar::Category cat;
         const char *name;
     } categories[] = {
         { QChar::Mark_NonSpacing,          "Mn" },
         { QChar::Mark_SpacingCombining,    "Mc" },
         { QChar::Mark_Enclosing,           "Me" },

         { QChar::Number_DecimalDigit,      "Nd" },
         { QChar::Number_Letter,            "Nl" },
         { QChar::Number_Other,             "No" },

         { QChar::Separator_Space,          "Zs" },
         { QChar::Separator_Line,           "Zl" },
         { QChar::Separator_Paragraph,      "Zp" },

         { QChar::Other_Control,            "Cc" },
         { QChar::Other_Format,             "Cf" },
         { QChar::Other_Surrogate,          "Cs" },
         { QChar::Other_PrivateUse,         "Co" },
         { QChar::Other_NotAssigned,        "Cn" },

         { QChar::Letter_Uppercase,         "Lu" },
         { QChar::Letter_Lowercase,         "Ll" },
         { QChar::Letter_Titlecase,         "Lt" },
         { QChar::Letter_Modifier,          "Lm" },
         { QChar::Letter_Other,             "Lo" },

         { QChar::Punctuation_Connector,    "Pc" },
         { QChar::Punctuation_Dash,         "Pd" },
         { QChar::Punctuation_Open,         "Ps" },
         { QChar::Punctuation_Close,        "Pe" },
         { QChar::Punctuation_InitialQuote, "Pi" },
         { QChar::Punctuation_FinalQuote,   "Pf" },
         { QChar::Punctuation_Other,        "Po" },

         { QChar::Symbol_Math,              "Sm" },
         { QChar::Symbol_Currency,          "Sc" },
         { QChar::Symbol_Modifier,          "Sk" },
         { QChar::Symbol_Other,             "So" },
         { QChar::Other_NotAssigned, 0 }
     };
     Cat *c = categories;
     while (c->name) {
         categoryMap.insert(c->name, c->cat);
         ++c;
     }
 }


 static QHash<QByteArray, QChar::Decomposition> decompositionMap;

 static void initDecompositionMap()
 {
     struct Dec {
         QChar::Decomposition dec;
         const char *name;
     } decompositions[] = {
         { QChar::Canonical, "<canonical>" },
         { QChar::Font, "<font>" },
         { QChar::NoBreak, "<noBreak>" },
         { QChar::Initial, "<initial>" },
         { QChar::Medial, "<medial>" },
         { QChar::Final, "<final>" },
         { QChar::Isolated, "<isolated>" },
         { QChar::Circle, "<circle>" },
         { QChar::Super, "<super>" },
         { QChar::Sub, "<sub>" },
         { QChar::Vertical, "<vertical>" },
         { QChar::Wide, "<wide>" },
         { QChar::Narrow, "<narrow>" },
         { QChar::Small, "<small>" },
         { QChar::Square, "<square>" },
         { QChar::Compat, "<compat>" },
         { QChar::Fraction, "<fraction>" },
         { QChar::NoDecomposition, 0 }
     };
     Dec *d = decompositions;
     while (d->name) {
         decompositionMap.insert(d->name, d->dec);
         ++d;
     }
 }


 enum Direction {
     DirL = QChar::DirL,
     DirR = QChar::DirR,
     DirEN = QChar::DirEN,
     DirES = QChar::DirES,
     DirET = QChar::DirET,
     DirAN = QChar::DirAN,
     DirCS = QChar::DirCS,
     DirB = QChar::DirB,
     DirS = QChar::DirS,
     DirWS = QChar::DirWS,
     DirON = QChar::DirON,
     DirLRE = QChar::DirLRE,
     DirLRO = QChar::DirLRO,
     DirAL = QChar::DirAL,
     DirRLE = QChar::DirRLE,
     DirRLO = QChar::DirRLO,
     DirPDF = QChar::DirPDF,
     DirNSM = QChar::DirNSM,
     DirBN = QChar::DirBN,
     DirLRI = QChar::DirLRI,
     DirRLI = QChar::DirRLI,
     DirFSI = QChar::DirFSI,
     DirPDI = QChar::DirPDI,

     Dir_Unassigned
 };

 static QHash<QByteArray, Direction> directionMap;

 static void initDirectionMap()
 {
     struct Dir {
         Direction dir;
         const char *name;
     } directions[] = {
         { DirL, "L" },
         { DirR, "R" },
         { DirEN, "EN" },
         { DirES, "ES" },
         { DirET, "ET" },
         { DirAN, "AN" },
         { DirCS, "CS" },
         { DirB, "B" },
         { DirS, "S" },
         { DirWS, "WS" },
         { DirON, "ON" },
         { DirLRE, "LRE" },
         { DirLRO, "LRO" },
         { DirAL, "AL" },
         { DirRLE, "RLE" },
         { DirRLO, "RLO" },
         { DirPDF, "PDF" },
         { DirNSM, "NSM" },
         { DirBN, "BN" },
         { DirLRI, "LRI" },
         { DirRLI, "RLI" },
         { DirFSI, "FSI" },
         { DirPDI, "PDI" },
         { Dir_Unassigned, 0 }
     };
     Dir *d = directions;
     while (d->name) {
         directionMap.insert(d->name, d->dir);
         ++d;
     }
 }


 enum JoiningType {
     Joining_None,
     Joining_Causing,
     Joining_Dual,
     Joining_Right,
     Joining_Left,
     Joining_Transparent,

     Joining_Unassigned
 };

 static QHash<QByteArray, JoiningType> joining_map;

 static void initJoiningMap()
 {
     struct JoiningList {
         JoiningType joining;
         const char *name;
     } joinings[] = {
         { Joining_None,        "U" },
         { Joining_Causing,     "C" },
         { Joining_Dual,        "D" },
         { Joining_Right,       "R" },
         { Joining_Left,        "L" },
         { Joining_Transparent, "T" },
         { Joining_Unassigned, 0 }
     };
     JoiningList *d = joinings;
     while (d->name) {
         joining_map.insert(d->name, d->joining);
         ++d;
     }
 }


 static const char *grapheme_break_class_string =
     "enum GraphemeBreakClass {\n"
     "    GraphemeBreak_Any,\n"
     "    GraphemeBreak_CR,\n"
     "    GraphemeBreak_LF,\n"
     "    GraphemeBreak_Control,\n"
     "    GraphemeBreak_Extend,\n"
     "    GraphemeBreak_ZWJ,\n"
     "    GraphemeBreak_RegionalIndicator,\n"
     "    GraphemeBreak_Prepend,\n"
     "    GraphemeBreak_SpacingMark,\n"
     "    GraphemeBreak_L,\n"
     "    GraphemeBreak_V,\n"
     "    GraphemeBreak_T,\n"
     "    GraphemeBreak_LV,\n"
     "    GraphemeBreak_LVT,\n"
     "    Graphemebreak_E_Base,\n"
     "    Graphemebreak_E_Modifier,\n"
     "    Graphemebreak_Glue_After_Zwj,\n"
     "    Graphemebreak_E_Base_GAZ,\n"
     "\n"
     "    NumGraphemeBreakClasses\n"
     "};\n\n";

 enum GraphemeBreakClass {
     GraphemeBreak_Any,
     GraphemeBreak_CR,
     GraphemeBreak_LF,
     GraphemeBreak_Control,
     GraphemeBreak_Extend,
     GraphemeBreak_ZWJ,
     GraphemeBreak_RegionalIndicator,
     GraphemeBreak_Prepend,
     GraphemeBreak_SpacingMark,
     GraphemeBreak_L,
     GraphemeBreak_V,
     GraphemeBreak_T,
     GraphemeBreak_LV,
     GraphemeBreak_LVT,
     Graphemebreak_E_Base,
     Graphemebreak_E_Modifier,
     Graphemebreak_Glue_After_Zwj,
     Graphemebreak_E_Base_GAZ,

     GraphemeBreak_Unassigned
 };

 static QHash<QByteArray, GraphemeBreakClass> grapheme_break_map;

 static void initGraphemeBreak()
 {
     struct GraphemeBreakList {
         GraphemeBreakClass brk;
         const char *name;
     } breaks[] = {
         { GraphemeBreak_Any, "Any" },
         { GraphemeBreak_CR, "CR" },
         { GraphemeBreak_LF, "LF" },
         { GraphemeBreak_Control, "Control" },
         { GraphemeBreak_Extend, "Extend" },
         { GraphemeBreak_ZWJ, "ZWJ" },
         { GraphemeBreak_RegionalIndicator, "Regional_Indicator" },
         { GraphemeBreak_Prepend, "Prepend" },
         { GraphemeBreak_SpacingMark, "SpacingMark" },
         { GraphemeBreak_L, "L" },
         { GraphemeBreak_V, "V" },
         { GraphemeBreak_T, "T" },
         { GraphemeBreak_LV, "LV" },
         { GraphemeBreak_LVT, "LVT" },
         { Graphemebreak_E_Base, "E_Base" },
         { Graphemebreak_E_Modifier, "E_Modifier" },
         { Graphemebreak_Glue_After_Zwj, "Glue_After_Zwj" },
         { Graphemebreak_E_Base_GAZ, "E_Base_GAZ" },
         { GraphemeBreak_Unassigned, 0 }
     };
     GraphemeBreakList *d = breaks;
     while (d->name) {
         grapheme_break_map.insert(d->name, d->brk);
         ++d;
     }
 }


 static const char *word_break_class_string =
     "enum WordBreakClass {\n"
     "    WordBreak_Any,\n"
     "    WordBreak_CR,\n"
     "    WordBreak_LF,\n"
     "    WordBreak_Newline,\n"
     "    WordBreak_Extend,\n"
     "    WordBreak_ZWJ,\n"
     "    WordBreak_Format,\n"
     "    WordBreak_RegionalIndicator,\n"
     "    WordBreak_Katakana,\n"
     "    WordBreak_HebrewLetter,\n"
     "    WordBreak_ALetter,\n"
     "    WordBreak_SingleQuote,\n"
     "    WordBreak_DoubleQuote,\n"
     "    WordBreak_MidNumLet,\n"
     "    WordBreak_MidLetter,\n"
     "    WordBreak_MidNum,\n"
     "    WordBreak_Numeric,\n"
     "    WordBreak_ExtendNumLet,\n"
     "    WordBreak_E_Base,\n"
     "    WordBreak_E_Modifier,\n"
     "    WordBreak_Glue_After_Zwj,\n"
     "    WordBreak_E_Base_GAZ,\n"
     "    WordBreak_WSegSpace,\n"
     "\n"
     "    NumWordBreakClasses\n"
     "};\n\n";

 enum WordBreakClass {
     WordBreak_Any,
     WordBreak_CR,
     WordBreak_LF,
     WordBreak_Newline,
     WordBreak_Extend,
     WordBreak_ZWJ,
     WordBreak_Format,
     WordBreak_RegionalIndicator,
     WordBreak_Katakana,
     WordBreak_HebrewLetter,
     WordBreak_ALetter,
     WordBreak_SingleQuote,
     WordBreak_DoubleQuote,
     WordBreak_MidNumLet,
     WordBreak_MidLetter,
     WordBreak_MidNum,
     WordBreak_Numeric,
     WordBreak_ExtendNumLet,
     WordBreak_E_Base,
     WordBreak_E_Modifier,
     WordBreak_Glue_After_Zwj,
     WordBreak_E_Base_GAZ,
     WordBreak_WSegSpace,

     WordBreak_Unassigned
 };

 static QHash<QByteArray, WordBreakClass> word_break_map;

 static void initWordBreak()
 {
     struct WordBreakList {
         WordBreakClass brk;
         const char *name;
     } breaks[] = {
         { WordBreak_Any, "Any" },
         { WordBreak_CR, "CR" },
         { WordBreak_LF, "LF" },
         { WordBreak_Newline, "Newline" },
         { WordBreak_Extend, "Extend" },
         { WordBreak_ZWJ, "ZWJ" },
         { WordBreak_Format, "Format" },
         { WordBreak_RegionalIndicator, "Regional_Indicator" },
         { WordBreak_Katakana, "Katakana" },
         { WordBreak_HebrewLetter, "Hebrew_Letter" },
         { WordBreak_ALetter, "ALetter" },
         { WordBreak_SingleQuote, "Single_Quote" },
         { WordBreak_DoubleQuote, "Double_Quote" },
         { WordBreak_MidNumLet, "MidNumLet" },
         { WordBreak_MidLetter, "MidLetter" },
         { WordBreak_MidNum, "MidNum" },
         { WordBreak_Numeric, "Numeric" },
         { WordBreak_ExtendNumLet, "ExtendNumLet" },
         { WordBreak_E_Base, "E_Base" },
         { WordBreak_E_Modifier, "E_Modifier" },
         { WordBreak_Glue_After_Zwj, "Glue_After_Zwj" },
         { WordBreak_E_Base_GAZ, "E_Base_GAZ" },
         { WordBreak_WSegSpace, "WSegSpace" },
         { WordBreak_Unassigned, 0 }
     };
     WordBreakList *d = breaks;
     while (d->name) {
         word_break_map.insert(d->name, d->brk);
         ++d;
     }
 }


 static const char *sentence_break_class_string =
     "enum SentenceBreakClass {\n"
     "    SentenceBreak_Any,\n"
     "    SentenceBreak_CR,\n"
     "    SentenceBreak_LF,\n"
     "    SentenceBreak_Sep,\n"
     "    SentenceBreak_Extend,\n"
     "    SentenceBreak_Sp,\n"
     "    SentenceBreak_Lower,\n"
     "    SentenceBreak_Upper,\n"
     "    SentenceBreak_OLetter,\n"
     "    SentenceBreak_Numeric,\n"
     "    SentenceBreak_ATerm,\n"
     "    SentenceBreak_SContinue,\n"
     "    SentenceBreak_STerm,\n"
     "    SentenceBreak_Close,\n"
     "\n"
     "    NumSentenceBreakClasses\n"
     "};\n\n";

 enum SentenceBreakClass {
     SentenceBreak_Any,
     SentenceBreak_CR,
     SentenceBreak_LF,
     SentenceBreak_Sep,
     SentenceBreak_Extend,
     SentenceBreak_Sp,
     SentenceBreak_Lower,
     SentenceBreak_Upper,
     SentenceBreak_OLetter,
     SentenceBreak_Numeric,
     SentenceBreak_ATerm,
     SentenceBreak_SContinue,
     SentenceBreak_STerm,
     SentenceBreak_Close,

     SentenceBreak_Unassigned
 };

 static QHash<QByteArray, SentenceBreakClass> sentence_break_map;

 static void initSentenceBreak()
 {
     struct SentenceBreakList {
         SentenceBreakClass brk;
         const char *name;
     } breaks[] = {
         { SentenceBreak_Any, "Any" },
         { SentenceBreak_CR, "CR" },
         { SentenceBreak_LF, "LF" },
         { SentenceBreak_Sep, "Sep" },
         { SentenceBreak_Extend, "Extend" },
         { SentenceBreak_Extend, "Format" },
         { SentenceBreak_Sp, "Sp" },
         { SentenceBreak_Lower, "Lower" },
         { SentenceBreak_Upper, "Upper" },
         { SentenceBreak_OLetter, "OLetter" },
         { SentenceBreak_Numeric, "Numeric" },
         { SentenceBreak_ATerm, "ATerm" },
         { SentenceBreak_SContinue, "SContinue" },
         { SentenceBreak_STerm, "STerm" },
         { SentenceBreak_Close, "Close" },
         { SentenceBreak_Unassigned, 0 }
     };
     SentenceBreakList *d = breaks;
     while (d->name) {
         sentence_break_map.insert(d->name, d->brk);
         ++d;
     }
 }


 static const char *line_break_class_string =
     "// see http://www.unicode.org/reports/tr14/tr14-30.html\n"
     "// we don't use the XX and AI classes and map them to AL instead.\n"
     "enum LineBreakClass {\n"
     "    LineBreak_OP, LineBreak_CL, LineBreak_CP, LineBreak_QU, LineBreak_GL,\n"
     "    LineBreak_NS, LineBreak_EX, LineBreak_SY, LineBreak_IS, LineBreak_PR,\n"
     "    LineBreak_PO, LineBreak_NU, LineBreak_AL, LineBreak_HL, LineBreak_ID,\n"
     "    LineBreak_IN, LineBreak_HY, LineBreak_BA, LineBreak_BB, LineBreak_B2,\n"
     "    LineBreak_ZW, LineBreak_CM, LineBreak_WJ, LineBreak_H2, LineBreak_H3,\n"
     "    LineBreak_JL, LineBreak_JV, LineBreak_JT, LineBreak_RI, LineBreak_CB,\n"
     "    LineBreak_EB, LineBreak_EM, LineBreak_ZWJ,\n"
     "    LineBreak_SA, LineBreak_SG, LineBreak_SP,\n"
     "    LineBreak_CR, LineBreak_LF, LineBreak_BK,\n"
     "\n"
     "    NumLineBreakClasses\n"
     "};\n\n";

 enum LineBreakClass {
     LineBreak_OP, LineBreak_CL, LineBreak_CP, LineBreak_QU, LineBreak_GL,
     LineBreak_NS, LineBreak_EX, LineBreak_SY, LineBreak_IS, LineBreak_PR,
     LineBreak_PO, LineBreak_NU, LineBreak_AL, LineBreak_HL, LineBreak_ID,
     LineBreak_IN, LineBreak_HY, LineBreak_BA, LineBreak_BB, LineBreak_B2,
     LineBreak_ZW, LineBreak_CM, LineBreak_WJ, LineBreak_H2, LineBreak_H3,
     LineBreak_JL, LineBreak_JV, LineBreak_JT, LineBreak_RI, LineBreak_CB,
     LineBreak_EB, LineBreak_EM, LineBreak_ZWJ,
     LineBreak_SA, LineBreak_SG, LineBreak_SP,
     LineBreak_CR, LineBreak_LF, LineBreak_BK,

     LineBreak_Unassigned
 };

 static QHash<QByteArray, LineBreakClass> line_break_map;

 static void initLineBreak()
 {
     // ### Classes XX and AI are left out and mapped to AL for now.
     // ### Class NL is mapped to BK.
     // ### Treating characters of class CJ as class NS will give CSS strict line breaking;
     //     treating them as class ID will give CSS normal breaking.
     struct LineBreakList {
         LineBreakClass brk;
         const char *name;
     } breaks[] = {
         { LineBreak_BK, "BK" },
         { LineBreak_CR, "CR" },
         { LineBreak_LF, "LF" },
         { LineBreak_CM, "CM" },
         { LineBreak_BK, "NL" },
         { LineBreak_SG, "SG" },
         { LineBreak_WJ, "WJ" },
         { LineBreak_ZW, "ZW" },
         { LineBreak_GL, "GL" },
         { LineBreak_SP, "SP" },
         { LineBreak_B2, "B2" },
         { LineBreak_BA, "BA" },
         { LineBreak_BB, "BB" },
         { LineBreak_HY, "HY" },
         { LineBreak_CB, "CB" },
         { LineBreak_NS, "CJ" },
         { LineBreak_CL, "CL" },
         { LineBreak_CP, "CP" },
         { LineBreak_EX, "EX" },
         { LineBreak_IN, "IN" },
         { LineBreak_NS, "NS" },
         { LineBreak_OP, "OP" },
         { LineBreak_QU, "QU" },
         { LineBreak_IS, "IS" },
         { LineBreak_NU, "NU" },
         { LineBreak_PO, "PO" },
         { LineBreak_PR, "PR" },
         { LineBreak_SY, "SY" },
         { LineBreak_AL, "AI" },
         { LineBreak_AL, "AL" },
         { LineBreak_HL, "HL" },
         { LineBreak_H2, "H2" },
         { LineBreak_H3, "H3" },
         { LineBreak_ID, "ID" },
         { LineBreak_JL, "JL" },
         { LineBreak_JV, "JV" },
         { LineBreak_JT, "JT" },
         { LineBreak_RI, "RI" },
         { LineBreak_SA, "SA" },
         { LineBreak_AL, "XX" },
         { LineBreak_EB, "EB" },
         { LineBreak_EM, "EM" },
         { LineBreak_ZWJ, "ZWJ" },
         { LineBreak_Unassigned, 0 }
     };
     LineBreakList *d = breaks;
     while (d->name) {
         line_break_map.insert(d->name, d->brk);
         ++d;
     }
 }


 static QHash<QByteArray, QChar::Script> scriptMap;

 static void initScriptMap()
 {
     struct Scrpt {
         QChar::Script script;
         const char *name;
     } scripts[] = {
         // general
         { QChar::Script_Unknown,                "Unknown" },
         { QChar::Script_Inherited,              "Inherited" },
         { QChar::Script_Common,                 "Common" },
         // pre-4.0
         { QChar::Script_Latin,                  "Latin" },
         { QChar::Script_Greek,                  "Greek" },
         { QChar::Script_Cyrillic,               "Cyrillic" },
         { QChar::Script_Armenian,               "Armenian" },
         { QChar::Script_Hebrew,                 "Hebrew" },
         { QChar::Script_Arabic,                 "Arabic" },
         { QChar::Script_Syriac,                 "Syriac" },
         { QChar::Script_Thaana,                 "Thaana" },
         { QChar::Script_Devanagari,             "Devanagari" },
         { QChar::Script_Bengali,                "Bengali" },
         { QChar::Script_Gurmukhi,               "Gurmukhi" },
         { QChar::Script_Gujarati,               "Gujarati" },
         { QChar::Script_Oriya,                  "Oriya" },
         { QChar::Script_Tamil,                  "Tamil" },
         { QChar::Script_Telugu,                 "Telugu" },
         { QChar::Script_Kannada,                "Kannada" },
         { QChar::Script_Malayalam,              "Malayalam" },
         { QChar::Script_Sinhala,                "Sinhala" },
         { QChar::Script_Thai,                   "Thai" },
         { QChar::Script_Lao,                    "Lao" },
         { QChar::Script_Tibetan,                "Tibetan" },
         { QChar::Script_Myanmar,                "Myanmar" },
         { QChar::Script_Georgian,               "Georgian" },
         { QChar::Script_Hangul,                 "Hangul" },
         { QChar::Script_Ethiopic,               "Ethiopic" },
         { QChar::Script_Cherokee,               "Cherokee" },
         { QChar::Script_CanadianAboriginal,     "CanadianAboriginal" },
         { QChar::Script_Ogham,                  "Ogham" },
         { QChar::Script_Runic,                  "Runic" },
         { QChar::Script_Khmer,                  "Khmer" },
         { QChar::Script_Mongolian,              "Mongolian" },
         { QChar::Script_Hiragana,               "Hiragana" },
         { QChar::Script_Katakana,               "Katakana" },
         { QChar::Script_Bopomofo,               "Bopomofo" },
         { QChar::Script_Han,                    "Han" },
         { QChar::Script_Yi,                     "Yi" },
         { QChar::Script_OldItalic,              "OldItalic" },
         { QChar::Script_Gothic,                 "Gothic" },
         { QChar::Script_Deseret,                "Deseret" },
         { QChar::Script_Tagalog,                "Tagalog" },
         { QChar::Script_Hanunoo,                "Hanunoo" },
         { QChar::Script_Buhid,                  "Buhid" },
         { QChar::Script_Tagbanwa,               "Tagbanwa" },
         { QChar::Script_Coptic,                 "Coptic" },
         // 4.0
         { QChar::Script_Limbu,                  "Limbu" },
         { QChar::Script_TaiLe,                  "TaiLe" },
         { QChar::Script_LinearB,                "LinearB" },
         { QChar::Script_Ugaritic,               "Ugaritic" },
         { QChar::Script_Shavian,                "Shavian" },
         { QChar::Script_Osmanya,                "Osmanya" },
         { QChar::Script_Cypriot,                "Cypriot" },
         { QChar::Script_Braille,                "Braille" },
         // 4.1
         { QChar::Script_Buginese,               "Buginese" },
         { QChar::Script_NewTaiLue,              "NewTaiLue" },
         { QChar::Script_Glagolitic,             "Glagolitic" },
         { QChar::Script_Tifinagh,               "Tifinagh" },
         { QChar::Script_SylotiNagri,            "SylotiNagri" },
         { QChar::Script_OldPersian,             "OldPersian" },
         { QChar::Script_Kharoshthi,             "Kharoshthi" },
         // 5.0
         { QChar::Script_Balinese,               "Balinese" },
         { QChar::Script_Cuneiform,              "Cuneiform" },
         { QChar::Script_Phoenician,             "Phoenician" },
         { QChar::Script_PhagsPa,                "PhagsPa" },
         { QChar::Script_Nko,                    "Nko" },
         // 5.1
         { QChar::Script_Sundanese,              "Sundanese" },
         { QChar::Script_Lepcha,                 "Lepcha" },
         { QChar::Script_OlChiki,                "OlChiki" },
         { QChar::Script_Vai,                    "Vai" },
         { QChar::Script_Saurashtra,             "Saurashtra" },
         { QChar::Script_KayahLi,                "KayahLi" },
         { QChar::Script_Rejang,                 "Rejang" },
         { QChar::Script_Lycian,                 "Lycian" },
         { QChar::Script_Carian,                 "Carian" },
         { QChar::Script_Lydian,                 "Lydian" },
         { QChar::Script_Cham,                   "Cham" },
         // 5.2
         { QChar::Script_TaiTham,                "TaiTham" },
         { QChar::Script_TaiViet,                "TaiViet" },
         { QChar::Script_Avestan,                "Avestan" },
         { QChar::Script_EgyptianHieroglyphs,    "EgyptianHieroglyphs" },
         { QChar::Script_Samaritan,              "Samaritan" },
         { QChar::Script_Lisu,                   "Lisu" },
         { QChar::Script_Bamum,                  "Bamum" },
         { QChar::Script_Javanese,               "Javanese" },
         { QChar::Script_MeeteiMayek,            "MeeteiMayek" },
         { QChar::Script_ImperialAramaic,        "ImperialAramaic" },
         { QChar::Script_OldSouthArabian,        "OldSouthArabian" },
         { QChar::Script_InscriptionalParthian,  "InscriptionalParthian" },
         { QChar::Script_InscriptionalPahlavi,   "InscriptionalPahlavi" },
         { QChar::Script_OldTurkic,              "OldTurkic" },
         { QChar::Script_Kaithi,                 "Kaithi" },
         // 6.0
         { QChar::Script_Batak,                  "Batak" },
         { QChar::Script_Brahmi,                 "Brahmi" },
         { QChar::Script_Mandaic,                "Mandaic" },
         // 6.1
         { QChar::Script_Chakma,                 "Chakma" },
         { QChar::Script_MeroiticCursive,        "MeroiticCursive" },
         { QChar::Script_MeroiticHieroglyphs,    "MeroiticHieroglyphs" },
         { QChar::Script_Miao,                   "Miao" },
         { QChar::Script_Sharada,                "Sharada" },
         { QChar::Script_SoraSompeng,            "SoraSompeng" },
         { QChar::Script_Takri,                  "Takri" },
         // 7.0
         { QChar::Script_CaucasianAlbanian,      "CaucasianAlbanian" },
         { QChar::Script_BassaVah,               "BassaVah" },
         { QChar::Script_Duployan,               "Duployan" },
         { QChar::Script_Elbasan,                "Elbasan" },
         { QChar::Script_Grantha,                "Grantha" },
         { QChar::Script_PahawhHmong,            "PahawhHmong" },
         { QChar::Script_Khojki,                 "Khojki" },
         { QChar::Script_LinearA,                "LinearA" },
         { QChar::Script_Mahajani,               "Mahajani" },
         { QChar::Script_Manichaean,             "Manichaean" },
         { QChar::Script_MendeKikakui,           "MendeKikakui" },
         { QChar::Script_Modi,                   "Modi" },
         { QChar::Script_Mro,                    "Mro" },
         { QChar::Script_OldNorthArabian,        "OldNorthArabian" },
         { QChar::Script_Nabataean,              "Nabataean" },
         { QChar::Script_Palmyrene,              "Palmyrene" },
         { QChar::Script_PauCinHau,              "PauCinHau" },
         { QChar::Script_OldPermic,              "OldPermic" },
         { QChar::Script_PsalterPahlavi,         "PsalterPahlavi" },
         { QChar::Script_Siddham,                "Siddham" },
         { QChar::Script_Khudawadi,              "Khudawadi" },
         { QChar::Script_Tirhuta,                "Tirhuta" },
         { QChar::Script_WarangCiti,             "WarangCiti" },
         // 8.0
         { QChar::Script_Ahom,                   "Ahom" },
         { QChar::Script_AnatolianHieroglyphs,   "AnatolianHieroglyphs" },
         { QChar::Script_Hatran,                 "Hatran" },
         { QChar::Script_Multani,                "Multani" },
         { QChar::Script_OldHungarian,           "OldHungarian" },
         { QChar::Script_SignWriting,            "SignWriting" },
         // 9.0
         { QChar::Script_Adlam,                  "Adlam" },
         { QChar::Script_Bhaiksuki,              "Bhaiksuki" },
         { QChar::Script_Marchen,                "Marchen" },
         { QChar::Script_Newa,                   "Newa" },
         { QChar::Script_Osage,                  "Osage" },
         { QChar::Script_Tangut,                 "Tangut" },
         // 10.0
         { QChar::Script_MasaramGondi,           "MasaramGondi" },
         { QChar::Script_Nushu,                  "Nushu" },
         { QChar::Script_Soyombo,                "Soyombo" },
         { QChar::Script_ZanabazarSquare,        "ZanabazarSquare" },
         // 12.1
         { QChar::Script_Dogra,                  "Dogra" },
         { QChar::Script_GunjalaGondi,           "GunjalaGondi" },
         { QChar::Script_HanifiRohingya,         "HanifiRohingya" },
         { QChar::Script_Makasar,                "Makasar" },
         { QChar::Script_Medefaidrin,            "Medefaidrin" },
         { QChar::Script_OldSogdian,             "OldSogdian" },
         { QChar::Script_Sogdian,                "Sogdian" },
         { QChar::Script_Elymaic,                "Elymaic" },
         { QChar::Script_Nandinagari,            "Nandinagari" },
         { QChar::Script_NyiakengPuachueHmong,   "NyiakengPuachueHmong" },
         { QChar::Script_Wancho,                 "Wancho" },
         // 13.0
         { QChar::Script_Chorasmian,             "Chorasmian" },
         { QChar::Script_DivesAkuru,             "DivesAkuru" },
         { QChar::Script_KhitanSmallScript,      "KhitanSmallScript" },
         { QChar::Script_Yezidi,                 "Yezidi" },

         // unhandled
         { QChar::Script_Unknown,                0 }
     };
     Scrpt *p = scripts;
     while (p->name) {
         scriptMap.insert(p->name, p->script);
         ++p;
     }
 }

 // Keep this one in sync with the code in createPropertyInfo
 static const char *property_string =
     "enum Case {\n"
     "    LowerCase,\n"
     "    UpperCase,\n"
     "    TitleCase,\n"
     "    CaseFold,\n"
     "\n"
     "    NumCases\n"
     "};\n"
     "\n"
     "struct Properties {\n"
     "    ushort category            : 8; /* 5 used */\n"
     "    ushort direction           : 8; /* 5 used */\n"
     "    ushort combiningClass      : 8;\n"
     "    ushort joining             : 3;\n"
     "    signed short digitValue    : 5;\n"
     "    signed short mirrorDiff    : 16;\n"
     "    ushort unicodeVersion      : 8; /* 5 used */\n"
     "    ushort nfQuickCheck        : 8;\n" // could be narrowed
     "#ifdef Q_OS_WASM\n"
     "    unsigned char              : 0; //wasm 64 packing trick\n"
     "#endif\n"
     "    struct {\n"
     "        ushort special    : 1;\n"
     "        signed short diff : 15;\n"
     "    } cases[NumCases];\n"
     "#ifdef Q_OS_WASM\n"
     "    unsigned char              : 0; //wasm 64 packing trick\n"
     "#endif\n"
     "    ushort graphemeBreakClass  : 5; /* 5 used */\n"
     "    ushort wordBreakClass      : 5; /* 5 used */\n"
     "    ushort lineBreakClass      : 6; /* 6 used */\n"
     "    ushort sentenceBreakClass  : 8; /* 4 used */\n"
     "    ushort script              : 8;\n"
     "};\n\n"
     "Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4) noexcept;\n"
     "Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2) noexcept;\n"
     "\n";

 static const char *methods =
     "Q_CORE_EXPORT GraphemeBreakClass QT_FASTCALL graphemeBreakClass(uint ucs4) noexcept;\n"
     "inline GraphemeBreakClass graphemeBreakClass(QChar ch) noexcept\n"
     "{ return graphemeBreakClass(ch.unicode()); }\n"
     "\n"
     "Q_CORE_EXPORT WordBreakClass QT_FASTCALL wordBreakClass(uint ucs4) noexcept;\n"
     "inline WordBreakClass wordBreakClass(QChar ch) noexcept\n"
     "{ return wordBreakClass(ch.unicode()); }\n"
     "\n"
     "Q_CORE_EXPORT SentenceBreakClass QT_FASTCALL sentenceBreakClass(uint ucs4) noexcept;\n"
     "inline SentenceBreakClass sentenceBreakClass(QChar ch) noexcept\n"
     "{ return sentenceBreakClass(ch.unicode()); }\n"
     "\n"
     "Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4) noexcept;\n"
     "inline LineBreakClass lineBreakClass(QChar ch) noexcept\n"
     "{ return lineBreakClass(ch.unicode()); }\n"
     "\n";

 static const int SizeOfPropertiesStruct = 20;

 static const QByteArray sizeOfPropertiesStructCheck =
         "Q_STATIC_ASSERT(sizeof(Properties) == " + QByteArray::number(SizeOfPropertiesStruct) + ");\n\n";

 struct PropertyFlags {
     bool operator==(const PropertyFlags &o) const {
         return (combiningClass == o.combiningClass
                 && category == o.category
                 && direction == o.direction
                 && joining == o.joining
                 && age == o.age
                 && digitValue == o.digitValue
                 && mirrorDiff == o.mirrorDiff
                 && lowerCaseDiff == o.lowerCaseDiff
                 && upperCaseDiff == o.upperCaseDiff
                 && titleCaseDiff == o.titleCaseDiff
                 && caseFoldDiff == o.caseFoldDiff
                 && lowerCaseSpecial == o.lowerCaseSpecial
                 && upperCaseSpecial == o.upperCaseSpecial
                 && titleCaseSpecial == o.titleCaseSpecial
                 && caseFoldSpecial == o.caseFoldSpecial
                 && graphemeBreakClass == o.graphemeBreakClass
                 && wordBreakClass == o.wordBreakClass
                 && sentenceBreakClass == o.sentenceBreakClass
                 && lineBreakClass == o.lineBreakClass
                 && script == o.script
                 && nfQuickCheck == o.nfQuickCheck
             );
     }
     // from UnicodeData.txt
     uchar combiningClass : 8;
     QChar::Category category : 5;
     QChar::Direction direction : 5;
     // from ArabicShaping.txt
     QChar::JoiningType joining : 3;
     // from DerivedAge.txt
     QChar::UnicodeVersion age : 5;
     int digitValue;

     int mirrorDiff : 16;

     int lowerCaseDiff;
     int upperCaseDiff;
     int titleCaseDiff;
     int caseFoldDiff;
     bool lowerCaseSpecial;
     bool upperCaseSpecial;
     bool titleCaseSpecial;
     bool caseFoldSpecial;
     GraphemeBreakClass graphemeBreakClass;
     WordBreakClass wordBreakClass;
     SentenceBreakClass sentenceBreakClass;
     LineBreakClass lineBreakClass;
     int script;
     // from DerivedNormalizationProps.txt
     uchar nfQuickCheck;
 };


 static QList<int> specialCaseMap;

 static int appendToSpecialCaseMap(const QList<int> &map)
 {
     QList<int> utf16map;
     for (int i = 0; i < map.size(); ++i) {
         uint codepoint = map.at(i);
         // if the condition below doesn't hold anymore we need to modify our special case mapping code
         Q_ASSERT(!QChar::requiresSurrogates(codepoint));
         if (QChar::requiresSurrogates(codepoint)) {
             utf16map << QChar::highSurrogate(codepoint);
             utf16map << QChar::lowSurrogate(codepoint);
         } else {
             utf16map << codepoint;
         }
     }
     int length = utf16map.size();
     utf16map.prepend(length);

     if (specialCaseMap.isEmpty())
         specialCaseMap << 0; // placeholder

     int i = 1;
     while (i < specialCaseMap.size()) {
         int n = specialCaseMap.at(i);
         if (n == length) {
             int j;
             for (j = 1; j <= n; ++j) {
                 if (specialCaseMap.at(i+j) != utf16map.at(j))
                     break;
             }
             if (j > n)
                 return i;
         }
         i += n + 1;
     }

     int pos = specialCaseMap.size();
     specialCaseMap << utf16map;
     return pos;
 }

 // DerivedCoreProperties.txt
 static inline bool isDefaultIgnorable(uint ucs4)
 {
     // Default_Ignorable_Code_Point:
     //  Generated from
     //    Other_Default_Ignorable_Code_Point + Cf + Variation_Selector
     //    - White_Space - FFF9..FFFB (Annotation Characters)
     //    - 0600..0604, 06DD, 070F, 110BD (exceptional Cf characters that should be visible)
     if (ucs4 <= 0xff)
         return ucs4 == 0xad;

     return ucs4 == 0x034f
             || ucs4 == 0x061c
             || (ucs4 >= 0x115f && ucs4 <= 0x1160)
             || (ucs4 >= 0x17b4 && ucs4 <= 0x17b5)
             || (ucs4 >= 0x180b && ucs4 <= 0x180d)
             || ucs4 == 0x180e
             || (ucs4 >= 0x200b && ucs4 <= 0x200f)
             || (ucs4 >= 0x202a && ucs4 <= 0x202e)
             || (ucs4 >= 0x2060 && ucs4 <= 0x206f)
             || ucs4 == 0x3164
             || (ucs4 >= 0xfe00 && ucs4 <= 0xfe0f)
             || ucs4 == 0xfeff
             || ucs4 == 0xffa0
             || (ucs4 >= 0xfff0 && ucs4 <= 0xfff8)
             || (ucs4 >= 0x1bca0 && ucs4 <= 0x1bca3)
             || (ucs4 >= 0x1d173 && ucs4 <= 0x1d17a)
             || (ucs4 >= 0xe0000 && ucs4 <= 0xe0fff);
 }

 struct UnicodeData {
     UnicodeData(int codepoint = 0) {
         p.category = QChar::Other_NotAssigned; // Cn
         p.combiningClass = 0;

         p.direction = QChar::DirL;
         // DerivedBidiClass.txt
         // The unassigned code points that default to AL are in the ranges:
         //     [U+0600..U+07BF, U+08A0..U+08FF, U+FB50..U+FDCF, U+FDF0..U+FDFF, U+FE70..U+FEFF, U+1EE00..U+1EEFF]
         if ((codepoint >= 0x0600 && codepoint <= 0x07BF)
             || (codepoint >= 0x08A0 && codepoint <= 0x08FF)
             || (codepoint >= 0xFB50 && codepoint <= 0xFDCF)
             || (codepoint >= 0xFDF0 && codepoint <= 0xFDFF)
             || (codepoint >= 0xFE70 && codepoint <= 0xFEFF)
             || (codepoint >= 0x1EE00 && codepoint <= 0x1EEFF)) {
             p.direction = QChar::DirAL;
         }
         // The unassigned code points that default to R are in the ranges:
         //     [U+0590..U+05FF, U+07C0..U+089F, U+FB1D..U+FB4F, U+10800..U+10FFF, U+1E800..U+1EDFF, U+1EF00..U+1EFFF]
         else if ((codepoint >= 0x0590 && codepoint <= 0x05FF)
             || (codepoint >= 0x07C0 && codepoint <= 0x089F)
             || (codepoint >= 0xFB1D && codepoint <= 0xFB4F)
             || (codepoint >= 0x10800 && codepoint <= 0x10FFF)
             || (codepoint >= 0x1E800 && codepoint <= 0x1EDFF)
             || (codepoint >= 0x1EF00 && codepoint <= 0x1EFFF)) {
             p.direction = QChar::DirR;
         }
         // The unassigned code points that default to ET are in the range:
         //     [U+20A0..U+20CF]
         else if (codepoint >= 0x20A0 && codepoint <= 0x20CF) {
             p.direction = QChar::DirET;
         }
         // The unassigned code points that default to BN have one of the following properties:
         //     Default_Ignorable_Code_Point
         //     Noncharacter_Code_Point
         else if (QChar::isNonCharacter(codepoint) || isDefaultIgnorable(codepoint)) {
             p.direction = QChar::DirBN;
         }

         p.lineBreakClass = LineBreak_AL; // XX -> AL
         // LineBreak.txt
         // The unassigned code points that default to "ID" include ranges in the following blocks:
         //     [U+3400..U+4DBF, U+4E00..U+9FFF, U+F900..U+FAFF, U+20000..U+2A6DF, U+2A700..U+2B73F, U+2B740..U+2B81F, U+2B820..U+2CEAF, U+2F800..U+2FA1F]
         // and any other reserved code points on
         //     [U+20000..U+2FFFD, U+30000..U+3FFFD]
         if ((codepoint >= 0x3400 && codepoint <= 0x4DBF)
             || (codepoint >= 0x4E00 && codepoint <= 0x9FFF)
             || (codepoint >= 0xF900 && codepoint <= 0xFAFF)
             || (codepoint >= 0x20000 && codepoint <= 0x2A6DF)
             || (codepoint >= 0x2A700 && codepoint <= 0x2B73F)
             || (codepoint >= 0x2B740 && codepoint <= 0x2B81F)
             || (codepoint >= 0x2B820 && codepoint <= 0x2CEAF)
             || (codepoint >= 0x2F800 && codepoint <= 0x2FA1F)
             || (codepoint >= 0x20000 && codepoint <= 0x2FFFD)
             || (codepoint >= 0x30000 && codepoint <= 0x3FFFD)) {
             p.lineBreakClass = LineBreak_ID;
         }
         // The unassigned code points that default to "PR" comprise a range in the following block:
         //     [U+20A0..U+20CF]
         else if (codepoint >= 0x20A0 && codepoint <= 0x20CF) {
             p.lineBreakClass = LineBreak_PR;
         }

         mirroredChar = 0;
         decompositionType = QChar::NoDecomposition;
         p.joining = QChar::Joining_None;
         p.age = QChar::Unicode_Unassigned;
         p.mirrorDiff = 0;
         p.digitValue = -1;
         p.lowerCaseDiff = 0;
         p.upperCaseDiff = 0;
         p.titleCaseDiff = 0;
         p.caseFoldDiff = 0;
         p.lowerCaseSpecial = 0;
         p.upperCaseSpecial = 0;
         p.titleCaseSpecial = 0;
         p.caseFoldSpecial = 0;
         p.graphemeBreakClass = GraphemeBreak_Any;
         p.wordBreakClass = WordBreak_Any;
         p.sentenceBreakClass = SentenceBreak_Any;
         p.script = QChar::Script_Unknown;
         p.nfQuickCheck = 0;
         propertyIndex = -1;
         excludedComposition = false;
     }

     static UnicodeData &valueRef(int codepoint);

     PropertyFlags p;

     // from UnicodeData.txt
     QChar::Decomposition decompositionType;
     QList<int> decomposition;

     QList<int> specialFolding;

     // from BidiMirroring.txt
     int mirroredChar;

     // DerivedNormalizationProps.txt
     bool excludedComposition;

     // computed position of unicode property set
     int propertyIndex;
 };

 static QList<UnicodeData> unicodeData;

 UnicodeData &UnicodeData::valueRef(int codepoint)
 {
     static bool initialized = false;
     if (!initialized) {
         unicodeData.reserve(QChar::LastValidCodePoint + 1);
         for (int uc = 0; uc <= QChar::LastValidCodePoint; ++uc)
             unicodeData.append(UnicodeData(uc));
         initialized = true;
     }

     Q_ASSERT(codepoint <= 0x10ffff);
     return unicodeData[codepoint];
 }


 static QHash<int, int> decompositionLength;
 static int highestComposedCharacter = 0;
 static int numLigatures = 0;
 static int highestLigature = 0;

 struct Ligature {
     int u1;
     int u2;
     int ligature;
 };
 // we need them sorted after the first component for fast lookup
 bool operator < (const Ligature &l1, const Ligature &l2)
 { return l1.u1 < l2.u1; }

 static QHash<int, QList<Ligature> > ligatureHashes;

 static QHash<int, int> combiningClassUsage;

 static int maxLowerCaseDiff = 0;
 static int maxUpperCaseDiff = 0;
 static int maxTitleCaseDiff = 0;

 static void readUnicodeData()
 {
     qDebug("Reading UnicodeData.txt");

     enum UniDataFields {
         UD_Value,
         UD_Name,
         UD_Category,
         UD_CombiningClass,
         UD_BidiCategory,
         UD_Decomposition,
         UD_DecimalDigitValue,
         UD_DigitValue,
         UD_NumericValue,
         UD_Mirrored,
         UD_OldName,
         UD_Comment,
         UD_UpperCase,
         UD_LowerCase,
         UD_TitleCase
     };

     QFile f("data/UnicodeData.txt");
     if (!f.exists())
         qFatal("Couldn't find UnicodeData.txt");

     f.open(QFile::ReadOnly);

     while (!f.atEnd()) {
         QByteArray line;
         line.resize(1024);
         int len = f.readLine(line.data(), 1024);
         line.truncate(len-1);

         int comment = line.indexOf('#');
         if (comment >= 0)
             line = line.left(comment);
         if (line.isEmpty())
             continue;

         QList<QByteArray> properties = line.split(';');
         bool ok;
         int codepoint = properties[UD_Value].toInt(&ok, 16);
         Q_ASSERT(ok);
         Q_ASSERT(codepoint <= QChar::LastValidCodePoint);
         int lastCodepoint = codepoint;

         QByteArray name = properties[UD_Name];
         if (name.startsWith('<') && name.contains("First")) {
             QByteArray nextLine;
             nextLine.resize(1024);
             f.readLine(nextLine.data(), 1024);
             QList<QByteArray> properties = nextLine.split(';');
             Q_ASSERT(properties[UD_Name].startsWith('<') && properties[UD_Name].contains("Last"));
             lastCodepoint = properties[UD_Value].toInt(&ok, 16);
             Q_ASSERT(ok);
             Q_ASSERT(lastCodepoint <= QChar::LastValidCodePoint);
         }

         UnicodeData &data = UnicodeData::valueRef(codepoint);
         data.p.category = categoryMap.value(properties[UD_Category], QChar::Other_NotAssigned);
         data.p.combiningClass = properties[UD_CombiningClass].toInt();
         if (!combiningClassUsage.contains(data.p.combiningClass))
             combiningClassUsage[data.p.combiningClass] = 1;
         else
             ++combiningClassUsage[data.p.combiningClass];

         Direction dir = directionMap.value(properties[UD_BidiCategory], Dir_Unassigned);
         if (dir == Dir_Unassigned)
             qFatal("unhandled direction value: %s", properties[UD_BidiCategory].constData());
         data.p.direction = QChar::Direction(dir);

         if (!properties[UD_UpperCase].isEmpty()) {
             int upperCase = properties[UD_UpperCase].toInt(&ok, 16);
             Q_ASSERT(ok);
             int diff = upperCase - codepoint;
             // if the conditions below doesn't hold anymore we need to modify our upper casing code
             Q_ASSERT(QChar::requiresSurrogates(codepoint) == QChar::requiresSurrogates(upperCase));
             if (QChar::requiresSurrogates(codepoint)) {
                 Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(upperCase));
                 Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(upperCase));
             }
             if (qAbs(diff) >= (1<<13)) {
                 qWarning() << "upperCaseDiff exceeded (" << hex << codepoint << "->" << upperCase << "); map it for special case";
                 data.p.upperCaseSpecial = true;
                 data.p.upperCaseDiff = appendToSpecialCaseMap(QList<int>() << upperCase);
             } else {
                 data.p.upperCaseDiff = diff;
                 maxUpperCaseDiff = qMax(maxUpperCaseDiff, qAbs(diff));
             }
         }
         if (!properties[UD_LowerCase].isEmpty()) {
             int lowerCase = properties[UD_LowerCase].toInt(&ok, 16);
             Q_ASSERT(ok);
             int diff = lowerCase - codepoint;
             // if the conditions below doesn't hold anymore we need to modify our lower casing code
             Q_ASSERT(QChar::requiresSurrogates(codepoint) == QChar::requiresSurrogates(lowerCase));
             if (QChar::requiresSurrogates(codepoint)) {
                 Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(lowerCase));
                 Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(lowerCase));
             }
             if (qAbs(diff) >= (1<<13)) {
                 qWarning() << "lowerCaseDiff exceeded (" << hex << codepoint << "->" << lowerCase << "); map it for special case";
                 data.p.lowerCaseSpecial = true;
                 data.p.lowerCaseDiff = appendToSpecialCaseMap(QList<int>() << lowerCase);
             } else {
                 data.p.lowerCaseDiff = diff;
                 maxLowerCaseDiff = qMax(maxLowerCaseDiff, qAbs(diff));
             }
         }
         // we want toTitleCase to map to ToUpper in case we don't have any titlecase.
         if (properties[UD_TitleCase].isEmpty())
             properties[UD_TitleCase] = properties[UD_UpperCase];
         if (!properties[UD_TitleCase].isEmpty()) {
             int titleCase = properties[UD_TitleCase].toInt(&ok, 16);
             Q_ASSERT(ok);
             int diff = titleCase - codepoint;
             // if the conditions below doesn't hold anymore we need to modify our title casing code
             Q_ASSERT(QChar::requiresSurrogates(codepoint) == QChar::requiresSurrogates(titleCase));
             if (QChar::requiresSurrogates(codepoint)) {
                 Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(titleCase));
                 Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(titleCase));
             }
             if (qAbs(diff) >= (1<<13)) {
                 qWarning() << "titleCaseDiff exceeded (" << hex << codepoint << "->" << titleCase << "); map it for special case";
                 data.p.titleCaseSpecial = true;
                 data.p.titleCaseDiff = appendToSpecialCaseMap(QList<int>() << titleCase);
             } else {
                 data.p.titleCaseDiff = diff;
                 maxTitleCaseDiff = qMax(maxTitleCaseDiff, qAbs(diff));
             }
         }

         if (!properties[UD_DigitValue].isEmpty())
             data.p.digitValue = properties[UD_DigitValue].toInt();

         // decompositition
         QByteArray decomposition = properties[UD_Decomposition];
         if (!decomposition.isEmpty()) {
             highestComposedCharacter = qMax(highestComposedCharacter, codepoint);
             QList<QByteArray> d = decomposition.split(' ');
             if (d[0].contains('<')) {
                 data.decompositionType = decompositionMap.value(d[0], QChar::NoDecomposition);
                 if (data.decompositionType == QChar::NoDecomposition)
                     qFatal("unhandled decomposition type: %s", d[0].constData());
                 d.takeFirst();
             } else {
                 data.decompositionType = QChar::Canonical;
             }
             for (int i = 0; i < d.size(); ++i) {
                 data.decomposition.append(d[i].toInt(&ok, 16));
                 Q_ASSERT(ok);
             }
             ++decompositionLength[data.decomposition.size()];
         }

         for (int i = codepoint; i <= lastCodepoint; ++i)
             unicodeData[i] = data;
     }
 }

 static int maxMirroredDiff = 0;

 static void readBidiMirroring()
 {
     qDebug("Reading BidiMirroring.txt");

     QFile f("data/BidiMirroring.txt");
     if (!f.exists())
         qFatal("Couldn't find BidiMirroring.txt");

     f.open(QFile::ReadOnly);

     while (!f.atEnd()) {
         QByteArray line;
         line.resize(1024);
         int len = f.readLine(line.data(), 1024);
         line.resize(len-1);

         int comment = line.indexOf('#');
         if (comment >= 0)
             line = line.left(comment);

         if (line.isEmpty())
             continue;
         line = line.replace(" ", "");

         QList<QByteArray> pair = line.split(';');
         Q_ASSERT(pair.size() == 2);

         bool ok;
         int codepoint = pair[0].toInt(&ok, 16);
         Q_ASSERT(ok);
         int mirror = pair[1].toInt(&ok, 16);
         Q_ASSERT(ok);

         UnicodeData &d = UnicodeData::valueRef(codepoint);
         d.mirroredChar = mirror;
         d.p.mirrorDiff = d.mirroredChar - codepoint;
         maxMirroredDiff = qMax(maxMirroredDiff, qAbs(d.p.mirrorDiff));
     }
 }

 static void readArabicShaping()
 {
     qDebug("Reading ArabicShaping.txt");

     // Initialize defaults:
     // Code points that are not explicitly listed in ArabicShaping.txt are either of joining type T or U:
     // - Those that not explicitly listed that are of General Category Mn, Me, or Cf have joining type T.
     // - All others not explicitly listed have joining type U.
     for (int codepoint = 0; codepoint <= QChar::LastValidCodePoint; ++codepoint) {
         UnicodeData &d = UnicodeData::valueRef(codepoint);
         if (d.p.joining == QChar::Joining_None) {
             if (d.p.category == QChar::Mark_NonSpacing || d.p.category == QChar::Mark_Enclosing || d.p.category == QChar::Other_Format)
                 d.p.joining = QChar::Joining_Transparent;
         }
     }

     QFile f("data/ArabicShaping.txt");
     if (!f.exists())
         qFatal("Couldn't find ArabicShaping.txt");

     f.open(QFile::ReadOnly);

     while (!f.atEnd()) {
         QByteArray line;
         line.resize(1024);
         int len = f.readLine(line.data(), 1024);
         line.resize(len-1);

         int comment = line.indexOf('#');
         if (comment >= 0)
             line = line.left(comment);
         line = line.trimmed();

         if (line.isEmpty())
             continue;

         QList<QByteArray> l = line.split(';');
         Q_ASSERT(l.size() == 4);

         bool ok;
         int codepoint = l[0].toInt(&ok, 16);
         Q_ASSERT(ok);

         UnicodeData &d = UnicodeData::valueRef(codepoint);
         JoiningType joining = joining_map.value(l[2].trimmed(), Joining_Unassigned);
         switch (joining) {
         case Joining_Unassigned:
             qFatal("%x: unassigned or unhandled joining type: %s", codepoint, l[2].constData());
             break;
         case Joining_Transparent:
             switch (d.p.category) {
             case QChar::Mark_Enclosing:
             case QChar::Mark_NonSpacing:
             case QChar::Letter_Modifier:
             case QChar::Other_Format:
                 break;
             default:
                 qFatal("%x: joining type '%s' was met (category: %d); "
                        "the current implementation needs to be revised!",
                        codepoint, l[2].constData(), d.p.category);
             }
             Q_FALLTHROUGH();
         default:
             d.p.joining = QChar::JoiningType(joining);
             break;
         }
     }
 }

 static void readDerivedAge()
 {
     qDebug("Reading DerivedAge.txt");

     QFile f("data/DerivedAge.txt");
     if (!f.exists())
         qFatal("Couldn't find DerivedAge.txt");

     f.open(QFile::ReadOnly);

     while (!f.atEnd()) {
         QByteArray line;
         line.resize(1024);
         int len = f.readLine(line.data(), 1024);
         line.resize(len-1);

         int comment = line.indexOf('#');
         if (comment >= 0)
             line = line.left(comment);
         line.replace(" ", "");

         if (line.isEmpty())
             continue;

         QList<QByteArray> l = line.split(';');
         Q_ASSERT(l.size() == 2);

         QByteArray codes = l[0];
         codes.replace("..", ".");
         QList<QByteArray> cl = codes.split('.');

         bool ok;
         int from = cl[0].toInt(&ok, 16);
         Q_ASSERT(ok);
         int to = from;
         if (cl.size() == 2) {
             to = cl[1].toInt(&ok, 16);
             Q_ASSERT(ok);
         }

         QChar::UnicodeVersion age = age_map.value(l[1].trimmed(), QChar::Unicode_Unassigned);
         //qDebug() << hex << from << ".." << to << ba << age;
         if (age == QChar::Unicode_Unassigned)
             qFatal("unassigned or unhandled age value: %s", l[1].constData());

         for (int codepoint = from; codepoint <= to; ++codepoint) {
             UnicodeData &d = UnicodeData::valueRef(codepoint);
             d.p.age = age;
         }
     }
 }

 static void readDerivedNormalizationProps()
 {
     qDebug("Reading DerivedNormalizationProps.txt");

     QFile f("data/DerivedNormalizationProps.txt");
     if (!f.exists())
         qFatal("Couldn't find DerivedNormalizationProps.txt");

     f.open(QFile::ReadOnly);

     while (!f.atEnd()) {
         QByteArray line;
         line.resize(1024);
         int len = f.readLine(line.data(), 1024);
         line.resize(len-1);

         int comment = line.indexOf('#');
         if (comment >= 0)
             line = line.left(comment);

         if (line.trimmed().isEmpty())
             continue;

         QList<QByteArray> l = line.split(';');
         Q_ASSERT(l.size() >= 2);

         QByteArray propName = l[1].trimmed();
         if (propName != "Full_Composition_Exclusion" &&
             propName != "NFD_QC" && propName != "NFC_QC" &&
             propName != "NFKD_QC" && propName != "NFKC_QC") {
             // ###
             continue;
         }

         QByteArray codes = l[0].trimmed();
         codes.replace("..", ".");
         QList<QByteArray> cl = codes.split('.');

         bool ok;
         int from = cl[0].toInt(&ok, 16);
         Q_ASSERT(ok);
         int to = from;
         if (cl.size() == 2) {
             to = cl[1].toInt(&ok, 16);
             Q_ASSERT(ok);
         }

         for (int codepoint = from; codepoint <= to; ++codepoint) {
             UnicodeData &d = UnicodeData::valueRef(codepoint);
             if (propName == "Full_Composition_Exclusion") {
                 d.excludedComposition = true;
             } else {
                 Q_STATIC_ASSERT(QString::NormalizationForm_D == 0);
                 Q_STATIC_ASSERT(QString::NormalizationForm_C == 1);
                 Q_STATIC_ASSERT(QString::NormalizationForm_KD == 2);
                 Q_STATIC_ASSERT(QString::NormalizationForm_KC == 3);

                 QString::NormalizationForm form;
                 if (propName == "NFD_QC")
                     form = QString::NormalizationForm_D;
                 else if (propName == "NFC_QC")
                     form = QString::NormalizationForm_C;
                 else if (propName == "NFKD_QC")
                     form = QString::NormalizationForm_KD;
                 else// if (propName == "NFKC_QC")
                     form = QString::NormalizationForm_KC;

                 Q_ASSERT(l.size() == 3);
                 l[2] = l[2].trimmed();

                 enum { NFQC_YES = 0, NFQC_NO = 1, NFQC_MAYBE = 3 };
                 uchar ynm = (l[2] == "N" ? NFQC_NO : l[2] == "M" ? NFQC_MAYBE : NFQC_YES);
                 if (ynm == NFQC_MAYBE) {
                     // if this changes, we need to revise the normalizationQuickCheckHelper() implementation
                     Q_ASSERT(form == QString::NormalizationForm_C || form == QString::NormalizationForm_KC);
                 }
                 d.p.nfQuickCheck |= (ynm << (form << 1)); // 2 bits per NF
             }
         }
     }

     for (int codepoint = 0; codepoint <= QChar::LastValidCodePoint; ++codepoint) {
         UnicodeData &d = UnicodeData::valueRef(codepoint);
         if (!d.excludedComposition
             && d.decompositionType == QChar::Canonical
             && d.decomposition.size() > 1) {
             Q_ASSERT(d.decomposition.size() == 2);

             int part1 = d.decomposition.at(0);
             int part2 = d.decomposition.at(1);

             // all non-starters are listed in DerivedNormalizationProps.txt
             // and already excluded from composition
             Q_ASSERT(UnicodeData::valueRef(part1).p.combiningClass == 0);

             ++numLigatures;
             highestLigature = qMax(highestLigature, part1);
             Ligature l = { part1, part2, codepoint };
             ligatureHashes[part2].append(l);
         }
     }
 }


 struct NormalizationCorrection {
     uint codepoint;
     uint mapped;
     int version;
 };

 static QByteArray createNormalizationCorrections()
 {
     qDebug("Reading NormalizationCorrections.txt");

     QFile f("data/NormalizationCorrections.txt");
     if (!f.exists())
         qFatal("Couldn't find NormalizationCorrections.txt");

     f.open(QFile::ReadOnly);

     QByteArray out;

     out += "struct NormalizationCorrection {\n"
            "    uint ucs4;\n"
            "    uint old_mapping;\n"
            "    int version;\n"
            "};\n\n"

            "static const NormalizationCorrection uc_normalization_corrections[] = {\n";

     int maxVersion = 0;
     int numCorrections = 0;
     while (!f.atEnd()) {
         QByteArray line;
         line.resize(1024);
         int len = f.readLine(line.data(), 1024);
         line.resize(len-1);

         int comment = line.indexOf('#');
         if (comment >= 0)
             line = line.left(comment);
         line.replace(" ", "");

         if (line.isEmpty())
             continue;

         Q_ASSERT(!line.contains(".."));

         QList<QByteArray> fields = line.split(';');
         Q_ASSERT(fields.size() == 4);

         NormalizationCorrection c = { 0, 0, 0 };
         bool ok;
         c.codepoint = fields.at(0).toInt(&ok, 16);
         Q_ASSERT(ok);
         c.mapped = fields.at(1).toInt(&ok, 16);
         Q_ASSERT(ok);
         if (fields.at(3) == "3.2.0")
             c.version = QChar::Unicode_3_2;
         else if (fields.at(3) == "4.0.0")
             c.version = QChar::Unicode_4_0;
         else
             qFatal("unknown unicode version in NormalizationCorrection.txt");

         out += "    { 0x" + QByteArray::number(c.codepoint, 16) + ", 0x" + QByteArray::number(c.mapped, 16)
              + ", " + QString::number(c.version) + " },\n";
         ++numCorrections;
         maxVersion = qMax(c.version, maxVersion);
     }
     if (out.endsWith(",\n"))
         out.chop(2);

     out += "\n};\n\n"

            "enum { NumNormalizationCorrections = " + QByteArray::number(numCorrections) + " };\n"
            "enum { NormalizationCorrectionsVersionMax = " + QByteArray::number(maxVersion) + " };\n\n";

     return out;
 }

 static void readLineBreak()
 {
     qDebug("Reading LineBreak.txt");

     QFile f("data/LineBreak.txt");
     if (!f.exists())
         qFatal("Couldn't find LineBreak.txt");

     f.open(QFile::ReadOnly);

     while (!f.atEnd()) {
         QByteArray line;
         line.resize(1024);
         int len = f.readLine(line.data(), 1024);
         line.resize(len-1);

         int comment = line.indexOf('#');
         if (comment >= 0)
             line = line.left(comment);
         line.replace(" ", "");

         if (line.isEmpty())
             continue;

         QList<QByteArray> l = line.split(';');
         Q_ASSERT(l.size() == 2);

         QByteArray codes = l[0];
         codes.replace("..", ".");
         QList<QByteArray> cl = codes.split('.');

         bool ok;
         int from = cl[0].toInt(&ok, 16);
         Q_ASSERT(ok);
         int to = from;
         if (cl.size() == 2) {
             to = cl[1].toInt(&ok, 16);
             Q_ASSERT(ok);
         }

         LineBreakClass lb = line_break_map.value(l[1], LineBreak_Unassigned);
         if (lb == LineBreak_Unassigned)
             qFatal("unassigned line break class: %s", l[1].constData());

         for (int codepoint = from; codepoint <= to; ++codepoint) {
             UnicodeData &d = UnicodeData::valueRef(codepoint);
             d.p.lineBreakClass = lb;
         }
     }
 }

 static void readSpecialCasing()
 {
     qDebug("Reading SpecialCasing.txt");

     QFile f("data/SpecialCasing.txt");
     if (!f.exists())
         qFatal("Couldn't find SpecialCasing.txt");

     f.open(QFile::ReadOnly);

     while (!f.atEnd()) {
         QByteArray line;
         line.resize(1024);
         int len = f.readLine(line.data(), 1024);
         line.resize(len-1);

         int comment = line.indexOf('#');
         if (comment >= 0)
             line = line.left(comment);

         if (line.isEmpty())
             continue;

         QList<QByteArray> l = line.split(';');

         QByteArray condition = l.size() < 5 ? QByteArray() : l[4].trimmed();
         if (!condition.isEmpty())
             // #####
             continue;

         bool ok;
         int codepoint = l[0].trimmed().toInt(&ok, 16);
         Q_ASSERT(ok);

         // if the condition below doesn't hold anymore we need to modify our
         // lower/upper/title casing code and case folding code
         Q_ASSERT(!QChar::requiresSurrogates(codepoint));

 //         qDebug() << "codepoint" << hex << codepoint;
 //         qDebug() << line;

         QList<QByteArray> lower = l[1].trimmed().split(' ');
         QList<int> lowerMap;
         for (int i = 0; i < lower.size(); ++i) {
             bool ok;
             lowerMap.append(lower.at(i).toInt(&ok, 16));
             Q_ASSERT(ok);
         }

         QList<QByteArray> title = l[2].trimmed().split(' ');
         QList<int> titleMap;
         for (int i = 0; i < title.size(); ++i) {
             bool ok;
             titleMap.append(title.at(i).toInt(&ok, 16));
             Q_ASSERT(ok);
         }

         QList<QByteArray> upper = l[3].trimmed().split(' ');
         QList<int> upperMap;
         for (int i = 0; i < upper.size(); ++i) {
             bool ok;
             upperMap.append(upper.at(i).toInt(&ok, 16));
             Q_ASSERT(ok);
         }


         UnicodeData &ud = UnicodeData::valueRef(codepoint);
         Q_ASSERT(lowerMap.size() > 1 || lowerMap.at(0) == codepoint + ud.p.lowerCaseDiff);
         Q_ASSERT(titleMap.size() > 1 || titleMap.at(0) == codepoint + ud.p.titleCaseDiff);
         Q_ASSERT(upperMap.size() > 1 || upperMap.at(0) == codepoint + ud.p.upperCaseDiff);

         if (lowerMap.size() > 1) {
             ud.p.lowerCaseSpecial = true;
             ud.p.lowerCaseDiff = appendToSpecialCaseMap(lowerMap);
         }
         if (titleMap.size() > 1) {
             ud.p.titleCaseSpecial = true;
             ud.p.titleCaseDiff = appendToSpecialCaseMap(titleMap);
         }
         if (upperMap.size() > 1) {
             ud.p.upperCaseSpecial = true;
             ud.p.upperCaseDiff = appendToSpecialCaseMap(upperMap);
         }
     }
 }

 static int maxCaseFoldDiff = 0;

 static void readCaseFolding()
 {
     qDebug("Reading CaseFolding.txt");

     QFile f("data/CaseFolding.txt");
     if (!f.exists())
         qFatal("Couldn't find CaseFolding.txt");

     f.open(QFile::ReadOnly);

     while (!f.atEnd()) {
         QByteArray line;
         line.resize(1024);
         int len = f.readLine(line.data(), 1024);
         line.resize(len-1);

         int comment = line.indexOf('#');
         if (comment >= 0)
             line = line.left(comment);

         if (line.isEmpty())
             continue;

         QList<QByteArray> l = line.split(';');

         bool ok;
         int codepoint = l[0].trimmed().toInt(&ok, 16);
         Q_ASSERT(ok);


         l[1] = l[1].trimmed();
         if (l[1] == "F" || l[1] == "T")
             continue;

 //         qDebug() << "codepoint" << hex << codepoint;
 //         qDebug() << line;
         QList<QByteArray> fold = l[2].trimmed().split(' ');
         QList<int> foldMap;
         for (int i = 0; i < fold.size(); ++i) {
             bool ok;
             foldMap.append(fold.at(i).toInt(&ok, 16));
             Q_ASSERT(ok);
         }

         UnicodeData &ud = UnicodeData::valueRef(codepoint);
         if (foldMap.size() == 1) {
             int caseFolded = foldMap.at(0);
             int diff = caseFolded - codepoint;
             // if the conditions below doesn't hold anymore we need to modify our case folding code
             Q_ASSERT(QChar::requiresSurrogates(codepoint) == QChar::requiresSurrogates(caseFolded));
             if (QChar::requiresSurrogates(codepoint)) {
                 Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(caseFolded));
                 Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(caseFolded));
             }
             if (qAbs(diff) >= (1<<13)) {
                 qWarning() << "caseFoldDiff exceeded (" << hex << codepoint << "->" << caseFolded << "); map it for special case";
                 ud.p.caseFoldSpecial = true;
                 ud.p.caseFoldDiff = appendToSpecialCaseMap(foldMap);
             } else {
                 ud.p.caseFoldDiff = diff;
                 maxCaseFoldDiff = qMax(maxCaseFoldDiff, qAbs(diff));
             }
         } else {
             qFatal("we currently don't support full case foldings");
 //             qDebug() << "special" << hex << foldMap;
             ud.p.caseFoldSpecial = true;
             ud.p.caseFoldDiff = appendToSpecialCaseMap(foldMap);
         }
     }
 }

 static void readGraphemeBreak()
 {
     qDebug("Reading GraphemeBreakProperty.txt");

     QFile f("data/GraphemeBreakProperty.txt");
     if (!f.exists())
         qFatal("Couldn't find GraphemeBreakProperty.txt");

     f.open(QFile::ReadOnly);

     while (!f.atEnd()) {
         QByteArray line;
         line.resize(1024);
         int len = f.readLine(line.data(), 1024);
         line.resize(len-1);

         int comment = line.indexOf('#');
         if (comment >= 0)
             line = line.left(comment);
         line.replace(" ", "");

         if (line.isEmpty())
             continue;

         QList<QByteArray> l = line.split(';');
         Q_ASSERT(l.size() == 2);

         QByteArray codes = l[0];
         codes.replace("..", ".");
         QList<QByteArray> cl = codes.split('.');

         bool ok;
         int from = cl[0].toInt(&ok, 16);
         Q_ASSERT(ok);
         int to = from;
         if (cl.size() == 2) {
             to = cl[1].toInt(&ok, 16);
             Q_ASSERT(ok);
         }

         GraphemeBreakClass brk = grapheme_break_map.value(l[1], GraphemeBreak_Unassigned);
         if (brk == GraphemeBreak_Unassigned)
             qFatal("unassigned grapheme break class: %s", l[1].constData());

         for (int codepoint = from; codepoint <= to; ++codepoint) {
             UnicodeData &ud = UnicodeData::valueRef(codepoint);
             ud.p.graphemeBreakClass = brk;
         }
     }
 }

 static void readWordBreak()
 {
     qDebug("Reading WordBreakProperty.txt");

     QFile f("data/WordBreakProperty.txt");
     if (!f.exists())
         qFatal("Couldn't find WordBreakProperty.txt");

     f.open(QFile::ReadOnly);

     while (!f.atEnd()) {
         QByteArray line;
         line.resize(1024);
         int len = f.readLine(line.data(), 1024);
         line.resize(len-1);

         int comment = line.indexOf('#');
         if (comment >= 0)
             line = line.left(comment);
         line.replace(" ", "");

         if (line.isEmpty())
             continue;

         QList<QByteArray> l = line.split(';');
         Q_ASSERT(l.size() == 2);

         QByteArray codes = l[0];
         codes.replace("..", ".");
         QList<QByteArray> cl = codes.split('.');

         bool ok;
         int from = cl[0].toInt(&ok, 16);
         Q_ASSERT(ok);
         int to = from;
         if (cl.size() == 2) {
             to = cl[1].toInt(&ok, 16);
             Q_ASSERT(ok);
         }

         WordBreakClass brk = word_break_map.value(l[1], WordBreak_Unassigned);
         if (brk == WordBreak_Unassigned)
             qFatal("unassigned word break class: %s", l[1].constData());

         for (int codepoint = from; codepoint <= to; ++codepoint) {
             // ### [
             // as of Unicode 5.1, some punctuation marks were mapped to MidLetter and MidNumLet
             // which caused "hi.there" to be treated like if it were just a single word;
             // until we have a tailoring mechanism, retain the old behavior by remapping those characters here.
             if (codepoint == 0x002E) // FULL STOP
                 brk = WordBreak_MidNum;
             else if (codepoint == 0x003A) // COLON
                 brk = WordBreak_Any;
             // ] ###
             UnicodeData &ud = UnicodeData::valueRef(codepoint);
             ud.p.wordBreakClass = brk;
         }
     }
 }

 static void readSentenceBreak()
 {
     qDebug("Reading SentenceBreakProperty.txt");

     QFile f("data/SentenceBreakProperty.txt");
     if (!f.exists())
         qFatal("Couldn't find SentenceBreakProperty.txt");

     f.open(QFile::ReadOnly);

     while (!f.atEnd()) {
         QByteArray line;
         line.resize(1024);
         int len = f.readLine(line.data(), 1024);
         line.resize(len-1);

         int comment = line.indexOf('#');
         if (comment >= 0)
             line = line.left(comment);
         line.replace(" ", "");

         if (line.isEmpty())
             continue;

         QList<QByteArray> l = line.split(';');
         Q_ASSERT(l.size() == 2);

         QByteArray codes = l[0];
         codes.replace("..", ".");
         QList<QByteArray> cl = codes.split('.');

         bool ok;
         int from = cl[0].toInt(&ok, 16);
         Q_ASSERT(ok);
         int to = from;
         if (cl.size() == 2) {
             to = cl[1].toInt(&ok, 16);
             Q_ASSERT(ok);
         }

         SentenceBreakClass brk = sentence_break_map.value(l[1], SentenceBreak_Unassigned);
         if (brk == SentenceBreak_Unassigned)
             qFatal("unassigned sentence break class: %s", l[1].constData());

         for (int codepoint = from; codepoint <= to; ++codepoint) {
             UnicodeData &ud = UnicodeData::valueRef(codepoint);
             ud.p.sentenceBreakClass = brk;
         }
     }
 }

 #if 0
 // this piece of code does full case folding and comparison. We currently
 // don't use it, since this gives lots of issues with things as case insensitive
 // search and replace.
 static inline void foldCase(uint ch, ushort *out)
 {
     const QUnicodeTables::Properties *p = qGetProp(ch);
     if (!p->caseFoldSpecial) {
         *(out++) = ch + p->caseFoldDiff;
     } else {
         const ushort *folded = specialCaseMap + p->caseFoldDiff;
         ushort length = *folded++;
         while (length--)
             *out++ = *folded++;
     }
     *out = 0;
 }

 static int ucstricmp(const ushort *a, const ushort *ae, const ushort *b, const ushort *be)
 {
     if (a == b)
         return 0;
     if (a == 0)
         return 1;
     if (b == 0)
         return -1;

     while (a != ae && b != be) {
         const QUnicodeTables::Properties *pa = qGetProp(*a);
         const QUnicodeTables::Properties *pb = qGetProp(*b);
         if (pa->caseFoldSpecial | pb->caseFoldSpecial)
             goto special;
             int diff = (int)(*a + pa->caseFoldDiff) - (int)(*b + pb->caseFoldDiff);
         if ((diff))
             return diff;
         ++a;
         ++b;
         }
     }
     if (a == ae) {
         if (b == be)
             return 0;
         return -1;
     }
     return 1;
 special:
     ushort abuf[SPECIAL_CASE_MAX_LEN + 1];
     ushort bbuf[SPECIAL_CASE_MAX_LEN + 1];
     abuf[0] = bbuf[0] = 0;
     ushort *ap = abuf;
     ushort *bp = bbuf;
     while (1) {
         if (!*ap) {
             if (a == ae) {
                 if (!*bp && b == be)
                     return 0;
                 return -1;
             }
             foldCase(*(a++), abuf);
             ap = abuf;
         }
         if (!*bp) {
             if (b == be)
                 return 1;
             foldCase(*(b++), bbuf);
             bp = bbuf;
         }
         if (*ap != *bp)
             return (int)*ap - (int)*bp;
         ++ap;
         ++bp;
     }
 }


 static int ucstricmp(const ushort *a, const ushort *ae, const uchar *b)
 {
     if (a == 0)
         return 1;
     if (b == 0)
         return -1;

     while (a != ae && *b) {
         const QUnicodeTables::Properties *pa = qGetProp(*a);
         const QUnicodeTables::Properties *pb = qGetProp((ushort)*b);
         if (pa->caseFoldSpecial | pb->caseFoldSpecial)
             goto special;
         int diff = (int)(*a + pa->caseFoldDiff) - (int)(*b + pb->caseFoldDiff);
         if ((diff))
             return diff;
         ++a;
         ++b;
     }
     if (a == ae) {
         if (!*b)
             return 0;
         return -1;
     }
     return 1;

 special:
     ushort abuf[SPECIAL_CASE_MAX_LEN + 1];
     ushort bbuf[SPECIAL_CASE_MAX_LEN + 1];
     abuf[0] = bbuf[0] = 0;
     ushort *ap = abuf;
     ushort *bp = bbuf;
     while (1) {
         if (!*ap) {
             if (a == ae) {
                 if (!*bp && !*b)
                     return 0;
                 return -1;
             }
             foldCase(*(a++), abuf);
             ap = abuf;
         }
         if (!*bp) {
             if (!*b)
                 return 1;
             foldCase(*(b++), bbuf);
             bp = bbuf;
         }
         if (*ap != *bp)
             return (int)*ap - (int)*bp;
         ++ap;
         ++bp;
     }
 }
 #endif

 #if 0
 static QList<QByteArray> blockNames;
 struct BlockInfo
 {
     int blockIndex;
     int firstCodePoint;
     int lastCodePoint;
 };
 static QList<BlockInfo> blockInfoList;

 static void readBlocks()
 {
     qDebug("Reading Blocks.txt");

     QFile f("data/Blocks.txt");
     if (!f.exists())
         qFatal("Couldn't find Blocks.txt");

     f.open(QFile::ReadOnly);

     while (!f.atEnd()) {
         QByteArray line = f.readLine();
         line.resize(line.size() - 1);

         int comment = line.indexOf("#");
         if (comment >= 0)
             line = line.left(comment);

         line.replace(" ", "");

         if (line.isEmpty())
             continue;

         int semicolon = line.indexOf(';');
         Q_ASSERT(semicolon >= 0);
         QByteArray codePoints = line.left(semicolon);
         QByteArray blockName = line.mid(semicolon + 1);

         int blockIndex = blockNames.indexOf(blockName);
         if (blockIndex == -1) {
             blockIndex = blockNames.size();
             blockNames.append(blockName);
         }

         codePoints.replace("..", ".");
         QList<QByteArray> cl = codePoints.split('.');

         bool ok;
         int first = cl[0].toInt(&ok, 16);
         Q_ASSERT(ok);
         int last = first;
         if (cl.size() == 2) {
             last = cl[1].toInt(&ok, 16);
             Q_ASSERT(ok);
         }

         BlockInfo blockInfo = { blockIndex, first, last };
         blockInfoList.append(blockInfo);
     }
 }
 #endif

 static void readScripts()
 {
     qDebug("Reading Scripts.txt");

     QFile f("data/Scripts.txt");
     if (!f.exists())
         qFatal("Couldn't find Scripts.txt");

     f.open(QFile::ReadOnly);

     while (!f.atEnd()) {
         QByteArray line = f.readLine();
         line.resize(line.size() - 1);

         int comment = line.indexOf("#");
         if (comment >= 0)
             line = line.left(comment);

         line.replace(" ", "");
         line.replace("_", "");

         if (line.isEmpty())
             continue;

         int semicolon = line.indexOf(';');
         Q_ASSERT(semicolon >= 0);
         QByteArray codePoints = line.left(semicolon);
         QByteArray scriptName = line.mid(semicolon + 1);

         codePoints.replace("..", ".");
         QList<QByteArray> cl = codePoints.split('.');

         bool ok;
         int first = cl[0].toInt(&ok, 16);
         Q_ASSERT(ok);
         int last = first;
         if (cl.size() == 2) {
             last = cl[1].toInt(&ok, 16);
             Q_ASSERT(ok);
         }

         if (!scriptMap.contains(scriptName))
             qFatal("Unhandled script property value: %s", scriptName.constData());
         QChar::Script script = scriptMap.value(scriptName, QChar::Script_Unknown);

         for (int codepoint = first; codepoint <= last; ++codepoint) {
             UnicodeData &ud = UnicodeData::valueRef(codepoint);
             ud.p.script = script;
         }
     }
 }

 #if 0
 static void dump(int from, int to)
 {
     for (int i = from; i <= to; ++i) {
         UnicodeData &d = UnicodeData::valueRef(i);
         qDebug("0x%04x: cat=%d combining=%d dir=%d case=%x mirror=%x joining=%d age=%d",
                i, d.p.category, d.p.combiningClass, d.p.direction, d.otherCase, d.mirroredChar, d.p.joining, d.p.age);
         if (d.decompositionType != QChar::NoDecomposition) {
             qDebug("    decomposition: type=%d, length=%d, first=%x", d.decompositionType, d.decomposition.size(),
                    d.decomposition[0]);
         }
     }
     qDebug(" ");
 }
 #endif

 static QList<PropertyFlags> uniqueProperties;

 static void computeUniqueProperties()
 {
     qDebug("computeUniqueProperties:");
     for (int codepoint = 0; codepoint <= QChar::LastValidCodePoint; ++codepoint) {
         UnicodeData &d = UnicodeData::valueRef(codepoint);
         int index = uniqueProperties.indexOf(d.p);
         if (index == -1) {
             index = uniqueProperties.size();
             uniqueProperties.append(d.p);
         }
         d.propertyIndex = index;
     }
     qDebug("    %d unique unicode properties found", uniqueProperties.size());
 }

 struct UniqueBlock {
     inline UniqueBlock() : index(-1) {}

     inline bool operator==(const UniqueBlock &other) const
     { return values == other.values; }

     int index;
     QVector<int> values;
 };

 static QByteArray createPropertyInfo()
 {
     qDebug("createPropertyInfo:");

     // we reserve one bit more than in the assert below for the sign
     Q_ASSERT(maxMirroredDiff < (1<<12));
     Q_ASSERT(maxLowerCaseDiff < (1<<13));
     Q_ASSERT(maxUpperCaseDiff < (1<<13));
     Q_ASSERT(maxTitleCaseDiff < (1<<13));
     Q_ASSERT(maxCaseFoldDiff < (1<<13));

     const int BMP_BLOCKSIZE = 32;
     const int BMP_SHIFT = 5;
     const int BMP_END = 0x11000;
     const int SMP_END = 0x110000;
     const int SMP_BLOCKSIZE = 256;
     const int SMP_SHIFT = 8;

     QList<UniqueBlock> uniqueBlocks;
     QVector<int> blockMap;
     int used = 0;

     for (int block = 0; block < BMP_END/BMP_BLOCKSIZE; ++block) {
         UniqueBlock b;
         b.values.reserve(BMP_BLOCKSIZE);
         for (int i = 0; i < BMP_BLOCKSIZE; ++i) {
             int uc = block*BMP_BLOCKSIZE + i;
             UnicodeData &d = UnicodeData::valueRef(uc);
             b.values.append(d.propertyIndex);
         }
         int index = uniqueBlocks.indexOf(b);
         if (index == -1) {
             index = uniqueBlocks.size();
             b.index = used;
             used += BMP_BLOCKSIZE;
             uniqueBlocks.append(b);
         }
         blockMap.append(uniqueBlocks.at(index).index);
     }
     int bmp_blocks = uniqueBlocks.size();

     for (int block = BMP_END/SMP_BLOCKSIZE; block < SMP_END/SMP_BLOCKSIZE; ++block) {
         UniqueBlock b;
         b.values.reserve(SMP_BLOCKSIZE);
         for (int i = 0; i < SMP_BLOCKSIZE; ++i) {
             int uc = block*SMP_BLOCKSIZE + i;
             UnicodeData &d = UnicodeData::valueRef(uc);
             b.values.append(d.propertyIndex);
         }
         int index = uniqueBlocks.indexOf(b);
         if (index == -1) {
             index = uniqueBlocks.size();
             b.index = used;
             used += SMP_BLOCKSIZE;
             uniqueBlocks.append(b);
         }
         blockMap.append(uniqueBlocks.at(index).index);
     }
     int smp_blocks = uniqueBlocks.size() - bmp_blocks;

     int bmp_block_data = bmp_blocks*BMP_BLOCKSIZE*sizeof(unsigned short);
     int bmp_trie = BMP_END/BMP_BLOCKSIZE*sizeof(unsigned short);
     int bmp_mem = bmp_block_data + bmp_trie;
     qDebug("    %d unique blocks in BMP.", bmp_blocks);
     qDebug("        block data uses: %d bytes", bmp_block_data);
     qDebug("        trie data uses : %d bytes", bmp_trie);

     int smp_block_data = smp_blocks*SMP_BLOCKSIZE*sizeof(unsigned short);
     int smp_trie = (SMP_END-BMP_END)/SMP_BLOCKSIZE*sizeof(unsigned short);
     int smp_mem = smp_block_data + smp_trie;
     qDebug("    %d unique blocks in SMP.", smp_blocks);
     qDebug("        block data uses: %d bytes", smp_block_data);
     qDebug("        trie data uses : %d bytes", smp_trie);

     int prop_data = uniqueProperties.size() * SizeOfPropertiesStruct;
     qDebug("\n        properties data uses : %d bytes", prop_data);
     qDebug("    memory usage: %d bytes", bmp_mem + smp_mem + prop_data);

     Q_ASSERT(blockMap.last() + blockMap.size() < (1<<(sizeof(unsigned short)*8)));

     QByteArray out;

     out += "static const unsigned short uc_property_trie[] = {\n";
     // first write the map
     out += "    // [0x0..0x" + QByteArray::number(BMP_END, 16) + ")";
     for (int i = 0; i < BMP_END/BMP_BLOCKSIZE; ++i) {
         if (!(i % 8)) {
             if (out.endsWith(' '))
                 out.chop(1);
             if (!((i*BMP_BLOCKSIZE) % 0x1000))
                 out += "\n";
             out += "\n    ";
         }
         out += QByteArray::number(blockMap.at(i) + blockMap.size());
         out += ", ";
     }
     if (out.endsWith(' '))
         out.chop(1);
     out += "\n\n    // [0x" + QByteArray::number(BMP_END, 16) + "..0x" + QByteArray::number(SMP_END, 16) + ")\n";
     for (int i = BMP_END/BMP_BLOCKSIZE; i < blockMap.size(); ++i) {
         if (!(i % 8)) {
             if (out.endsWith(' '))
                 out.chop(1);
             if (!(i % (0x10000/SMP_BLOCKSIZE)))
                 out += "\n";
             out += "\n    ";
         }
         out += QByteArray::number(blockMap.at(i) + blockMap.size());
         out += ", ";
     }
     if (out.endsWith(' '))
         out.chop(1);
     out += "\n";
     // write the data
     for (int i = 0; i < uniqueBlocks.size(); ++i) {
         if (out.endsWith(' '))
             out.chop(1);
         out += "\n";
         const UniqueBlock &b = uniqueBlocks.at(i);
         for (int j = 0; j < b.values.size(); ++j) {
             if (!(j % 8)) {
                 if (out.endsWith(' '))
                     out.chop(1);
                 out += "\n    ";
             }
             out += QByteArray::number(b.values.at(j));
             out += ", ";
         }
     }
     if (out.endsWith(", "))
         out.chop(2);
     out += "\n};\n\n";

     out += "#define GET_PROP_INDEX(ucs4) \\\n"
            "       (ucs4 < 0x" + QByteArray::number(BMP_END, 16) + " \\\n"
            "        ? (uc_property_trie[uc_property_trie[ucs4>>" + QByteArray::number(BMP_SHIFT) +
            "] + (ucs4 & 0x" + QByteArray::number(BMP_BLOCKSIZE-1, 16)+ ")]) \\\n"
            "        : (uc_property_trie[uc_property_trie[((ucs4 - 0x" + QByteArray::number(BMP_END, 16) +
            ")>>" + QByteArray::number(SMP_SHIFT) + ") + 0x" + QByteArray::number(BMP_END/BMP_BLOCKSIZE, 16) + "]"
            " + (ucs4 & 0x" + QByteArray::number(SMP_BLOCKSIZE-1, 16) + ")]))\n\n"
            "#define GET_PROP_INDEX_UCS2(ucs2) \\\n"
            "       (uc_property_trie[uc_property_trie[ucs2>>" + QByteArray::number(BMP_SHIFT) +
            "] + (ucs2 & 0x" + QByteArray::number(BMP_BLOCKSIZE-1, 16)+ ")])\n\n";

     out += "static const Properties uc_properties[] = {";
     // keep in sync with the property declaration
     for (int i = 0; i < uniqueProperties.size(); ++i) {
         const PropertyFlags &p = uniqueProperties.at(i);
         out += "\n    { ";
 //     "        ushort category            : 8; /* 5 used */\n"
         out += QByteArray::number( p.category );
         out += ", ";
 //     "        ushort direction           : 8; /* 5 used */\n"
         out += QByteArray::number( p.direction );
         out += ", ";
 //     "        ushort combiningClass      : 8;\n"
         out += QByteArray::number( p.combiningClass );
         out += ", ";
 //     "        ushort joining             : 3;\n"
         out += QByteArray::number( p.joining );
         out += ", ";
 //     "        signed short digitValue    : 5;\n"
         out += QByteArray::number( p.digitValue );
         out += ", ";
 //     "        signed short mirrorDiff    : 16;\n"
         out += QByteArray::number( p.mirrorDiff );
         out += ", ";
 //     "        ushort unicodeVersion      : 8; /* 5 used */\n"
         out += QByteArray::number( p.age );
         out += ", ";
 //     "        ushort nfQuickCheck        : 8;\n"
         out += QByteArray::number( p.nfQuickCheck );
         out += ", ";
 //     "        struct {\n"
 //     "            ushort special    : 1;\n"
 //     "            signed short diff : 15;\n"
 //     "        } cases[NumCases];\n"
         out += " { {";
         out += QByteArray::number( p.lowerCaseSpecial );
         out += ", ";
         out += QByteArray::number( p.lowerCaseDiff );
         out += "}, {";
         out += QByteArray::number( p.upperCaseSpecial );
         out += ", ";
         out += QByteArray::number( p.upperCaseDiff );
         out += "}, {";
         out += QByteArray::number( p.titleCaseSpecial );
         out += ", ";
         out += QByteArray::number( p.titleCaseDiff );
         out += "}, {";
         out += QByteArray::number( p.caseFoldSpecial );
         out += ", ";
         out += QByteArray::number( p.caseFoldDiff );
         out += "} }, ";
 //     "        ushort graphemeBreakClass  : 5; /* 5 used */\n"
 //     "        ushort wordBreakClass      : 5; /* 5 used */\n"
 //     "        ushort lineBreakClass      : 6; /* 6 used */\n"
         out += QByteArray::number( p.graphemeBreakClass );
         out += ", ";
         out += QByteArray::number( p.wordBreakClass );
         out += ", ";
         out += QByteArray::number( p.lineBreakClass );
         out += ", ";
 //     "        ushort sentenceBreakClass  : 8; /* 4 used */\n"
         out += QByteArray::number( p.sentenceBreakClass );
         out += ", ";
 //     "        ushort script              : 8;\n"
         out += QByteArray::number( p.script );
         out += " },";
     }
     if (out.endsWith(','))
         out.chop(1);
     out += "\n};\n\n";


     out += "Q_DECL_CONST_FUNCTION static inline const Properties *qGetProp(uint ucs4) noexcept\n"
            "{\n"
            "    return uc_properties + GET_PROP_INDEX(ucs4);\n"
            "}\n"
            "\n"
            "Q_DECL_CONST_FUNCTION static inline const Properties *qGetProp(ushort ucs2) noexcept\n"
            "{\n"
            "    return uc_properties + GET_PROP_INDEX_UCS2(ucs2);\n"
            "}\n"
            "\n"
            "Q_DECL_CONST_FUNCTION Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4) noexcept\n"
            "{\n"
            "    return qGetProp(ucs4);\n"
            "}\n"
            "\n"
            "Q_DECL_CONST_FUNCTION Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2) noexcept\n"
            "{\n"
            "    return qGetProp(ucs2);\n"
            "}\n\n";

     out += "Q_CORE_EXPORT GraphemeBreakClass QT_FASTCALL graphemeBreakClass(uint ucs4) noexcept\n"
            "{\n"
            "    return static_cast<GraphemeBreakClass>(qGetProp(ucs4)->graphemeBreakClass);\n"
            "}\n"
            "\n"
            "Q_CORE_EXPORT WordBreakClass QT_FASTCALL wordBreakClass(uint ucs4) noexcept\n"
            "{\n"
            "    return static_cast<WordBreakClass>(qGetProp(ucs4)->wordBreakClass);\n"
            "}\n"
            "\n"
            "Q_CORE_EXPORT SentenceBreakClass QT_FASTCALL sentenceBreakClass(uint ucs4) noexcept\n"
            "{\n"
            "    return static_cast<SentenceBreakClass>(qGetProp(ucs4)->sentenceBreakClass);\n"
            "}\n"
            "\n"
            "Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4) noexcept\n"
            "{\n"
            "    return static_cast<LineBreakClass>(qGetProp(ucs4)->lineBreakClass);\n"
            "}\n"
            "\n";

     return out;
 }

 static QByteArray createSpecialCaseMap()
 {
     qDebug("createSpecialCaseMap:");

     QByteArray out;

     out += "static const unsigned short specialCaseMap[] = {\n"
            "    0x0, // placeholder";
     int i = 1;
     while (i < specialCaseMap.size()) {
         out += "\n   ";
         int n = specialCaseMap.at(i);
         for (int j = 0; j <= n; ++j) {
             out += QByteArray(" 0x") + QByteArray::number(specialCaseMap.at(i+j), 16);
             out += ",";
         }
         i += n + 1;
     }
     out.chop(1);
     out += "\n};\n\n";

     qDebug("    memory usage: %ld bytes", specialCaseMap.size()*sizeof(unsigned short));

     return out;
 }


 static QByteArray createCompositionInfo()
 {
     qDebug("createCompositionInfo: highestComposedCharacter=0x%x", highestComposedCharacter);

     const int BMP_BLOCKSIZE = 16;
     const int BMP_SHIFT = 4;
     const int BMP_END = 0x3400; // start of Han
     const int SMP_END = 0x30000;
     const int SMP_BLOCKSIZE = 256;
     const int SMP_SHIFT = 8;

     if (SMP_END <= highestComposedCharacter)
         qFatal("end of table smaller than highest composed character 0x%x", highestComposedCharacter);

     QVector<unsigned short> decompositions;
     int tableIndex = 0;

     QList<UniqueBlock> uniqueBlocks;
     QVector<int> blockMap;
     int used = 0;

     for (int block = 0; block < BMP_END/BMP_BLOCKSIZE; ++block) {
         UniqueBlock b;
         b.values.reserve(BMP_BLOCKSIZE);
         for (int i = 0; i < BMP_BLOCKSIZE; ++i) {
             int uc = block*BMP_BLOCKSIZE + i;
             UnicodeData &d = UnicodeData::valueRef(uc);
             if (!d.decomposition.isEmpty()) {
                 int utf16Length = 0;
                 decompositions.append(0);
                 for (int j = 0; j < d.decomposition.size(); ++j) {
                     int code = d.decomposition.at(j);
                     if (QChar::requiresSurrogates(code)) {
                         // save as surrogate pair
                         decompositions.append(QChar::highSurrogate(code));
                         decompositions.append(QChar::lowSurrogate(code));
                         utf16Length += 2;
                     } else {
                         decompositions.append(code);
                         utf16Length++;
                     }
                 }
                 decompositions[tableIndex] = d.decompositionType + (utf16Length<<8);
                 b.values.append(tableIndex);
                 tableIndex += utf16Length + 1;
             } else {
                 b.values.append(0xffff);
             }
         }
         int index = uniqueBlocks.indexOf(b);
         if (index == -1) {
             index = uniqueBlocks.size();
             b.index = used;
             used += BMP_BLOCKSIZE;
             uniqueBlocks.append(b);
         }
         blockMap.append(uniqueBlocks.at(index).index);
     }
     int bmp_blocks = uniqueBlocks.size();

     for (int block = BMP_END/SMP_BLOCKSIZE; block < SMP_END/SMP_BLOCKSIZE; ++block) {
         UniqueBlock b;
         b.values.reserve(SMP_BLOCKSIZE);
         for (int i = 0; i < SMP_BLOCKSIZE; ++i) {
             int uc = block*SMP_BLOCKSIZE + i;
             UnicodeData &d = UnicodeData::valueRef(uc);
             if (!d.decomposition.isEmpty()) {
                 int utf16Length = 0;
                 decompositions.append(0);
                 for (int j = 0; j < d.decomposition.size(); ++j) {
                     int code = d.decomposition.at(j);
                     if (QChar::requiresSurrogates(code)) {
                         // save as surrogate pair
                         decompositions.append(QChar::highSurrogate(code));
                         decompositions.append(QChar::lowSurrogate(code));
                         utf16Length += 2;
                     } else {
                         decompositions.append(code);
                         utf16Length++;
                     }
                 }
                 decompositions[tableIndex] = d.decompositionType + (utf16Length<<8);
                 b.values.append(tableIndex);
                 tableIndex += utf16Length + 1;
             } else {
                 b.values.append(0xffff);
             }
         }
         int index = uniqueBlocks.indexOf(b);
         if (index == -1) {
             index = uniqueBlocks.size();
             b.index = used;
             used += SMP_BLOCKSIZE;
             uniqueBlocks.append(b);
         }
         blockMap.append(uniqueBlocks.at(index).index);
     }
     int smp_blocks = uniqueBlocks.size() - bmp_blocks;

     // if the condition below doesn't hold anymore we need to modify our decomposition code
     Q_ASSERT(tableIndex < 0xffff);

     int bmp_block_data = bmp_blocks*BMP_BLOCKSIZE*sizeof(unsigned short);
     int bmp_trie = BMP_END/BMP_BLOCKSIZE*sizeof(unsigned short);
     int bmp_mem = bmp_block_data + bmp_trie;
     qDebug("    %d unique blocks in BMP.", bmp_blocks);
     qDebug("        block data uses: %d bytes", bmp_block_data);
     qDebug("        trie data uses : %d bytes", bmp_trie);

     int smp_block_data = smp_blocks*SMP_BLOCKSIZE*sizeof(unsigned short);
     int smp_trie = (SMP_END-BMP_END)/SMP_BLOCKSIZE*sizeof(unsigned short);
     int smp_mem = smp_block_data + smp_trie;
     qDebug("    %d unique blocks in SMP.", smp_blocks);
     qDebug("        block data uses: %d bytes", smp_block_data);
     qDebug("        trie data uses : %d bytes", smp_trie);

     int decomposition_data = decompositions.size() * 2;
     qDebug("\n        decomposition data uses : %d bytes", decomposition_data);
     qDebug("    memory usage: %d bytes", bmp_mem + smp_mem + decomposition_data);

     Q_ASSERT(blockMap.last() + blockMap.size() < (1<<(sizeof(unsigned short)*8)));

     QByteArray out;

     out += "static const unsigned short uc_decomposition_trie[] = {\n";
     // first write the map
     out += "    // 0 - 0x" + QByteArray::number(BMP_END, 16);
     for (int i = 0; i < BMP_END/BMP_BLOCKSIZE; ++i) {
         if (!(i % 8)) {
             if (out.endsWith(' '))
                 out.chop(1);
             if (!((i*BMP_BLOCKSIZE) % 0x1000))
                 out += "\n";
             out += "\n    ";
         }
         out += QByteArray::number(blockMap.at(i) + blockMap.size());
         out += ", ";
     }
     if (out.endsWith(' '))
         out.chop(1);
     out += "\n\n    // 0x" + QByteArray::number(BMP_END, 16) + " - 0x" + QByteArray::number(SMP_END, 16) + "\n";
     for (int i = BMP_END/BMP_BLOCKSIZE; i < blockMap.size(); ++i) {
         if (!(i % 8)) {
             if (out.endsWith(' '))
                 out.chop(1);
             if (!(i % (0x10000/SMP_BLOCKSIZE)))
                 out += "\n";
             out += "\n    ";
         }
         out += QByteArray::number(blockMap.at(i) + blockMap.size());
         out += ", ";
     }
     if (out.endsWith(' '))
         out.chop(1);
     out += "\n";
     // write the data
     for (int i = 0; i < uniqueBlocks.size(); ++i) {
         if (out.endsWith(' '))
             out.chop(1);
         out += "\n";
         const UniqueBlock &b = uniqueBlocks.at(i);
         for (int j = 0; j < b.values.size(); ++j) {
             if (!(j % 8)) {
                 if (out.endsWith(' '))
                     out.chop(1);
                 out += "\n    ";
             }
             out += "0x" + QByteArray::number(b.values.at(j), 16);
             out += ", ";
         }
     }
     if (out.endsWith(' '))
         out.chop(2);
     out += "\n};\n\n";

     out += "#define GET_DECOMPOSITION_INDEX(ucs4) \\\n"
            "       (ucs4 < 0x" + QByteArray::number(BMP_END, 16) + " \\\n"
            "        ? (uc_decomposition_trie[uc_decomposition_trie[ucs4>>" + QByteArray::number(BMP_SHIFT) +
            "] + (ucs4 & 0x" + QByteArray::number(BMP_BLOCKSIZE-1, 16)+ ")]) \\\n"
            "        : (ucs4 < 0x" + QByteArray::number(SMP_END, 16) + " \\\n"
            "           ? uc_decomposition_trie[uc_decomposition_trie[((ucs4 - 0x" + QByteArray::number(BMP_END, 16) +
            ")>>" + QByteArray::number(SMP_SHIFT) + ") + 0x" + QByteArray::number(BMP_END/BMP_BLOCKSIZE, 16) + "]"
            " + (ucs4 & 0x" + QByteArray::number(SMP_BLOCKSIZE-1, 16) + ")] \\\n"
            "           : 0xffff))\n\n";

     out += "static const unsigned short uc_decomposition_map[] = {";
     for (int i = 0; i < decompositions.size(); ++i) {
         if (!(i % 8)) {
             if (out.endsWith(' '))
                 out.chop(1);
             out += "\n    ";
         }
         out += "0x" + QByteArray::number(decompositions.at(i), 16);
         out += ", ";
     }
     if (out.endsWith(' '))
         out.chop(2);
     out += "\n};\n\n";

     return out;
 }

 static QByteArray createLigatureInfo()
 {
     qDebug("createLigatureInfo: numLigatures=%d, highestLigature=0x%x", numLigatures, highestLigature);

     for (int i = 0; i < ligatureHashes.size(); ++i) {
         const QList<Ligature> &l = ligatureHashes.value(i);
         for (int j = 0; j < l.size(); ++j) {
             // if the condition below doesn't hold anymore we need to modify our ligatureHelper code
             Q_ASSERT(QChar::requiresSurrogates(l.at(j).u2) == QChar::requiresSurrogates(l.at(j).ligature) &&
                      QChar::requiresSurrogates(l.at(j).u2) == QChar::requiresSurrogates(l.at(j).u1));
         }
     }

     const int BMP_BLOCKSIZE = 32;
     const int BMP_SHIFT = 5;
     const int BMP_END = 0x3100;
     const int SMP_END = 0x12000;
     const int SMP_BLOCKSIZE = 256;
     const int SMP_SHIFT = 8;

     if (SMP_END <= highestLigature)
         qFatal("end of table smaller than highest ligature character 0x%x", highestLigature);

     QList<unsigned short> ligatures;
     int tableIndex = 0;

     QList<UniqueBlock> uniqueBlocks;
     QVector<int> blockMap;
     int used = 0;

     for (int block = 0; block < BMP_END/BMP_BLOCKSIZE; ++block) {
         UniqueBlock b;
         b.values.reserve(BMP_BLOCKSIZE);
         for (int i = 0; i < BMP_BLOCKSIZE; ++i) {
             int uc = block*BMP_BLOCKSIZE + i;
             QList<Ligature> l = ligatureHashes.value(uc);
             if (!l.isEmpty()) {
                 Q_ASSERT(!QChar::requiresSurrogates(uc));
                 std::sort(l.begin(), l.end()); // needed for bsearch in ligatureHelper code

                 ligatures.append(l.size());
                 for (int j = 0; j < l.size(); ++j) {
                     ligatures.append(l.at(j).u1);
                     ligatures.append(l.at(j).ligature);
                 }
                 b.values.append(tableIndex);
                 tableIndex += 2*l.size() + 1;
             } else {
                 b.values.append(0xffff);
             }
         }
         int index = uniqueBlocks.indexOf(b);
         if (index == -1) {
             index = uniqueBlocks.size();
             b.index = used;
             used += BMP_BLOCKSIZE;
             uniqueBlocks.append(b);
         }
         blockMap.append(uniqueBlocks.at(index).index);
     }
     int bmp_blocks = uniqueBlocks.size();

     for (int block = BMP_END/SMP_BLOCKSIZE; block < SMP_END/SMP_BLOCKSIZE; ++block) {
         UniqueBlock b;
         b.values.reserve(SMP_BLOCKSIZE);
         for (int i = 0; i < SMP_BLOCKSIZE; ++i) {
             int uc = block*SMP_BLOCKSIZE + i;
             QList<Ligature> l = ligatureHashes.value(uc);
             if (!l.isEmpty()) {
                 Q_ASSERT(QChar::requiresSurrogates(uc));
                 std::sort(l.begin(), l.end()); // needed for bsearch in ligatureHelper code

                 ligatures.append(l.size());
                 for (int j = 0; j < l.size(); ++j) {
                     ligatures.append(QChar::highSurrogate(l.at(j).u1));
                     ligatures.append(QChar::lowSurrogate(l.at(j).u1));
                     ligatures.append(QChar::highSurrogate(l.at(j).ligature));
                     ligatures.append(QChar::lowSurrogate(l.at(j).ligature));
                 }
                 b.values.append(tableIndex);
                 tableIndex += 4*l.size() + 1;
             } else {
                 b.values.append(0xffff);
             }
         }
         int index = uniqueBlocks.indexOf(b);
         if (index == -1) {
             index = uniqueBlocks.size();
             b.index = used;
             used += SMP_BLOCKSIZE;
             uniqueBlocks.append(b);
         }
         blockMap.append(uniqueBlocks.at(index).index);
     }
     int smp_blocks = uniqueBlocks.size() - bmp_blocks;

     // if the condition below doesn't hold anymore we need to modify our composition code
     Q_ASSERT(tableIndex < 0xffff);

     int bmp_block_data = bmp_blocks*BMP_BLOCKSIZE*sizeof(unsigned short);
     int bmp_trie = BMP_END/BMP_BLOCKSIZE*sizeof(unsigned short);
     int bmp_mem = bmp_block_data + bmp_trie;
     qDebug("    %d unique blocks in BMP.", bmp_blocks);
     qDebug("        block data uses: %d bytes", bmp_block_data);
     qDebug("        trie data uses : %d bytes", bmp_trie);

     int smp_block_data = smp_blocks*SMP_BLOCKSIZE*sizeof(unsigned short);
     int smp_trie = (SMP_END-BMP_END)/SMP_BLOCKSIZE*sizeof(unsigned short);
     int smp_mem = smp_block_data + smp_trie;
     qDebug("    %d unique blocks in SMP.", smp_blocks);
     qDebug("        block data uses: %d bytes", smp_block_data);
     qDebug("        trie data uses : %d bytes", smp_trie);

     int ligature_data = ligatures.size() * 2;
     qDebug("\n        ligature data uses : %d bytes", ligature_data);
     qDebug("    memory usage: %d bytes", bmp_mem + smp_mem + ligature_data);

     Q_ASSERT(blockMap.last() + blockMap.size() < (1<<(sizeof(unsigned short)*8)));

     QByteArray out;

     out += "static const unsigned short uc_ligature_trie[] = {\n";
     // first write the map
     out += "    // 0 - 0x" + QByteArray::number(BMP_END, 16);
     for (int i = 0; i < BMP_END/BMP_BLOCKSIZE; ++i) {
         if (!(i % 8)) {
             if (out.endsWith(' '))
                 out.chop(1);
             if (!((i*BMP_BLOCKSIZE) % 0x1000))
                 out += "\n";
             out += "\n    ";
         }
         out += QByteArray::number(blockMap.at(i) + blockMap.size());
         out += ", ";
     }
     if (out.endsWith(' '))
         out.chop(1);
     out += "\n\n    // 0x" + QByteArray::number(BMP_END, 16) + " - 0x" + QByteArray::number(SMP_END, 16) + "\n";
     for (int i = BMP_END/BMP_BLOCKSIZE; i < blockMap.size(); ++i) {
         if (!(i % 8)) {
             if (out.endsWith(' '))
                 out.chop(1);
             if (!(i % (0x10000/SMP_BLOCKSIZE)))
                 out += "\n";
             out += "\n    ";
         }
         out += QByteArray::number(blockMap.at(i) + blockMap.size());
         out += ", ";
     }
     if (out.endsWith(' '))
         out.chop(1);
     out += "\n";
     // write the data
     for (int i = 0; i < uniqueBlocks.size(); ++i) {
         if (out.endsWith(' '))
             out.chop(1);
         out += "\n";
         const UniqueBlock &b = uniqueBlocks.at(i);
         for (int j = 0; j < b.values.size(); ++j) {
             if (!(j % 8)) {
                 if (out.endsWith(' '))
                     out.chop(1);
                 out += "\n    ";
             }
             out += "0x" + QByteArray::number(b.values.at(j), 16);
             out += ", ";
         }
     }
     if (out.endsWith(' '))
         out.chop(2);
     out += "\n};\n\n";

     out += "#define GET_LIGATURE_INDEX(ucs4) \\\n"
            "       (ucs4 < 0x" + QByteArray::number(BMP_END, 16) + " \\\n"
            "        ? (uc_ligature_trie[uc_ligature_trie[ucs4>>" + QByteArray::number(BMP_SHIFT) +
            "] + (ucs4 & 0x" + QByteArray::number(BMP_BLOCKSIZE-1, 16)+ ")]) \\\n"
            "        : (ucs4 < 0x" + QByteArray::number(SMP_END, 16) + " \\\n"
            "           ? uc_ligature_trie[uc_ligature_trie[((ucs4 - 0x" + QByteArray::number(BMP_END, 16) +
            ")>>" + QByteArray::number(SMP_SHIFT) + ") + 0x" + QByteArray::number(BMP_END/BMP_BLOCKSIZE, 16) + "]"
            " + (ucs4 & 0x" + QByteArray::number(SMP_BLOCKSIZE-1, 16) + ")] \\\n"
            "           : 0xffff))\n\n";

     out += "static const unsigned short uc_ligature_map[] = {";
     for (int i = 0; i < ligatures.size(); ++i) {
         if (!(i % 8)) {
             if (out.endsWith(' '))
                 out.chop(1);
             out += "\n    ";
         }
         out += "0x" + QByteArray::number(ligatures.at(i), 16);
         out += ", ";
     }
     if (out.endsWith(' '))
         out.chop(2);
     out += "\n};\n\n";

     return out;
 }

 QByteArray createCasingInfo()
 {
     QByteArray out;

     out += "struct CasingInfo {\n"
            "    uint codePoint : 16;\n"
            "    uint flags : 8;\n"
            "    uint offset : 8;\n"
            "};\n\n";

     return out;
 }


 int main(int, char **)
 {
     initAgeMap();
     initCategoryMap();
     initDecompositionMap();
     initDirectionMap();
     initJoiningMap();
     initGraphemeBreak();
     initWordBreak();
     initSentenceBreak();
     initLineBreak();
     initScriptMap();

     readUnicodeData();
     readBidiMirroring();
     readArabicShaping();
     readDerivedAge();
     readDerivedNormalizationProps();
     readSpecialCasing();
     readCaseFolding();
     // readBlocks();
     readScripts();
     readGraphemeBreak();
     readWordBreak();
     readSentenceBreak();
     readLineBreak();

     computeUniqueProperties();
     QByteArray properties = createPropertyInfo();
     QByteArray specialCases = createSpecialCaseMap();
     QByteArray compositions = createCompositionInfo();
     QByteArray ligatures = createLigatureInfo();
     QByteArray normalizationCorrections = createNormalizationCorrections();

     QByteArray header =
         "/****************************************************************************\n"
         "**\n"
         "** Copyright (C) 2020 The Qt Company Ltd.\n"
         "** Contact: https://www.qt.io/licensing/\n"
         "**\n"
         "** This file is part of the QtCore module of the Qt Toolkit.\n"
         "**\n"
         "** $QT_BEGIN_LICENSE:LGPL$\n"
         "** Commercial License Usage\n"
         "** Licensees holding valid commercial Qt licenses may use this file in\n"
         "** accordance with the commercial license agreement provided with the\n"
         "** Software or, alternatively, in accordance with the terms contained in\n"
         "** a written agreement between you and The Qt Company. For licensing terms\n"
         "** and conditions see https://www.qt.io/terms-conditions. For further\n"
         "** information use the contact form at https://www.qt.io/contact-us.\n"
         "**\n"
         "** GNU Lesser General Public License Usage\n"
         "** Alternatively, this file may be used under the terms of the GNU Lesser\n"
         "** General Public License version 3 as published by the Free Software\n"
         "** Foundation and appearing in the file LICENSE.LGPL3 included in the\n"
         "** packaging of this file. Please review the following information to\n"
         "** ensure the GNU Lesser General Public License version 3 requirements\n"
         "** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.\n"
         "**\n"
         "** GNU General Public License Usage\n"
         "** Alternatively, this file may be used under the terms of the GNU\n"
         "** General Public License version 2.0 or (at your option) the GNU General\n"
         "** Public license version 3 or any later version approved by the KDE Free\n"
         "** Qt Foundation. The licenses are as published by the Free Software\n"
         "** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3\n"
         "** included in the packaging of this file. Please review the following\n"
         "** information to ensure the GNU General Public License requirements will\n"
         "** be met: https://www.gnu.org/licenses/gpl-2.0.html and\n"
         "** https://www.gnu.org/licenses/gpl-3.0.html.\n"
         "**\n"
         "** $QT_END_LICENSE$\n"
         "**\n"
         "****************************************************************************/\n\n";

     QByteArray note =
         "/* This file is autogenerated from the Unicode " DATA_VERSION_S " database. Do not edit */\n\n";

     QByteArray warning =
         "//\n"
         "//  W A R N I N G\n"
         "//  -------------\n"
         "//\n"
         "// This file is not part of the Qt API.  It exists for the convenience\n"
         "// of internal files.  This header file may change from version to version\n"
         "// without notice, or even be removed.\n"
         "//\n"
         "// We mean it.\n"
         "//\n\n";

     QFile f("../../src/corelib/text/qunicodetables.cpp");
     f.open(QFile::WriteOnly|QFile::Truncate);
     f.write(header);
     f.write(note);
     f.write("#include \"qunicodetables_p.h\"\n\n");
     f.write("QT_BEGIN_NAMESPACE\n\n");
     f.write("namespace QUnicodeTables {\n\n");
     f.write(properties);
     f.write("\n");
     f.write(specialCases);
     f.write("\n");
     f.write(compositions);
     f.write(ligatures);
     f.write("\n");
     f.write(normalizationCorrections);
     f.write("} // namespace QUnicodeTables\n\n");
     f.write("using namespace QUnicodeTables;\n\n");
     f.write("QT_END_NAMESPACE\n");
     f.close();

     f.setFileName("../../src/corelib/text/qunicodetables_p.h");
     f.open(QFile::WriteOnly | QFile::Truncate);
     f.write(header);
     f.write(note);
     f.write(warning);
     f.write("#ifndef QUNICODETABLES_P_H\n"
             "#define QUNICODETABLES_P_H\n\n"
             "#include <QtCore/private/qglobal_p.h>\n\n"
             "#include <QtCore/qchar.h>\n\n"
             "QT_BEGIN_NAMESPACE\n\n");
     f.write("#define UNICODE_DATA_VERSION " DATA_VERSION_STR "\n\n");
     f.write("namespace QUnicodeTables {\n\n");
     f.write(property_string);
     f.write(sizeOfPropertiesStructCheck);
     f.write(grapheme_break_class_string);
     f.write(word_break_class_string);
     f.write(sentence_break_class_string);
     f.write(line_break_class_string);
     f.write(methods);
     f.write("} // namespace QUnicodeTables\n\n"
             "QT_END_NAMESPACE\n\n"
             "#endif // QUNICODETABLES_P_H\n");
     f.close();

     qDebug() << "maxMirroredDiff  = " << hex << maxMirroredDiff;
     qDebug() << "maxLowerCaseDiff = " << hex << maxLowerCaseDiff;
     qDebug() << "maxUpperCaseDiff = " << hex << maxUpperCaseDiff;
     qDebug() << "maxTitleCaseDiff = " << hex << maxTitleCaseDiff;
     qDebug() << "maxCaseFoldDiff  = " << hex << maxCaseFoldDiff;
 #if 0
 //     dump(0, 0x7f);
 //     dump(0x620, 0x640);
 //     dump(0x10000, 0x10020);
 //     dump(0x10800, 0x10820);

     qDebug("decompositionLength used:");
     int totalcompositions = 0;
     int sum = 0;
     for (int i = 1; i < 20; ++i) {
         qDebug("    length %d used %d times", i, decompositionLength.value(i, 0));
         totalcompositions += i*decompositionLength.value(i, 0);
         sum += decompositionLength.value(i, 0);
     }
     qDebug("    len decomposition map %d, average length %f, num composed chars %d",
            totalcompositions, (float)totalcompositions/(float)sum, sum);
     qDebug("highest composed character %x", highestComposedCharacter);
     qDebug("num ligatures = %d highest=%x, maxLength=%d", numLigatures, highestLigature, longestLigature);

     qBubbleSort(ligatures);
     for (int i = 0; i < ligatures.size(); ++i)
         qDebug("%s", ligatures.at(i).data());

 //     qDebug("combiningClass usage:");
 //     int numClasses = 0;
 //     for (int i = 0; i < 255; ++i) {
 //         int num = combiningClassUsage.value(i, 0);
 //         if (num) {
 //             ++numClasses;
 //             qDebug("    combiningClass %d used %d times", i, num);
 //         }
 //     }
 //     qDebug("total of %d combining classes used", numClasses);

 #endif
 }