| /**************************************************************************** |
| ** |
| ** Copyright (C) 2019 The Qt Company Ltd. |
| ** Contact: https://www.qt.io/licensing/ |
| ** |
| ** This file is part of the QtCore module of the Qt Toolkit. |
| ** |
| ** $QT_BEGIN_LICENSE:LGPL$ |
| ** Commercial License Usage |
| ** Licensees holding valid commercial Qt licenses may use this file in |
| ** accordance with the commercial license agreement provided with the |
| ** Software or, alternatively, in accordance with the terms contained in |
| ** a written agreement between you and The Qt Company. For licensing terms |
| ** and conditions see https://www.qt.io/terms-conditions. For further |
| ** information use the contact form at https://www.qt.io/contact-us. |
| ** |
| ** GNU Lesser General Public License Usage |
| ** Alternatively, this file may be used under the terms of the GNU Lesser |
| ** General Public License version 3 as published by the Free Software |
| ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
| ** packaging of this file. Please review the following information to |
| ** ensure the GNU Lesser General Public License version 3 requirements |
| ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
| ** |
| ** GNU General Public License Usage |
| ** Alternatively, this file may be used under the terms of the GNU |
| ** General Public License version 2.0 or (at your option) the GNU General |
| ** Public license version 3 or any later version approved by the KDE Free |
| ** Qt Foundation. The licenses are as published by the Free Software |
| ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
| ** included in the packaging of this file. Please review the following |
| ** information to ensure the GNU General Public License requirements will |
| ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
| ** https://www.gnu.org/licenses/gpl-3.0.html. |
| ** |
| ** $QT_END_LICENSE$ |
| ** |
| ****************************************************************************/ |
| |
| // Don't define it while compiling this module, or USERS of Qt will |
| // not be able to link. |
| #ifdef QT_NO_CAST_FROM_ASCII |
| # undef QT_NO_CAST_FROM_ASCII |
| #endif |
| #ifdef QT_NO_CAST_TO_ASCII |
| # undef QT_NO_CAST_TO_ASCII |
| #endif |
| #include "qchar.h" |
| |
| #include "qdatastream.h" |
| |
| #include "qunicodetables_p.h" |
| #include "qunicodetables.cpp" |
| |
| #include <algorithm> |
| |
| QT_BEGIN_NAMESPACE |
| |
| #define FLAG(x) (1 << (x)) |
| |
| /*! |
| \class QLatin1Char |
| \inmodule QtCore |
| \reentrant |
| \brief The QLatin1Char class provides an 8-bit ASCII/Latin-1 character. |
| |
| \ingroup string-processing |
| |
| This class is only useful to construct a QChar with 8-bit character. |
| |
| \sa QChar, QLatin1String, QString |
| */ |
| |
| /*! |
| \fn const char QLatin1Char::toLatin1() const |
| |
| Converts a Latin-1 character to an 8-bit ASCII representation of the character. |
| */ |
| |
| /*! |
| \fn const ushort QLatin1Char::unicode() const |
| |
| Converts a Latin-1 character to an 16-bit-encoded Unicode representation |
| of the character. |
| */ |
| |
| /*! |
| \fn QLatin1Char::QLatin1Char(char c) |
| |
| Constructs a Latin-1 character for \a c. This constructor should be |
| used when the encoding of the input character is known to be Latin-1. |
| */ |
| |
| /*! |
| \class QChar |
| \inmodule QtCore |
| \brief The QChar class provides a 16-bit Unicode character. |
| |
| \ingroup string-processing |
| \reentrant |
| |
| In Qt, Unicode characters are 16-bit entities without any markup |
| or structure. This class represents such an entity. It is |
| lightweight, so it can be used everywhere. Most compilers treat |
| it like an \c{unsigned short}. |
| |
| QChar provides a full complement of testing/classification |
| functions, converting to and from other formats, converting from |
| composed to decomposed Unicode, and trying to compare and |
| case-convert if you ask it to. |
| |
| The classification functions include functions like those in the |
| standard C++ header \<cctype\> (formerly \<ctype.h\>), but |
| operating on the full range of Unicode characters, not just for the ASCII |
| range. They all return true if the character is a certain type of character; |
| otherwise they return false. These classification functions are |
| isNull() (returns \c true if the character is '\\0'), isPrint() |
| (true if the character is any sort of printable character, |
| including whitespace), isPunct() (any sort of punctation), |
| isMark() (Unicode Mark), isLetter() (a letter), isNumber() (any |
| sort of numeric character, not just 0-9), isLetterOrNumber(), and |
| isDigit() (decimal digits). All of these are wrappers around |
| category() which return the Unicode-defined category of each |
| character. Some of these also calculate the derived properties |
| (for example isSpace() returns \c true if the character is of category |
| Separator_* or an exceptional code point from Other_Control category). |
| |
| QChar also provides direction(), which indicates the "natural" |
| writing direction of this character. The joiningType() function |
| indicates how the character joins with it's neighbors (needed |
| mostly for Arabic or Syriac) and finally hasMirrored(), which indicates |
| whether the character needs to be mirrored when it is printed in |
| it's "unnatural" writing direction. |
| |
| Composed Unicode characters (like \a ring) can be converted to |
| decomposed Unicode ("a" followed by "ring above") by using decomposition(). |
| |
| In Unicode, comparison is not necessarily possible and case |
| conversion is very difficult at best. Unicode, covering the |
| "entire" world, also includes most of the world's case and |
| sorting problems. operator==() and friends will do comparison |
| based purely on the numeric Unicode value (code point) of the |
| characters, and toUpper() and toLower() will do case changes when |
| the character has a well-defined uppercase/lowercase equivalent. |
| For locale-dependent comparisons, use QString::localeAwareCompare(). |
| |
| The conversion functions include unicode() (to a scalar), |
| toLatin1() (to scalar, but converts all non-Latin-1 characters to |
| 0), row() (gives the Unicode row), cell() (gives the Unicode |
| cell), digitValue() (gives the integer value of any of the |
| numerous digit characters), and a host of constructors. |
| |
| QChar provides constructors and cast operators that make it easy |
| to convert to and from traditional 8-bit \c{char}s. If you |
| defined \c QT_NO_CAST_FROM_ASCII and \c QT_NO_CAST_TO_ASCII, as |
| explained in the QString documentation, you will need to |
| explicitly call fromLatin1(), or use QLatin1Char, |
| to construct a QChar from an 8-bit \c char, and you will need to |
| call toLatin1() to get the 8-bit value back. |
| |
| For more information see |
| \l{http://www.unicode.org/ucd/}{"About the Unicode Character Database"}. |
| |
| \sa Unicode, QString, QLatin1Char |
| */ |
| |
| /*! |
| \enum QChar::UnicodeVersion |
| |
| Specifies which version of the \l{http://www.unicode.org/}{Unicode standard} |
| introduced a certain character. |
| |
| \value Unicode_1_1 Version 1.1 |
| \value Unicode_2_0 Version 2.0 |
| \value Unicode_2_1_2 Version 2.1.2 |
| \value Unicode_3_0 Version 3.0 |
| \value Unicode_3_1 Version 3.1 |
| \value Unicode_3_2 Version 3.2 |
| \value Unicode_4_0 Version 4.0 |
| \value Unicode_4_1 Version 4.1 |
| \value Unicode_5_0 Version 5.0 |
| \value Unicode_5_1 Version 5.1 |
| \value Unicode_5_2 Version 5.2 |
| \value Unicode_6_0 Version 6.0 |
| \value Unicode_6_1 Version 6.1 |
| \value Unicode_6_2 Version 6.2 |
| \value Unicode_6_3 Version 6.3 Since Qt 5.3 |
| \value Unicode_7_0 Version 7.0 Since Qt 5.5 |
| \value Unicode_8_0 Version 8.0 Since Qt 5.6 |
| \value Unicode_9_0 Version 9.0 Since Qt 5.11 |
| \value Unicode_10_0 Version 10.0 Since Qt 5.11 |
| \value Unicode_11_0 Version 11.0 Since Qt 5.15 |
| \value Unicode_12_0 Version 12.0 Since Qt 5.15 |
| \value Unicode_12_1 Version 12.1 Since Qt 5.15 |
| \value Unicode_13_0 Version 13.0 Since Qt 5.15 |
| \value Unicode_Unassigned The value is not assigned to any character |
| in version 8.0 of Unicode. |
| |
| \sa unicodeVersion(), currentUnicodeVersion() |
| */ |
| |
| /*! |
| \enum QChar::Category |
| |
| This enum maps the Unicode character categories. |
| |
| The following characters are normative in Unicode: |
| |
| \value Mark_NonSpacing Unicode class name Mn |
| |
| \value Mark_SpacingCombining Unicode class name Mc |
| |
| \value Mark_Enclosing Unicode class name Me |
| |
| \value Number_DecimalDigit Unicode class name Nd |
| |
| \value Number_Letter Unicode class name Nl |
| |
| \value Number_Other Unicode class name No |
| |
| \value Separator_Space Unicode class name Zs |
| |
| \value Separator_Line Unicode class name Zl |
| |
| \value Separator_Paragraph Unicode class name Zp |
| |
| \value Other_Control Unicode class name Cc |
| |
| \value Other_Format Unicode class name Cf |
| |
| \value Other_Surrogate Unicode class name Cs |
| |
| \value Other_PrivateUse Unicode class name Co |
| |
| \value Other_NotAssigned Unicode class name Cn |
| |
| |
| The following categories are informative in Unicode: |
| |
| \value Letter_Uppercase Unicode class name Lu |
| |
| \value Letter_Lowercase Unicode class name Ll |
| |
| \value Letter_Titlecase Unicode class name Lt |
| |
| \value Letter_Modifier Unicode class name Lm |
| |
| \value Letter_Other Unicode class name Lo |
| |
| \value Punctuation_Connector Unicode class name Pc |
| |
| \value Punctuation_Dash Unicode class name Pd |
| |
| \value Punctuation_Open Unicode class name Ps |
| |
| \value Punctuation_Close Unicode class name Pe |
| |
| \value Punctuation_InitialQuote Unicode class name Pi |
| |
| \value Punctuation_FinalQuote Unicode class name Pf |
| |
| \value Punctuation_Other Unicode class name Po |
| |
| \value Symbol_Math Unicode class name Sm |
| |
| \value Symbol_Currency Unicode class name Sc |
| |
| \value Symbol_Modifier Unicode class name Sk |
| |
| \value Symbol_Other Unicode class name So |
| |
| \sa category() |
| */ |
| |
| /*! |
| \enum QChar::Script |
| \since 5.1 |
| |
| This enum type defines the Unicode script property values. |
| |
| For details about the Unicode script property values see |
| \l{http://www.unicode.org/reports/tr24/}{Unicode Standard Annex #24}. |
| |
| In order to conform to C/C++ naming conventions "Script_" is prepended |
| to the codes used in the Unicode Standard. |
| |
| \value Script_Unknown For unassigned, private-use, noncharacter, and surrogate code points. |
| \value Script_Inherited For characters that may be used with multiple scripts |
| and that inherit their script from the preceding characters. |
| These include nonspacing marks, enclosing marks, |
| and zero width joiner/non-joiner characters. |
| \value Script_Common For characters that may be used with multiple scripts |
| and that do not inherit their script from the preceding characters. |
| |
| \value Script_Adlam Since Qt 5.11 |
| \value Script_Ahom Since Qt 5.6 |
| \value Script_AnatolianHieroglyphs Since Qt 5.6 |
| \value Script_Arabic |
| \value Script_Armenian |
| \value Script_Avestan |
| \value Script_Balinese |
| \value Script_Bamum |
| \value Script_BassaVah Since Qt 5.5 |
| \value Script_Batak |
| \value Script_Bengali |
| \value Script_Bhaiksuki Since Qt 5.11 |
| \value Script_Bopomofo |
| \value Script_Brahmi |
| \value Script_Braille |
| \value Script_Buginese |
| \value Script_Buhid |
| \value Script_CanadianAboriginal |
| \value Script_Carian |
| \value Script_CaucasianAlbanian Since Qt 5.5 |
| \value Script_Chakma |
| \value Script_Cham |
| \value Script_Cherokee |
| \value Script_Chorasmian Since Qt 5.15 |
| \value Script_Coptic |
| \value Script_Cuneiform |
| \value Script_Cypriot |
| \value Script_Cyrillic |
| \value Script_Deseret |
| \value Script_Devanagari |
| \value Script_DivesAkuru Since Qt 5.15 |
| \value Script_Dogra Since Qt 5.15 |
| \value Script_Duployan Since Qt 5.5 |
| \value Script_EgyptianHieroglyphs |
| \value Script_Elbasan Since Qt 5.5 |
| \value Script_Elymaic Since Qt 5.15 |
| \value Script_Ethiopic |
| \value Script_Georgian |
| \value Script_Glagolitic |
| \value Script_Gothic |
| \value Script_Grantha Since Qt 5.5 |
| \value Script_Greek |
| \value Script_Gujarati |
| \value Script_GunjalaGondi Since Qt 5.15 |
| \value Script_Gurmukhi |
| \value Script_Han |
| \value Script_Hangul |
| \value Script_HanifiRohingya Since Qt 5.15 |
| \value Script_Hanunoo |
| \value Script_Hatran Since Qt 5.6 |
| \value Script_Hebrew |
| \value Script_Hiragana |
| \value Script_ImperialAramaic |
| \value Script_InscriptionalPahlavi |
| \value Script_InscriptionalParthian |
| \value Script_Javanese |
| \value Script_Kaithi |
| \value Script_Kannada |
| \value Script_Katakana |
| \value Script_KayahLi |
| \value Script_Kharoshthi |
| \value Script_KhitanSmallScript Since Qt 5.15 |
| \value Script_Khmer |
| \value Script_Khojki Since Qt 5.5 |
| \value Script_Khudawadi Since Qt 5.5 |
| \value Script_Lao |
| \value Script_Latin |
| \value Script_Lepcha |
| \value Script_Limbu |
| \value Script_LinearA Since Qt 5.5 |
| \value Script_LinearB |
| \value Script_Lisu |
| \value Script_Lycian |
| \value Script_Lydian |
| \value Script_Mahajani Since Qt 5.5 |
| \value Script_Makasar Since Qt 5.15 |
| \value Script_Malayalam |
| \value Script_Mandaic |
| \value Script_Manichaean Since Qt 5.5 |
| \value Script_Marchen Since Qt 5.11 |
| \value Script_MasaramGondi Since Qt 5.11 |
| \value Script_Medefaidrin Since Qt 5.15 |
| \value Script_MeeteiMayek |
| \value Script_MendeKikakui Since Qt 5.5 |
| \value Script_MeroiticCursive |
| \value Script_MeroiticHieroglyphs |
| \value Script_Miao |
| \value Script_Modi Since Qt 5.5 |
| \value Script_Mongolian |
| \value Script_Mro Since Qt 5.5 |
| \value Script_Multani Since Qt 5.6 |
| \value Script_Myanmar |
| \value Script_Nabataean Since Qt 5.5 |
| \value Script_Nandinagari Since Qt 5.15 |
| \value Script_Newa Since Qt 5.11 |
| \value Script_NewTaiLue |
| \value Script_Nko |
| \value Script_Nushu Since Qt 5.11 |
| \value Script_NyiakengPuachueHmong Since Qt 5.15 |
| \value Script_Ogham |
| \value Script_OlChiki |
| \value Script_OldHungarian Since Qt 5.6 |
| \value Script_OldItalic |
| \value Script_OldNorthArabian Since Qt 5.5 |
| \value Script_OldPermic Since Qt 5.5 |
| \value Script_OldPersian |
| \value Script_OldSogdian Since Qt 5.15 |
| \value Script_OldSouthArabian |
| \value Script_OldTurkic |
| \value Script_Oriya |
| \value Script_Osage Since Qt 5.11 |
| \value Script_Osmanya |
| \value Script_PahawhHmong Since Qt 5.5 |
| \value Script_Palmyrene Since Qt 5.5 |
| \value Script_PauCinHau Since Qt 5.5 |
| \value Script_PhagsPa |
| \value Script_Phoenician |
| \value Script_PsalterPahlavi Since Qt 5.5 |
| \value Script_Rejang |
| \value Script_Runic |
| \value Script_Samaritan |
| \value Script_Saurashtra |
| \value Script_Sharada |
| \value Script_Shavian |
| \value Script_Siddham Since Qt 5.5 |
| \value Script_SignWriting Since Qt 5.6 |
| \value Script_Sinhala |
| \value Script_Sogdian Since Qt 5.15 |
| \value Script_SoraSompeng |
| \value Script_Soyombo Since Qt 5.11 |
| \value Script_Sundanese |
| \value Script_SylotiNagri |
| \value Script_Syriac |
| \value Script_Tagalog |
| \value Script_Tagbanwa |
| \value Script_TaiLe |
| \value Script_TaiTham |
| \value Script_TaiViet |
| \value Script_Takri |
| \value Script_Tamil |
| \value Script_Tangut Since Qt 5.11 |
| \value Script_Telugu |
| \value Script_Thaana |
| \value Script_Thai |
| \value Script_Tibetan |
| \value Script_Tifinagh |
| \value Script_Tirhuta Since Qt 5.5 |
| \value Script_Ugaritic |
| \value Script_Vai |
| \value Script_Wancho Since Qt 5.15 |
| \value Script_WarangCiti Since Qt 5.5 |
| \value Script_Yezidi Since Qt 5.15 |
| \value Script_Yi |
| \value Script_ZanabazarSquare Since Qt 5.11 |
| |
| \omitvalue ScriptCount |
| |
| \sa script() |
| */ |
| |
| /*! |
| \enum QChar::Direction |
| |
| This enum type defines the Unicode direction attributes. See the |
| \l{http://www.unicode.org/reports/tr9/tr9-35.html#Table_Bidirectional_Character_Types}{Unicode Standard} for a description |
| of the values. |
| |
| In order to conform to C/C++ naming conventions "Dir" is prepended |
| to the codes used in the Unicode Standard. |
| |
| \value DirAL |
| \value DirAN |
| \value DirB |
| \value DirBN |
| \value DirCS |
| \value DirEN |
| \value DirES |
| \value DirET |
| \value DirFSI Since Qt 5.3 |
| \value DirL |
| \value DirLRE |
| \value DirLRI Since Qt 5.3 |
| \value DirLRO |
| \value DirNSM |
| \value DirON |
| \value DirPDF |
| \value DirPDI Since Qt 5.3 |
| \value DirR |
| \value DirRLE |
| \value DirRLI Since Qt 5.3 |
| \value DirRLO |
| \value DirS |
| \value DirWS |
| |
| \sa direction() |
| */ |
| |
| /*! |
| \enum QChar::Decomposition |
| |
| This enum type defines the Unicode decomposition attributes. See |
| the \l{http://www.unicode.org/}{Unicode Standard} for a |
| description of the values. |
| |
| \value NoDecomposition |
| \value Canonical |
| \value Circle |
| \value Compat |
| \value Final |
| \value Font |
| \value Fraction |
| \value Initial |
| \value Isolated |
| \value Medial |
| \value Narrow |
| \value NoBreak |
| \value Small |
| \value Square |
| \value Sub |
| \value Super |
| \value Vertical |
| \value Wide |
| |
| \sa decomposition() |
| */ |
| |
| /*! |
| \enum QChar::JoiningType |
| since 5.3 |
| |
| This enum type defines the Unicode joining type attributes. See the |
| \l{http://www.unicode.org/}{Unicode Standard} for a description of the values. |
| |
| In order to conform to C/C++ naming conventions "Joining_" is prepended |
| to the codes used in the Unicode Standard. |
| |
| \value Joining_None |
| \value Joining_Causing |
| \value Joining_Dual |
| \value Joining_Right |
| \value Joining_Left |
| \value Joining_Transparent |
| |
| \sa joiningType() |
| */ |
| |
| #if QT_DEPRECATED_SINCE(5, 3) |
| /*! |
| \enum QChar::Joining |
| \deprecated in 5.3, use JoiningType instead. |
| |
| This enum type defines the Unicode joining attributes. See the |
| \l{http://www.unicode.org/}{Unicode Standard} for a description |
| of the values. |
| |
| \value Center |
| \value Dual |
| \value OtherJoining |
| \value Right |
| |
| \sa joining() |
| */ |
| #endif |
| |
| /*! |
| \enum QChar::CombiningClass |
| |
| \internal |
| |
| This enum type defines names for some of the Unicode combining |
| classes. See the \l{http://www.unicode.org/}{Unicode Standard} |
| for a description of the values. |
| |
| \value Combining_Above |
| \value Combining_AboveAttached |
| \value Combining_AboveLeft |
| \value Combining_AboveLeftAttached |
| \value Combining_AboveRight |
| \value Combining_AboveRightAttached |
| \value Combining_Below |
| \value Combining_BelowAttached |
| \value Combining_BelowLeft |
| \value Combining_BelowLeftAttached |
| \value Combining_BelowRight |
| \value Combining_BelowRightAttached |
| \value Combining_DoubleAbove |
| \value Combining_DoubleBelow |
| \value Combining_IotaSubscript |
| \value Combining_Left |
| \value Combining_LeftAttached |
| \value Combining_Right |
| \value Combining_RightAttached |
| */ |
| |
| /*! |
| \enum QChar::SpecialCharacter |
| |
| \value Null A QChar with this value isNull(). |
| \value Tabulation Character tabulation. |
| \value LineFeed |
| \value FormFeed |
| \value CarriageReturn |
| \value Space |
| \value Nbsp Non-breaking space. |
| \value SoftHyphen |
| \value ReplacementCharacter The character shown when a font has no glyph |
| for a certain codepoint. A special question mark character is often |
| used. Codecs use this codepoint when input data cannot be |
| represented in Unicode. |
| \value ObjectReplacementCharacter Used to represent an object such as an |
| image when such objects cannot be presented. |
| \value ByteOrderMark |
| \value ByteOrderSwapped |
| \value ParagraphSeparator |
| \value LineSeparator |
| \value LastValidCodePoint |
| */ |
| |
| /*! |
| \fn void QChar::setCell(uchar cell) |
| \internal |
| */ |
| |
| /*! |
| \fn void QChar::setRow(uchar row) |
| \internal |
| */ |
| |
| /*! |
| \fn QChar::QChar() |
| |
| Constructs a null QChar ('\\0'). |
| |
| \sa isNull() |
| */ |
| |
| /*! |
| \fn QChar::QChar(QLatin1Char ch) |
| |
| Constructs a QChar corresponding to ASCII/Latin-1 character \a ch. |
| */ |
| |
| /*! |
| \fn QChar::QChar(SpecialCharacter ch) |
| |
| Constructs a QChar for the predefined character value \a ch. |
| */ |
| |
| /*! |
| \fn QChar::QChar(char16_t ch) |
| \since 5.10 |
| |
| Constructs a QChar corresponding to the UTF-16 character \a ch. |
| */ |
| |
| /*! |
| \fn QChar::QChar(wchar_t ch) |
| \since 5.10 |
| |
| Constructs a QChar corresponding to the wide character \a ch. |
| |
| \note This constructor is only available on Windows. |
| */ |
| |
| /*! |
| \fn QChar::QChar(char ch) |
| |
| Constructs a QChar corresponding to ASCII/Latin-1 character \a ch. |
| |
| \note This constructor is not available when \c QT_NO_CAST_FROM_ASCII |
| is defined. |
| |
| \sa QT_NO_CAST_FROM_ASCII |
| */ |
| |
| /*! |
| \fn QChar::QChar(uchar ch) |
| |
| Constructs a QChar corresponding to ASCII/Latin-1 character \a ch. |
| |
| \note This constructor is not available when \c QT_NO_CAST_FROM_ASCII |
| or \c QT_RESTRICTED_CAST_FROM_ASCII is defined. |
| |
| \sa QT_NO_CAST_FROM_ASCII, QT_RESTRICTED_CAST_FROM_ASCII |
| */ |
| |
| /*! |
| \fn QChar::QChar(uchar cell, uchar row) |
| |
| Constructs a QChar for Unicode cell \a cell in row \a row. |
| |
| \sa cell(), row() |
| */ |
| |
| /*! |
| \fn QChar::QChar(ushort code) |
| |
| Constructs a QChar for the character with Unicode code point \a code. |
| */ |
| |
| /*! |
| \fn QChar::QChar(short code) |
| |
| Constructs a QChar for the character with Unicode code point \a code. |
| */ |
| |
| /*! |
| \fn QChar::QChar(uint code) |
| |
| Constructs a QChar for the character with Unicode code point \a code. |
| */ |
| |
| /*! |
| \fn QChar::QChar(int code) |
| |
| Constructs a QChar for the character with Unicode code point \a code. |
| */ |
| |
| /*! |
| \fn bool QChar::isNull() const |
| |
| Returns \c true if the character is the Unicode character 0x0000 |
| ('\\0'); otherwise returns \c false. |
| */ |
| |
| /*! |
| \fn uchar QChar::cell() const |
| |
| Returns the cell (least significant byte) of the Unicode character. |
| |
| \sa row() |
| */ |
| |
| /*! |
| \fn uchar QChar::row() const |
| |
| Returns the row (most significant byte) of the Unicode character. |
| |
| \sa cell() |
| */ |
| |
| /*! |
| \fn bool QChar::isPrint() const |
| |
| Returns \c true if the character is a printable character; otherwise |
| returns \c false. This is any character not of category Other_*. |
| |
| Note that this gives no indication of whether the character is |
| available in a particular font. |
| */ |
| |
| /*! |
| \overload |
| \since 5.0 |
| |
| Returns \c true if the UCS-4-encoded character specified by \a ucs4 is |
| a printable character; otherwise returns \c false. |
| This is any character not of category Other_*. |
| |
| Note that this gives no indication of whether the character is |
| available in a particular font. |
| */ |
| bool QChar::isPrint(uint ucs4) noexcept |
| { |
| if (ucs4 > LastValidCodePoint) |
| return false; |
| const int test = FLAG(Other_Control) | |
| FLAG(Other_Format) | |
| FLAG(Other_Surrogate) | |
| FLAG(Other_PrivateUse) | |
| FLAG(Other_NotAssigned); |
| return !(FLAG(qGetProp(ucs4)->category) & test); |
| } |
| |
| /*! |
| \fn bool QChar::isSpace() const |
| |
| Returns \c true if the character is a separator character |
| (Separator_* categories or certain code points from Other_Control category); |
| otherwise returns \c false. |
| */ |
| |
| /*! |
| \fn bool QChar::isSpace(uint ucs4) |
| \overload |
| \since 5.0 |
| |
| Returns \c true if the UCS-4-encoded character specified by \a ucs4 is |
| a separator character (Separator_* categories or certain code points |
| from Other_Control category); otherwise returns \c false. |
| */ |
| |
| /*! |
| \internal |
| */ |
| bool QT_FASTCALL QChar::isSpace_helper(uint ucs4) noexcept |
| { |
| if (ucs4 > LastValidCodePoint) |
| return false; |
| const int test = FLAG(Separator_Space) | |
| FLAG(Separator_Line) | |
| FLAG(Separator_Paragraph); |
| return FLAG(qGetProp(ucs4)->category) & test; |
| } |
| |
| /*! |
| \fn bool QChar::isMark() const |
| |
| Returns \c true if the character is a mark (Mark_* categories); |
| otherwise returns \c false. |
| |
| See QChar::Category for more information regarding marks. |
| */ |
| |
| /*! |
| \overload |
| \since 5.0 |
| |
| Returns \c true if the UCS-4-encoded character specified by \a ucs4 is |
| a mark (Mark_* categories); otherwise returns \c false. |
| */ |
| bool QChar::isMark(uint ucs4) noexcept |
| { |
| if (ucs4 > LastValidCodePoint) |
| return false; |
| const int test = FLAG(Mark_NonSpacing) | |
| FLAG(Mark_SpacingCombining) | |
| FLAG(Mark_Enclosing); |
| return FLAG(qGetProp(ucs4)->category) & test; |
| } |
| |
| /*! |
| \fn bool QChar::isPunct() const |
| |
| Returns \c true if the character is a punctuation mark (Punctuation_* |
| categories); otherwise returns \c false. |
| */ |
| |
| /*! |
| \overload |
| \since 5.0 |
| |
| Returns \c true if the UCS-4-encoded character specified by \a ucs4 is |
| a punctuation mark (Punctuation_* categories); otherwise returns \c false. |
| */ |
| bool QChar::isPunct(uint ucs4) noexcept |
| { |
| if (ucs4 > LastValidCodePoint) |
| return false; |
| const int test = FLAG(Punctuation_Connector) | |
| FLAG(Punctuation_Dash) | |
| FLAG(Punctuation_Open) | |
| FLAG(Punctuation_Close) | |
| FLAG(Punctuation_InitialQuote) | |
| FLAG(Punctuation_FinalQuote) | |
| FLAG(Punctuation_Other); |
| return FLAG(qGetProp(ucs4)->category) & test; |
| } |
| |
| /*! |
| \fn bool QChar::isSymbol() const |
| |
| Returns \c true if the character is a symbol (Symbol_* categories); |
| otherwise returns \c false. |
| */ |
| |
| /*! |
| \overload |
| \since 5.0 |
| |
| Returns \c true if the UCS-4-encoded character specified by \a ucs4 is |
| a symbol (Symbol_* categories); otherwise returns \c false. |
| */ |
| bool QChar::isSymbol(uint ucs4) noexcept |
| { |
| if (ucs4 > LastValidCodePoint) |
| return false; |
| const int test = FLAG(Symbol_Math) | |
| FLAG(Symbol_Currency) | |
| FLAG(Symbol_Modifier) | |
| FLAG(Symbol_Other); |
| return FLAG(qGetProp(ucs4)->category) & test; |
| } |
| |
| /*! |
| \fn bool QChar::isLetter() const |
| |
| Returns \c true if the character is a letter (Letter_* categories); |
| otherwise returns \c false. |
| */ |
| |
| /*! |
| \fn bool QChar::isLetter(uint ucs4) |
| \overload |
| \since 5.0 |
| |
| Returns \c true if the UCS-4-encoded character specified by \a ucs4 is |
| a letter (Letter_* categories); otherwise returns \c false. |
| */ |
| |
| /*! |
| \internal |
| */ |
| bool QT_FASTCALL QChar::isLetter_helper(uint ucs4) noexcept |
| { |
| if (ucs4 > LastValidCodePoint) |
| return false; |
| const int test = FLAG(Letter_Uppercase) | |
| FLAG(Letter_Lowercase) | |
| FLAG(Letter_Titlecase) | |
| FLAG(Letter_Modifier) | |
| FLAG(Letter_Other); |
| return FLAG(qGetProp(ucs4)->category) & test; |
| } |
| |
| /*! |
| \fn bool QChar::isNumber() const |
| |
| Returns \c true if the character is a number (Number_* categories, |
| not just 0-9); otherwise returns \c false. |
| |
| \sa isDigit() |
| */ |
| |
| /*! |
| \fn bool QChar::isNumber(uint ucs4) |
| \overload |
| \since 5.0 |
| |
| Returns \c true if the UCS-4-encoded character specified by \a ucs4 is |
| a number (Number_* categories, not just 0-9); otherwise returns \c false. |
| |
| \sa isDigit() |
| */ |
| |
| /*! |
| \internal |
| */ |
| bool QT_FASTCALL QChar::isNumber_helper(uint ucs4) noexcept |
| { |
| if (ucs4 > LastValidCodePoint) |
| return false; |
| const int test = FLAG(Number_DecimalDigit) | |
| FLAG(Number_Letter) | |
| FLAG(Number_Other); |
| return FLAG(qGetProp(ucs4)->category) & test; |
| } |
| |
| /*! |
| \fn bool QChar::isLetterOrNumber() const |
| |
| Returns \c true if the character is a letter or number (Letter_* or |
| Number_* categories); otherwise returns \c false. |
| */ |
| |
| /*! |
| \fn bool QChar::isLetterOrNumber(uint ucs4) |
| \overload |
| \since 5.0 |
| |
| Returns \c true if the UCS-4-encoded character specified by \a ucs4 is |
| a letter or number (Letter_* or Number_* categories); otherwise returns \c false. |
| */ |
| |
| /*! |
| \internal |
| */ |
| bool QT_FASTCALL QChar::isLetterOrNumber_helper(uint ucs4) noexcept |
| { |
| if (ucs4 > LastValidCodePoint) |
| return false; |
| const int test = FLAG(Letter_Uppercase) | |
| FLAG(Letter_Lowercase) | |
| FLAG(Letter_Titlecase) | |
| FLAG(Letter_Modifier) | |
| FLAG(Letter_Other) | |
| FLAG(Number_DecimalDigit) | |
| FLAG(Number_Letter) | |
| FLAG(Number_Other); |
| return FLAG(qGetProp(ucs4)->category) & test; |
| } |
| |
| /*! |
| \fn bool QChar::isDigit() const |
| |
| Returns \c true if the character is a decimal digit |
| (Number_DecimalDigit); otherwise returns \c false. |
| |
| \sa isNumber() |
| */ |
| |
| /*! |
| \fn bool QChar::isDigit(uint ucs4) |
| \overload |
| \since 5.0 |
| |
| Returns \c true if the UCS-4-encoded character specified by \a ucs4 is |
| a decimal digit (Number_DecimalDigit); otherwise returns \c false. |
| |
| \sa isNumber() |
| */ |
| |
| /*! |
| \fn bool QChar::isNonCharacter() const |
| \since 5.0 |
| |
| Returns \c true if the QChar is a non-character; false otherwise. |
| |
| Unicode has a certain number of code points that are classified |
| as "non-characters:" that is, they can be used for internal purposes |
| in applications but cannot be used for text interchange. |
| Those are the last two entries each Unicode Plane ([0xfffe..0xffff], |
| [0x1fffe..0x1ffff], etc.) as well as the entries in range [0xfdd0..0xfdef]. |
| */ |
| |
| /*! |
| \fn bool QChar::isHighSurrogate() const |
| |
| Returns \c true if the QChar is the high part of a UTF16 surrogate |
| (for example if its code point is in range [0xd800..0xdbff]); false otherwise. |
| */ |
| |
| /*! |
| \fn bool QChar::isLowSurrogate() const |
| |
| Returns \c true if the QChar is the low part of a UTF16 surrogate |
| (for example if its code point is in range [0xdc00..0xdfff]); false otherwise. |
| */ |
| |
| /*! |
| \fn bool QChar::isSurrogate() const |
| \since 5.0 |
| |
| Returns \c true if the QChar contains a code point that is in either |
| the high or the low part of the UTF-16 surrogate range |
| (for example if its code point is in range [0xd800..0xdfff]); false otherwise. |
| */ |
| |
| /*! |
| \fn static bool QChar::isNonCharacter(uint ucs4) |
| \overload |
| \since 5.0 |
| |
| Returns \c true if the UCS-4-encoded character specified by \a ucs4 |
| is a non-character; false otherwise. |
| |
| Unicode has a certain number of code points that are classified |
| as "non-characters:" that is, they can be used for internal purposes |
| in applications but cannot be used for text interchange. |
| Those are the last two entries each Unicode Plane ([0xfffe..0xffff], |
| [0x1fffe..0x1ffff], etc.) as well as the entries in range [0xfdd0..0xfdef]. |
| */ |
| |
| /*! |
| \fn static bool QChar::isHighSurrogate(uint ucs4) |
| \overload |
| |
| Returns \c true if the UCS-4-encoded character specified by \a ucs4 |
| is the high part of a UTF16 surrogate |
| (for example if its code point is in range [0xd800..0xdbff]); false otherwise. |
| */ |
| |
| /*! |
| \fn static bool QChar::isLowSurrogate(uint ucs4) |
| \overload |
| |
| Returns \c true if the UCS-4-encoded character specified by \a ucs4 |
| is the low part of a UTF16 surrogate |
| (for example if its code point is in range [0xdc00..0xdfff]); false otherwise. |
| */ |
| |
| /*! |
| \fn static bool QChar::isSurrogate(uint ucs4) |
| \overload |
| \since 5.0 |
| |
| Returns \c true if the UCS-4-encoded character specified by \a ucs4 |
| contains a code point that is in either the high or the low part of the |
| UTF-16 surrogate range (for example if its code point is in range [0xd800..0xdfff]); |
| false otherwise. |
| */ |
| |
| /*! |
| \fn static bool QChar::requiresSurrogates(uint ucs4) |
| |
| Returns \c true if the UCS-4-encoded character specified by \a ucs4 |
| can be split into the high and low parts of a UTF16 surrogate |
| (for example if its code point is greater than or equals to 0x10000); |
| false otherwise. |
| */ |
| |
| /*! |
| \fn static uint QChar::surrogateToUcs4(ushort high, ushort low) |
| |
| Converts a UTF16 surrogate pair with the given \a high and \a low values |
| to it's UCS-4-encoded code point. |
| */ |
| |
| /*! |
| \fn static uint QChar::surrogateToUcs4(QChar high, QChar low) |
| \overload |
| |
| Converts a UTF16 surrogate pair (\a high, \a low) to it's UCS-4-encoded code point. |
| */ |
| |
| /*! |
| \fn static ushort QChar::highSurrogate(uint ucs4) |
| |
| Returns the high surrogate part of a UCS-4-encoded code point. |
| The returned result is undefined if \a ucs4 is smaller than 0x10000. |
| */ |
| |
| /*! |
| \fn static ushort QChar::lowSurrogate(uint ucs4) |
| |
| Returns the low surrogate part of a UCS-4-encoded code point. |
| The returned result is undefined if \a ucs4 is smaller than 0x10000. |
| */ |
| |
| /*! |
| \fn int QChar::digitValue() const |
| |
| Returns the numeric value of the digit, or -1 if the character is not a digit. |
| */ |
| |
| /*! |
| \overload |
| Returns the numeric value of the digit specified by the UCS-4-encoded |
| character, \a ucs4, or -1 if the character is not a digit. |
| */ |
| int QChar::digitValue(uint ucs4) noexcept |
| { |
| if (ucs4 > LastValidCodePoint) |
| return -1; |
| return qGetProp(ucs4)->digitValue; |
| } |
| |
| /*! |
| \fn QChar::Category QChar::category() const |
| |
| Returns the character's category. |
| */ |
| |
| /*! |
| \overload |
| Returns the category of the UCS-4-encoded character specified by \a ucs4. |
| */ |
| QChar::Category QChar::category(uint ucs4) noexcept |
| { |
| if (ucs4 > LastValidCodePoint) |
| return QChar::Other_NotAssigned; |
| return (QChar::Category) qGetProp(ucs4)->category; |
| } |
| |
| /*! |
| \fn QChar::Direction QChar::direction() const |
| |
| Returns the character's direction. |
| */ |
| |
| /*! |
| \overload |
| Returns the direction of the UCS-4-encoded character specified by \a ucs4. |
| */ |
| QChar::Direction QChar::direction(uint ucs4) noexcept |
| { |
| if (ucs4 > LastValidCodePoint) |
| return QChar::DirL; |
| return (QChar::Direction) qGetProp(ucs4)->direction; |
| } |
| |
| /*! |
| \fn QChar::JoiningType QChar::joiningType() const |
| \since 5.3 |
| |
| Returns information about the joining type attributes of the character |
| (needed for certain languages such as Arabic or Syriac). |
| */ |
| |
| /*! |
| \overload |
| \since 5.3 |
| |
| Returns information about the joining type attributes of the UCS-4-encoded |
| character specified by \a ucs4 |
| (needed for certain languages such as Arabic or Syriac). |
| */ |
| QChar::JoiningType QChar::joiningType(uint ucs4) noexcept |
| { |
| if (ucs4 > LastValidCodePoint) |
| return QChar::Joining_None; |
| return QChar::JoiningType(qGetProp(ucs4)->joining); |
| } |
| |
| #if QT_DEPRECATED_SINCE(5, 3) |
| /*! |
| \fn QChar::Joining QChar::joining() const |
| \deprecated in 5.3, use joiningType() instead. |
| |
| Returns information about the joining properties of the character |
| (needed for certain languages such as Arabic). |
| */ |
| |
| /*! |
| \overload |
| \deprecated in 5.3, use joiningType() instead. |
| |
| Returns information about the joining properties of the UCS-4-encoded |
| character specified by \a ucs4 (needed for certain languages such as Arabic). |
| */ |
| QChar::Joining QChar::joining(uint ucs4) noexcept |
| { |
| if (ucs4 > LastValidCodePoint) |
| return QChar::OtherJoining; |
| switch (qGetProp(ucs4)->joining) { |
| case QChar::Joining_Causing: return QChar::Center; |
| case QChar::Joining_Dual: return QChar::Dual; |
| case QChar::Joining_Right: return QChar::Right; |
| default: break; |
| } |
| return QChar::OtherJoining; |
| } |
| #endif |
| |
| /*! |
| \fn bool QChar::hasMirrored() const |
| |
| Returns \c true if the character should be reversed if the text |
| direction is reversed; otherwise returns \c false. |
| |
| A bit faster equivalent of (ch.mirroredChar() != ch). |
| |
| \sa mirroredChar() |
| */ |
| |
| /*! |
| \overload |
| \since 5.0 |
| |
| Returns \c true if the UCS-4-encoded character specified by \a ucs4 |
| should be reversed if the text direction is reversed; otherwise returns \c false. |
| |
| A bit faster equivalent of (QChar::mirroredChar(ucs4) != ucs4). |
| |
| \sa mirroredChar() |
| */ |
| bool QChar::hasMirrored(uint ucs4) noexcept |
| { |
| if (ucs4 > LastValidCodePoint) |
| return false; |
| return qGetProp(ucs4)->mirrorDiff != 0; |
| } |
| |
| /*! |
| \fn bool QChar::isLower() const |
| |
| Returns \c true if the character is a lowercase letter, for example |
| category() is Letter_Lowercase. |
| |
| \sa isUpper(), toLower(), toUpper() |
| */ |
| |
| /*! |
| \fn static bool QChar::isLower(uint ucs4) |
| \overload |
| \since 5.0 |
| |
| Returns \c true if the UCS-4-encoded character specified by \a ucs4 |
| is a lowercase letter, for example category() is Letter_Lowercase. |
| |
| \sa isUpper(), toLower(), toUpper() |
| */ |
| |
| /*! |
| \fn bool QChar::isUpper() const |
| |
| Returns \c true if the character is an uppercase letter, for example |
| category() is Letter_Uppercase. |
| |
| \sa isLower(), toUpper(), toLower() |
| */ |
| |
| /*! |
| \fn static bool QChar::isUpper(uint ucs4) |
| \overload |
| \since 5.0 |
| |
| Returns \c true if the UCS-4-encoded character specified by \a ucs4 |
| is an uppercase letter, for example category() is Letter_Uppercase. |
| |
| \sa isLower(), toUpper(), toLower() |
| */ |
| |
| /*! |
| \fn bool QChar::isTitleCase() const |
| |
| Returns \c true if the character is a titlecase letter, for example |
| category() is Letter_Titlecase. |
| |
| \sa isLower(), toUpper(), toLower(), toTitleCase() |
| */ |
| |
| /*! |
| \fn static bool QChar::isTitleCase(uint ucs4) |
| \overload |
| \since 5.0 |
| |
| Returns \c true if the UCS-4-encoded character specified by \a ucs4 |
| is a titlecase letter, for example category() is Letter_Titlecase. |
| |
| \sa isLower(), toUpper(), toLower(), toTitleCase() |
| */ |
| /*! |
| \fn QChar QChar::mirroredChar() const |
| |
| Returns the mirrored character if this character is a mirrored |
| character; otherwise returns the character itself. |
| |
| \sa hasMirrored() |
| */ |
| |
| /*! |
| \overload |
| Returns the mirrored character if the UCS-4-encoded character specified |
| by \a ucs4 is a mirrored character; otherwise returns the character itself. |
| |
| \sa hasMirrored() |
| */ |
| uint QChar::mirroredChar(uint ucs4) noexcept |
| { |
| if (ucs4 > LastValidCodePoint) |
| return ucs4; |
| return ucs4 + qGetProp(ucs4)->mirrorDiff; |
| } |
| |
| |
| // constants for Hangul (de)composition, see UAX #15 |
| enum { |
| Hangul_SBase = 0xac00, |
| Hangul_LBase = 0x1100, |
| Hangul_VBase = 0x1161, |
| Hangul_TBase = 0x11a7, |
| Hangul_LCount = 19, |
| Hangul_VCount = 21, |
| Hangul_TCount = 28, |
| Hangul_NCount = Hangul_VCount * Hangul_TCount, |
| Hangul_SCount = Hangul_LCount * Hangul_NCount |
| }; |
| |
| // buffer has to have a length of 3. It's needed for Hangul decomposition |
| static const unsigned short * QT_FASTCALL decompositionHelper |
| (uint ucs4, int *length, int *tag, unsigned short *buffer) |
| { |
| if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount) { |
| // compute Hangul syllable decomposition as per UAX #15 |
| const uint SIndex = ucs4 - Hangul_SBase; |
| buffer[0] = Hangul_LBase + SIndex / Hangul_NCount; // L |
| buffer[1] = Hangul_VBase + (SIndex % Hangul_NCount) / Hangul_TCount; // V |
| buffer[2] = Hangul_TBase + SIndex % Hangul_TCount; // T |
| *length = buffer[2] == Hangul_TBase ? 2 : 3; |
| *tag = QChar::Canonical; |
| return buffer; |
| } |
| |
| const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4); |
| if (index == 0xffff) { |
| *length = 0; |
| *tag = QChar::NoDecomposition; |
| return nullptr; |
| } |
| |
| const unsigned short *decomposition = uc_decomposition_map+index; |
| *tag = (*decomposition) & 0xff; |
| *length = (*decomposition) >> 8; |
| return decomposition+1; |
| } |
| |
| /*! |
| Decomposes a character into it's constituent parts. Returns an empty string |
| if no decomposition exists. |
| */ |
| QString QChar::decomposition() const |
| { |
| return QChar::decomposition(ucs); |
| } |
| |
| /*! |
| \overload |
| Decomposes the UCS-4-encoded character specified by \a ucs4 into it's |
| constituent parts. Returns an empty string if no decomposition exists. |
| */ |
| QString QChar::decomposition(uint ucs4) |
| { |
| unsigned short buffer[3]; |
| int length; |
| int tag; |
| const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer); |
| return QString(reinterpret_cast<const QChar *>(d), length); |
| } |
| |
| /*! |
| \fn QChar::Decomposition QChar::decompositionTag() const |
| |
| Returns the tag defining the composition of the character. Returns |
| QChar::NoDecomposition if no decomposition exists. |
| */ |
| |
| /*! |
| \overload |
| Returns the tag defining the composition of the UCS-4-encoded character |
| specified by \a ucs4. Returns QChar::NoDecomposition if no decomposition exists. |
| */ |
| QChar::Decomposition QChar::decompositionTag(uint ucs4) noexcept |
| { |
| if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount) |
| return QChar::Canonical; |
| const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4); |
| if (index == 0xffff) |
| return QChar::NoDecomposition; |
| return (QChar::Decomposition)(uc_decomposition_map[index] & 0xff); |
| } |
| |
| /*! |
| \fn unsigned char QChar::combiningClass() const |
| |
| Returns the combining class for the character as defined in the |
| Unicode standard. This is mainly useful as a positioning hint for |
| marks attached to a base character. |
| |
| The Qt text rendering engine uses this information to correctly |
| position non-spacing marks around a base character. |
| */ |
| |
| /*! |
| \overload |
| Returns the combining class for the UCS-4-encoded character specified by |
| \a ucs4, as defined in the Unicode standard. |
| */ |
| unsigned char QChar::combiningClass(uint ucs4) noexcept |
| { |
| if (ucs4 > LastValidCodePoint) |
| return 0; |
| return (unsigned char) qGetProp(ucs4)->combiningClass; |
| } |
| |
| /*! |
| \fn QChar::Script QChar::script() const |
| \since 5.1 |
| |
| Returns the Unicode script property value for this character. |
| */ |
| |
| /*! |
| \overload |
| \since 5.1 |
| |
| Returns the Unicode script property value for the character specified in |
| its UCS-4-encoded form as \a ucs4. |
| */ |
| QChar::Script QChar::script(uint ucs4) noexcept |
| { |
| if (ucs4 > LastValidCodePoint) |
| return QChar::Script_Unknown; |
| return (QChar::Script) qGetProp(ucs4)->script; |
| } |
| |
| /*! |
| \fn QChar::UnicodeVersion QChar::unicodeVersion() const |
| |
| Returns the Unicode version that introduced this character. |
| */ |
| |
| /*! |
| \overload |
| Returns the Unicode version that introduced the character specified in |
| its UCS-4-encoded form as \a ucs4. |
| */ |
| QChar::UnicodeVersion QChar::unicodeVersion(uint ucs4) noexcept |
| { |
| if (ucs4 > LastValidCodePoint) |
| return QChar::Unicode_Unassigned; |
| return (QChar::UnicodeVersion) qGetProp(ucs4)->unicodeVersion; |
| } |
| |
| /*! |
| Returns the most recent supported Unicode version. |
| */ |
| QChar::UnicodeVersion QChar::currentUnicodeVersion() noexcept |
| { |
| return UNICODE_DATA_VERSION; |
| } |
| |
| |
| template <typename T> |
| Q_DECL_CONST_FUNCTION static inline T convertCase_helper(T uc, QUnicodeTables::Case which) noexcept |
| { |
| const auto fold = qGetProp(uc)->cases[which]; |
| |
| if (Q_UNLIKELY(fold.special)) { |
| const ushort *specialCase = specialCaseMap + fold.diff; |
| // so far, there are no special cases beyond BMP (guaranteed by the qunicodetables generator) |
| return *specialCase == 1 ? specialCase[1] : uc; |
| } |
| |
| return uc + fold.diff; |
| } |
| |
| /*! |
| \fn QChar QChar::toLower() const |
| |
| Returns the lowercase equivalent if the character is uppercase or titlecase; |
| otherwise returns the character itself. |
| */ |
| |
| /*! |
| \overload |
| Returns the lowercase equivalent of the UCS-4-encoded character specified |
| by \a ucs4 if the character is uppercase or titlecase; otherwise returns |
| the character itself. |
| */ |
| uint QChar::toLower(uint ucs4) noexcept |
| { |
| if (ucs4 > LastValidCodePoint) |
| return ucs4; |
| return convertCase_helper(ucs4, QUnicodeTables::LowerCase); |
| } |
| |
| /*! |
| \fn QChar QChar::toUpper() const |
| |
| Returns the uppercase equivalent if the character is lowercase or titlecase; |
| otherwise returns the character itself. |
| */ |
| |
| /*! |
| \overload |
| Returns the uppercase equivalent of the UCS-4-encoded character specified |
| by \a ucs4 if the character is lowercase or titlecase; otherwise returns |
| the character itself. |
| */ |
| uint QChar::toUpper(uint ucs4) noexcept |
| { |
| if (ucs4 > LastValidCodePoint) |
| return ucs4; |
| return convertCase_helper(ucs4, QUnicodeTables::UpperCase); |
| } |
| |
| /*! |
| \fn QChar QChar::toTitleCase() const |
| |
| Returns the title case equivalent if the character is lowercase or uppercase; |
| otherwise returns the character itself. |
| */ |
| |
| /*! |
| \overload |
| Returns the title case equivalent of the UCS-4-encoded character specified |
| by \a ucs4 if the character is lowercase or uppercase; otherwise returns |
| the character itself. |
| */ |
| uint QChar::toTitleCase(uint ucs4) noexcept |
| { |
| if (ucs4 > LastValidCodePoint) |
| return ucs4; |
| return convertCase_helper(ucs4, QUnicodeTables::TitleCase); |
| } |
| |
| static inline uint foldCase(const ushort *ch, const ushort *start) |
| { |
| uint ucs4 = *ch; |
| if (QChar::isLowSurrogate(ucs4) && ch > start && QChar::isHighSurrogate(*(ch - 1))) |
| ucs4 = QChar::surrogateToUcs4(*(ch - 1), ucs4); |
| return convertCase_helper(ucs4, QUnicodeTables::CaseFold); |
| } |
| |
| static inline uint foldCase(uint ch, uint &last) noexcept |
| { |
| uint ucs4 = ch; |
| if (QChar::isLowSurrogate(ucs4) && QChar::isHighSurrogate(last)) |
| ucs4 = QChar::surrogateToUcs4(last, ucs4); |
| last = ch; |
| return convertCase_helper(ucs4, QUnicodeTables::CaseFold); |
| } |
| |
| static inline ushort foldCase(ushort ch) noexcept |
| { |
| return convertCase_helper(ch, QUnicodeTables::CaseFold); |
| } |
| |
| static inline QChar foldCase(QChar ch) noexcept |
| { |
| return QChar(foldCase(ch.unicode())); |
| } |
| |
| /*! |
| \fn QChar QChar::toCaseFolded() const |
| |
| Returns the case folded equivalent of the character. |
| For most Unicode characters this is the same as toLower(). |
| */ |
| |
| /*! |
| \overload |
| Returns the case folded equivalent of the UCS-4-encoded character specified |
| by \a ucs4. For most Unicode characters this is the same as toLower(). |
| */ |
| uint QChar::toCaseFolded(uint ucs4) noexcept |
| { |
| if (ucs4 > LastValidCodePoint) |
| return ucs4; |
| return convertCase_helper(ucs4, QUnicodeTables::CaseFold); |
| } |
| |
| /*! |
| \fn char QChar::toLatin1() const |
| |
| Returns the Latin-1 character equivalent to the QChar, or 0. This |
| is mainly useful for non-internationalized software. |
| |
| \note It is not possible to distinguish a non-Latin-1 character from a Latin-1 0 |
| (NUL) character. Prefer to use unicode(), which does not have this ambiguity. |
| |
| \sa unicode() |
| */ |
| |
| /*! |
| \fn QChar QChar::fromLatin1(char) |
| |
| Converts the Latin-1 character \a c to its equivalent QChar. This |
| is mainly useful for non-internationalized software. |
| |
| An alternative is to use QLatin1Char. |
| |
| \sa toLatin1(), unicode() |
| */ |
| |
| /*! |
| \fn char QChar::toAscii() const |
| \deprecated |
| |
| Returns the Latin-1 character value of the QChar, or 0 if the character is not |
| representable. |
| |
| The main purpose of this function is to preserve ASCII characters used |
| in C strings. This is mainly useful for developers of non-internationalized |
| software. |
| |
| \note It is not possible to distinguish a non-Latin 1 character from an ASCII 0 |
| (NUL) character. Prefer to use unicode(), which does not have this ambiguity. |
| |
| \note This function does not check whether the character value is inside |
| the valid range of US-ASCII. |
| |
| \sa toLatin1(), unicode() |
| */ |
| |
| /*! |
| \fn QChar QChar::fromAscii(char) |
| \deprecated |
| |
| Converts the ASCII character \a c to it's equivalent QChar. This |
| is mainly useful for non-internationalized software. |
| |
| An alternative is to use QLatin1Char. |
| |
| \sa fromLatin1(), unicode() |
| */ |
| |
| #ifndef QT_NO_DATASTREAM |
| /*! |
| \relates QChar |
| |
| Writes the char \a chr to the stream \a out. |
| |
| \sa {Serializing Qt Data Types} |
| */ |
| QDataStream &operator<<(QDataStream &out, QChar chr) |
| { |
| out << quint16(chr.unicode()); |
| return out; |
| } |
| |
| /*! |
| \relates QChar |
| |
| Reads a char from the stream \a in into char \a chr. |
| |
| \sa {Serializing Qt Data Types} |
| */ |
| QDataStream &operator>>(QDataStream &in, QChar &chr) |
| { |
| quint16 u; |
| in >> u; |
| chr.unicode() = ushort(u); |
| return in; |
| } |
| #endif // QT_NO_DATASTREAM |
| |
| /*! |
| \fn ushort & QChar::unicode() |
| |
| Returns a reference to the numeric Unicode value of the QChar. |
| */ |
| |
| /*! |
| \fn ushort QChar::unicode() const |
| |
| Returns the numeric Unicode value of the QChar. |
| */ |
| |
| /***************************************************************************** |
| Documentation of QChar related functions |
| *****************************************************************************/ |
| |
| /*! |
| \fn bool operator==(QChar c1, QChar c2) |
| |
| \relates QChar |
| |
| Returns \c true if \a c1 and \a c2 are the same Unicode character; |
| otherwise returns \c false. |
| */ |
| |
| /*! |
| \fn int operator!=(QChar c1, QChar c2) |
| |
| \relates QChar |
| |
| Returns \c true if \a c1 and \a c2 are not the same Unicode |
| character; otherwise returns \c false. |
| */ |
| |
| /*! |
| \fn int operator<=(QChar c1, QChar c2) |
| |
| \relates QChar |
| |
| Returns \c true if the numeric Unicode value of \a c1 is less than |
| or equal to that of \a c2; otherwise returns \c false. |
| */ |
| |
| /*! |
| \fn int operator>=(QChar c1, QChar c2) |
| |
| \relates QChar |
| |
| Returns \c true if the numeric Unicode value of \a c1 is greater than |
| or equal to that of \a c2; otherwise returns \c false. |
| */ |
| |
| /*! |
| \fn int operator<(QChar c1, QChar c2) |
| |
| \relates QChar |
| |
| Returns \c true if the numeric Unicode value of \a c1 is less than |
| that of \a c2; otherwise returns \c false. |
| */ |
| |
| /*! |
| \fn int operator>(QChar c1, QChar c2) |
| |
| \relates QChar |
| |
| Returns \c true if the numeric Unicode value of \a c1 is greater than |
| that of \a c2; otherwise returns \c false. |
| */ |
| |
| |
| // --------------------------------------------------------------------------- |
| |
| |
| static void decomposeHelper(QString *str, bool canonical, QChar::UnicodeVersion version, int from) |
| { |
| int length; |
| int tag; |
| unsigned short buffer[3]; |
| |
| QString &s = *str; |
| |
| const unsigned short *utf16 = reinterpret_cast<unsigned short *>(s.data()); |
| const unsigned short *uc = utf16 + s.length(); |
| while (uc != utf16 + from) { |
| uint ucs4 = *(--uc); |
| if (QChar(ucs4).isLowSurrogate() && uc != utf16) { |
| ushort high = *(uc - 1); |
| if (QChar(high).isHighSurrogate()) { |
| --uc; |
| ucs4 = QChar::surrogateToUcs4(high, ucs4); |
| } |
| } |
| |
| if (QChar::unicodeVersion(ucs4) > version) |
| continue; |
| |
| const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer); |
| if (!d || (canonical && tag != QChar::Canonical)) |
| continue; |
| |
| int pos = uc - utf16; |
| s.replace(pos, QChar::requiresSurrogates(ucs4) ? 2 : 1, reinterpret_cast<const QChar *>(d), length); |
| // since the replace invalidates the pointers and we do decomposition recursive |
| utf16 = reinterpret_cast<unsigned short *>(s.data()); |
| uc = utf16 + pos + length; |
| } |
| } |
| |
| |
| struct UCS2Pair { |
| ushort u1; |
| ushort u2; |
| }; |
| |
| inline bool operator<(const UCS2Pair &ligature1, const UCS2Pair &ligature2) |
| { return ligature1.u1 < ligature2.u1; } |
| inline bool operator<(ushort u1, const UCS2Pair &ligature) |
| { return u1 < ligature.u1; } |
| inline bool operator<(const UCS2Pair &ligature, ushort u1) |
| { return ligature.u1 < u1; } |
| |
| struct UCS2SurrogatePair { |
| UCS2Pair p1; |
| UCS2Pair p2; |
| }; |
| |
| inline bool operator<(const UCS2SurrogatePair &ligature1, const UCS2SurrogatePair &ligature2) |
| { return QChar::surrogateToUcs4(ligature1.p1.u1, ligature1.p1.u2) < QChar::surrogateToUcs4(ligature2.p1.u1, ligature2.p1.u2); } |
| inline bool operator<(uint u1, const UCS2SurrogatePair &ligature) |
| { return u1 < QChar::surrogateToUcs4(ligature.p1.u1, ligature.p1.u2); } |
| inline bool operator<(const UCS2SurrogatePair &ligature, uint u1) |
| { return QChar::surrogateToUcs4(ligature.p1.u1, ligature.p1.u2) < u1; } |
| |
| static uint inline ligatureHelper(uint u1, uint u2) |
| { |
| if (u1 >= Hangul_LBase && u1 <= Hangul_SBase + Hangul_SCount) { |
| // compute Hangul syllable composition as per UAX #15 |
| // hangul L-V pair |
| const uint LIndex = u1 - Hangul_LBase; |
| if (LIndex < Hangul_LCount) { |
| const uint VIndex = u2 - Hangul_VBase; |
| if (VIndex < Hangul_VCount) |
| return Hangul_SBase + (LIndex * Hangul_VCount + VIndex) * Hangul_TCount; |
| } |
| // hangul LV-T pair |
| const uint SIndex = u1 - Hangul_SBase; |
| if (SIndex < Hangul_SCount && (SIndex % Hangul_TCount) == 0) { |
| const uint TIndex = u2 - Hangul_TBase; |
| if (TIndex <= Hangul_TCount) |
| return u1 + TIndex; |
| } |
| } |
| |
| const unsigned short index = GET_LIGATURE_INDEX(u2); |
| if (index == 0xffff) |
| return 0; |
| const unsigned short *ligatures = uc_ligature_map+index; |
| ushort length = *ligatures++; |
| if (QChar::requiresSurrogates(u1)) { |
| const UCS2SurrogatePair *data = reinterpret_cast<const UCS2SurrogatePair *>(ligatures); |
| const UCS2SurrogatePair *r = std::lower_bound(data, data + length, u1); |
| if (r != data + length && QChar::surrogateToUcs4(r->p1.u1, r->p1.u2) == u1) |
| return QChar::surrogateToUcs4(r->p2.u1, r->p2.u2); |
| } else { |
| const UCS2Pair *data = reinterpret_cast<const UCS2Pair *>(ligatures); |
| const UCS2Pair *r = std::lower_bound(data, data + length, ushort(u1)); |
| if (r != data + length && r->u1 == ushort(u1)) |
| return r->u2; |
| } |
| |
| return 0; |
| } |
| |
| static void composeHelper(QString *str, QChar::UnicodeVersion version, int from) |
| { |
| QString &s = *str; |
| |
| if (from < 0 || s.length() - from < 2) |
| return; |
| |
| uint stcode = 0; // starter code point |
| int starter = -1; // starter position |
| int next = -1; // to prevent i == next |
| int lastCombining = 255; // to prevent combining > lastCombining |
| |
| int pos = from; |
| while (pos < s.length()) { |
| int i = pos; |
| uint uc = s.at(pos).unicode(); |
| if (QChar(uc).isHighSurrogate() && pos < s.length()-1) { |
| ushort low = s.at(pos+1).unicode(); |
| if (QChar(low).isLowSurrogate()) { |
| uc = QChar::surrogateToUcs4(uc, low); |
| ++pos; |
| } |
| } |
| |
| const QUnicodeTables::Properties *p = qGetProp(uc); |
| if (p->unicodeVersion > version) { |
| starter = -1; |
| next = -1; // to prevent i == next |
| lastCombining = 255; // to prevent combining > lastCombining |
| ++pos; |
| continue; |
| } |
| |
| int combining = p->combiningClass; |
| if ((i == next || combining > lastCombining) && starter >= from) { |
| // allowed to form ligature with S |
| uint ligature = ligatureHelper(stcode, uc); |
| if (ligature) { |
| stcode = ligature; |
| QChar *d = s.data(); |
| // ligatureHelper() never changes planes |
| if (QChar::requiresSurrogates(ligature)) { |
| d[starter] = QChar(QChar::highSurrogate(ligature)); |
| d[starter + 1] = QChar(QChar::lowSurrogate(ligature)); |
| s.remove(i, 2); |
| } else { |
| d[starter] = QChar(ligature); |
| s.remove(i, 1); |
| } |
| continue; |
| } |
| } |
| if (combining == 0) { |
| starter = i; |
| stcode = uc; |
| next = pos + 1; |
| } |
| lastCombining = combining; |
| |
| ++pos; |
| } |
| } |
| |
| |
| static void canonicalOrderHelper(QString *str, QChar::UnicodeVersion version, int from) |
| { |
| QString &s = *str; |
| const int l = s.length()-1; |
| |
| uint u1, u2; |
| ushort c1, c2; |
| |
| int pos = from; |
| while (pos < l) { |
| int p2 = pos+1; |
| u1 = s.at(pos).unicode(); |
| if (QChar(u1).isHighSurrogate()) { |
| ushort low = s.at(p2).unicode(); |
| if (QChar(low).isLowSurrogate()) { |
| u1 = QChar::surrogateToUcs4(u1, low); |
| if (p2 >= l) |
| break; |
| ++p2; |
| } |
| } |
| c1 = 0; |
| |
| advance: |
| u2 = s.at(p2).unicode(); |
| if (QChar(u2).isHighSurrogate() && p2 < l) { |
| ushort low = s.at(p2+1).unicode(); |
| if (QChar(low).isLowSurrogate()) { |
| u2 = QChar::surrogateToUcs4(u2, low); |
| ++p2; |
| } |
| } |
| |
| c2 = 0; |
| { |
| const QUnicodeTables::Properties *p = qGetProp(u2); |
| if (p->unicodeVersion <= version) |
| c2 = p->combiningClass; |
| } |
| if (c2 == 0) { |
| pos = p2+1; |
| continue; |
| } |
| |
| if (c1 == 0) { |
| const QUnicodeTables::Properties *p = qGetProp(u1); |
| if (p->unicodeVersion <= version) |
| c1 = p->combiningClass; |
| } |
| |
| if (c1 > c2) { |
| QChar *uc = s.data(); |
| int p = pos; |
| // exchange characters |
| if (!QChar::requiresSurrogates(u2)) { |
| uc[p++] = QChar(u2); |
| } else { |
| uc[p++] = QChar(QChar::highSurrogate(u2)); |
| uc[p++] = QChar(QChar::lowSurrogate(u2)); |
| } |
| if (!QChar::requiresSurrogates(u1)) { |
| uc[p++] = QChar(u1); |
| } else { |
| uc[p++] = QChar(QChar::highSurrogate(u1)); |
| uc[p++] = QChar(QChar::lowSurrogate(u1)); |
| } |
| if (pos > 0) |
| --pos; |
| if (pos > 0 && s.at(pos).isLowSurrogate()) |
| --pos; |
| } else { |
| ++pos; |
| if (QChar::requiresSurrogates(u1)) |
| ++pos; |
| |
| u1 = u2; |
| c1 = c2; // != 0 |
| p2 = pos + 1; |
| if (QChar::requiresSurrogates(u1)) |
| ++p2; |
| if (p2 > l) |
| break; |
| |
| goto advance; |
| } |
| } |
| } |
| |
| // returns true if the text is in a desired Normalization Form already; false otherwise. |
| // sets lastStable to the position of the last stable code point |
| static bool normalizationQuickCheckHelper(QString *str, QString::NormalizationForm mode, int from, int *lastStable) |
| { |
| Q_STATIC_ASSERT(QString::NormalizationForm_D == 0); |
| Q_STATIC_ASSERT(QString::NormalizationForm_C == 1); |
| Q_STATIC_ASSERT(QString::NormalizationForm_KD == 2); |
| Q_STATIC_ASSERT(QString::NormalizationForm_KC == 3); |
| |
| enum { NFQC_YES = 0, NFQC_NO = 1, NFQC_MAYBE = 3 }; |
| |
| const ushort *string = reinterpret_cast<const ushort *>(str->constData()); |
| int length = str->length(); |
| |
| // this avoids one out of bounds check in the loop |
| while (length > from && QChar::isHighSurrogate(string[length - 1])) |
| --length; |
| |
| uchar lastCombining = 0; |
| for (int i = from; i < length; ++i) { |
| int pos = i; |
| uint uc = string[i]; |
| if (uc < 0x80) { |
| // ASCII characters are stable code points |
| lastCombining = 0; |
| *lastStable = pos; |
| continue; |
| } |
| |
| if (QChar::isHighSurrogate(uc)) { |
| ushort low = string[i + 1]; |
| if (!QChar::isLowSurrogate(low)) { |
| // treat surrogate like stable code point |
| lastCombining = 0; |
| *lastStable = pos; |
| continue; |
| } |
| ++i; |
| uc = QChar::surrogateToUcs4(uc, low); |
| } |
| |
| const QUnicodeTables::Properties *p = qGetProp(uc); |
| |
| if (p->combiningClass < lastCombining && p->combiningClass > 0) |
| return false; |
| |
| const uchar check = (p->nfQuickCheck >> (mode << 1)) & 0x03; |
| if (check != NFQC_YES) |
| return false; // ### can we quick check NFQC_MAYBE ? |
| |
| lastCombining = p->combiningClass; |
| if (lastCombining == 0) |
| *lastStable = pos; |
| } |
| |
| if (length != str->length()) // low surrogate parts at the end of text |
| *lastStable = str->length() - 1; |
| |
| return true; |
| } |
| |
| QT_END_NAMESPACE |