qt-everywhere-src-5.15.1/qttools/src/qdoc/tokenizer.cpp - orbit - Git at Google

 /****************************************************************************
 **
 ** Copyright (C) 2019 The Qt Company Ltd.
 ** Contact: https://www.qt.io/licensing/
 **
 ** This file is part of the tools applications of the Qt Toolkit.
 **
 ** $QT_BEGIN_LICENSE:GPL-EXCEPT$
 ** Commercial License Usage
 ** Licensees holding valid commercial Qt licenses may use this file in
 ** accordance with the commercial license agreement provided with the
 ** Software or, alternatively, in accordance with the terms contained in
 ** a written agreement between you and The Qt Company. For licensing terms
 ** and conditions see https://www.qt.io/terms-conditions. For further
 ** information use the contact form at https://www.qt.io/contact-us.
 **
 ** GNU General Public License Usage
 ** Alternatively, this file may be used under the terms of the GNU
 ** General Public License version 3 as published by the Free Software
 ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
 ** included in the packaging of this file. Please review the following
 ** information to ensure the GNU General Public License requirements will
 ** be met: https://www.gnu.org/licenses/gpl-3.0.html.
 **
 ** $QT_END_LICENSE$
 **
 ****************************************************************************/

 #include "tokenizer.h"

 #include "config.h"
 #include "generator.h"

 #include <QtCore/qfile.h>
 #include <QtCore/qhash.h>
 #include <QtCore/qregexp.h>
 #include <QtCore/qstring.h>
 #include <QtCore/qtextcodec.h>

 #include <ctype.h>
 #include <string.h>

 QT_BEGIN_NAMESPACE

 #define LANGUAGE_CPP "Cpp"

 /* qmake ignore Q_OBJECT */

 /*
   Keep in sync with tokenizer.h.
 */
 static const char *kwords[] = { "char",
                                 "class",
                                 "const",
                                 "double",
                                 "enum",
                                 "explicit",
                                 "friend",
                                 "inline",
                                 "int",
                                 "long",
                                 "namespace",
                                 "operator",
                                 "private",
                                 "protected",
                                 "public",
                                 "short",
                                 "signals",
                                 "signed",
                                 "slots",
                                 "static",
                                 "struct",
                                 "template",
                                 "typedef",
                                 "typename",
                                 "union",
                                 "unsigned",
                                 "using",
                                 "virtual",
                                 "void",
                                 "volatile",
                                 "__int64",
                                 "default",
                                 "delete",
                                 "final",
                                 "override",
                                 "Q_OBJECT",
                                 "Q_OVERRIDE",
                                 "Q_PROPERTY",
                                 "Q_PRIVATE_PROPERTY",
                                 "Q_DECLARE_SEQUENTIAL_ITERATOR",
                                 "Q_DECLARE_MUTABLE_SEQUENTIAL_ITERATOR",
                                 "Q_DECLARE_ASSOCIATIVE_ITERATOR",
                                 "Q_DECLARE_MUTABLE_ASSOCIATIVE_ITERATOR",
                                 "Q_DECLARE_FLAGS",
                                 "Q_SIGNALS",
                                 "Q_SLOTS",
                                 "QT_COMPAT",
                                 "QT_COMPAT_CONSTRUCTOR",
                                 "QT_DEPRECATED",
                                 "QT_MOC_COMPAT",
                                 "QT_MODULE",
                                 "QT3_SUPPORT",
                                 "QT3_SUPPORT_CONSTRUCTOR",
                                 "QT3_MOC_SUPPORT",
                                 "QDOC_PROPERTY",
                                 "QPrivateSignal" };

 static const int KwordHashTableSize = 4096;
 static int kwordHashTable[KwordHashTableSize];

 static QHash<QByteArray, bool> *ignoredTokensAndDirectives = nullptr;

 static QRegExp *comment = nullptr;
 static QRegExp *versionX = nullptr;
 static QRegExp *definedX = nullptr;

 static QRegExp *defines = nullptr;
 static QRegExp *falsehoods = nullptr;

 #ifndef QT_NO_TEXTCODEC
 static QTextCodec *sourceCodec = nullptr;
 #endif

 /*
   This function is a perfect hash function for the 37 keywords of C99
   (with a hash table size of 512). It should perform well on our
   Qt-enhanced C++ subset.
 */
 static int hashKword(const char *s, int len)
 {
     return (((uchar)s[0]) + (((uchar)s[2]) << 5) + (((uchar)s[len - 1]) << 3)) % KwordHashTableSize;
 }

 static void insertKwordIntoHash(const char *s, int number)
 {
     int k = hashKword(s, int(strlen(s)));
     while (kwordHashTable[k]) {
         if (++k == KwordHashTableSize)
             k = 0;
     }
     kwordHashTable[k] = number;
 }

 Tokenizer::Tokenizer(const Location &loc, QFile &in)
 {
     init();
     yyIn = in.readAll();
     yyPos = 0;
     start(loc);
 }

 Tokenizer::Tokenizer(const Location &loc, const QByteArray &in) : yyIn(in)
 {
     init();
     yyPos = 0;
     start(loc);
 }

 Tokenizer::~Tokenizer()
 {
     delete[] yyLexBuf1;
     delete[] yyLexBuf2;
 }

 int Tokenizer::getToken()
 {
     char *t = yyPrevLex;
     yyPrevLex = yyLex;
     yyLex = t;

     while (yyCh != EOF) {
         yyTokLoc = yyCurLoc;
         yyLexLen = 0;

         if (isspace(yyCh)) {
             do {
                 yyCh = getChar();
             } while (isspace(yyCh));
         } else if (isalpha(yyCh) || yyCh == '_') {
             do {
                 yyCh = getChar();
             } while (isalnum(yyCh) || yyCh == '_');

             int k = hashKword(yyLex, int(yyLexLen));
             for (;;) {
                 int i = kwordHashTable[k];
                 if (i == 0) {
                     return Tok_Ident;
                 } else if (i == -1) {
                     if (!parsingMacro && ignoredTokensAndDirectives->contains(yyLex)) {
                         if (ignoredTokensAndDirectives->value(yyLex)) { // it's a directive
                             int parenDepth = 0;
                             while (yyCh != EOF && (yyCh != ')' || parenDepth > 1)) {
                                 if (yyCh == '(')
                                     ++parenDepth;
                                 else if (yyCh == ')')
                                     --parenDepth;
                                 yyCh = getChar();
                             }
                             if (yyCh == ')')
                                 yyCh = getChar();
                         }
                         break;
                     }
                 } else if (strcmp(yyLex, kwords[i - 1]) == 0) {
                     int ret = (int)Tok_FirstKeyword + i - 1;
                     if (ret != Tok_typename)
                         return ret;
                     break;
                 }

                 if (++k == KwordHashTableSize)
                     k = 0;
             }
         } else if (isdigit(yyCh)) {
             do {
                 yyCh = getChar();
             } while (isalnum(yyCh) || yyCh == '.' || yyCh == '+' || yyCh == '-');
             return Tok_Number;
         } else {
             switch (yyCh) {
             case '!':
             case '%':
                 yyCh = getChar();
                 if (yyCh == '=')
                     yyCh = getChar();
                 return Tok_SomeOperator;
             case '"':
                 yyCh = getChar();

                 while (yyCh != EOF && yyCh != '"') {
                     if (yyCh == '\\')
                         yyCh = getChar();
                     yyCh = getChar();
                 }
                 yyCh = getChar();

                 if (yyCh == EOF)
                     yyTokLoc.warning(tr("Unterminated C++ string literal"),
                                      tr("Maybe you forgot '/*!' at the beginning of the file?"));
                 else
                     return Tok_String;
                 break;
             case '#':
                 return getTokenAfterPreprocessor();
             case '&':
                 yyCh = getChar();
                 /*
                   Removed check for '&&', only interpret '&=' as an operator.
                   '&&' is also used for an rvalue reference. QTBUG-32675
                  */
                 if (yyCh == '=') {
                     yyCh = getChar();
                     return Tok_SomeOperator;
                 } else {
                     return Tok_Ampersand;
                 }
             case '\'':
                 yyCh = getChar();
                 /*
                   Allow empty character literal. QTBUG-25775
                  */
                 if (yyCh == '\'') {
                     yyCh = getChar();
                     break;
                 }
                 if (yyCh == '\\')
                     yyCh = getChar();
                 do {
                     yyCh = getChar();
                 } while (yyCh != EOF && yyCh != '\'');

                 if (yyCh == EOF) {
                     yyTokLoc.warning(tr("Unterminated C++ character literal"));
                 } else {
                     yyCh = getChar();
                     return Tok_Number;
                 }
                 break;
             case '(':
                 yyCh = getChar();
                 if (yyNumPreprocessorSkipping == 0)
                     yyParenDepth++;
                 if (isspace(yyCh)) {
                     do {
                         yyCh = getChar();
                     } while (isspace(yyCh));
                     yyLexLen = 1;
                     yyLex[1] = '\0';
                 }
                 if (yyCh == '*') {
                     yyCh = getChar();
                     return Tok_LeftParenAster;
                 }
                 return Tok_LeftParen;
             case ')':
                 yyCh = getChar();
                 if (yyNumPreprocessorSkipping == 0)
                     yyParenDepth--;
                 return Tok_RightParen;
             case '*':
                 yyCh = getChar();
                 if (yyCh == '=') {
                     yyCh = getChar();
                     return Tok_SomeOperator;
                 } else {
                     return Tok_Aster;
                 }
             case '^':
                 yyCh = getChar();
                 if (yyCh == '=') {
                     yyCh = getChar();
                     return Tok_SomeOperator;
                 } else {
                     return Tok_Caret;
                 }
             case '+':
                 yyCh = getChar();
                 if (yyCh == '+' || yyCh == '=')
                     yyCh = getChar();
                 return Tok_SomeOperator;
             case ',':
                 yyCh = getChar();
                 return Tok_Comma;
             case '-':
                 yyCh = getChar();
                 if (yyCh == '-' || yyCh == '=') {
                     yyCh = getChar();
                 } else if (yyCh == '>') {
                     yyCh = getChar();
                     if (yyCh == '*')
                         yyCh = getChar();
                 }
                 return Tok_SomeOperator;
             case '.':
                 yyCh = getChar();
                 if (yyCh == '*') {
                     yyCh = getChar();
                 } else if (yyCh == '.') {
                     do {
                         yyCh = getChar();
                     } while (yyCh == '.');
                     return Tok_Ellipsis;
                 } else if (isdigit(yyCh)) {
                     do {
                         yyCh = getChar();
                     } while (isalnum(yyCh) || yyCh == '.' || yyCh == '+' || yyCh == '-');
                     return Tok_Number;
                 }
                 return Tok_SomeOperator;
             case '/':
                 yyCh = getChar();
                 if (yyCh == '/') {
                     do {
                         yyCh = getChar();
                     } while (yyCh != EOF && yyCh != '\n');
                 } else if (yyCh == '*') {
                     bool metDoc = false; // empty doc is no doc
                     bool metSlashAsterBang = false;
                     bool metAster = false;
                     bool metAsterSlash = false;

                     yyCh = getChar();
                     if (yyCh == '!')
                         metSlashAsterBang = true;

                     while (!metAsterSlash) {
                         if (yyCh == EOF) {
                             yyTokLoc.warning(tr("Unterminated C++ comment"));
                             break;
                         } else {
                             if (yyCh == '*') {
                                 metAster = true;
                             } else if (metAster && yyCh == '/') {
                                 metAsterSlash = true;
                             } else {
                                 metAster = false;
                                 if (isgraph(yyCh))
                                     metDoc = true;
                             }
                         }
                         yyCh = getChar();
                     }
                     if (metSlashAsterBang && metDoc)
                         return Tok_Doc;
                     else if (yyParenDepth > 0)
                         return Tok_Comment;
                 } else {
                     if (yyCh == '=')
                         yyCh = getChar();
                     return Tok_SomeOperator;
                 }
                 break;
             case ':':
                 yyCh = getChar();
                 if (yyCh == ':') {
                     yyCh = getChar();
                     return Tok_Gulbrandsen;
                 } else {
                     return Tok_Colon;
                 }
             case ';':
                 yyCh = getChar();
                 return Tok_Semicolon;
             case '<':
                 yyCh = getChar();
                 if (yyCh == '<') {
                     yyCh = getChar();
                     if (yyCh == '=')
                         yyCh = getChar();
                     return Tok_SomeOperator;
                 } else if (yyCh == '=') {
                     yyCh = getChar();
                     return Tok_SomeOperator;
                 } else {
                     return Tok_LeftAngle;
                 }
             case '=':
                 yyCh = getChar();
                 if (yyCh == '=') {
                     yyCh = getChar();
                     return Tok_SomeOperator;
                 } else {
                     return Tok_Equal;
                 }
             case '>':
                 yyCh = getChar();
                 if (yyCh == '>') {
                     yyCh = getChar();
                     if (yyCh == '=')
                         yyCh = getChar();
                     return Tok_SomeOperator;
                 } else if (yyCh == '=') {
                     yyCh = getChar();
                     return Tok_SomeOperator;
                 } else {
                     return Tok_RightAngle;
                 }
             case '?':
                 yyCh = getChar();
                 return Tok_SomeOperator;
             case '[':
                 yyCh = getChar();
                 if (yyNumPreprocessorSkipping == 0)
                     yyBracketDepth++;
                 return Tok_LeftBracket;
             case '\\':
                 yyCh = getChar();
                 yyCh = getChar(); // skip one character
                 break;
             case ']':
                 yyCh = getChar();
                 if (yyNumPreprocessorSkipping == 0)
                     yyBracketDepth--;
                 return Tok_RightBracket;
             case '{':
                 yyCh = getChar();
                 if (yyNumPreprocessorSkipping == 0)
                     yyBraceDepth++;
                 return Tok_LeftBrace;
             case '}':
                 yyCh = getChar();
                 if (yyNumPreprocessorSkipping == 0)
                     yyBraceDepth--;
                 return Tok_RightBrace;
             case '|':
                 yyCh = getChar();
                 if (yyCh == '|' || yyCh == '=')
                     yyCh = getChar();
                 return Tok_SomeOperator;
             case '~':
                 yyCh = getChar();
                 return Tok_Tilde;
             case '@':
                 yyCh = getChar();
                 return Tok_At;
             default:
                 // ### We should really prevent qdoc from looking at snippet files rather than
                 // ### suppress warnings when reading them.
                 if (yyNumPreprocessorSkipping == 0
                     && !(yyTokLoc.fileName().endsWith(".qdoc")
                          || yyTokLoc.fileName().endsWith(".js"))) {
                     yyTokLoc.warning(
                             tr("Hostile character 0x%1 in C++ source").arg((uchar)yyCh, 1, 16));
                 }
                 yyCh = getChar();
             }
         }
     }

     if (yyPreprocessorSkipping.count() > 1) {
         yyTokLoc.warning(tr("Expected #endif before end of file"));
         // clear it out or we get an infinite loop!
         while (!yyPreprocessorSkipping.isEmpty()) {
             popSkipping();
         }
     }

     strcpy(yyLex, "end-of-input");
     yyLexLen = strlen(yyLex);
     return Tok_Eoi;
 }

 void Tokenizer::initialize()
 {
     Config &config = Config::instance();
     QString versionSym = config.getString(CONFIG_VERSIONSYM);

     QString sourceEncoding = config.getString(CONFIG_SOURCEENCODING);
     if (sourceEncoding.isEmpty())
         sourceEncoding = QLatin1String("ISO-8859-1");
 #ifndef QT_NO_TEXTCODEC
     sourceCodec = QTextCodec::codecForName(sourceEncoding.toLocal8Bit());
 #endif

     comment = new QRegExp("/(?:\\*.*\\*/|/.*\n|/[^\n]*$)");
     comment->setMinimal(true);
     versionX = new QRegExp("$cannot possibly match^");
     if (!versionSym.isEmpty())
         versionX->setPattern("[ \t]*(?:" + QRegExp::escape(versionSym)
                              + ")[ \t]+\"([^\"]*)\"[ \t]*");
     definedX = new QRegExp("defined ?\\(?([A-Z_0-9a-z]+) ?\\)?");

     QStringList d = config.getStringList(CONFIG_DEFINES);
     d += "qdoc";
     defines = new QRegExp(d.join('|'));
     falsehoods = new QRegExp(config.getStringList(CONFIG_FALSEHOODS).join('|'));

     /*
       The keyword hash table is always cleared before any words are inserted.
      */
     memset(kwordHashTable, 0, sizeof(kwordHashTable));
     for (int i = 0; i < Tok_LastKeyword - Tok_FirstKeyword + 1; i++)
         insertKwordIntoHash(kwords[i], i + 1);

     ignoredTokensAndDirectives = new QHash<QByteArray, bool>;

     const QStringList tokens =
             config.getStringList(LANGUAGE_CPP + Config::dot + CONFIG_IGNORETOKENS);
     for (const auto &token : tokens) {
         const QByteArray tb = token.toLatin1();
         ignoredTokensAndDirectives->insert(tb, false);
         insertKwordIntoHash(tb.data(), -1);
     }

     const QStringList directives =
             config.getStringList(LANGUAGE_CPP + Config::dot + CONFIG_IGNOREDIRECTIVES);
     for (const auto &directive : directives) {
         const QByteArray db = directive.toLatin1();
         ignoredTokensAndDirectives->insert(db, true);
         insertKwordIntoHash(db.data(), -1);
     }
 }

 /*!
   The heap allocated variables are freed here. The keyword
   hash table is not cleared here, but it is cleared in the
   initialize() function, before any keywords are inserted.
  */
 void Tokenizer::terminate()
 {
     delete comment;
     comment = nullptr;
     delete versionX;
     versionX = nullptr;
     delete definedX;
     definedX = nullptr;
     delete defines;
     defines = nullptr;
     delete falsehoods;
     falsehoods = nullptr;
     delete ignoredTokensAndDirectives;
     ignoredTokensAndDirectives = nullptr;
 }

 void Tokenizer::init()
 {
     yyLexBuf1 = new char[(int)yyLexBufSize];
     yyLexBuf2 = new char[(int)yyLexBufSize];
     yyPrevLex = yyLexBuf1;
     yyPrevLex[0] = '\0';
     yyLex = yyLexBuf2;
     yyLex[0] = '\0';
     yyLexLen = 0;
     yyPreprocessorSkipping.push(false);
     yyNumPreprocessorSkipping = 0;
     yyBraceDepth = 0;
     yyParenDepth = 0;
     yyBracketDepth = 0;
     yyCh = '\0';
     parsingMacro = false;
 }

 void Tokenizer::start(const Location &loc)
 {
     yyTokLoc = loc;
     yyCurLoc = loc;
     yyCurLoc.start();
     strcpy(yyPrevLex, "beginning-of-input");
     strcpy(yyLex, "beginning-of-input");
     yyLexLen = strlen(yyLex);
     yyBraceDepth = 0;
     yyParenDepth = 0;
     yyBracketDepth = 0;
     yyCh = '\0';
     yyCh = getChar();
 }

 /*
   Returns the next token, if # was met.  This function interprets the
   preprocessor directive, skips over any #ifdef'd out tokens, and returns the
   token after all of that.
 */
 int Tokenizer::getTokenAfterPreprocessor()
 {
     yyCh = getChar();
     while (isspace(yyCh) && yyCh != '\n')
         yyCh = getChar();

     /*
       #directive condition
     */
     QString directive;
     QString condition;

     while (isalpha(yyCh)) {
         directive += QChar(yyCh);
         yyCh = getChar();
     }
     if (!directive.isEmpty()) {
         while (yyCh != EOF && yyCh != '\n') {
             if (yyCh == '\\') {
                 yyCh = getChar();
                 if (yyCh == '\r')
                     yyCh = getChar();
             }
             condition += yyCh;
             yyCh = getChar();
         }
         condition.remove(*comment);
         condition = condition.simplified();

         /*
           The #if, #ifdef, #ifndef, #elif, #else, and #endif
           directives have an effect on the skipping stack.  For
           instance, if the code processed so far is

               #if 1
               #if 0
               #if 1
               // ...
               #else

           the skipping stack contains, from bottom to top, false true
           true (assuming 0 is false and 1 is true).  If at least one
           entry of the stack is true, the tokens are skipped.

           This mechanism is simple yet hard to understand.
         */
         if (directive[0] == QChar('i')) {
             if (directive == QString("if"))
                 pushSkipping(!isTrue(condition));
             else if (directive == QString("ifdef"))
                 pushSkipping(!defines->exactMatch(condition));
             else if (directive == QString("ifndef"))
                 pushSkipping(defines->exactMatch(condition));
         } else if (directive[0] == QChar('e')) {
             if (directive == QString("elif")) {
                 bool old = popSkipping();
                 if (old)
                     pushSkipping(!isTrue(condition));
                 else
                     pushSkipping(true);
             } else if (directive == QString("else")) {
                 pushSkipping(!popSkipping());
             } else if (directive == QString("endif")) {
                 popSkipping();
             }
         } else if (directive == QString("define")) {
             if (versionX->exactMatch(condition))
                 yyVersion = versionX->cap(1);
         }
     }

     int tok;
     do {
         /*
           We set yyLex now, and after getToken() this will be
           yyPrevLex. This way, we skip over the preprocessor
           directive.
         */
         qstrcpy(yyLex, yyPrevLex);

         /*
           If getToken() meets another #, it will call
           getTokenAfterPreprocessor() once again, which could in turn
           call getToken() again, etc. Unless there are 10,000 or so
           preprocessor directives in a row, this shouldn't overflow
           the stack.
         */
         tok = getToken();
     } while (yyNumPreprocessorSkipping > 0 && tok != Tok_Eoi);
     return tok;
 }

 /*
   Pushes a new skipping value onto the stack.  This corresponds to entering a
   new #if block.
 */
 void Tokenizer::pushSkipping(bool skip)
 {
     yyPreprocessorSkipping.push(skip);
     if (skip)
         yyNumPreprocessorSkipping++;
 }

 /*
   Pops a skipping value from the stack.  This corresponds to reaching a #endif.
 */
 bool Tokenizer::popSkipping()
 {
     if (yyPreprocessorSkipping.isEmpty()) {
         yyTokLoc.warning(tr("Unexpected #elif, #else or #endif"));
         return true;
     }

     bool skip = yyPreprocessorSkipping.pop();
     if (skip)
         yyNumPreprocessorSkipping--;
     return skip;
 }

 /*
   Returns \c true if the condition evaluates as true, otherwise false.  The
   condition is represented by a string.  Unsophisticated parsing techniques are
   used.  The preprocessing method could be named StriNg-Oriented PreProcessing,
   as SNOBOL stands for StriNg-Oriented symBOlic Language.
 */
 bool Tokenizer::isTrue(const QString &condition)
 {
     int firstOr = -1;
     int firstAnd = -1;
     int parenDepth = 0;

     /*
       Find the first logical operator at top level, but be careful
       about precedence. Examples:

           X || Y          // the or
           X || Y || Z     // the leftmost or
           X || Y && Z     // the or
           X && Y || Z     // the or
           (X || Y) && Z   // the and
     */
     for (int i = 0; i < condition.length() - 1; i++) {
         QChar ch = condition[i];
         if (ch == QChar('(')) {
             parenDepth++;
         } else if (ch == QChar(')')) {
             parenDepth--;
         } else if (parenDepth == 0) {
             if (condition[i + 1] == ch) {
                 if (ch == QChar('|')) {
                     firstOr = i;
                     break;
                 } else if (ch == QChar('&')) {
                     if (firstAnd == -1)
                         firstAnd = i;
                 }
             }
         }
     }
     if (firstOr != -1)
         return isTrue(condition.left(firstOr)) || isTrue(condition.mid(firstOr + 2));
     if (firstAnd != -1)
         return isTrue(condition.left(firstAnd)) && isTrue(condition.mid(firstAnd + 2));

     QString t = condition.simplified();
     if (t.isEmpty())
         return true;

     if (t[0] == QChar('!'))
         return !isTrue(t.mid(1));
     if (t[0] == QChar('(') && t.endsWith(QChar(')')))
         return isTrue(t.mid(1, t.length() - 2));

     if (definedX->exactMatch(t))
         return defines->exactMatch(definedX->cap(1));
     else
         return !falsehoods->exactMatch(t);
 }

 QString Tokenizer::lexeme() const
 {
 #ifndef QT_NO_TEXTCODEC
     return sourceCodec->toUnicode(yyLex);
 #else
     return QString::fromUtf8(yyLex);
 #endif
 }

 QString Tokenizer::previousLexeme() const
 {
 #ifndef QT_NO_TEXTCODEC
     return sourceCodec->toUnicode(yyPrevLex);
 #else
     return QString::fromUtf8(yyPrevLex);
 #endif
 }

 QT_END_NAMESPACE
	/****************************************************************************
	**
	** Copyright (C) 2019 The Qt Company Ltd.
	** Contact: https://www.qt.io/licensing/
	**
	** This file is part of the tools applications of the Qt Toolkit.
	**
	** $QT_BEGIN_LICENSE:GPL-EXCEPT$
	** Commercial License Usage
	** Licensees holding valid commercial Qt licenses may use this file in
	** accordance with the commercial license agreement provided with the
	** Software or, alternatively, in accordance with the terms contained in
	** a written agreement between you and The Qt Company. For licensing terms
	** and conditions see https://www.qt.io/terms-conditions. For further
	** information use the contact form at https://www.qt.io/contact-us.
	**
	** GNU General Public License Usage
	** Alternatively, this file may be used under the terms of the GNU
	** General Public License version 3 as published by the Free Software
	** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
	** included in the packaging of this file. Please review the following
	** information to ensure the GNU General Public License requirements will
	** be met: https://www.gnu.org/licenses/gpl-3.0.html.
	**
	** $QT_END_LICENSE$
	**
	****************************************************************************/

	#include "tokenizer.h"

	#include "config.h"
	#include "generator.h"

	#include <QtCore/qfile.h>
	#include <QtCore/qhash.h>
	#include <QtCore/qregexp.h>
	#include <QtCore/qstring.h>
	#include <QtCore/qtextcodec.h>

	#include <ctype.h>
	#include <string.h>

	QT_BEGIN_NAMESPACE

	#define LANGUAGE_CPP "Cpp"

	/* qmake ignore Q_OBJECT */

	/*
	Keep in sync with tokenizer.h.
	*/
	static const char *kwords[] = { "char",
	"class",
	"const",
	"double",
	"enum",
	"explicit",
	"friend",
	"inline",
	"int",
	"long",
	"namespace",
	"operator",
	"private",
	"protected",
	"public",
	"short",
	"signals",
	"signed",
	"slots",
	"static",
	"struct",
	"template",
	"typedef",
	"typename",
	"union",
	"unsigned",
	"using",
	"virtual",
	"void",
	"volatile",
	"__int64",
	"default",
	"delete",
	"final",
	"override",
	"Q_OBJECT",
	"Q_OVERRIDE",
	"Q_PROPERTY",
	"Q_PRIVATE_PROPERTY",
	"Q_DECLARE_SEQUENTIAL_ITERATOR",
	"Q_DECLARE_MUTABLE_SEQUENTIAL_ITERATOR",
	"Q_DECLARE_ASSOCIATIVE_ITERATOR",
	"Q_DECLARE_MUTABLE_ASSOCIATIVE_ITERATOR",
	"Q_DECLARE_FLAGS",
	"Q_SIGNALS",
	"Q_SLOTS",
	"QT_COMPAT",
	"QT_COMPAT_CONSTRUCTOR",
	"QT_DEPRECATED",
	"QT_MOC_COMPAT",
	"QT_MODULE",
	"QT3_SUPPORT",
	"QT3_SUPPORT_CONSTRUCTOR",
	"QT3_MOC_SUPPORT",
	"QDOC_PROPERTY",
	"QPrivateSignal" };

	static const int KwordHashTableSize = 4096;
	static int kwordHashTable[KwordHashTableSize];

	static QHash<QByteArray, bool> *ignoredTokensAndDirectives = nullptr;

	static QRegExp *comment = nullptr;
	static QRegExp *versionX = nullptr;
	static QRegExp *definedX = nullptr;

	static QRegExp *defines = nullptr;
	static QRegExp *falsehoods = nullptr;

	#ifndef QT_NO_TEXTCODEC
	static QTextCodec *sourceCodec = nullptr;
	#endif

	/*
	This function is a perfect hash function for the 37 keywords of C99
	(with a hash table size of 512). It should perform well on our
	Qt-enhanced C++ subset.
	*/
	static int hashKword(const char *s, int len)
	{
	return (((uchar)s[0]) + (((uchar)s[2]) << 5) + (((uchar)s[len - 1]) << 3)) % KwordHashTableSize;
	}

	static void insertKwordIntoHash(const char *s, int number)
	{
	int k = hashKword(s, int(strlen(s)));
	while (kwordHashTable[k]) {
	if (++k == KwordHashTableSize)
	k = 0;
	}
	kwordHashTable[k] = number;
	}

	Tokenizer::Tokenizer(const Location &loc, QFile &in)
	{
	init();
	yyIn = in.readAll();
	yyPos = 0;
	start(loc);
	}

	Tokenizer::Tokenizer(const Location &loc, const QByteArray &in) : yyIn(in)
	{
	init();
	yyPos = 0;
	start(loc);
	}

	Tokenizer::~Tokenizer()
	{
	delete[] yyLexBuf1;
	delete[] yyLexBuf2;
	}

	int Tokenizer::getToken()
	{
	char *t = yyPrevLex;
	yyPrevLex = yyLex;
	yyLex = t;

	while (yyCh != EOF) {
	yyTokLoc = yyCurLoc;
	yyLexLen = 0;

	if (isspace(yyCh)) {
	do {
	yyCh = getChar();
	} while (isspace(yyCh));
	} else if (isalpha(yyCh) \|\| yyCh == '_') {
	do {
	yyCh = getChar();
	} while (isalnum(yyCh) \|\| yyCh == '_');

	int k = hashKword(yyLex, int(yyLexLen));
	for (;;) {
	int i = kwordHashTable[k];
	if (i == 0) {
	return Tok_Ident;
	} else if (i == -1) {
	if (!parsingMacro && ignoredTokensAndDirectives->contains(yyLex)) {
	if (ignoredTokensAndDirectives->value(yyLex)) { // it's a directive
	int parenDepth = 0;
	while (yyCh != EOF && (yyCh != ')' \|\| parenDepth > 1)) {
	if (yyCh == '(')
	++parenDepth;
	else if (yyCh == ')')
	--parenDepth;
	yyCh = getChar();
	}
	if (yyCh == ')')
	yyCh = getChar();
	}
	break;
	}
	} else if (strcmp(yyLex, kwords[i - 1]) == 0) {
	int ret = (int)Tok_FirstKeyword + i - 1;
	if (ret != Tok_typename)
	return ret;
	break;
	}

	if (++k == KwordHashTableSize)
	k = 0;
	}
	} else if (isdigit(yyCh)) {
	do {
	yyCh = getChar();
	} while (isalnum(yyCh) \|\| yyCh == '.' \|\| yyCh == '+' \|\| yyCh == '-');
	return Tok_Number;
	} else {
	switch (yyCh) {
	case '!':
	case '%':
	yyCh = getChar();
	if (yyCh == '=')
	yyCh = getChar();
	return Tok_SomeOperator;
	case '"':
	yyCh = getChar();

	while (yyCh != EOF && yyCh != '"') {
	if (yyCh == '\\')
	yyCh = getChar();
	yyCh = getChar();
	}
	yyCh = getChar();

	if (yyCh == EOF)
	yyTokLoc.warning(tr("Unterminated C++ string literal"),
	tr("Maybe you forgot '/*!' at the beginning of the file?"));
	else
	return Tok_String;
	break;
	case '#':
	return getTokenAfterPreprocessor();
	case '&':
	yyCh = getChar();
	/*
	Removed check for '&&', only interpret '&=' as an operator.
	'&&' is also used for an rvalue reference. QTBUG-32675
	*/
	if (yyCh == '=') {
	yyCh = getChar();
	return Tok_SomeOperator;
	} else {
	return Tok_Ampersand;
	}
	case '\'':
	yyCh = getChar();
	/*
	Allow empty character literal. QTBUG-25775
	*/
	if (yyCh == '\'') {
	yyCh = getChar();
	break;
	}
	if (yyCh == '\\')
	yyCh = getChar();
	do {
	yyCh = getChar();
	} while (yyCh != EOF && yyCh != '\'');

	if (yyCh == EOF) {
	yyTokLoc.warning(tr("Unterminated C++ character literal"));
	} else {
	yyCh = getChar();
	return Tok_Number;
	}
	break;
	case '(':
	yyCh = getChar();
	if (yyNumPreprocessorSkipping == 0)
	yyParenDepth++;
	if (isspace(yyCh)) {
	do {
	yyCh = getChar();
	} while (isspace(yyCh));
	yyLexLen = 1;
	yyLex[1] = '\0';
	}
	if (yyCh == '*') {
	yyCh = getChar();
	return Tok_LeftParenAster;
	}
	return Tok_LeftParen;
	case ')':
	yyCh = getChar();
	if (yyNumPreprocessorSkipping == 0)
	yyParenDepth--;
	return Tok_RightParen;
	case '*':
	yyCh = getChar();
	if (yyCh == '=') {
	yyCh = getChar();
	return Tok_SomeOperator;
	} else {
	return Tok_Aster;
	}
	case '^':
	yyCh = getChar();
	if (yyCh == '=') {
	yyCh = getChar();
	return Tok_SomeOperator;
	} else {
	return Tok_Caret;
	}
	case '+':
	yyCh = getChar();
	if (yyCh == '+' \|\| yyCh == '=')
	yyCh = getChar();
	return Tok_SomeOperator;
	case ',':
	yyCh = getChar();
	return Tok_Comma;
	case '-':
	yyCh = getChar();
	if (yyCh == '-' \|\| yyCh == '=') {
	yyCh = getChar();
	} else if (yyCh == '>') {
	yyCh = getChar();
	if (yyCh == '*')
	yyCh = getChar();
	}
	return Tok_SomeOperator;
	case '.':
	yyCh = getChar();
	if (yyCh == '*') {
	yyCh = getChar();
	} else if (yyCh == '.') {
	do {
	yyCh = getChar();
	} while (yyCh == '.');
	return Tok_Ellipsis;
	} else if (isdigit(yyCh)) {
	do {
	yyCh = getChar();
	} while (isalnum(yyCh) \|\| yyCh == '.' \|\| yyCh == '+' \|\| yyCh == '-');
	return Tok_Number;
	}
	return Tok_SomeOperator;
	case '/':
	yyCh = getChar();
	if (yyCh == '/') {
	do {
	yyCh = getChar();
	} while (yyCh != EOF && yyCh != '\n');
	} else if (yyCh == '*') {
	bool metDoc = false; // empty doc is no doc
	bool metSlashAsterBang = false;
	bool metAster = false;
	bool metAsterSlash = false;

	yyCh = getChar();
	if (yyCh == '!')
	metSlashAsterBang = true;

	while (!metAsterSlash) {
	if (yyCh == EOF) {
	yyTokLoc.warning(tr("Unterminated C++ comment"));
	break;
	} else {
	if (yyCh == '*') {
	metAster = true;
	} else if (metAster && yyCh == '/') {
	metAsterSlash = true;
	} else {
	metAster = false;
	if (isgraph(yyCh))
	metDoc = true;
	}
	}
	yyCh = getChar();
	}
	if (metSlashAsterBang && metDoc)
	return Tok_Doc;
	else if (yyParenDepth > 0)
	return Tok_Comment;
	} else {
	if (yyCh == '=')
	yyCh = getChar();
	return Tok_SomeOperator;
	}
	break;
	case ':':
	yyCh = getChar();
	if (yyCh == ':') {
	yyCh = getChar();
	return Tok_Gulbrandsen;
	} else {
	return Tok_Colon;
	}
	case ';':
	yyCh = getChar();
	return Tok_Semicolon;
	case '<':
	yyCh = getChar();
	if (yyCh == '<') {
	yyCh = getChar();
	if (yyCh == '=')
	yyCh = getChar();
	return Tok_SomeOperator;
	} else if (yyCh == '=') {
	yyCh = getChar();
	return Tok_SomeOperator;
	} else {
	return Tok_LeftAngle;
	}
	case '=':
	yyCh = getChar();
	if (yyCh == '=') {
	yyCh = getChar();
	return Tok_SomeOperator;
	} else {
	return Tok_Equal;
	}
	case '>':
	yyCh = getChar();
	if (yyCh == '>') {
	yyCh = getChar();
	if (yyCh == '=')
	yyCh = getChar();
	return Tok_SomeOperator;
	} else if (yyCh == '=') {
	yyCh = getChar();
	return Tok_SomeOperator;
	} else {
	return Tok_RightAngle;
	}
	case '?':
	yyCh = getChar();
	return Tok_SomeOperator;
	case '[':
	yyCh = getChar();
	if (yyNumPreprocessorSkipping == 0)
	yyBracketDepth++;
	return Tok_LeftBracket;
	case '\\':
	yyCh = getChar();
	yyCh = getChar(); // skip one character
	break;
	case ']':
	yyCh = getChar();
	if (yyNumPreprocessorSkipping == 0)
	yyBracketDepth--;
	return Tok_RightBracket;
	case '{':
	yyCh = getChar();
	if (yyNumPreprocessorSkipping == 0)
	yyBraceDepth++;
	return Tok_LeftBrace;
	case '}':
	yyCh = getChar();
	if (yyNumPreprocessorSkipping == 0)
	yyBraceDepth--;
	return Tok_RightBrace;
	case '\|':
	yyCh = getChar();
	if (yyCh == '\|' \|\| yyCh == '=')
	yyCh = getChar();
	return Tok_SomeOperator;
	case '~':
	yyCh = getChar();
	return Tok_Tilde;
	case '@':
	yyCh = getChar();
	return Tok_At;
	default:
	// ### We should really prevent qdoc from looking at snippet files rather than
	// ### suppress warnings when reading them.
	if (yyNumPreprocessorSkipping == 0
	&& !(yyTokLoc.fileName().endsWith(".qdoc")
	\|\| yyTokLoc.fileName().endsWith(".js"))) {
	yyTokLoc.warning(
	tr("Hostile character 0x%1 in C++ source").arg((uchar)yyCh, 1, 16));
	}
	yyCh = getChar();
	}
	}
	}

	if (yyPreprocessorSkipping.count() > 1) {
	yyTokLoc.warning(tr("Expected #endif before end of file"));
	// clear it out or we get an infinite loop!
	while (!yyPreprocessorSkipping.isEmpty()) {
	popSkipping();
	}
	}

	strcpy(yyLex, "end-of-input");
	yyLexLen = strlen(yyLex);
	return Tok_Eoi;
	}

	void Tokenizer::initialize()
	{
	Config &config = Config::instance();
	QString versionSym = config.getString(CONFIG_VERSIONSYM);

	QString sourceEncoding = config.getString(CONFIG_SOURCEENCODING);
	if (sourceEncoding.isEmpty())
	sourceEncoding = QLatin1String("ISO-8859-1");
	#ifndef QT_NO_TEXTCODEC
	sourceCodec = QTextCodec::codecForName(sourceEncoding.toLocal8Bit());
	#endif

	comment = new QRegExp("/(?:\\.\\/\|/.\n\|/[^\n]*$)");
	comment->setMinimal(true);
	versionX = new QRegExp("$cannot possibly match^");
	if (!versionSym.isEmpty())
	versionX->setPattern("[ \t]*(?:" + QRegExp::escape(versionSym)
	+ ")[ \t]+\"([^\"])\"[ \t]");
	definedX = new QRegExp("defined ?\\(?([A-Z_0-9a-z]+) ?\\)?");

	QStringList d = config.getStringList(CONFIG_DEFINES);
	d += "qdoc";
	defines = new QRegExp(d.join('\|'));
	falsehoods = new QRegExp(config.getStringList(CONFIG_FALSEHOODS).join('\|'));

	/*
	The keyword hash table is always cleared before any words are inserted.
	*/
	memset(kwordHashTable, 0, sizeof(kwordHashTable));
	for (int i = 0; i < Tok_LastKeyword - Tok_FirstKeyword + 1; i++)
	insertKwordIntoHash(kwords[i], i + 1);

	ignoredTokensAndDirectives = new QHash<QByteArray, bool>;

	const QStringList tokens =
	config.getStringList(LANGUAGE_CPP + Config::dot + CONFIG_IGNORETOKENS);
	for (const auto &token : tokens) {
	const QByteArray tb = token.toLatin1();
	ignoredTokensAndDirectives->insert(tb, false);
	insertKwordIntoHash(tb.data(), -1);
	}

	const QStringList directives =
	config.getStringList(LANGUAGE_CPP + Config::dot + CONFIG_IGNOREDIRECTIVES);
	for (const auto &directive : directives) {
	const QByteArray db = directive.toLatin1();
	ignoredTokensAndDirectives->insert(db, true);
	insertKwordIntoHash(db.data(), -1);
	}
	}

	/*!
	The heap allocated variables are freed here. The keyword
	hash table is not cleared here, but it is cleared in the
	initialize() function, before any keywords are inserted.
	*/
	void Tokenizer::terminate()
	{
	delete comment;
	comment = nullptr;
	delete versionX;
	versionX = nullptr;
	delete definedX;
	definedX = nullptr;
	delete defines;
	defines = nullptr;
	delete falsehoods;
	falsehoods = nullptr;
	delete ignoredTokensAndDirectives;
	ignoredTokensAndDirectives = nullptr;
	}

	void Tokenizer::init()
	{
	yyLexBuf1 = new char[(int)yyLexBufSize];
	yyLexBuf2 = new char[(int)yyLexBufSize];
	yyPrevLex = yyLexBuf1;
	yyPrevLex[0] = '\0';
	yyLex = yyLexBuf2;
	yyLex[0] = '\0';
	yyLexLen = 0;
	yyPreprocessorSkipping.push(false);
	yyNumPreprocessorSkipping = 0;
	yyBraceDepth = 0;
	yyParenDepth = 0;
	yyBracketDepth = 0;
	yyCh = '\0';
	parsingMacro = false;
	}

	void Tokenizer::start(const Location &loc)
	{
	yyTokLoc = loc;
	yyCurLoc = loc;
	yyCurLoc.start();
	strcpy(yyPrevLex, "beginning-of-input");
	strcpy(yyLex, "beginning-of-input");
	yyLexLen = strlen(yyLex);
	yyBraceDepth = 0;
	yyParenDepth = 0;
	yyBracketDepth = 0;
	yyCh = '\0';
	yyCh = getChar();
	}

	/*
	Returns the next token, if # was met. This function interprets the
	preprocessor directive, skips over any #ifdef'd out tokens, and returns the
	token after all of that.
	*/
	int Tokenizer::getTokenAfterPreprocessor()
	{
	yyCh = getChar();
	while (isspace(yyCh) && yyCh != '\n')
	yyCh = getChar();

	/*
	#directive condition
	*/
	QString directive;
	QString condition;

	while (isalpha(yyCh)) {
	directive += QChar(yyCh);
	yyCh = getChar();
	}
	if (!directive.isEmpty()) {
	while (yyCh != EOF && yyCh != '\n') {
	if (yyCh == '\\') {
	yyCh = getChar();
	if (yyCh == '\r')
	yyCh = getChar();
	}
	condition += yyCh;
	yyCh = getChar();
	}
	condition.remove(*comment);
	condition = condition.simplified();

	/*
	The #if, #ifdef, #ifndef, #elif, #else, and #endif
	directives have an effect on the skipping stack. For
	instance, if the code processed so far is

	#if 1
	#if 0
	#if 1
	// ...
	#else

	the skipping stack contains, from bottom to top, false true
	true (assuming 0 is false and 1 is true). If at least one
	entry of the stack is true, the tokens are skipped.

	This mechanism is simple yet hard to understand.
	*/
	if (directive[0] == QChar('i')) {
	if (directive == QString("if"))
	pushSkipping(!isTrue(condition));
	else if (directive == QString("ifdef"))
	pushSkipping(!defines->exactMatch(condition));
	else if (directive == QString("ifndef"))
	pushSkipping(defines->exactMatch(condition));
	} else if (directive[0] == QChar('e')) {
	if (directive == QString("elif")) {
	bool old = popSkipping();
	if (old)
	pushSkipping(!isTrue(condition));
	else
	pushSkipping(true);
	} else if (directive == QString("else")) {
	pushSkipping(!popSkipping());
	} else if (directive == QString("endif")) {
	popSkipping();
	}
	} else if (directive == QString("define")) {
	if (versionX->exactMatch(condition))
	yyVersion = versionX->cap(1);
	}
	}

	int tok;
	do {
	/*
	We set yyLex now, and after getToken() this will be
	yyPrevLex. This way, we skip over the preprocessor
	directive.
	*/
	qstrcpy(yyLex, yyPrevLex);

	/*
	If getToken() meets another #, it will call
	getTokenAfterPreprocessor() once again, which could in turn
	call getToken() again, etc. Unless there are 10,000 or so
	preprocessor directives in a row, this shouldn't overflow
	the stack.
	*/
	tok = getToken();
	} while (yyNumPreprocessorSkipping > 0 && tok != Tok_Eoi);
	return tok;
	}

	/*
	Pushes a new skipping value onto the stack. This corresponds to entering a
	new #if block.
	*/
	void Tokenizer::pushSkipping(bool skip)
	{
	yyPreprocessorSkipping.push(skip);
	if (skip)
	yyNumPreprocessorSkipping++;
	}

	/*
	Pops a skipping value from the stack. This corresponds to reaching a #endif.
	*/
	bool Tokenizer::popSkipping()
	{
	if (yyPreprocessorSkipping.isEmpty()) {
	yyTokLoc.warning(tr("Unexpected #elif, #else or #endif"));
	return true;
	}

	bool skip = yyPreprocessorSkipping.pop();
	if (skip)
	yyNumPreprocessorSkipping--;
	return skip;
	}

	/*
	Returns \c true if the condition evaluates as true, otherwise false. The
	condition is represented by a string. Unsophisticated parsing techniques are
	used. The preprocessing method could be named StriNg-Oriented PreProcessing,
	as SNOBOL stands for StriNg-Oriented symBOlic Language.
	*/
	bool Tokenizer::isTrue(const QString &condition)
	{
	int firstOr = -1;
	int firstAnd = -1;
	int parenDepth = 0;

	/*
	Find the first logical operator at top level, but be careful
	about precedence. Examples:

	X \|\| Y // the or
	X \|\| Y \|\| Z // the leftmost or
	X \|\| Y && Z // the or
	X && Y \|\| Z // the or
	(X \|\| Y) && Z // the and
	*/
	for (int i = 0; i < condition.length() - 1; i++) {
	QChar ch = condition[i];
	if (ch == QChar('(')) {
	parenDepth++;
	} else if (ch == QChar(')')) {
	parenDepth--;
	} else if (parenDepth == 0) {
	if (condition[i + 1] == ch) {
	if (ch == QChar('\|')) {
	firstOr = i;
	break;
	} else if (ch == QChar('&')) {
	if (firstAnd == -1)
	firstAnd = i;
	}
	}
	}
	}
	if (firstOr != -1)
	return isTrue(condition.left(firstOr)) \|\| isTrue(condition.mid(firstOr + 2));
	if (firstAnd != -1)
	return isTrue(condition.left(firstAnd)) && isTrue(condition.mid(firstAnd + 2));

	QString t = condition.simplified();
	if (t.isEmpty())
	return true;

	if (t[0] == QChar('!'))
	return !isTrue(t.mid(1));
	if (t[0] == QChar('(') && t.endsWith(QChar(')')))
	return isTrue(t.mid(1, t.length() - 2));

	if (definedX->exactMatch(t))
	return defines->exactMatch(definedX->cap(1));
	else
	return !falsehoods->exactMatch(t);
	}

	QString Tokenizer::lexeme() const
	{
	#ifndef QT_NO_TEXTCODEC
	return sourceCodec->toUnicode(yyLex);
	#else
	return QString::fromUtf8(yyLex);
	#endif
	}

	QString Tokenizer::previousLexeme() const
	{
	#ifndef QT_NO_TEXTCODEC
	return sourceCodec->toUnicode(yyPrevLex);
	#else
	return QString::fromUtf8(yyPrevLex);
	#endif
	}

	QT_END_NAMESPACE