qt-everywhere-src-5.15.1/qtbase/util/lexgen/re2nfa.cpp - orbit - Git at Google

 /****************************************************************************
 **
 ** Copyright (C) 2016 The Qt Company Ltd.
 ** Contact: https://www.qt.io/licensing/
 **
 ** This file is part of the utils of the Qt Toolkit.
 **
 ** $QT_BEGIN_LICENSE:GPL-EXCEPT$
 ** Commercial License Usage
 ** Licensees holding valid commercial Qt licenses may use this file in
 ** accordance with the commercial license agreement provided with the
 ** Software or, alternatively, in accordance with the terms contained in
 ** a written agreement between you and The Qt Company. For licensing terms
 ** and conditions see https://www.qt.io/terms-conditions. For further
 ** information use the contact form at https://www.qt.io/contact-us.
 **
 ** GNU General Public License Usage
 ** Alternatively, this file may be used under the terms of the GNU
 ** General Public License version 3 as published by the Free Software
 ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
 ** included in the packaging of this file. Please review the following
 ** information to ensure the GNU General Public License requirements will
 ** be met: https://www.gnu.org/licenses/gpl-3.0.html.
 **
 ** $QT_END_LICENSE$
 **
 ****************************************************************************/

 #include "re2nfa.h"
 #include "tokenizer.cpp"

 RE2NFA::RE2NFA(const QMap<QString, NFA> &macros, const QSet<InputType> &maxInputSet, Qt::CaseSensitivity cs)
     : macros(macros), index(0), errorColumn(-1), maxInputSet(maxInputSet), caseSensitivity(cs)
 {
 }

 NFA RE2NFA::parse(const QString &expression, int *errCol)
 {
     tokenize(expression);

     if (symbols.isEmpty())
         return NFA();

     index = 0;

     NFA result = parseExpr();
     if (result.isEmpty()) {
         if (errCol)
             *errCol = errorColumn;
     }
     return result;
 }

 NFA RE2NFA::parseExpr()
 {
     NFA value = parseBranch();
     while (test(TOK_OR)) {
         NFA rhs = parseBranch();
         value = NFA::createAlternatingNFA(value, rhs);
     }
     return value;
 }

 NFA RE2NFA::parseBranch()
 {
     NFA value = parsePiece();
     if (!hasNext())
         return value;
     NFA next;
     do {
         next = parsePiece();
         if (!next.isEmpty())
             value = NFA::createConcatenatingNFA(value, next);
     } while (!next.isEmpty() && hasNext());
     return value;
 }

 NFA RE2NFA::parsePiece()
 {
     NFA atom = parseAtom();
     if (atom.isEmpty() || !hasNext())
         return atom;
     return parseMaybeQuantifier(atom);
 }

 NFA RE2NFA::parseAtom()
 {
     // ####
     switch (next()) {
         case TOK_STRING:
             return createCharNFA();
         case TOK_LPAREN: {
             NFA subExpr = parseExpr();
             next(TOK_RPAREN);
             return subExpr;
         }
         case TOK_LBRACE: {
             QString macroName = lexemUntil(TOK_RBRACE);
             QMap<QString, NFA>::ConstIterator macro = macros.find(macroName);
             if (macro == macros.end()) {
                 qWarning("Unknown macro '%s' - probably used before defined", qPrintable(macroName));
                 return NFA();
             }
             return *macro;
         }
         case TOK_LBRACKET: {
             NFA set = parseSet();
             next(TOK_RBRACKET);
             return set;
         }
         case TOK_SEQUENCE:
             return parseSet2();
         case TOK_DOT:
             return NFA::createSetNFA(maxInputSet);
         default:
             prev();
             return NFA();
     }
 }

 NFA RE2NFA::parseMaybeQuantifier(const NFA &nfa)
 {
     // ####
     switch (next()) {
         case TOK_STAR:
             return NFA::createOptionalNFA(nfa);
         case TOK_QUESTION:
             return NFA::createZeroOrOneNFA(nfa);
         case TOK_PLUS:
             return NFA::createConcatenatingNFA(nfa, NFA::createOptionalNFA(nfa));
         case TOK_LBRACE: {
               const int rewind = index - 1;

               QString lexemBeforeComma;
               QString lexemAfterComma;
               bool seenComma = false;
               forever {
                   if (test(TOK_COMMA)) {
                       if (seenComma) {
                           errorColumn = symbol().column;
                           return NFA();
                       }
                       seenComma = true;
                   } else if (test(TOK_RBRACE)) {
                       break;
                   } else {
                       next(TOK_STRING);
                       if (seenComma)
                           lexemAfterComma += symbol().lexem;
                       else
                           lexemBeforeComma += symbol().lexem;
                   }
               }
               bool isNumber = false;
               int min = lexemBeforeComma.toInt(&isNumber);
               if (!isNumber) {
                   index = rewind;
                   return nfa;
               }
               int max = min;
               if (seenComma) {
                   max = lexemAfterComma.toInt(&isNumber);
                   if (!isNumber) {
                       errorColumn = symbol().column;
                       return NFA();
                   }
               }
               return NFA::applyQuantity(nfa, min, max);
         }
         default:
             prev();
             return nfa;
     }
 }

 NFA RE2NFA::parseSet()
 {
     QSet<InputType> set;
     bool negate = false;

     next(TOK_STRING);

     do {
         Q_ASSERT(symbol().lexem.length() == 1);
         // ###
         QChar ch = symbol().lexem.at(0);
         if (set.isEmpty() && ch == QLatin1Char('^')) {
             negate = true;
             continue;
         }

         // look ahead for ranges like a-z
         bool rangeFound = false;
         if (test(TOK_STRING)) {
             if (symbol().lexem.length() == 1
                 && symbol().lexem.at(0) == QLatin1Char('-')) {
                 next(TOK_STRING);
                 Q_ASSERT(symbol().lexem.length() == 1);
                 QChar last = symbol().lexem.at(0);

                 if (ch.unicode() > last.unicode())
                     qSwap(ch, last);

                 for (ushort i = ch.unicode(); i <= last.unicode(); ++i) {
                     if (caseSensitivity == Qt::CaseInsensitive) {
                         set.insert(QChar(i).toLower().unicode());
                     } else {
                         set.insert(i);
                     }
                 }

                 rangeFound = true;
             } else {
                 prev();
             }
         }

         if (!rangeFound) {
             if (caseSensitivity == Qt::CaseInsensitive) {
                 set.insert(ch.toLower().unicode());
             } else {
                 set.insert(ch.unicode());
             }
         }
     } while (test(TOK_STRING));

     if (negate) {
         QSet<InputType> negatedSet = maxInputSet;
         negatedSet.subtract(set);
         set = negatedSet;
     }

     return NFA::createSetNFA(set);
 }

 NFA RE2NFA::parseSet2()
 {
     QSet<InputType> set;
     bool negate = false;

     QString str = symbol().lexem;
     // strip off brackets
     str.chop(1);
     str.remove(0, 1);

     int i = 0;
     while (i < str.length()) {
         // ###
         QChar ch = str.at(i++);
         if (set.isEmpty() && ch == QLatin1Char('^')) {
             negate = true;
             continue;
         }

         // look ahead for ranges like a-z
         bool rangeFound = false;
         if (i < str.length() - 1 && str.at(i) == QLatin1Char('-')) {
             ++i;
             QChar last = str.at(i++);

             if (ch.unicode() > last.unicode())
                 qSwap(ch, last);

             for (ushort i = ch.unicode(); i <= last.unicode(); ++i) {
                 if (caseSensitivity == Qt::CaseInsensitive) {
                     set.insert(QChar(i).toLower().unicode());
                 } else {
                     set.insert(i);
                 }
             }

             rangeFound = true;
         }

         if (!rangeFound) {
             if (caseSensitivity == Qt::CaseInsensitive) {
                 set.insert(ch.toLower().unicode());
             } else {
                 set.insert(ch.unicode());
             }
         }
     }

     if (negate) {
         QSet<InputType> negatedSet = maxInputSet;
         negatedSet.subtract(set);
         set = negatedSet;
     }

     return NFA::createSetNFA(set);
 }
 NFA RE2NFA::createCharNFA()
 {
     NFA nfa;
     // ####
     if (caseSensitivity == Qt::CaseInsensitive) {
         nfa = NFA::createStringNFA(symbol().lexem.toLower().toLatin1());
     } else {
         nfa = NFA::createStringNFA(symbol().lexem.toLatin1());
     }
     return nfa;
 }

 static inline int skipQuote(const QString &str, int pos)
 {
     while (pos < str.length()
            && str.at(pos) != QLatin1Char('"')) {
         if (str.at(pos) == QLatin1Char('\\')) {
             ++pos;
             if (pos >= str.length())
                 break;
         }
         ++pos;
     }
     if (pos < str.length())
         ++pos;
     return pos;
 }

 #if 0
 static const char*tokStr(Token t)
 {
     switch (t) {
         case TOK_INVALID: return "TOK_INVALID";
         case TOK_STRING: return "TOK_STRING";
         case TOK_LBRACE: return "TOK_LBRACE";
         case TOK_RBRACE: return "TOK_RBRACE";
         case TOK_LBRACKET: return "TOK_LBRACKET";
         case TOK_RBRACKET: return "TOK_RBRACKET";
         case TOK_LPAREN: return "TOK_LPAREN";
         case TOK_RPAREN: return "TOK_RPAREN";
         case TOK_COMMA: return "TOK_COMMA";
         case TOK_STAR: return "TOK_STAR";
         case TOK_OR: return "TOK_OR";
         case TOK_QUESTION: return "TOK_QUESTION";
         case TOK_DOT: return "TOK_DOT";
         case TOK_PLUS: return "TOK_PLUS";
         case TOK_SEQUENCE: return "TOK_SEQUENCE";
         case TOK_QUOTED_STRING: return "TOK_QUOTED_STRING";
     }
     return "";
 }
 #endif

 void RE2NFA::tokenize(const QString &input)
 {
     symbols.clear();
 #if 1
     RegExpTokenizer tokenizer(input);
     Symbol sym;
     int tok = tokenizer.lex();
     while (tok != -1) {
         Symbol sym;
         sym.token = static_cast<Token>(tok);
         sym.lexem = input.mid(tokenizer.lexemStart, tokenizer.lexemLength);

         if (sym.token == TOK_QUOTED_STRING) {
             sym.lexem.chop(1);
             sym.lexem.remove(0, 1);
             sym.token = TOK_STRING;
         }

         if (sym.token == TOK_STRING || sym.token == TOK_SEQUENCE) {
             for (int i = 0; i < sym.lexem.length(); ++i) {
                 if (sym.lexem.at(i) == '\\') {
                     if (i >= sym.lexem.length() - 1)
                         break;
                     QChar ch = sym.lexem.at(i + 1);
                     if (ch == QLatin1Char('n')) {
                         ch = '\n';
                     } else if (ch == QLatin1Char('r')) {
                         ch = '\r';
                     } else if (ch == QLatin1Char('t')) {
                         ch = '\t';
                     } else if (ch == QLatin1Char('f')) {
                         ch = '\f';
                     }
                     sym.lexem.replace(i, 2, ch);
                 }
             }
         }

         /*
         if (sym.token == TOK_SEQUENCE) {
             Symbol s;
             s.token = TOK_LBRACKET;
             s.lexem = "[";
             symbols.append(s);

             for (int i = 1; i < sym.lexem.length() - 1; ++i) {
                 s.token = TOK_STRING;
                 s.lexem = sym.lexem.at(i);
                 symbols.append(s);
             }

             s.token = TOK_RBRACKET;
             s.lexem = "]";
             symbols.append(s);

             tok = tokenizer.lex();
             continue;
         }
         */

         symbols.append(sym);
         tok = tokenizer.lex();
     }
 #else
     int pos = 0;
     bool insideSet = false;
     while (pos < input.length()) {
         QChar ch = input.at(pos);

         Symbol sym;
         sym.column = pos;
         sym.token = TOK_INVALID;
         sym.lexem = QString(ch);
         switch (ch.toLatin1()) {
             case '"': {
                 if (insideSet) {
                     sym.token = TOK_STRING;
                     sym.lexem = QString(ch);
                     symbols += sym;
                     ++pos;
                     continue;
                 }
                 if (pos + 1 >= input.length())
                     return;
                 int quoteEnd = skipQuote(input, pos + 1);
                 sym.token = TOK_STRING;
                 sym.lexem = input.mid(pos + 1, quoteEnd - pos - 2);
                 symbols += sym;
                 pos = quoteEnd;
                 continue;
             }
             case '{':
                 sym.token = (insideSet ? TOK_STRING : TOK_LBRACE);
                 break;
             case '}':
                 sym.token = (insideSet ? TOK_STRING : TOK_RBRACE);
                 break;
             case '[':
                 insideSet = true;
                 sym.token = TOK_LBRACKET;
                 break;
             case ']':
                 insideSet = false;
                 sym.token = TOK_RBRACKET;
                 break;
             case '(':
                 sym.token = (insideSet ? TOK_STRING : TOK_LPAREN);
                 break;
             case ')':
                 sym.token = (insideSet ? TOK_STRING : TOK_RPAREN);
                 break;
             case ',':
                 sym.token = (insideSet ? TOK_STRING : TOK_COMMA);
                 break;
             case '*':
                 sym.token = (insideSet ? TOK_STRING : TOK_STAR);
                 break;
             case '|':
                 sym.token = (insideSet ? TOK_STRING : TOK_OR);
                 break;
             case '?':
                 sym.token = (insideSet ? TOK_STRING : TOK_QUESTION);
                 break;
             case '.':
                 sym.token = (insideSet ? TOK_STRING : TOK_DOT);
                 break;
             case '+':
                 sym.token = (insideSet ? TOK_STRING : TOK_PLUS);
                 break;
             case '\\':
                 ++pos;
                 if (pos >= input.length())
                     return;
                 ch = input.at(pos);
                 if (ch == QLatin1Char('n')) {
                     ch = '\n';
                 } else if (ch == QLatin1Char('r')) {
                     ch = '\r';
                 } else if (ch == QLatin1Char('t')) {
                     ch = '\t';
                 } else if (ch == QLatin1Char('f')) {
                     ch = '\f';
                 }
                 // fall through
             default:
                 sym.token = TOK_STRING;
                 sym.lexem = QString(ch);
                 symbols += sym;
                 ++pos;
                 continue;
         }
         symbols += sym;
         ++pos;
     }
 #endif
 #if 0
     foreach (Symbol s, symbols) {
         qDebug() << "Tok" << tokStr(s.token) << "lexem" << s.lexem;
     }
 #endif
 }

 bool RE2NFA::next(Token t)
 {
     if (hasNext() && next() == t)
         return true;
     errorColumn = symbol().column;
     Q_ASSERT(false);
     return false;
 }

 bool RE2NFA::test(Token t)
 {
     if (index >= symbols.count())
         return false;
     if (symbols.at(index).token == t) {
         ++index;
         return true;
     }
     return false;
 }

 QString RE2NFA::lexemUntil(Token t)
 {
     QString lexem;
     while (hasNext() && next() != t)
         lexem += symbol().lexem;
     return lexem;
 }
	/****************************************************************************
	**
	** Copyright (C) 2016 The Qt Company Ltd.
	** Contact: https://www.qt.io/licensing/
	**
	** This file is part of the utils of the Qt Toolkit.
	**
	** $QT_BEGIN_LICENSE:GPL-EXCEPT$
	** Commercial License Usage
	** Licensees holding valid commercial Qt licenses may use this file in
	** accordance with the commercial license agreement provided with the
	** Software or, alternatively, in accordance with the terms contained in
	** a written agreement between you and The Qt Company. For licensing terms
	** and conditions see https://www.qt.io/terms-conditions. For further
	** information use the contact form at https://www.qt.io/contact-us.
	**
	** GNU General Public License Usage
	** Alternatively, this file may be used under the terms of the GNU
	** General Public License version 3 as published by the Free Software
	** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
	** included in the packaging of this file. Please review the following
	** information to ensure the GNU General Public License requirements will
	** be met: https://www.gnu.org/licenses/gpl-3.0.html.
	**
	** $QT_END_LICENSE$
	**
	****************************************************************************/

	#include "re2nfa.h"
	#include "tokenizer.cpp"

	RE2NFA::RE2NFA(const QMap<QString, NFA> &macros, const QSet<InputType> &maxInputSet, Qt::CaseSensitivity cs)
	: macros(macros), index(0), errorColumn(-1), maxInputSet(maxInputSet), caseSensitivity(cs)
	{
	}

	NFA RE2NFA::parse(const QString &expression, int *errCol)
	{
	tokenize(expression);

	if (symbols.isEmpty())
	return NFA();

	index = 0;

	NFA result = parseExpr();
	if (result.isEmpty()) {
	if (errCol)
	*errCol = errorColumn;
	}
	return result;
	}

	NFA RE2NFA::parseExpr()
	{
	NFA value = parseBranch();
	while (test(TOK_OR)) {
	NFA rhs = parseBranch();
	value = NFA::createAlternatingNFA(value, rhs);
	}
	return value;
	}

	NFA RE2NFA::parseBranch()
	{
	NFA value = parsePiece();
	if (!hasNext())
	return value;
	NFA next;
	do {
	next = parsePiece();
	if (!next.isEmpty())
	value = NFA::createConcatenatingNFA(value, next);
	} while (!next.isEmpty() && hasNext());
	return value;
	}

	NFA RE2NFA::parsePiece()
	{
	NFA atom = parseAtom();
	if (atom.isEmpty() \|\| !hasNext())
	return atom;
	return parseMaybeQuantifier(atom);
	}

	NFA RE2NFA::parseAtom()
	{
	// ####
	switch (next()) {
	case TOK_STRING:
	return createCharNFA();
	case TOK_LPAREN: {
	NFA subExpr = parseExpr();
	next(TOK_RPAREN);
	return subExpr;
	}
	case TOK_LBRACE: {
	QString macroName = lexemUntil(TOK_RBRACE);
	QMap<QString, NFA>::ConstIterator macro = macros.find(macroName);
	if (macro == macros.end()) {
	qWarning("Unknown macro '%s' - probably used before defined", qPrintable(macroName));
	return NFA();
	}
	return *macro;
	}
	case TOK_LBRACKET: {
	NFA set = parseSet();
	next(TOK_RBRACKET);
	return set;
	}
	case TOK_SEQUENCE:
	return parseSet2();
	case TOK_DOT:
	return NFA::createSetNFA(maxInputSet);
	default:
	prev();
	return NFA();
	}
	}

	NFA RE2NFA::parseMaybeQuantifier(const NFA &nfa)
	{
	// ####
	switch (next()) {
	case TOK_STAR:
	return NFA::createOptionalNFA(nfa);
	case TOK_QUESTION:
	return NFA::createZeroOrOneNFA(nfa);
	case TOK_PLUS:
	return NFA::createConcatenatingNFA(nfa, NFA::createOptionalNFA(nfa));
	case TOK_LBRACE: {
	const int rewind = index - 1;

	QString lexemBeforeComma;
	QString lexemAfterComma;
	bool seenComma = false;
	forever {
	if (test(TOK_COMMA)) {
	if (seenComma) {
	errorColumn = symbol().column;
	return NFA();
	}
	seenComma = true;
	} else if (test(TOK_RBRACE)) {
	break;
	} else {
	next(TOK_STRING);
	if (seenComma)
	lexemAfterComma += symbol().lexem;
	else
	lexemBeforeComma += symbol().lexem;
	}
	}
	bool isNumber = false;
	int min = lexemBeforeComma.toInt(&isNumber);
	if (!isNumber) {
	index = rewind;
	return nfa;
	}
	int max = min;
	if (seenComma) {
	max = lexemAfterComma.toInt(&isNumber);
	if (!isNumber) {
	errorColumn = symbol().column;
	return NFA();
	}
	}
	return NFA::applyQuantity(nfa, min, max);
	}
	default:
	prev();
	return nfa;
	}
	}

	NFA RE2NFA::parseSet()
	{
	QSet<InputType> set;
	bool negate = false;

	next(TOK_STRING);

	do {
	Q_ASSERT(symbol().lexem.length() == 1);
	// ###
	QChar ch = symbol().lexem.at(0);
	if (set.isEmpty() && ch == QLatin1Char('^')) {
	negate = true;
	continue;
	}

	// look ahead for ranges like a-z
	bool rangeFound = false;
	if (test(TOK_STRING)) {
	if (symbol().lexem.length() == 1
	&& symbol().lexem.at(0) == QLatin1Char('-')) {
	next(TOK_STRING);
	Q_ASSERT(symbol().lexem.length() == 1);
	QChar last = symbol().lexem.at(0);

	if (ch.unicode() > last.unicode())
	qSwap(ch, last);

	for (ushort i = ch.unicode(); i <= last.unicode(); ++i) {
	if (caseSensitivity == Qt::CaseInsensitive) {
	set.insert(QChar(i).toLower().unicode());
	} else {
	set.insert(i);
	}
	}

	rangeFound = true;
	} else {
	prev();
	}
	}

	if (!rangeFound) {
	if (caseSensitivity == Qt::CaseInsensitive) {
	set.insert(ch.toLower().unicode());
	} else {
	set.insert(ch.unicode());
	}
	}
	} while (test(TOK_STRING));

	if (negate) {
	QSet<InputType> negatedSet = maxInputSet;
	negatedSet.subtract(set);
	set = negatedSet;
	}

	return NFA::createSetNFA(set);
	}

	NFA RE2NFA::parseSet2()
	{
	QSet<InputType> set;
	bool negate = false;

	QString str = symbol().lexem;
	// strip off brackets
	str.chop(1);
	str.remove(0, 1);

	int i = 0;
	while (i < str.length()) {
	// ###
	QChar ch = str.at(i++);
	if (set.isEmpty() && ch == QLatin1Char('^')) {
	negate = true;
	continue;
	}

	// look ahead for ranges like a-z
	bool rangeFound = false;
	if (i < str.length() - 1 && str.at(i) == QLatin1Char('-')) {
	++i;
	QChar last = str.at(i++);

	if (ch.unicode() > last.unicode())
	qSwap(ch, last);

	for (ushort i = ch.unicode(); i <= last.unicode(); ++i) {
	if (caseSensitivity == Qt::CaseInsensitive) {
	set.insert(QChar(i).toLower().unicode());
	} else {
	set.insert(i);
	}
	}

	rangeFound = true;
	}

	if (!rangeFound) {
	if (caseSensitivity == Qt::CaseInsensitive) {
	set.insert(ch.toLower().unicode());
	} else {
	set.insert(ch.unicode());
	}
	}
	}

	if (negate) {
	QSet<InputType> negatedSet = maxInputSet;
	negatedSet.subtract(set);
	set = negatedSet;
	}

	return NFA::createSetNFA(set);
	}
	NFA RE2NFA::createCharNFA()
	{
	NFA nfa;
	// ####
	if (caseSensitivity == Qt::CaseInsensitive) {
	nfa = NFA::createStringNFA(symbol().lexem.toLower().toLatin1());
	} else {
	nfa = NFA::createStringNFA(symbol().lexem.toLatin1());
	}
	return nfa;
	}

	static inline int skipQuote(const QString &str, int pos)
	{
	while (pos < str.length()
	&& str.at(pos) != QLatin1Char('"')) {
	if (str.at(pos) == QLatin1Char('\\')) {
	++pos;
	if (pos >= str.length())
	break;
	}
	++pos;
	}
	if (pos < str.length())
	++pos;
	return pos;
	}

	#if 0
	static const char*tokStr(Token t)
	{
	switch (t) {
	case TOK_INVALID: return "TOK_INVALID";
	case TOK_STRING: return "TOK_STRING";
	case TOK_LBRACE: return "TOK_LBRACE";
	case TOK_RBRACE: return "TOK_RBRACE";
	case TOK_LBRACKET: return "TOK_LBRACKET";
	case TOK_RBRACKET: return "TOK_RBRACKET";
	case TOK_LPAREN: return "TOK_LPAREN";
	case TOK_RPAREN: return "TOK_RPAREN";
	case TOK_COMMA: return "TOK_COMMA";
	case TOK_STAR: return "TOK_STAR";
	case TOK_OR: return "TOK_OR";
	case TOK_QUESTION: return "TOK_QUESTION";
	case TOK_DOT: return "TOK_DOT";
	case TOK_PLUS: return "TOK_PLUS";
	case TOK_SEQUENCE: return "TOK_SEQUENCE";
	case TOK_QUOTED_STRING: return "TOK_QUOTED_STRING";
	}
	return "";
	}
	#endif

	void RE2NFA::tokenize(const QString &input)
	{
	symbols.clear();
	#if 1
	RegExpTokenizer tokenizer(input);
	Symbol sym;
	int tok = tokenizer.lex();
	while (tok != -1) {
	Symbol sym;
	sym.token = static_cast<Token>(tok);
	sym.lexem = input.mid(tokenizer.lexemStart, tokenizer.lexemLength);

	if (sym.token == TOK_QUOTED_STRING) {
	sym.lexem.chop(1);
	sym.lexem.remove(0, 1);
	sym.token = TOK_STRING;
	}

	if (sym.token == TOK_STRING \|\| sym.token == TOK_SEQUENCE) {
	for (int i = 0; i < sym.lexem.length(); ++i) {
	if (sym.lexem.at(i) == '\\') {
	if (i >= sym.lexem.length() - 1)
	break;
	QChar ch = sym.lexem.at(i + 1);
	if (ch == QLatin1Char('n')) {
	ch = '\n';
	} else if (ch == QLatin1Char('r')) {
	ch = '\r';
	} else if (ch == QLatin1Char('t')) {
	ch = '\t';
	} else if (ch == QLatin1Char('f')) {
	ch = '\f';
	}
	sym.lexem.replace(i, 2, ch);
	}
	}
	}

	/*
	if (sym.token == TOK_SEQUENCE) {
	Symbol s;
	s.token = TOK_LBRACKET;
	s.lexem = "[";
	symbols.append(s);

	for (int i = 1; i < sym.lexem.length() - 1; ++i) {
	s.token = TOK_STRING;
	s.lexem = sym.lexem.at(i);
	symbols.append(s);
	}

	s.token = TOK_RBRACKET;
	s.lexem = "]";
	symbols.append(s);

	tok = tokenizer.lex();
	continue;
	}
	*/

	symbols.append(sym);
	tok = tokenizer.lex();
	}
	#else
	int pos = 0;
	bool insideSet = false;
	while (pos < input.length()) {
	QChar ch = input.at(pos);

	Symbol sym;
	sym.column = pos;
	sym.token = TOK_INVALID;
	sym.lexem = QString(ch);
	switch (ch.toLatin1()) {
	case '"': {
	if (insideSet) {
	sym.token = TOK_STRING;
	sym.lexem = QString(ch);
	symbols += sym;
	++pos;
	continue;
	}
	if (pos + 1 >= input.length())
	return;
	int quoteEnd = skipQuote(input, pos + 1);
	sym.token = TOK_STRING;
	sym.lexem = input.mid(pos + 1, quoteEnd - pos - 2);
	symbols += sym;
	pos = quoteEnd;
	continue;
	}
	case '{':
	sym.token = (insideSet ? TOK_STRING : TOK_LBRACE);
	break;
	case '}':
	sym.token = (insideSet ? TOK_STRING : TOK_RBRACE);
	break;
	case '[':
	insideSet = true;
	sym.token = TOK_LBRACKET;
	break;
	case ']':
	insideSet = false;
	sym.token = TOK_RBRACKET;
	break;
	case '(':
	sym.token = (insideSet ? TOK_STRING : TOK_LPAREN);
	break;
	case ')':
	sym.token = (insideSet ? TOK_STRING : TOK_RPAREN);
	break;
	case ',':
	sym.token = (insideSet ? TOK_STRING : TOK_COMMA);
	break;
	case '*':
	sym.token = (insideSet ? TOK_STRING : TOK_STAR);
	break;
	case '\|':
	sym.token = (insideSet ? TOK_STRING : TOK_OR);
	break;
	case '?':
	sym.token = (insideSet ? TOK_STRING : TOK_QUESTION);
	break;
	case '.':
	sym.token = (insideSet ? TOK_STRING : TOK_DOT);
	break;
	case '+':
	sym.token = (insideSet ? TOK_STRING : TOK_PLUS);
	break;
	case '\\':
	++pos;
	if (pos >= input.length())
	return;
	ch = input.at(pos);
	if (ch == QLatin1Char('n')) {
	ch = '\n';
	} else if (ch == QLatin1Char('r')) {
	ch = '\r';
	} else if (ch == QLatin1Char('t')) {
	ch = '\t';
	} else if (ch == QLatin1Char('f')) {
	ch = '\f';
	}
	// fall through
	default:
	sym.token = TOK_STRING;
	sym.lexem = QString(ch);
	symbols += sym;
	++pos;
	continue;
	}
	symbols += sym;
	++pos;
	}
	#endif
	#if 0
	foreach (Symbol s, symbols) {
	qDebug() << "Tok" << tokStr(s.token) << "lexem" << s.lexem;
	}
	#endif
	}

	bool RE2NFA::next(Token t)
	{
	if (hasNext() && next() == t)
	return true;
	errorColumn = symbol().column;
	Q_ASSERT(false);
	return false;
	}

	bool RE2NFA::test(Token t)
	{
	if (index >= symbols.count())
	return false;
	if (symbols.at(index).token == t) {
	++index;
	return true;
	}
	return false;
	}

	QString RE2NFA::lexemUntil(Token t)
	{
	QString lexem;
	while (hasNext() && next() != t)
	lexem += symbol().lexem;
	return lexem;
	}