1 /****************************************************************************
3 ** Copyright (C) 2012 Digia Plc and/or its subsidiary(-ies).
4 ** Contact: http://www.qt-project.org/legal
6 ** This file is part of the QtQml module of the Qt Toolkit.
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and Digia. For licensing terms and
14 ** conditions see http://qt.digia.com/licensing. For further information
15 ** use the contact form at http://qt.digia.com/contact-us.
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 2.1 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 2.1 requirements
23 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
25 ** In addition, as a special exception, Digia gives you certain additional
26 ** rights. These rights are described in the Digia Qt LGPL Exception
27 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
29 ** GNU General Public License Usage
30 ** Alternatively, this file may be used under the terms of the GNU
31 ** General Public License version 3.0 as published by the Free Software
32 ** Foundation and appearing in the file LICENSE.GPL included in the
33 ** packaging of this file. Please review the following information to
34 ** ensure the GNU General Public License version 3.0 requirements will be
35 ** met: http://www.gnu.org/copyleft/gpl.html.
40 ****************************************************************************/
42 #include "qqmljslexer_p.h"
43 #include "qqmljsengine_p.h"
44 #include "qqmljsmemorypool_p.h"
46 #include <QtCore/QCoreApplication>
47 #include <QtCore/QVarLengthArray>
48 #include <QtCore/QDebug>
51 Q_CORE_EXPORT double qstrtod(const char *s00, char const **se, bool *ok);
54 using namespace QQmlJS;
56 static int regExpFlagFromChar(const QChar &ch)
58 switch (ch.unicode()) {
59 case 'g': return Lexer::RegExp_Global;
60 case 'i': return Lexer::RegExp_IgnoreCase;
61 case 'm': return Lexer::RegExp_Multiline;
66 static unsigned char convertHex(ushort c)
68 if (c >= '0' && c <= '9')
70 else if (c >= 'a' && c <= 'f')
71 return (c - 'a' + 10);
73 return (c - 'A' + 10);
76 static QChar convertHex(QChar c1, QChar c2)
78 return QChar((convertHex(c1.unicode()) << 4) + convertHex(c2.unicode()));
81 static QChar convertUnicode(QChar c1, QChar c2, QChar c3, QChar c4)
83 return QChar((convertHex(c3.unicode()) << 4) + convertHex(c4.unicode()),
84 (convertHex(c1.unicode()) << 4) + convertHex(c2.unicode()));
87 Lexer::Lexer(Engine *engine)
93 , _char(QLatin1Char('\n'))
95 , _currentLineNumber(0)
97 , _parenthesesState(IgnoreParentheses)
98 , _parenthesesCount(0)
104 , _validTokenText(false)
105 , _prohibitAutomaticSemicolon(false)
106 , _restrictedKeyword(false)
108 , _followsClosingBrace(false)
113 engine->setLexer(this);
116 bool Lexer::qmlMode() const
121 QString Lexer::code() const
126 void Lexer::setCode(const QString &code, int lineno, bool qmlMode)
129 _engine->setCode(code);
134 _tokenText.reserve(1024);
135 _errorMessage.clear();
136 _tokenSpell = QStringRef();
138 _codePtr = code.unicode();
139 _lastLinePtr = _codePtr;
140 _tokenLinePtr = _codePtr;
141 _tokenStartPtr = _codePtr;
143 _char = QLatin1Char('\n');
144 _errorCode = NoError;
146 _currentLineNumber = lineno;
150 _parenthesesState = IgnoreParentheses;
151 _parenthesesCount = 0;
159 _validTokenText = false;
160 _prohibitAutomaticSemicolon = false;
161 _restrictedKeyword = false;
163 _followsClosingBrace = false;
167 void Lexer::scanChar()
171 if (_char == QLatin1Char('\n')) {
172 _lastLinePtr = _codePtr; // points to the first character after the newline
173 ++_currentLineNumber;
179 const int previousTokenKind = _tokenKind;
181 _tokenSpell = QStringRef();
182 _tokenKind = scanToken();
183 _tokenLength = _codePtr - _tokenStartPtr - 1;
186 _restrictedKeyword = false;
187 _followsClosingBrace = (previousTokenKind == T_RBRACE);
190 switch (_tokenKind) {
201 _parenthesesState = CountParentheses;
202 _parenthesesCount = 0;
206 _parenthesesState = BalancedParentheses;
213 _restrictedKeyword = true;
217 // update the parentheses state
218 switch (_parenthesesState) {
219 case IgnoreParentheses:
222 case CountParentheses:
223 if (_tokenKind == T_RPAREN) {
225 if (_parenthesesCount == 0)
226 _parenthesesState = BalancedParentheses;
227 } else if (_tokenKind == T_LPAREN) {
232 case BalancedParentheses:
233 _parenthesesState = IgnoreParentheses;
240 bool Lexer::isUnicodeEscapeSequence(const QChar *chars)
242 if (isHexDigit(chars[0]) && isHexDigit(chars[1]) && isHexDigit(chars[2]) && isHexDigit(chars[3]))
248 QChar Lexer::decodeUnicodeEscapeCharacter(bool *ok)
250 if (_char == QLatin1Char('u') && isUnicodeEscapeSequence(&_codePtr[0])) {
251 scanChar(); // skip u
253 const QChar c1 = _char;
256 const QChar c2 = _char;
259 const QChar c3 = _char;
262 const QChar c4 = _char;
268 return convertUnicode(c1, c2, c3, c4);
275 int Lexer::scanToken()
277 if (_stackToken != -1) {
278 int tk = _stackToken;
286 _validTokenText = false;
287 _tokenLinePtr = _lastLinePtr;
289 while (_char.isSpace()) {
290 if (_char == QLatin1Char('\n')) {
291 _tokenLinePtr = _codePtr;
293 if (_restrictedKeyword) {
294 // automatic semicolon insertion
295 _tokenLine = _currentLineNumber;
296 _tokenStartPtr = _codePtr - 1; // ### TODO: insert it before the optional \r sequence.
300 syncProhibitAutomaticSemicolon();
307 _tokenStartPtr = _codePtr - 1;
308 _tokenLine = _currentLineNumber;
313 const QChar ch = _char;
316 switch (ch.unicode()) {
317 case '~': return T_TILDE;
318 case '}': return T_RBRACE;
321 if (_char == QLatin1Char('|')) {
324 } else if (_char == QLatin1Char('=')) {
330 case '{': return T_LBRACE;
333 if (_char == QLatin1Char('=')) {
339 case ']': return T_RBRACKET;
340 case '[': return T_LBRACKET;
341 case '?': return T_QUESTION;
344 if (_char == QLatin1Char('>')) {
346 if (_char == QLatin1Char('>')) {
348 if (_char == QLatin1Char('=')) {
350 return T_GT_GT_GT_EQ;
353 } else if (_char == QLatin1Char('=')) {
358 } else if (_char == QLatin1Char('=')) {
365 if (_char == QLatin1Char('=')) {
367 if (_char == QLatin1Char('=')) {
376 if (_char == QLatin1Char('=')) {
379 } else if (_char == QLatin1Char('<')) {
381 if (_char == QLatin1Char('=')) {
389 case ';': return T_SEMICOLON;
390 case ':': return T_COLON;
393 if (_char == QLatin1Char('*')) {
395 while (!_char.isNull()) {
396 if (_char == QLatin1Char('*')) {
398 if (_char == QLatin1Char('/')) {
402 _engine->addComment(tokenOffset() + 2, _codePtr - _tokenStartPtr - 1 - 4,
403 tokenStartLine(), tokenStartColumn() + 2);
412 } else if (_char == QLatin1Char('/')) {
413 while (!_char.isNull() && _char != QLatin1Char('\n')) {
417 _engine->addComment(tokenOffset() + 2, _codePtr - _tokenStartPtr - 1 - 2,
418 tokenStartLine(), tokenStartColumn() + 2);
421 } if (_char == QLatin1Char('=')) {
428 if (_char.isDigit()) {
429 QVarLengthArray<char,32> chars;
431 chars.append(ch.unicode()); // append the `.'
433 while (_char.isDigit()) {
434 chars.append(_char.unicode());
438 if (_char == QLatin1Char('e') || _char == QLatin1Char('E')) {
439 if (_codePtr[0].isDigit() || ((_codePtr[0] == QLatin1Char('+') || _codePtr[0] == QLatin1Char('-')) &&
440 _codePtr[1].isDigit())) {
442 chars.append(_char.unicode());
443 scanChar(); // consume `e'
445 if (_char == QLatin1Char('+') || _char == QLatin1Char('-')) {
446 chars.append(_char.unicode());
447 scanChar(); // consume the sign
450 while (_char.isDigit()) {
451 chars.append(_char.unicode());
459 const char *begin = chars.constData();
463 _tokenValue = qstrtod(begin, &end, &ok);
465 if (end - begin != chars.size() - 1) {
466 _errorCode = IllegalExponentIndicator;
467 _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal syntax for exponential number");
471 return T_NUMERIC_LITERAL;
476 if (_char == QLatin1Char('=')) {
479 } else if (_char == QLatin1Char('-')) {
482 if (_terminator && !_delimited && !_prohibitAutomaticSemicolon) {
483 _stackToken = T_MINUS_MINUS;
487 return T_MINUS_MINUS;
491 case ',': return T_COMMA;
494 if (_char == QLatin1Char('=')) {
497 } else if (_char == QLatin1Char('+')) {
500 if (_terminator && !_delimited && !_prohibitAutomaticSemicolon) {
501 _stackToken = T_PLUS_PLUS;
510 if (_char == QLatin1Char('=')) {
516 case ')': return T_RPAREN;
517 case '(': return T_LPAREN;
520 if (_char == QLatin1Char('=')) {
523 } else if (_char == QLatin1Char('&')) {
530 if (_char == QLatin1Char('=')) {
532 return T_REMAINDER_EQ;
537 if (_char == QLatin1Char('=')) {
539 if (_char == QLatin1Char('=')) {
549 const QChar quote = ch;
550 bool multilineStringLiteral = false;
552 const QChar *startCode = _codePtr;
555 while (!_char.isNull()) {
556 if (_char == QLatin1Char('\n') || _char == QLatin1Char('\\')) {
558 } else if (_char == quote) {
559 _tokenSpell = _engine->midRef(startCode - _code.unicode() - 1, _codePtr - startCode);
562 return T_STRING_LITERAL;
568 _validTokenText = true;
569 _tokenText.resize(0);
571 while (startCode != _codePtr - 1)
572 _tokenText += *startCode++;
574 while (! _char.isNull()) {
575 if (_char == QLatin1Char('\n')) {
576 multilineStringLiteral = true;
579 } else if (_char == quote) {
583 _tokenSpell = _engine->newStringRef(_tokenText);
585 return multilineStringLiteral ? T_MULTILINE_STRING_LITERAL : T_STRING_LITERAL;
586 } else if (_char == QLatin1Char('\\')) {
592 switch (_char.unicode()) {
593 // unicode escape sequence
595 u = decodeUnicodeEscapeCharacter(&ok);
600 // hex escape sequence
603 if (isHexDigit(_codePtr[0]) && isHexDigit(_codePtr[1])) {
606 const QChar c1 = _char;
609 const QChar c2 = _char;
612 u = convertHex(c1, c2);
618 // single character escape sequence
619 case '\\': u = QLatin1Char('\\'); scanChar(); break;
620 case '\'': u = QLatin1Char('\''); scanChar(); break;
621 case '\"': u = QLatin1Char('\"'); scanChar(); break;
622 case 'b': u = QLatin1Char('\b'); scanChar(); break;
623 case 'f': u = QLatin1Char('\f'); scanChar(); break;
624 case 'n': u = QLatin1Char('\n'); scanChar(); break;
625 case 'r': u = QLatin1Char('\r'); scanChar(); break;
626 case 't': u = QLatin1Char('\t'); scanChar(); break;
627 case 'v': u = QLatin1Char('\v'); scanChar(); break;
630 if (! _codePtr[1].isDigit()) {
632 u = QLatin1Char('\0');
634 // ### parse deprecated octal escape sequence ?
640 while (_char == QLatin1Char('\r'))
643 if (_char == QLatin1Char('\n')) {
647 u = QLatin1Char('\n');
658 // non escape character
670 _errorCode = UnclosedStringLiteral;
671 _errorMessage = QCoreApplication::translate("QQmlParser", "Unclosed string at end of line");
684 return scanNumber(ch);
687 if (ch.isLetter() || ch == QLatin1Char('$') || ch == QLatin1Char('_') || (ch == QLatin1Char('\\') && _char == QLatin1Char('u'))) {
688 bool identifierWithEscapeChars = false;
689 if (ch == QLatin1Char('\\')) {
690 identifierWithEscapeChars = true;
691 _tokenText.resize(0);
693 _tokenText += decodeUnicodeEscapeCharacter(&ok);
694 _validTokenText = true;
696 _errorCode = IllegalUnicodeEscapeSequence;
697 _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal unicode escape sequence");
702 if (_char.isLetterOrNumber() || _char == QLatin1Char('$') || _char == QLatin1Char('_')) {
703 if (identifierWithEscapeChars)
707 } else if (_char == QLatin1Char('\\') && _codePtr[0] == QLatin1Char('u')) {
708 if (! identifierWithEscapeChars) {
709 identifierWithEscapeChars = true;
710 _tokenText.resize(0);
711 _tokenText.insert(0, _tokenStartPtr, _codePtr - _tokenStartPtr - 1);
712 _validTokenText = true;
715 scanChar(); // skip '\\'
717 _tokenText += decodeUnicodeEscapeCharacter(&ok);
719 _errorCode = IllegalUnicodeEscapeSequence;
720 _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal unicode escape sequence");
724 _tokenLength = _codePtr - _tokenStartPtr - 1;
726 int kind = T_IDENTIFIER;
728 if (! identifierWithEscapeChars)
729 kind = classify(_tokenStartPtr, _tokenLength, _qmlMode);
732 if (kind == T_IDENTIFIER && identifierWithEscapeChars)
733 _tokenSpell = _engine->newStringRef(_tokenText);
735 _tokenSpell = _engine->midRef(_tokenStartPtr - _code.unicode(), _tokenLength);
749 int Lexer::scanNumber(QChar ch)
751 if (ch != QLatin1Char('0')) {
752 double integer = ch.unicode() - '0';
755 const QChar *code = _codePtr;
756 while (n.isDigit()) {
757 integer = integer * 10 + (n.unicode() - '0');
761 if (n != QLatin1Char('.') && n != QLatin1Char('e') && n != QLatin1Char('E')) {
762 if (code != _codePtr) {
766 _tokenValue = integer;
767 return T_NUMERIC_LITERAL;
771 QVarLengthArray<char,32> chars;
772 chars.append(ch.unicode());
774 if (ch == QLatin1Char('0') && (_char == QLatin1Char('x') || _char == QLatin1Char('X'))) {
775 // parse hex integer literal
777 chars.append(_char.unicode());
778 scanChar(); // consume `x'
780 while (isHexDigit(_char)) {
781 chars.append(_char.unicode());
785 _tokenValue = integerFromString(chars.constData(), chars.size(), 16);
786 return T_NUMERIC_LITERAL;
789 // decimal integer literal
790 while (_char.isDigit()) {
791 chars.append(_char.unicode());
792 scanChar(); // consume the digit
795 if (_char == QLatin1Char('.')) {
796 chars.append(_char.unicode());
797 scanChar(); // consume `.'
799 while (_char.isDigit()) {
800 chars.append(_char.unicode());
804 if (_char == QLatin1Char('e') || _char == QLatin1Char('E')) {
805 if (_codePtr[0].isDigit() || ((_codePtr[0] == QLatin1Char('+') || _codePtr[0] == QLatin1Char('-')) &&
806 _codePtr[1].isDigit())) {
808 chars.append(_char.unicode());
809 scanChar(); // consume `e'
811 if (_char == QLatin1Char('+') || _char == QLatin1Char('-')) {
812 chars.append(_char.unicode());
813 scanChar(); // consume the sign
816 while (_char.isDigit()) {
817 chars.append(_char.unicode());
822 } else if (_char == QLatin1Char('e') || _char == QLatin1Char('E')) {
823 if (_codePtr[0].isDigit() || ((_codePtr[0] == QLatin1Char('+') || _codePtr[0] == QLatin1Char('-')) &&
824 _codePtr[1].isDigit())) {
826 chars.append(_char.unicode());
827 scanChar(); // consume `e'
829 if (_char == QLatin1Char('+') || _char == QLatin1Char('-')) {
830 chars.append(_char.unicode());
831 scanChar(); // consume the sign
834 while (_char.isDigit()) {
835 chars.append(_char.unicode());
841 if (chars.length() == 1) {
842 // if we ended up with a single digit, then it was a '0'
844 return T_NUMERIC_LITERAL;
849 const char *begin = chars.constData();
853 _tokenValue = qstrtod(begin, &end, &ok);
855 if (end - begin != chars.size() - 1) {
856 _errorCode = IllegalExponentIndicator;
857 _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal syntax for exponential number");
861 return T_NUMERIC_LITERAL;
864 bool Lexer::scanRegExp(RegExpBodyPrefix prefix)
866 _tokenText.resize(0);
867 _validTokenText = true;
870 if (prefix == EqualPrefix)
871 _tokenText += QLatin1Char('=');
874 switch (_char.unicode()) {
876 case '\n': case '\r': // line terminator
877 _errorMessage = QCoreApplication::translate("QQmlParser", "Unterminated regular expression literal");
885 while (isIdentLetter(_char)) {
886 int flag = regExpFlagFromChar(_char);
888 _errorMessage = QCoreApplication::translate("QQmlParser", "Invalid regular expression flag '%0'")
892 _patternFlags |= flag;
896 _tokenLength = _codePtr - _tokenStartPtr - 1;
900 // regular expression backslash sequence
904 if (_char.isNull() || isLineTerminator()) {
905 _errorMessage = QCoreApplication::translate("QQmlParser", "Unterminated regular expression backslash sequence");
914 // regular expression class
918 while (! _char.isNull() && ! isLineTerminator()) {
919 if (_char == QLatin1Char(']'))
921 else if (_char == QLatin1Char('\\')) {
922 // regular expression backslash sequence
926 if (_char.isNull() || isLineTerminator()) {
927 _errorMessage = QCoreApplication::translate("QQmlParser", "Unterminated regular expression backslash sequence");
939 if (_char != QLatin1Char(']')) {
940 _errorMessage = QCoreApplication::translate("QQmlParser", "Unterminated regular expression class");
945 scanChar(); // skip ]
957 bool Lexer::isLineTerminator() const
959 return (_char == QLatin1Char('\n') || _char == QLatin1Char('\r'));
962 bool Lexer::isIdentLetter(QChar ch)
964 // ASCII-biased, since all reserved words are ASCII, aand hence the
965 // bulk of content to be parsed.
966 if ((ch >= QLatin1Char('a') && ch <= QLatin1Char('z'))
967 || (ch >= QLatin1Char('A') && ch <= QLatin1Char('Z'))
968 || ch == QLatin1Char('$')
969 || ch == QLatin1Char('_'))
971 if (ch.unicode() < 128)
973 return ch.isLetterOrNumber();
976 bool Lexer::isDecimalDigit(ushort c)
978 return (c >= '0' && c <= '9');
981 bool Lexer::isHexDigit(QChar c)
983 return ((c >= QLatin1Char('0') && c <= QLatin1Char('9'))
984 || (c >= QLatin1Char('a') && c <= QLatin1Char('f'))
985 || (c >= QLatin1Char('A') && c <= QLatin1Char('F')));
988 bool Lexer::isOctalDigit(ushort c)
990 return (c >= '0' && c <= '7');
993 int Lexer::tokenEndLine() const
995 return _currentLineNumber;
998 int Lexer::tokenEndColumn() const
1000 return _codePtr - _lastLinePtr;
1003 QString Lexer::tokenText() const
1005 if (_validTokenText)
1008 if (_tokenKind == T_STRING_LITERAL)
1009 return QString(_tokenStartPtr + 1, _tokenLength - 2);
1011 return QString(_tokenStartPtr, _tokenLength);
1014 Lexer::Error Lexer::errorCode() const
1019 QString Lexer::errorMessage() const
1021 return _errorMessage;
1024 void Lexer::syncProhibitAutomaticSemicolon()
1026 if (_parenthesesState == BalancedParentheses) {
1027 // we have seen something like "if (foo)", which means we should
1028 // never insert an automatic semicolon at this point, since it would
1029 // then be expanded into an empty statement (ECMA-262 7.9.1)
1030 _prohibitAutomaticSemicolon = true;
1031 _parenthesesState = IgnoreParentheses;
1033 _prohibitAutomaticSemicolon = false;
1037 bool Lexer::prevTerminator() const
1042 bool Lexer::followsClosingBrace() const
1044 return _followsClosingBrace;
1047 bool Lexer::canInsertAutomaticSemicolon(int token) const
1049 return token == T_RBRACE
1050 || token == EOF_SYMBOL
1052 || _followsClosingBrace;
1055 bool Lexer::scanDirectives(Directives *directives)
1058 // the directives are a Javascript-only extension.
1062 lex(); // fetch the first token
1064 if (_tokenKind != T_DOT)
1068 lex(); // skip T_DOT
1070 const int lineNumber = tokenStartLine();
1072 if (! (_tokenKind == T_IDENTIFIER || _tokenKind == T_RESERVED_WORD))
1073 return false; // expected a valid QML/JS directive
1075 const QString directiveName = tokenText();
1077 if (! (directiveName == QLatin1String("pragma") ||
1078 directiveName == QLatin1String("import")))
1079 return false; // not a valid directive name
1081 // it must be a pragma or an import directive.
1082 if (directiveName == QLatin1String("pragma")) {
1084 if (! (lex() == T_IDENTIFIER && tokenText() == QLatin1String("library")))
1085 return false; // expected `library
1087 // we found a .pragma library directive
1088 directives->pragmaLibrary();
1091 Q_ASSERT(directiveName == QLatin1String("import"));
1092 lex(); // skip .import
1096 bool fileImport = false; // file or uri import
1098 if (_tokenKind == T_STRING_LITERAL) {
1099 // .import T_STRING_LITERAL as T_IDENTIFIER
1102 pathOrUri = tokenText();
1104 } else if (_tokenKind == T_IDENTIFIER) {
1105 // .import T_IDENTIFIER (. T_IDENTIFIER)* T_NUMERIC_LITERAL as T_IDENTIFIER
1107 pathOrUri = tokenText();
1109 lex(); // skip the first T_IDENTIFIER
1110 for (; _tokenKind == T_DOT; lex()) {
1111 if (lex() != T_IDENTIFIER)
1114 pathOrUri += QLatin1Char('.');
1115 pathOrUri += tokenText();
1118 if (_tokenKind != T_NUMERIC_LITERAL)
1119 return false; // expected the module version number
1121 version = tokenText();
1125 // recognize the mandatory `as' followed by the module name
1127 if (! (lex() == T_RESERVED_WORD && tokenText() == QLatin1String("as")))
1128 return false; // expected `as'
1130 if (lex() != T_IDENTIFIER)
1131 return false; // expected module name
1133 const QString module = tokenText();
1136 directives->importFile(pathOrUri, module);
1138 directives->importModule(pathOrUri, version, module);
1141 if (tokenStartLine() != lineNumber)
1142 return false; // the directives cannot span over multiple lines
1144 // fetch the first token after the .pragma/.import directive
1146 } while (_tokenKind == T_DOT);
1151 #include "qqmljskeywords_p.h"