1 /****************************************************************************
3 ** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
4 ** Contact: http://www.qt-project.org/
6 ** This file is part of the QtDeclarative module of the Qt Toolkit.
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** GNU Lesser General Public License Usage
10 ** This file may be used under the terms of the GNU Lesser General Public
11 ** License version 2.1 as published by the Free Software Foundation and
12 ** appearing in the file LICENSE.LGPL included in the packaging of this
13 ** file. Please review the following information to ensure the GNU Lesser
14 ** General Public License version 2.1 requirements will be met:
15 ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
17 ** In addition, as a special exception, Nokia gives you certain additional
18 ** rights. These rights are described in the Nokia Qt LGPL Exception
19 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
21 ** GNU General Public License Usage
22 ** Alternatively, this file may be used under the terms of the GNU General
23 ** Public License version 3.0 as published by the Free Software Foundation
24 ** and appearing in the file LICENSE.GPL included in the packaging of this
25 ** file. Please review the following information to ensure the GNU General
26 ** Public License version 3.0 requirements will be met:
27 ** http://www.gnu.org/copyleft/gpl.html.
30 ** Alternatively, this file may be used in accordance with the terms and
31 ** conditions contained in a signed written agreement between you and Nokia.
40 ****************************************************************************/
42 #include "qdeclarativejslexer_p.h"
43 #include "qdeclarativejsengine_p.h"
44 #include "qdeclarativejsmemorypool_p.h"
46 #include <QtCore/QCoreApplication>
47 #include <QtCore/QVarLengthArray>
48 #include <QtCore/QDebug>
51 Q_CORE_EXPORT double qstrtod(const char *s00, char const **se, bool *ok);
54 using namespace QDeclarativeJS;
56 static int regExpFlagFromChar(const QChar &ch)
58 switch (ch.unicode()) {
59 case 'g': return Lexer::RegExp_Global;
60 case 'i': return Lexer::RegExp_IgnoreCase;
61 case 'm': return Lexer::RegExp_Multiline;
66 static unsigned char convertHex(ushort c)
68 if (c >= '0' && c <= '9')
70 else if (c >= 'a' && c <= 'f')
71 return (c - 'a' + 10);
73 return (c - 'A' + 10);
76 static QChar convertHex(QChar c1, QChar c2)
78 return QChar((convertHex(c1.unicode()) << 4) + convertHex(c2.unicode()));
81 static QChar convertUnicode(QChar c1, QChar c2, QChar c3, QChar c4)
83 return QChar((convertHex(c3.unicode()) << 4) + convertHex(c4.unicode()),
84 (convertHex(c1.unicode()) << 4) + convertHex(c2.unicode()));
87 Lexer::Lexer(Engine *engine)
93 , _char(QLatin1Char('\n'))
95 , _currentLineNumber(0)
97 , _parenthesesState(IgnoreParentheses)
98 , _parenthesesCount(0)
104 , _validTokenText(false)
105 , _prohibitAutomaticSemicolon(false)
106 , _restrictedKeyword(false)
108 , _followsClosingBrace(false)
113 engine->setLexer(this);
116 bool Lexer::qmlMode() const
121 QString Lexer::code() const
126 void Lexer::setCode(const QString &code, int lineno, bool qmlMode)
129 _engine->setCode(code);
134 _tokenText.reserve(1024);
135 _errorMessage.clear();
136 _tokenSpell = QStringRef();
138 _codePtr = code.unicode();
139 _lastLinePtr = _codePtr;
140 _tokenLinePtr = _codePtr;
141 _tokenStartPtr = _codePtr;
143 _char = QLatin1Char('\n');
144 _errorCode = NoError;
146 _currentLineNumber = lineno;
150 _parenthesesState = IgnoreParentheses;
151 _parenthesesCount = 0;
159 _validTokenText = false;
160 _prohibitAutomaticSemicolon = false;
161 _restrictedKeyword = false;
163 _followsClosingBrace = false;
167 void Lexer::scanChar()
171 if (_char == QLatin1Char('\n')) {
172 _lastLinePtr = _codePtr; // points to the first character after the newline
173 ++_currentLineNumber;
179 const int previousTokenKind = _tokenKind;
181 _tokenSpell = QStringRef();
182 _tokenKind = scanToken();
183 _tokenLength = _codePtr - _tokenStartPtr - 1;
186 _restrictedKeyword = false;
187 _followsClosingBrace = (previousTokenKind == T_RBRACE);
190 switch (_tokenKind) {
201 _parenthesesState = CountParentheses;
202 _parenthesesCount = 0;
206 _parenthesesState = BalancedParentheses;
213 _restrictedKeyword = true;
217 // update the parentheses state
218 switch (_parenthesesState) {
219 case IgnoreParentheses:
222 case CountParentheses:
223 if (_tokenKind == T_RPAREN) {
225 if (_parenthesesCount == 0)
226 _parenthesesState = BalancedParentheses;
227 } else if (_tokenKind == T_LPAREN) {
232 case BalancedParentheses:
233 _parenthesesState = IgnoreParentheses;
240 bool Lexer::isUnicodeEscapeSequence(const QChar *chars)
242 if (isHexDigit(chars[0]) && isHexDigit(chars[1]) && isHexDigit(chars[2]) && isHexDigit(chars[3]))
248 QChar Lexer::decodeUnicodeEscapeCharacter(bool *ok)
250 if (_char == QLatin1Char('u') && isUnicodeEscapeSequence(&_codePtr[0])) {
251 scanChar(); // skip u
253 const QChar c1 = _char;
256 const QChar c2 = _char;
259 const QChar c3 = _char;
262 const QChar c4 = _char;
268 return convertUnicode(c1, c2, c3, c4);
275 int Lexer::scanToken()
277 if (_stackToken != -1) {
278 int tk = _stackToken;
286 _validTokenText = false;
287 _tokenLinePtr = _lastLinePtr;
289 while (_char.isSpace()) {
290 if (_char == QLatin1Char('\n')) {
291 _tokenLinePtr = _codePtr;
293 if (_restrictedKeyword) {
294 // automatic semicolon insertion
295 _tokenLine = _currentLineNumber;
296 _tokenStartPtr = _codePtr - 1; // ### TODO: insert it before the optional \r sequence.
300 syncProhibitAutomaticSemicolon();
307 _tokenStartPtr = _codePtr - 1;
308 _tokenLine = _currentLineNumber;
313 const QChar ch = _char;
316 switch (ch.unicode()) {
317 case '~': return T_TILDE;
318 case '}': return T_RBRACE;
321 if (_char == QLatin1Char('|')) {
324 } else if (_char == QLatin1Char('=')) {
330 case '{': return T_LBRACE;
333 if (_char == QLatin1Char('=')) {
339 case ']': return T_RBRACKET;
340 case '[': return T_LBRACKET;
341 case '?': return T_QUESTION;
344 if (_char == QLatin1Char('>')) {
346 if (_char == QLatin1Char('>')) {
348 if (_char == QLatin1Char('=')) {
350 return T_GT_GT_GT_EQ;
353 } else if (_char == QLatin1Char('=')) {
358 } else if (_char == QLatin1Char('=')) {
365 if (_char == QLatin1Char('=')) {
367 if (_char == QLatin1Char('=')) {
376 if (_char == QLatin1Char('=')) {
379 } else if (_char == QLatin1Char('<')) {
381 if (_char == QLatin1Char('=')) {
389 case ';': return T_SEMICOLON;
390 case ':': return T_COLON;
393 if (_char == QLatin1Char('*')) {
395 while (!_char.isNull()) {
396 if (_char == QLatin1Char('*')) {
398 if (_char == QLatin1Char('/')) {
402 _engine->addComment(tokenOffset() + 2, _codePtr - _tokenStartPtr - 1 - 4,
403 tokenStartLine(), tokenStartColumn() + 2);
412 } else if (_char == QLatin1Char('/')) {
413 while (!_char.isNull() && _char != QLatin1Char('\n')) {
417 _engine->addComment(tokenOffset() + 2, _codePtr - _tokenStartPtr - 1 - 2,
418 tokenStartLine(), tokenStartColumn() + 2);
421 } if (_char == QLatin1Char('=')) {
428 if (_char.isDigit()) {
429 QVarLengthArray<char,32> chars;
431 chars.append(ch.unicode()); // append the `.'
433 while (_char.isDigit()) {
434 chars.append(_char.unicode());
438 if (_char == QLatin1Char('e') || _char == QLatin1Char('E')) {
439 if (_codePtr[0].isDigit() || ((_codePtr[0] == QLatin1Char('+') || _codePtr[0] == QLatin1Char('-')) &&
440 _codePtr[1].isDigit())) {
442 chars.append(_char.unicode());
443 scanChar(); // consume `e'
445 if (_char == QLatin1Char('+') || _char == QLatin1Char('-')) {
446 chars.append(_char.unicode());
447 scanChar(); // consume the sign
450 while (_char.isDigit()) {
451 chars.append(_char.unicode());
459 const char *begin = chars.constData();
463 _tokenValue = qstrtod(begin, &end, &ok);
465 if (end - begin != chars.size() - 1) {
466 _errorCode = IllegalExponentIndicator;
467 _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Illegal syntax for exponential number");
471 return T_NUMERIC_LITERAL;
476 if (_char == QLatin1Char('=')) {
479 } else if (_char == QLatin1Char('-')) {
482 if (_terminator && !_delimited && !_prohibitAutomaticSemicolon) {
483 _stackToken = T_MINUS_MINUS;
487 return T_MINUS_MINUS;
491 case ',': return T_COMMA;
494 if (_char == QLatin1Char('=')) {
497 } else if (_char == QLatin1Char('+')) {
500 if (_terminator && !_delimited && !_prohibitAutomaticSemicolon) {
501 _stackToken = T_PLUS_PLUS;
510 if (_char == QLatin1Char('=')) {
516 case ')': return T_RPAREN;
517 case '(': return T_LPAREN;
520 if (_char == QLatin1Char('=')) {
523 } else if (_char == QLatin1Char('&')) {
530 if (_char == QLatin1Char('=')) {
532 return T_REMAINDER_EQ;
537 if (_char == QLatin1Char('=')) {
539 if (_char == QLatin1Char('=')) {
549 const QChar quote = ch;
550 bool multilineStringLiteral = false;
552 const QChar *startCode = _codePtr;
555 while (!_char.isNull()) {
556 if (_char == QLatin1Char('\n') || _char == QLatin1Char('\\')) {
558 } else if (_char == quote) {
559 _tokenSpell = _engine->midRef(startCode - _code.unicode() - 1, _codePtr - startCode);
562 return T_STRING_LITERAL;
568 _validTokenText = true;
569 _tokenText.resize(0);
571 while (startCode != _codePtr - 1)
572 _tokenText += *startCode++;
574 while (! _char.isNull()) {
575 if (_char == QLatin1Char('\n')) {
576 multilineStringLiteral = true;
579 } else if (_char == quote) {
583 _tokenSpell = _engine->newStringRef(_tokenText);
585 return multilineStringLiteral ? T_MULTILINE_STRING_LITERAL : T_STRING_LITERAL;
586 } else if (_char == QLatin1Char('\\')) {
592 switch (_char.unicode()) {
593 // unicode escape sequence
595 u = decodeUnicodeEscapeCharacter(&ok);
600 // hex escape sequence
603 if (isHexDigit(_codePtr[0]) && isHexDigit(_codePtr[1])) {
606 const QChar c1 = _char;
609 const QChar c2 = _char;
612 u = convertHex(c1, c2);
618 // single character escape sequence
619 case '\\': u = QLatin1Char('\\'); scanChar(); break;
620 case '\'': u = QLatin1Char('\''); scanChar(); break;
621 case '\"': u = QLatin1Char('\"'); scanChar(); break;
622 case 'b': u = QLatin1Char('\b'); scanChar(); break;
623 case 'f': u = QLatin1Char('\f'); scanChar(); break;
624 case 'n': u = QLatin1Char('\n'); scanChar(); break;
625 case 'r': u = QLatin1Char('\r'); scanChar(); break;
626 case 't': u = QLatin1Char('\t'); scanChar(); break;
627 case 'v': u = QLatin1Char('\v'); scanChar(); break;
630 if (! _codePtr[1].isDigit()) {
632 u = QLatin1Char('\0');
634 // ### parse deprecated octal escape sequence ?
640 while (_char == QLatin1Char('\r'))
643 if (_char == QLatin1Char('\n')) {
647 u = QLatin1Char('\n');
658 // non escape character
670 _errorCode = UnclosedStringLiteral;
671 _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Unclosed string at end of line");
676 if (ch.isLetter() || ch == QLatin1Char('$') || ch == QLatin1Char('_') || (ch == QLatin1Char('\\') && _char == QLatin1Char('u'))) {
677 bool identifierWithEscapeChars = false;
678 if (ch == QLatin1Char('\\')) {
679 identifierWithEscapeChars = true;
680 _tokenText.resize(0);
682 _tokenText += decodeUnicodeEscapeCharacter(&ok);
683 _validTokenText = true;
685 _errorCode = IllegalUnicodeEscapeSequence;
686 _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Illegal unicode escape sequence");
691 if (_char.isLetterOrNumber() || _char == QLatin1Char('$') || _char == QLatin1Char('_')) {
692 if (identifierWithEscapeChars)
696 } else if (_char == QLatin1Char('\\') && _codePtr[0] == QLatin1Char('u')) {
697 if (! identifierWithEscapeChars) {
698 identifierWithEscapeChars = true;
699 _tokenText.resize(0);
700 _tokenText.insert(0, _tokenStartPtr, _codePtr - _tokenStartPtr - 1);
701 _validTokenText = true;
704 scanChar(); // skip '\\'
706 _tokenText += decodeUnicodeEscapeCharacter(&ok);
708 _errorCode = IllegalUnicodeEscapeSequence;
709 _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Illegal unicode escape sequence");
713 _tokenLength = _codePtr - _tokenStartPtr - 1;
715 int kind = T_IDENTIFIER;
717 if (! identifierWithEscapeChars)
718 kind = classify(_tokenStartPtr, _tokenLength, _qmlMode);
721 if (kind == T_IDENTIFIER && identifierWithEscapeChars)
722 _tokenSpell = _engine->newStringRef(_tokenText);
724 _tokenSpell = _engine->midRef(_tokenStartPtr - _code.unicode(), _tokenLength);
730 } else if (ch.isDigit()) {
731 if (ch != QLatin1Char('0')) {
732 double integer = ch.unicode() - '0';
735 const QChar *code = _codePtr;
736 while (n.isDigit()) {
737 integer = integer * 10 + (n.unicode() - '0');
741 if (n != QLatin1Char('.') && n != QLatin1Char('e') && n != QLatin1Char('E')) {
742 if (code != _codePtr) {
746 _tokenValue = integer;
747 return T_NUMERIC_LITERAL;
751 QVarLengthArray<char,32> chars;
752 chars.append(ch.unicode());
754 if (ch == QLatin1Char('0') && (_char == QLatin1Char('x') || _char == QLatin1Char('X'))) {
755 // parse hex integer literal
757 chars.append(_char.unicode());
758 scanChar(); // consume `x'
760 while (isHexDigit(_char)) {
761 chars.append(_char.unicode());
765 _tokenValue = integerFromString(chars.constData(), chars.size(), 16);
766 return T_NUMERIC_LITERAL;
769 // decimal integer literal
770 while (_char.isDigit()) {
771 chars.append(_char.unicode());
772 scanChar(); // consume the digit
775 if (_char == QLatin1Char('.')) {
776 chars.append(_char.unicode());
777 scanChar(); // consume `.'
779 while (_char.isDigit()) {
780 chars.append(_char.unicode());
784 if (_char == QLatin1Char('e') || _char == QLatin1Char('E')) {
785 if (_codePtr[0].isDigit() || ((_codePtr[0] == QLatin1Char('+') || _codePtr[0] == QLatin1Char('-')) &&
786 _codePtr[1].isDigit())) {
788 chars.append(_char.unicode());
789 scanChar(); // consume `e'
791 if (_char == QLatin1Char('+') || _char == QLatin1Char('-')) {
792 chars.append(_char.unicode());
793 scanChar(); // consume the sign
796 while (_char.isDigit()) {
797 chars.append(_char.unicode());
802 } else if (_char == QLatin1Char('e') || _char == QLatin1Char('E')) {
803 if (_codePtr[0].isDigit() || ((_codePtr[0] == QLatin1Char('+') || _codePtr[0] == QLatin1Char('-')) &&
804 _codePtr[1].isDigit())) {
806 chars.append(_char.unicode());
807 scanChar(); // consume `e'
809 if (_char == QLatin1Char('+') || _char == QLatin1Char('-')) {
810 chars.append(_char.unicode());
811 scanChar(); // consume the sign
814 while (_char.isDigit()) {
815 chars.append(_char.unicode());
823 const char *begin = chars.constData();
827 _tokenValue = qstrtod(begin, &end, &ok);
829 if (end - begin != chars.size() - 1) {
830 _errorCode = IllegalExponentIndicator;
831 _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Illegal syntax for exponential number");
835 return T_NUMERIC_LITERAL;
844 bool Lexer::scanRegExp(RegExpBodyPrefix prefix)
846 _tokenText.resize(0);
847 _validTokenText = true;
850 if (prefix == EqualPrefix)
851 _tokenText += QLatin1Char('=');
854 switch (_char.unicode()) {
856 case '\n': case '\r': // line terminator
857 _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Unterminated regular expression literal");
865 while (isIdentLetter(_char)) {
866 int flag = regExpFlagFromChar(_char);
868 _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Invalid regular expression flag '%0'")
872 _patternFlags |= flag;
876 _tokenLength = _codePtr - _tokenStartPtr - 1;
880 // regular expression backslash sequence
884 if (_char.isNull() || isLineTerminator()) {
885 _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Unterminated regular expression backslash sequence");
894 // regular expression class
898 while (! _char.isNull() && ! isLineTerminator()) {
899 if (_char == QLatin1Char(']'))
901 else if (_char == QLatin1Char('\\')) {
902 // regular expression backslash sequence
906 if (_char.isNull() || isLineTerminator()) {
907 _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Unterminated regular expression backslash sequence");
919 if (_char != QLatin1Char(']')) {
920 _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Unterminated regular expression class");
925 scanChar(); // skip ]
937 bool Lexer::isLineTerminator() const
939 return (_char == QLatin1Char('\n') || _char == QLatin1Char('\r'));
942 bool Lexer::isIdentLetter(QChar ch)
944 // ASCII-biased, since all reserved words are ASCII, aand hence the
945 // bulk of content to be parsed.
946 if ((ch >= QLatin1Char('a') && ch <= QLatin1Char('z'))
947 || (ch >= QLatin1Char('A') && ch <= QLatin1Char('Z'))
948 || ch == QLatin1Char('$')
949 || ch == QLatin1Char('_'))
951 if (ch.unicode() < 128)
953 return ch.isLetterOrNumber();
956 bool Lexer::isDecimalDigit(ushort c)
958 return (c >= '0' && c <= '9');
961 bool Lexer::isHexDigit(QChar c)
963 return ((c >= QLatin1Char('0') && c <= QLatin1Char('9'))
964 || (c >= QLatin1Char('a') && c <= QLatin1Char('f'))
965 || (c >= QLatin1Char('A') && c <= QLatin1Char('F')));
968 bool Lexer::isOctalDigit(ushort c)
970 return (c >= '0' && c <= '7');
973 int Lexer::tokenKind() const
978 int Lexer::tokenOffset() const
980 return _tokenStartPtr - _code.unicode();
983 int Lexer::tokenLength() const
988 int Lexer::tokenStartLine() const
993 int Lexer::tokenStartColumn() const
995 return _tokenStartPtr - _tokenLinePtr + 1;
998 int Lexer::tokenEndLine() const
1000 return _currentLineNumber;
1003 int Lexer::tokenEndColumn() const
1005 return _codePtr - _lastLinePtr;
1008 QStringRef Lexer::tokenSpell() const
1013 double Lexer::tokenValue() const
1018 QString Lexer::tokenText() const
1020 if (_validTokenText)
1023 if (_tokenKind == T_STRING_LITERAL)
1024 return QString(_tokenStartPtr + 1, _tokenLength - 2);
1026 return QString(_tokenStartPtr, _tokenLength);
1029 Lexer::Error Lexer::errorCode() const
1034 QString Lexer::errorMessage() const
1036 return _errorMessage;
1039 void Lexer::syncProhibitAutomaticSemicolon()
1041 if (_parenthesesState == BalancedParentheses) {
1042 // we have seen something like "if (foo)", which means we should
1043 // never insert an automatic semicolon at this point, since it would
1044 // then be expanded into an empty statement (ECMA-262 7.9.1)
1045 _prohibitAutomaticSemicolon = true;
1046 _parenthesesState = IgnoreParentheses;
1048 _prohibitAutomaticSemicolon = false;
1052 bool Lexer::prevTerminator() const
1057 bool Lexer::followsClosingBrace() const
1059 return _followsClosingBrace;
1062 bool Lexer::canInsertAutomaticSemicolon(int token) const
1064 return token == T_RBRACE
1065 || token == EOF_SYMBOL
1067 || _followsClosingBrace;
1070 bool Lexer::scanDirectives(Directives *directives)
1073 // the directives are a Javascript-only extension.
1077 lex(); // fetch the first token
1079 if (_tokenKind != T_DOT)
1083 lex(); // skip T_DOT
1085 const int lineNumber = tokenStartLine();
1087 if (! (_tokenKind == T_IDENTIFIER || _tokenKind == T_RESERVED_WORD))
1088 return false; // expected a valid QML/JS directive
1090 const QString directiveName = tokenText();
1092 if (! (directiveName == QLatin1String("pragma") ||
1093 directiveName == QLatin1String("import")))
1094 return false; // not a valid directive name
1096 // it must be a pragma or an import directive.
1097 if (directiveName == QLatin1String("pragma")) {
1099 if (! (lex() == T_IDENTIFIER && tokenText() == QLatin1String("library")))
1100 return false; // expected `library
1102 // we found a .pragma library directive
1103 directives->pragmaLibrary();
1106 Q_ASSERT(directiveName == QLatin1String("import"));
1107 lex(); // skip .import
1111 bool fileImport = false; // file or uri import
1113 if (_tokenKind == T_STRING_LITERAL) {
1114 // .import T_STRING_LITERAL as T_IDENTIFIER
1117 pathOrUri = tokenText();
1119 } else if (_tokenKind == T_IDENTIFIER) {
1120 // .import T_IDENTIFIER (. T_IDENTIFIER)* T_NUMERIC_LITERAL as T_IDENTIFIER
1122 pathOrUri = tokenText();
1124 lex(); // skip the first T_IDENTIFIER
1125 for (; _tokenKind == T_DOT; lex()) {
1126 if (lex() != T_IDENTIFIER)
1129 pathOrUri += QLatin1Char('.');
1130 pathOrUri += tokenText();
1133 if (_tokenKind != T_NUMERIC_LITERAL)
1134 return false; // expected the module version number
1136 version = tokenText();
1140 // recognize the mandatory `as' followed by the module name
1142 if (! (lex() == T_RESERVED_WORD && tokenText() == QLatin1String("as")))
1143 return false; // expected `as'
1145 if (lex() != T_IDENTIFIER)
1146 return false; // expected module name
1148 const QString module = tokenText();
1151 directives->importFile(pathOrUri, module);
1153 directives->importModule(pathOrUri, version, module);
1156 if (tokenStartLine() != lineNumber)
1157 return false; // the directives cannot span over multiple lines
1159 // fetch the first token after the .pragma/.import directive
1161 } while (_tokenKind == T_DOT);
1166 #include "qdeclarativejskeywords_p.h"