1 /****************************************************************************
3 ** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
4 ** All rights reserved.
5 ** Contact: Nokia Corporation (qt-info@nokia.com)
7 ** This file is part of the QtDeclarative module of the Qt Toolkit.
9 ** $QT_BEGIN_LICENSE:LGPL$
10 ** GNU Lesser General Public License Usage
11 ** This file may be used under the terms of the GNU Lesser General Public
12 ** License version 2.1 as published by the Free Software Foundation and
13 ** appearing in the file LICENSE.LGPL included in the packaging of this
14 ** file. Please review the following information to ensure the GNU Lesser
15 ** General Public License version 2.1 requirements will be met:
16 ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
18 ** In addition, as a special exception, Nokia gives you certain additional
19 ** rights. These rights are described in the Nokia Qt LGPL Exception
20 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
22 ** GNU General Public License Usage
23 ** Alternatively, this file may be used under the terms of the GNU General
24 ** Public License version 3.0 as published by the Free Software Foundation
25 ** and appearing in the file LICENSE.GPL included in the packaging of this
26 ** file. Please review the following information to ensure the GNU General
27 ** Public License version 3.0 requirements will be met:
28 ** http://www.gnu.org/copyleft/gpl.html.
31 ** Alternatively, this file may be used in accordance with the terms and
32 ** conditions contained in a signed written agreement between you and Nokia.
40 ****************************************************************************/
42 #include "qdeclarativejslexer_p.h"
43 #include "qdeclarativejsengine_p.h"
44 #include "qdeclarativejsmemorypool_p.h"
46 #include <private/qdeclarativeutils_p.h>
47 #include <QtCore/QCoreApplication>
48 #include <QtCore/QVarLengthArray>
49 #include <QtCore/QDebug>
52 Q_CORE_EXPORT double qstrtod(const char *s00, char const **se, bool *ok);
55 using namespace QDeclarativeJS;
63 static int flagFromChar(const QChar &ch)
65 switch (ch.unicode()) {
66 case 'g': return Global;
67 case 'i': return IgnoreCase;
68 case 'm': return Multiline;
73 static unsigned char convertHex(ushort c)
75 if (c >= '0' && c <= '9')
77 else if (c >= 'a' && c <= 'f')
78 return (c - 'a' + 10);
80 return (c - 'A' + 10);
83 static unsigned char convertHex(QChar c1, QChar c2)
85 return ((convertHex(c1.unicode()) << 4) + convertHex(c2.unicode()));
88 static QChar convertUnicode(QChar c1, QChar c2, QChar c3, QChar c4)
90 return QChar((convertHex(c3.unicode()) << 4) + convertHex(c4.unicode()),
91 (convertHex(c1.unicode()) << 4) + convertHex(c2.unicode()));
94 Lexer::Lexer(Engine *engine)
100 , _char(QLatin1Char('\n'))
101 , _errorCode(NoError)
102 , _currentLineNumber(0)
104 , _parenthesesState(IgnoreParentheses)
105 , _parenthesesCount(0)
110 , _validTokenText(false)
111 , _prohibitAutomaticSemicolon(false)
112 , _restrictedKeyword(false)
117 engine->setLexer(this);
120 QString Lexer::code() const
125 void Lexer::setCode(const QString &code, int lineno)
128 _engine->setCode(code);
132 _tokenText.reserve(1024);
133 _errorMessage.clear();
134 _tokenSpell = QStringRef();
136 _codePtr = code.unicode();
137 _lastLinePtr = _codePtr;
138 _tokenLinePtr = _codePtr;
139 _tokenStartPtr = _codePtr;
141 _char = QLatin1Char('\n');
142 _errorCode = NoError;
144 _currentLineNumber = lineno;
148 _parenthesesState = IgnoreParentheses;
149 _parenthesesCount = 0;
157 _validTokenText = false;
158 _prohibitAutomaticSemicolon = false;
159 _restrictedKeyword = false;
164 void Lexer::scanChar()
168 if (_char == QLatin1Char('\n')) {
169 _lastLinePtr = _codePtr; // points to the first character after the newline
170 ++_currentLineNumber;
176 _tokenSpell = QStringRef();
177 int token = scanToken();
178 _tokenLength = _codePtr - _tokenStartPtr - 1;
181 _restrictedKeyword = false;
194 _parenthesesState = CountParentheses;
195 _parenthesesCount = 0;
199 _parenthesesState = BalancedParentheses;
206 _restrictedKeyword = true;
210 // update the parentheses state
211 switch (_parenthesesState) {
212 case IgnoreParentheses:
215 case CountParentheses:
216 if (token == T_RPAREN) {
218 if (_parenthesesCount == 0)
219 _parenthesesState = BalancedParentheses;
220 } else if (token == T_LPAREN) {
225 case BalancedParentheses:
226 _parenthesesState = IgnoreParentheses;
233 bool Lexer::isUnicodeEscapeSequence(const QChar *chars)
235 if (isHexDigit(chars[0]) && isHexDigit(chars[1]) && isHexDigit(chars[2]) && isHexDigit(chars[3]))
241 QChar Lexer::decodeUnicodeEscapeCharacter(bool *ok)
243 if (_char == QLatin1Char('u') && isUnicodeEscapeSequence(&_codePtr[0])) {
244 scanChar(); // skip u
246 const QChar c1 = _char;
249 const QChar c2 = _char;
252 const QChar c3 = _char;
255 const QChar c4 = _char;
261 return convertUnicode(c1, c2, c3, c4);
268 int Lexer::scanToken()
270 if (_stackToken != -1) {
271 int tk = _stackToken;
279 _validTokenText = false;
280 _tokenLinePtr = _lastLinePtr;
282 while (QDeclarativeUtils::isSpace(_char)) {
283 if (_char == QLatin1Char('\n')) {
284 _tokenLinePtr = _codePtr;
286 if (_restrictedKeyword) {
287 // automatic semicolon insertion
288 _tokenLine = _currentLineNumber;
289 _tokenStartPtr = _codePtr - 1; // ### TODO: insert it before the optional \r sequence.
293 syncProhibitAutomaticSemicolon();
300 _tokenStartPtr = _codePtr - 1;
301 _tokenLine = _currentLineNumber;
306 const QChar ch = _char;
309 switch (ch.unicode()) {
310 case '~': return T_TILDE;
311 case '}': return T_RBRACE;
314 if (_char == QLatin1Char('|')) {
317 } else if (_char == QLatin1Char('=')) {
323 case '{': return T_LBRACE;
326 if (_char == QLatin1Char('=')) {
332 case ']': return T_RBRACKET;
333 case '[': return T_LBRACKET;
334 case '?': return T_QUESTION;
337 if (_char == QLatin1Char('>')) {
339 if (_char == QLatin1Char('>')) {
341 if (_char == QLatin1Char('=')) {
343 return T_GT_GT_GT_EQ;
346 } else if (_char == QLatin1Char('=')) {
351 } else if (_char == QLatin1Char('=')) {
358 if (_char == QLatin1Char('=')) {
360 if (_char == QLatin1Char('=')) {
369 if (_char == QLatin1Char('=')) {
372 } else if (_char == QLatin1Char('<')) {
374 if (_char == QLatin1Char('=')) {
382 case ';': return T_SEMICOLON;
383 case ':': return T_COLON;
386 if (_char == QLatin1Char('*')) {
388 while (!_char.isNull()) {
389 if (_char == QLatin1Char('*')) {
391 if (_char == QLatin1Char('/')) {
399 } else if (_char == QLatin1Char('/')) {
400 while (!_char.isNull() && _char != QLatin1Char('\n')) {
404 } if (_char == QLatin1Char('=')) {
411 if (QDeclarativeUtils::isDigit(_char)) {
412 QVarLengthArray<char,32> chars;
414 chars.append(ch.unicode()); // append the `.'
416 while (QDeclarativeUtils::isDigit(_char)) {
417 chars.append(_char.unicode());
421 if (_char == QLatin1Char('e') || _char == QLatin1Char('E')) {
422 if (QDeclarativeUtils::isDigit(_codePtr[0]) || ((_codePtr[0] == QLatin1Char('+') || _codePtr[0] == QLatin1Char('-')) &&
423 QDeclarativeUtils::isDigit(_codePtr[1]))) {
425 chars.append(_char.unicode());
426 scanChar(); // consume `e'
428 if (_char == QLatin1Char('+') || _char == QLatin1Char('-')) {
429 chars.append(_char.unicode());
430 scanChar(); // consume the sign
433 while (QDeclarativeUtils::isDigit(_char)) {
434 chars.append(_char.unicode());
442 const char *begin = chars.constData();
446 _tokenValue = qstrtod(begin, &end, &ok);
448 if (end - begin != chars.size() - 1) {
449 _errorCode = IllegalExponentIndicator;
450 _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Illegal syntax for exponential number");
454 return T_NUMERIC_LITERAL;
459 if (_char == QLatin1Char('=')) {
462 } else if (_char == QLatin1Char('-')) {
465 if (_terminator && !_delimited && !_prohibitAutomaticSemicolon) {
466 _stackToken = T_PLUS_PLUS;
470 return T_MINUS_MINUS;
474 case ',': return T_COMMA;
477 if (_char == QLatin1Char('=')) {
480 } else if (_char == QLatin1Char('+')) {
483 if (_terminator && !_delimited && !_prohibitAutomaticSemicolon) {
484 _stackToken = T_PLUS_PLUS;
493 if (_char == QLatin1Char('=')) {
499 case ')': return T_RPAREN;
500 case '(': return T_LPAREN;
503 if (_char == QLatin1Char('=')) {
506 } else if (_char == QLatin1Char('&')) {
513 if (_char == QLatin1Char('=')) {
515 return T_REMAINDER_EQ;
520 if (_char == QLatin1Char('=')) {
522 if (_char == QLatin1Char('=')) {
532 const QChar quote = ch;
533 _validTokenText = true;
535 bool multilineStringLiteral = false;
537 const QChar *startCode = _codePtr;
539 while (!_char.isNull()) {
540 if (_char == QLatin1Char('\n') || _char == QLatin1Char('\\')) {
542 } else if (_char == quote) {
543 _tokenSpell = _engine->midRef(startCode - _code.unicode() - 1, _codePtr - startCode);
546 return T_STRING_LITERAL;
551 _tokenText.resize(0);
553 while (startCode != _codePtr - 1)
554 _tokenText += *startCode++;
556 while (! _char.isNull()) {
557 if (_char == QLatin1Char('\n')) {
558 multilineStringLiteral = true;
561 } else if (_char == quote) {
565 _tokenSpell = _engine->newStringRef(_tokenText);
567 return multilineStringLiteral ? T_MULTILINE_STRING_LITERAL : T_STRING_LITERAL;
568 } else if (_char == QLatin1Char('\\')) {
574 switch (_char.unicode()) {
575 // unicode escape sequence
577 u = decodeUnicodeEscapeCharacter(&ok);
582 // hex escape sequence
585 if (isHexDigit(_codePtr[0]) && isHexDigit(_codePtr[1])) {
588 const QChar c1 = _char;
591 const QChar c2 = _char;
594 u = convertHex(c1, c2);
600 // single character escape sequence
601 case '\\': u = QLatin1Char('\''); scanChar(); break;
602 case '\'': u = QLatin1Char('\''); scanChar(); break;
603 case '\"': u = QLatin1Char('\"'); scanChar(); break;
604 case 'b': u = QLatin1Char('\b'); scanChar(); break;
605 case 'f': u = QLatin1Char('\f'); scanChar(); break;
606 case 'n': u = QLatin1Char('\n'); scanChar(); break;
607 case 'r': u = QLatin1Char('\r'); scanChar(); break;
608 case 't': u = QLatin1Char('\t'); scanChar(); break;
609 case 'v': u = QLatin1Char('\v'); scanChar(); break;
612 if (! _codePtr[1].isDigit()) {
614 u = QLatin1Char('\0');
616 // ### parse deprecated octal escape sequence ?
622 while (_char == QLatin1Char('\r'))
629 u = QLatin1Char('\n');
640 // non escape character
652 _errorCode = UnclosedStringLiteral;
653 _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Unclosed string at end of line");
658 if (QDeclarativeUtils::isLetter(ch) || ch == QLatin1Char('$') || ch == QLatin1Char('_') || (ch == QLatin1Char('\\') && _char == QLatin1Char('u'))) {
659 bool identifierWithEscapeChars = false;
660 if (ch == QLatin1Char('\\')) {
661 identifierWithEscapeChars = true;
662 _tokenText.resize(0);
664 _tokenText += decodeUnicodeEscapeCharacter(&ok);
665 _validTokenText = true;
667 _errorCode = IllegalUnicodeEscapeSequence;
668 _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Illegal unicode escape sequence");
673 if (QDeclarativeUtils::isLetterOrNumber(_char) || _char == QLatin1Char('$') || _char == QLatin1Char('_')) {
674 if (identifierWithEscapeChars)
678 } else if (_char == QLatin1Char('\\') && _codePtr[0] == QLatin1Char('u')) {
679 if (! identifierWithEscapeChars) {
680 identifierWithEscapeChars = true;
681 _tokenText.resize(0);
682 _tokenText.insert(0, _tokenStartPtr, _codePtr - _tokenStartPtr - 1);
683 _validTokenText = true;
686 scanChar(); // skip '\\'
688 _tokenText += decodeUnicodeEscapeCharacter(&ok);
690 _errorCode = IllegalUnicodeEscapeSequence;
691 _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Illegal unicode escape sequence");
695 _tokenLength = _codePtr - _tokenStartPtr - 1;
697 int kind = T_IDENTIFIER;
699 if (! identifierWithEscapeChars)
700 kind = classify(_tokenStartPtr, _tokenLength);
703 if (kind == T_IDENTIFIER && identifierWithEscapeChars)
704 _tokenSpell = _engine->newStringRef(_tokenText);
706 _tokenSpell = _engine->midRef(_tokenStartPtr - _code.unicode(), _tokenLength);
712 } else if (QDeclarativeUtils::isDigit(ch)) {
714 int integer = ch.unicode() - '0';
717 const QChar *code = _codePtr;
718 while (QDeclarativeUtils::isDigit(n)) {
719 integer = integer * 10 + (n.unicode() - '0');
723 if (n != QLatin1Char('.') && n != QLatin1Char('e') && n != QLatin1Char('E')) {
726 _tokenValue = integer;
727 return T_NUMERIC_LITERAL;
731 QVarLengthArray<char,32> chars;
732 chars.append(ch.unicode());
734 if (ch == QLatin1Char('0') && (_char == 'x' || _char == 'X')) {
735 // parse hex integer literal
737 chars.append(_char.unicode());
738 scanChar(); // consume `x'
740 while (isHexDigit(_char)) {
741 chars.append(_char.unicode());
745 _tokenValue = integerFromString(chars.constData(), chars.size(), 16);
746 return T_NUMERIC_LITERAL;
749 // decimal integer literal
750 while (QDeclarativeUtils::isDigit(_char)) {
751 chars.append(_char.unicode());
752 scanChar(); // consume the digit
755 if (_char == QLatin1Char('.')) {
756 chars.append(_char.unicode());
757 scanChar(); // consume `.'
759 while (QDeclarativeUtils::isDigit(_char)) {
760 chars.append(_char.unicode());
764 if (_char == QLatin1Char('e') || _char == QLatin1Char('E')) {
765 if (QDeclarativeUtils::isDigit(_codePtr[0]) || ((_codePtr[0] == QLatin1Char('+') || _codePtr[0] == QLatin1Char('-')) &&
766 QDeclarativeUtils::isDigit(_codePtr[1]))) {
768 chars.append(_char.unicode());
769 scanChar(); // consume `e'
771 if (_char == QLatin1Char('+') || _char == QLatin1Char('-')) {
772 chars.append(_char.unicode());
773 scanChar(); // consume the sign
776 while (QDeclarativeUtils::isDigit(_char)) {
777 chars.append(_char.unicode());
782 } else if (_char == QLatin1Char('e') || _char == QLatin1Char('E')) {
783 if (QDeclarativeUtils::isDigit(_codePtr[0]) || ((_codePtr[0] == QLatin1Char('+') || _codePtr[0] == QLatin1Char('-')) &&
784 QDeclarativeUtils::isDigit(_codePtr[1]))) {
786 chars.append(_char.unicode());
787 scanChar(); // consume `e'
789 if (_char == QLatin1Char('+') || _char == QLatin1Char('-')) {
790 chars.append(_char.unicode());
791 scanChar(); // consume the sign
794 while (QDeclarativeUtils::isDigit(_char)) {
795 chars.append(_char.unicode());
803 const char *begin = chars.constData();
807 _tokenValue = qstrtod(begin, &end, &ok);
809 if (end - begin != chars.size() - 1) {
810 _errorCode = IllegalExponentIndicator;
811 _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Illegal syntax for exponential number");
815 return T_NUMERIC_LITERAL;
824 bool Lexer::scanRegExp(RegExpBodyPrefix prefix)
826 _tokenText.resize(0);
827 _validTokenText = true;
830 if (prefix == EqualPrefix)
831 _tokenText += QLatin1Char('=');
834 switch (_char.unicode()) {
836 case '\n': case '\r': // line terminator
837 _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Unterminated regular expression literal");
845 while (isIdentLetter(_char)) {
846 int flag = flagFromChar(_char);
848 _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Invalid regular expression flag '%0'")
852 _patternFlags |= flag;
858 // regular expression backslash sequence
862 if (_char.isNull() || isLineTerminator()) {
863 _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Unterminated regular expression backslash sequence");
872 // regular expression class
876 while (! _char.isNull() && ! isLineTerminator()) {
877 if (_char == QLatin1Char(']'))
879 else if (_char == QLatin1Char('\\')) {
880 // regular expression backslash sequence
884 if (_char.isNull() || isLineTerminator()) {
885 _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Unterminated regular expression backslash sequence");
897 if (_char != QLatin1Char(']')) {
898 _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Unterminated regular expression class");
903 scanChar(); // skip ]
915 bool Lexer::isLineTerminator() const
917 return (_char == QLatin1Char('\n') || _char == QLatin1Char('\r'));
920 bool Lexer::isIdentLetter(QChar ch)
922 // ASCII-biased, since all reserved words are ASCII, aand hence the
923 // bulk of content to be parsed.
924 if ((ch >= QLatin1Char('a') && ch <= QLatin1Char('z'))
925 || (ch >= QLatin1Char('A') && ch <= QLatin1Char('Z'))
926 || ch == QLatin1Char('$')
927 || ch == QLatin1Char('_'))
929 if (ch.unicode() < 128)
931 return ch.isLetterOrNumber();
934 bool Lexer::isDecimalDigit(ushort c)
936 return (c >= '0' && c <= '9');
939 bool Lexer::isHexDigit(QChar c)
941 return ((c >= QLatin1Char('0') && c <= QLatin1Char('9'))
942 || (c >= QLatin1Char('a') && c <= QLatin1Char('f'))
943 || (c >= QLatin1Char('A') && c <= QLatin1Char('F')));
946 bool Lexer::isOctalDigit(ushort c)
948 return (c >= '0' && c <= '7');
951 int Lexer::tokenOffset() const
953 return _tokenStartPtr - _code.unicode();
956 int Lexer::tokenLength() const
961 int Lexer::tokenStartLine() const
966 int Lexer::tokenStartColumn() const
968 return _tokenStartPtr - _tokenLinePtr + 1;
971 int Lexer::tokenEndLine() const
973 return _currentLineNumber;
976 int Lexer::tokenEndColumn() const
978 return _codePtr - _lastLinePtr;
981 QStringRef Lexer::tokenSpell() const
986 double Lexer::tokenValue() const
991 QString Lexer::tokenText() const
996 return QString(_tokenStartPtr, _tokenLength);
999 Lexer::Error Lexer::errorCode() const
1004 QString Lexer::errorMessage() const
1006 return _errorMessage;
1009 void Lexer::syncProhibitAutomaticSemicolon()
1011 if (_parenthesesState == BalancedParentheses) {
1012 // we have seen something like "if (foo)", which means we should
1013 // never insert an automatic semicolon at this point, since it would
1014 // then be expanded into an empty statement (ECMA-262 7.9.1)
1015 _prohibitAutomaticSemicolon = true;
1016 _parenthesesState = IgnoreParentheses;
1018 _prohibitAutomaticSemicolon = false;
1022 bool Lexer::prevTerminator() const
1027 #include "qdeclarativejskeywords_p.h"