src/declarative/qml/parser/qdeclarativejslexer.cpp

   1 /****************************************************************************
   2 **
   3 ** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
   4 ** Contact: http://www.qt-project.org/
   5 **
   6 ** This file is part of the QtDeclarative module of the Qt Toolkit.
   7 **
   8 ** $QT_BEGIN_LICENSE:LGPL$
   9 ** GNU Lesser General Public License Usage
  10 ** This file may be used under the terms of the GNU Lesser General Public
  11 ** License version 2.1 as published by the Free Software Foundation and
  12 ** appearing in the file LICENSE.LGPL included in the packaging of this
  13 ** file. Please review the following information to ensure the GNU Lesser
  14 ** General Public License version 2.1 requirements will be met:
  15 ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
  16 **
  17 ** In addition, as a special exception, Nokia gives you certain additional
  18 ** rights. These rights are described in the Nokia Qt LGPL Exception
  19 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
  20 **
  21 ** GNU General Public License Usage
  22 ** Alternatively, this file may be used under the terms of the GNU General
  23 ** Public License version 3.0 as published by the Free Software Foundation
  24 ** and appearing in the file LICENSE.GPL included in the packaging of this
  25 ** file. Please review the following information to ensure the GNU General
  26 ** Public License version 3.0 requirements will be met:
  27 ** http://www.gnu.org/copyleft/gpl.html.
  28 **
  29 ** Other Usage
  30 ** Alternatively, this file may be used in accordance with the terms and
  31 ** conditions contained in a signed written agreement between you and Nokia.
  32 **
  33 **
  34 **
  35 **
  36 **
  37 **
  38 ** $QT_END_LICENSE$
  39 **
  40 ****************************************************************************/
  41
  42 #include "qdeclarativejslexer_p.h"
  43 #include "qdeclarativejsengine_p.h"
  44 #include "qdeclarativejsmemorypool_p.h"
  45
  46 #include <QtCore/QCoreApplication>
  47 #include <QtCore/QVarLengthArray>
  48 #include <QtCore/QDebug>
  49
  50 QT_BEGIN_NAMESPACE
  51 Q_CORE_EXPORT double qstrtod(const char *s00, char const **se, bool *ok);
  52 QT_END_NAMESPACE
  53
  54 using namespace QDeclarativeJS;
  55
  56 static int regExpFlagFromChar(const QChar &ch)
  57 {
  58     switch (ch.unicode()) {
  59     case 'g': return Lexer::RegExp_Global;
  60     case 'i': return Lexer::RegExp_IgnoreCase;
  61     case 'm': return Lexer::RegExp_Multiline;
  62     }
  63     return 0;
  64 }
  65
  66 static unsigned char convertHex(ushort c)
  67 {
  68     if (c >= '0' && c <= '9')
  69         return (c - '0');
  70     else if (c >= 'a' && c <= 'f')
  71         return (c - 'a' + 10);
  72     else
  73         return (c - 'A' + 10);
  74 }
  75
  76 static QChar convertHex(QChar c1, QChar c2)
  77 {
  78     return QChar((convertHex(c1.unicode()) << 4) + convertHex(c2.unicode()));
  79 }
  80
  81 static QChar convertUnicode(QChar c1, QChar c2, QChar c3, QChar c4)
  82 {
  83     return QChar((convertHex(c3.unicode()) << 4) + convertHex(c4.unicode()),
  84                  (convertHex(c1.unicode()) << 4) + convertHex(c2.unicode()));
  85 }
  86
  87 Lexer::Lexer(Engine *engine)
  88     : _engine(engine)
  89     , _codePtr(0)
  90     , _lastLinePtr(0)
  91     , _tokenLinePtr(0)
  92     , _tokenStartPtr(0)
  93     , _char(QLatin1Char('\n'))
  94     , _errorCode(NoError)
  95     , _currentLineNumber(0)
  96     , _tokenValue(0)
  97     , _parenthesesState(IgnoreParentheses)
  98     , _parenthesesCount(0)
  99     , _stackToken(-1)
 100     , _patternFlags(0)
 101     , _tokenKind(0)
 102     , _tokenLength(0)
 103     , _tokenLine(0)
 104     , _validTokenText(false)
 105     , _prohibitAutomaticSemicolon(false)
 106     , _restrictedKeyword(false)
 107     , _terminator(false)
 108     , _followsClosingBrace(false)
 109     , _delimited(true)
 110     , _qmlMode(true)
 111 {
 112     if (engine)
 113         engine->setLexer(this);
 114 }
 115
 116 bool Lexer::qmlMode() const
 117 {
 118     return _qmlMode;
 119 }
 120
 121 QString Lexer::code() const
 122 {
 123     return _code;
 124 }
 125
 126 void Lexer::setCode(const QString &code, int lineno, bool qmlMode)
 127 {
 128     if (_engine)
 129         _engine->setCode(code);
 130
 131     _qmlMode = qmlMode;
 132     _code = code;
 133     _tokenText.clear();
 134     _tokenText.reserve(1024);
 135     _errorMessage.clear();
 136     _tokenSpell = QStringRef();
 137
 138     _codePtr = code.unicode();
 139     _lastLinePtr = _codePtr;
 140     _tokenLinePtr = _codePtr;
 141     _tokenStartPtr = _codePtr;
 142
 143     _char = QLatin1Char('\n');
 144     _errorCode = NoError;
 145
 146     _currentLineNumber = lineno;
 147     _tokenValue = 0;
 148
 149     // parentheses state
 150     _parenthesesState = IgnoreParentheses;
 151     _parenthesesCount = 0;
 152
 153     _stackToken = -1;
 154
 155     _patternFlags = 0;
 156     _tokenLength = 0;
 157     _tokenLine = lineno;
 158
 159     _validTokenText = false;
 160     _prohibitAutomaticSemicolon = false;
 161     _restrictedKeyword = false;
 162     _terminator = false;
 163     _followsClosingBrace = false;
 164     _delimited = true;
 165 }
 166
 167 void Lexer::scanChar()
 168 {
 169     _char = *_codePtr++;
 170
 171     if (_char == QLatin1Char('\n')) {
 172         _lastLinePtr = _codePtr; // points to the first character after the newline
 173         ++_currentLineNumber;
 174     }
 175 }
 176
 177 int Lexer::lex()
 178 {
 179     const int previousTokenKind = _tokenKind;
 180
 181     _tokenSpell = QStringRef();
 182     _tokenKind = scanToken();
 183     _tokenLength = _codePtr - _tokenStartPtr - 1;
 184
 185     _delimited = false;
 186     _restrictedKeyword = false;
 187     _followsClosingBrace = (previousTokenKind == T_RBRACE);
 188
 189     // update the flags
 190     switch (_tokenKind) {
 191     case T_LBRACE:
 192     case T_SEMICOLON:
 193     case T_COLON:
 194         _delimited = true;
 195         break;
 196
 197     case T_IF:
 198     case T_FOR:
 199     case T_WHILE:
 200     case T_WITH:
 201         _parenthesesState = CountParentheses;
 202         _parenthesesCount = 0;
 203         break;
 204
 205     case T_DO:
 206         _parenthesesState = BalancedParentheses;
 207         break;
 208
 209     case T_CONTINUE:
 210     case T_BREAK:
 211     case T_RETURN:
 212     case T_THROW:
 213         _restrictedKeyword = true;
 214         break;
 215     } // switch
 216
 217     // update the parentheses state
 218     switch (_parenthesesState) {
 219     case IgnoreParentheses:
 220         break;
 221
 222     case CountParentheses:
 223         if (_tokenKind == T_RPAREN) {
 224             --_parenthesesCount;
 225             if (_parenthesesCount == 0)
 226                 _parenthesesState = BalancedParentheses;
 227         } else if (_tokenKind == T_LPAREN) {
 228             ++_parenthesesCount;
 229         }
 230         break;
 231
 232     case BalancedParentheses:
 233         _parenthesesState = IgnoreParentheses;
 234         break;
 235     } // switch
 236
 237     return _tokenKind;
 238 }
 239
 240 bool Lexer::isUnicodeEscapeSequence(const QChar *chars)
 241 {
 242     if (isHexDigit(chars[0]) && isHexDigit(chars[1]) && isHexDigit(chars[2]) && isHexDigit(chars[3]))
 243         return true;
 244
 245     return false;
 246 }
 247
 248 QChar Lexer::decodeUnicodeEscapeCharacter(bool *ok)
 249 {
 250     if (_char == QLatin1Char('u') && isUnicodeEscapeSequence(&_codePtr[0])) {
 251         scanChar(); // skip u
 252
 253         const QChar c1 = _char;
 254         scanChar();
 255
 256         const QChar c2 = _char;
 257         scanChar();
 258
 259         const QChar c3 = _char;
 260         scanChar();
 261
 262         const QChar c4 = _char;
 263         scanChar();
 264
 265         if (ok)
 266             *ok = true;
 267
 268         return convertUnicode(c1, c2, c3, c4);
 269     }
 270
 271     *ok = false;
 272     return QChar();
 273 }
 274
 275 int Lexer::scanToken()
 276 {
 277     if (_stackToken != -1) {
 278         int tk = _stackToken;
 279         _stackToken = -1;
 280         return tk;
 281     }
 282
 283     _terminator = false;
 284
 285 again:
 286     _validTokenText = false;
 287     _tokenLinePtr = _lastLinePtr;
 288
 289     while (_char.isSpace()) {
 290         if (_char == QLatin1Char('\n')) {
 291             _tokenLinePtr = _codePtr;
 292
 293             if (_restrictedKeyword) {
 294                 // automatic semicolon insertion
 295                 _tokenLine = _currentLineNumber;
 296                 _tokenStartPtr = _codePtr - 1; // ### TODO: insert it before the optional \r sequence.
 297                 return T_SEMICOLON;
 298             } else {
 299                 _terminator = true;
 300                 syncProhibitAutomaticSemicolon();
 301             }
 302         }
 303
 304         scanChar();
 305     }
 306
 307     _tokenStartPtr = _codePtr - 1;
 308     _tokenLine = _currentLineNumber;
 309
 310     if (_char.isNull())
 311         return EOF_SYMBOL;
 312
 313     const QChar ch = _char;
 314     scanChar();
 315
 316     switch (ch.unicode()) {
 317     case '~': return T_TILDE;
 318     case '}': return T_RBRACE;
 319
 320     case '|':
 321         if (_char == QLatin1Char('|')) {
 322             scanChar();
 323             return T_OR_OR;
 324         } else if (_char == QLatin1Char('=')) {
 325             scanChar();
 326             return T_OR_EQ;
 327         }
 328         return T_OR;
 329
 330     case '{': return T_LBRACE;
 331
 332     case '^':
 333         if (_char == QLatin1Char('=')) {
 334             scanChar();
 335             return T_XOR_EQ;
 336         }
 337         return T_XOR;
 338
 339     case ']': return T_RBRACKET;
 340     case '[': return T_LBRACKET;
 341     case '?': return T_QUESTION;
 342
 343     case '>':
 344         if (_char == QLatin1Char('>')) {
 345             scanChar();
 346             if (_char == QLatin1Char('>')) {
 347                 scanChar();
 348                 if (_char == QLatin1Char('=')) {
 349                     scanChar();
 350                     return T_GT_GT_GT_EQ;
 351                 }
 352                 return T_GT_GT_GT;
 353             } else if (_char == QLatin1Char('=')) {
 354                 scanChar();
 355                 return T_GT_GT_EQ;
 356             }
 357             return T_GT_GT;
 358         } else if (_char == QLatin1Char('=')) {
 359             scanChar();
 360             return T_GE;
 361         }
 362         return T_GT;
 363
 364     case '=':
 365         if (_char == QLatin1Char('=')) {
 366             scanChar();
 367             if (_char == QLatin1Char('=')) {
 368                 scanChar();
 369                 return T_EQ_EQ_EQ;
 370             }
 371             return T_EQ_EQ;
 372         }
 373         return T_EQ;
 374
 375     case '<':
 376         if (_char == QLatin1Char('=')) {
 377             scanChar();
 378             return T_LE;
 379         } else if (_char == QLatin1Char('<')) {
 380             scanChar();
 381             if (_char == QLatin1Char('=')) {
 382                 scanChar();
 383                 return T_LT_LT_EQ;
 384             }
 385             return T_LT_LT;
 386         }
 387         return T_LT;
 388
 389     case ';': return T_SEMICOLON;
 390     case ':': return T_COLON;
 391
 392     case '/':
 393         if (_char == QLatin1Char('*')) {
 394             scanChar();
 395             while (!_char.isNull()) {
 396                 if (_char == QLatin1Char('*')) {
 397                     scanChar();
 398                     if (_char == QLatin1Char('/')) {
 399                         scanChar();
 400
 401                         if (_engine) {
 402                             _engine->addComment(tokenOffset() + 2, _codePtr - _tokenStartPtr - 1 - 4,
 403                                                 tokenStartLine(), tokenStartColumn() + 2);
 404                         }
 405
 406                         goto again;
 407                     }
 408                 } else {
 409                     scanChar();
 410                 }
 411             }
 412         } else if (_char == QLatin1Char('/')) {
 413             while (!_char.isNull() && _char != QLatin1Char('\n')) {
 414                 scanChar();
 415             }
 416             if (_engine) {
 417                 _engine->addComment(tokenOffset() + 2, _codePtr - _tokenStartPtr - 1 - 2,
 418                                     tokenStartLine(), tokenStartColumn() + 2);
 419             }
 420             goto again;
 421         } if (_char == QLatin1Char('=')) {
 422             scanChar();
 423             return T_DIVIDE_EQ;
 424         }
 425         return T_DIVIDE_;
 426
 427     case '.':
 428         if (_char.isDigit()) {
 429             QVarLengthArray<char,32> chars;
 430
 431             chars.append(ch.unicode()); // append the `.'
 432
 433             while (_char.isDigit()) {
 434                 chars.append(_char.unicode());
 435                 scanChar();
 436             }
 437
 438             if (_char == QLatin1Char('e') || _char == QLatin1Char('E')) {
 439                 if (_codePtr[0].isDigit() || ((_codePtr[0] == QLatin1Char('+') || _codePtr[0] == QLatin1Char('-')) &&
 440                                               _codePtr[1].isDigit())) {
 441
 442                     chars.append(_char.unicode());
 443                     scanChar(); // consume `e'
 444
 445                     if (_char == QLatin1Char('+') || _char == QLatin1Char('-')) {
 446                         chars.append(_char.unicode());
 447                         scanChar(); // consume the sign
 448                     }
 449
 450                     while (_char.isDigit()) {
 451                         chars.append(_char.unicode());
 452                         scanChar();
 453                     }
 454                 }
 455             }
 456
 457             chars.append('\0');
 458
 459             const char *begin = chars.constData();
 460             const char *end = 0;
 461             bool ok = false;
 462
 463             _tokenValue = qstrtod(begin, &end, &ok);
 464
 465             if (end - begin != chars.size() - 1) {
 466                 _errorCode = IllegalExponentIndicator;
 467                 _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Illegal syntax for exponential number");
 468                 return T_ERROR;
 469             }
 470
 471             return T_NUMERIC_LITERAL;
 472         }
 473         return T_DOT;
 474
 475     case '-':
 476         if (_char == QLatin1Char('=')) {
 477             scanChar();
 478             return T_MINUS_EQ;
 479         } else if (_char == QLatin1Char('-')) {
 480             scanChar();
 481
 482             if (_terminator && !_delimited && !_prohibitAutomaticSemicolon) {
 483                 _stackToken = T_MINUS_MINUS;
 484                 return T_SEMICOLON;
 485             }
 486
 487             return T_MINUS_MINUS;
 488         }
 489         return T_MINUS;
 490
 491     case ',': return T_COMMA;
 492
 493     case '+':
 494         if (_char == QLatin1Char('=')) {
 495             scanChar();
 496             return T_PLUS_EQ;
 497         } else if (_char == QLatin1Char('+')) {
 498             scanChar();
 499
 500             if (_terminator && !_delimited && !_prohibitAutomaticSemicolon) {
 501                 _stackToken = T_PLUS_PLUS;
 502                 return T_SEMICOLON;
 503             }
 504
 505             return T_PLUS_PLUS;
 506         }
 507         return T_PLUS;
 508
 509     case '*':
 510         if (_char == QLatin1Char('=')) {
 511             scanChar();
 512             return T_STAR_EQ;
 513         }
 514         return T_STAR;
 515
 516     case ')': return T_RPAREN;
 517     case '(': return T_LPAREN;
 518
 519     case '&':
 520         if (_char == QLatin1Char('=')) {
 521             scanChar();
 522             return T_AND_EQ;
 523         } else if (_char == QLatin1Char('&')) {
 524             scanChar();
 525             return T_AND_AND;
 526         }
 527         return T_AND;
 528
 529     case '%':
 530         if (_char == QLatin1Char('=')) {
 531             scanChar();
 532             return T_REMAINDER_EQ;
 533         }
 534         return T_REMAINDER;
 535
 536     case '!':
 537         if (_char == QLatin1Char('=')) {
 538             scanChar();
 539             if (_char == QLatin1Char('=')) {
 540                 scanChar();
 541                 return T_NOT_EQ_EQ;
 542             }
 543             return T_NOT_EQ;
 544         }
 545         return T_NOT;
 546
 547     case '\'':
 548     case '"': {
 549         const QChar quote = ch;
 550         bool multilineStringLiteral = false;
 551
 552         const QChar *startCode = _codePtr;
 553
 554         if (_engine) {
 555             while (!_char.isNull()) {
 556                 if (_char == QLatin1Char('\n') || _char == QLatin1Char('\\')) {
 557                     break;
 558                 } else if (_char == quote) {
 559                     _tokenSpell = _engine->midRef(startCode - _code.unicode() - 1, _codePtr - startCode);
 560                     scanChar();
 561
 562                     return T_STRING_LITERAL;
 563                 }
 564                 scanChar();
 565             }
 566         }
 567
 568         _validTokenText = true;
 569         _tokenText.resize(0);
 570         startCode--;
 571         while (startCode != _codePtr - 1)
 572             _tokenText += *startCode++;
 573
 574         while (! _char.isNull()) {
 575             if (_char == QLatin1Char('\n')) {
 576                 multilineStringLiteral = true;
 577                 _tokenText += _char;
 578                 scanChar();
 579             } else if (_char == quote) {
 580                 scanChar();
 581
 582                 if (_engine)
 583                     _tokenSpell = _engine->newStringRef(_tokenText);
 584
 585                 return multilineStringLiteral ? T_MULTILINE_STRING_LITERAL : T_STRING_LITERAL;
 586             } else if (_char == QLatin1Char('\\')) {
 587                 scanChar();
 588
 589                 QChar u;
 590                 bool ok = false;
 591
 592                 switch (_char.unicode()) {
 593                 // unicode escape sequence
 594                 case 'u':
 595                     u = decodeUnicodeEscapeCharacter(&ok);
 596                     if (! ok)
 597                         u = _char;
 598                     break;
 599
 600                 // hex escape sequence
 601                 case 'x':
 602                 case 'X':
 603                     if (isHexDigit(_codePtr[0]) && isHexDigit(_codePtr[1])) {
 604                         scanChar();
 605
 606                         const QChar c1 = _char;
 607                         scanChar();
 608
 609                         const QChar c2 = _char;
 610                         scanChar();
 611
 612                         u = convertHex(c1, c2);
 613                     } else {
 614                         u = _char;
 615                     }
 616                     break;
 617
 618                 // single character escape sequence
 619                 case '\\': u = QLatin1Char('\\'); scanChar(); break;
 620                 case '\'': u = QLatin1Char('\''); scanChar(); break;
 621                 case '\"': u = QLatin1Char('\"'); scanChar(); break;
 622                 case 'b':  u = QLatin1Char('\b'); scanChar(); break;
 623                 case 'f':  u = QLatin1Char('\f'); scanChar(); break;
 624                 case 'n':  u = QLatin1Char('\n'); scanChar(); break;
 625                 case 'r':  u = QLatin1Char('\r'); scanChar(); break;
 626                 case 't':  u = QLatin1Char('\t'); scanChar(); break;
 627                 case 'v':  u = QLatin1Char('\v'); scanChar(); break;
 628
 629                 case '0':
 630                     if (! _codePtr[1].isDigit()) {
 631                         scanChar();
 632                         u = QLatin1Char('\0');
 633                     } else {
 634                         // ### parse deprecated octal escape sequence ?
 635                         u = _char;
 636                     }
 637                     break;
 638
 639                 case '\r':
 640                     while (_char == QLatin1Char('\r'))
 641                         scanChar();
 642
 643                     if (_char == QLatin1Char('\n')) {
 644                         u = _char;
 645                         scanChar();
 646                     } else {
 647                         u = QLatin1Char('\n');
 648                     }
 649
 650                     break;
 651
 652                 case '\n':
 653                     u = _char;
 654                     scanChar();
 655                     break;
 656
 657                 default:
 658                     // non escape character
 659                     u = _char;
 660                     scanChar();
 661                 }
 662
 663                 _tokenText += u;
 664             } else {
 665                 _tokenText += _char;
 666                 scanChar();
 667             }
 668         }
 669
 670         _errorCode = UnclosedStringLiteral;
 671         _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Unclosed string at end of line");
 672         return T_ERROR;
 673     }
 674
 675     default:
 676         if (ch.isLetter() || ch == QLatin1Char('$') || ch == QLatin1Char('_') || (ch == QLatin1Char('\\') && _char == QLatin1Char('u'))) {
 677             bool identifierWithEscapeChars = false;
 678             if (ch == QLatin1Char('\\')) {
 679                 identifierWithEscapeChars = true;
 680                 _tokenText.resize(0);
 681                 bool ok = false;
 682                 _tokenText += decodeUnicodeEscapeCharacter(&ok);
 683                 _validTokenText = true;
 684                 if (! ok) {
 685                     _errorCode = IllegalUnicodeEscapeSequence;
 686                     _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Illegal unicode escape sequence");
 687                     return T_ERROR;
 688                 }
 689             }
 690             while (true) {
 691                 if (_char.isLetterOrNumber() || _char == QLatin1Char('$') || _char == QLatin1Char('_')) {
 692                     if (identifierWithEscapeChars)
 693                         _tokenText += _char;
 694
 695                     scanChar();
 696                 } else if (_char == QLatin1Char('\\') && _codePtr[0] == QLatin1Char('u')) {
 697                     if (! identifierWithEscapeChars) {
 698                         identifierWithEscapeChars = true;
 699                         _tokenText.resize(0);
 700                         _tokenText.insert(0, _tokenStartPtr, _codePtr - _tokenStartPtr - 1);
 701                         _validTokenText = true;
 702                     }
 703
 704                     scanChar(); // skip '\\'
 705                     bool ok = false;
 706                     _tokenText += decodeUnicodeEscapeCharacter(&ok);
 707                     if (! ok) {
 708                         _errorCode = IllegalUnicodeEscapeSequence;
 709                         _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Illegal unicode escape sequence");
 710                         return T_ERROR;
 711                     }
 712                 } else {
 713                     _tokenLength = _codePtr - _tokenStartPtr - 1;
 714
 715                     int kind = T_IDENTIFIER;
 716
 717                     if (! identifierWithEscapeChars)
 718                         kind = classify(_tokenStartPtr, _tokenLength, _qmlMode);
 719
 720                     if (_engine) {
 721                         if (kind == T_IDENTIFIER && identifierWithEscapeChars)
 722                             _tokenSpell = _engine->newStringRef(_tokenText);
 723                         else
 724                             _tokenSpell = _engine->midRef(_tokenStartPtr - _code.unicode(), _tokenLength);
 725                     }
 726
 727                     return kind;
 728                 }
 729             }
 730         } else if (ch.isDigit()) {
 731             if (ch != QLatin1Char('0')) {
 732                 double integer = ch.unicode() - '0';
 733
 734                 QChar n = _char;
 735                 const QChar *code = _codePtr;
 736                 while (n.isDigit()) {
 737                     integer = integer * 10 + (n.unicode() - '0');
 738                     n = *code++;
 739                 }
 740
 741                 if (n != QLatin1Char('.') && n != QLatin1Char('e') && n != QLatin1Char('E')) {
 742                     if (code != _codePtr) {
 743                         _codePtr = code - 1;
 744                         scanChar();
 745                     }
 746                     _tokenValue = integer;
 747                     return T_NUMERIC_LITERAL;
 748                 }
 749             }
 750
 751             QVarLengthArray<char,32> chars;
 752             chars.append(ch.unicode());
 753
 754             if (ch == QLatin1Char('0') && (_char == QLatin1Char('x') || _char == QLatin1Char('X'))) {
 755                 // parse hex integer literal
 756
 757                 chars.append(_char.unicode());
 758                 scanChar(); // consume `x'
 759
 760                 while (isHexDigit(_char)) {
 761                     chars.append(_char.unicode());
 762                     scanChar();
 763                 }
 764
 765                 _tokenValue = integerFromString(chars.constData(), chars.size(), 16);
 766                 return T_NUMERIC_LITERAL;
 767             }
 768
 769             // decimal integer literal
 770             while (_char.isDigit()) {
 771                 chars.append(_char.unicode());
 772                 scanChar(); // consume the digit
 773             }
 774
 775             if (_char == QLatin1Char('.')) {
 776                 chars.append(_char.unicode());
 777                 scanChar(); // consume `.'
 778
 779                 while (_char.isDigit()) {
 780                     chars.append(_char.unicode());
 781                     scanChar();
 782                 }
 783
 784                 if (_char == QLatin1Char('e') || _char == QLatin1Char('E')) {
 785                     if (_codePtr[0].isDigit() || ((_codePtr[0] == QLatin1Char('+') || _codePtr[0] == QLatin1Char('-')) &&
 786                                                   _codePtr[1].isDigit())) {
 787
 788                         chars.append(_char.unicode());
 789                         scanChar(); // consume `e'
 790
 791                         if (_char == QLatin1Char('+') || _char == QLatin1Char('-')) {
 792                             chars.append(_char.unicode());
 793                             scanChar(); // consume the sign
 794                         }
 795
 796                         while (_char.isDigit()) {
 797                             chars.append(_char.unicode());
 798                             scanChar();
 799                         }
 800                     }
 801                 }
 802             } else if (_char == QLatin1Char('e') || _char == QLatin1Char('E')) {
 803                 if (_codePtr[0].isDigit() || ((_codePtr[0] == QLatin1Char('+') || _codePtr[0] == QLatin1Char('-')) &&
 804                                               _codePtr[1].isDigit())) {
 805
 806                     chars.append(_char.unicode());
 807                     scanChar(); // consume `e'
 808
 809                     if (_char == QLatin1Char('+') || _char == QLatin1Char('-')) {
 810                         chars.append(_char.unicode());
 811                         scanChar(); // consume the sign
 812                     }
 813
 814                     while (_char.isDigit()) {
 815                         chars.append(_char.unicode());
 816                         scanChar();
 817                     }
 818                 }
 819             }
 820
 821             chars.append('\0');
 822
 823             const char *begin = chars.constData();
 824             const char *end = 0;
 825             bool ok = false;
 826
 827             _tokenValue = qstrtod(begin, &end, &ok);
 828
 829             if (end - begin != chars.size() - 1) {
 830                 _errorCode = IllegalExponentIndicator;
 831                 _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Illegal syntax for exponential number");
 832                 return T_ERROR;
 833             }
 834
 835             return T_NUMERIC_LITERAL;
 836         }
 837
 838         break;
 839     }
 840
 841     return T_ERROR;
 842 }
 843
 844 bool Lexer::scanRegExp(RegExpBodyPrefix prefix)
 845 {
 846     _tokenText.resize(0);
 847     _validTokenText = true;
 848     _patternFlags = 0;
 849
 850     if (prefix == EqualPrefix)
 851         _tokenText += QLatin1Char('=');
 852
 853     while (true) {
 854         switch (_char.unicode()) {
 855         case 0: // eof
 856         case '\n': case '\r': // line terminator
 857             _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Unterminated regular expression literal");
 858             return false;
 859
 860         case '/':
 861             scanChar();
 862
 863             // scan the flags
 864             _patternFlags = 0;
 865             while (isIdentLetter(_char)) {
 866                 int flag = regExpFlagFromChar(_char);
 867                 if (flag == 0) {
 868                     _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Invalid regular expression flag '%0'")
 869                              .arg(QChar(_char));
 870                     return false;
 871                 }
 872                 _patternFlags |= flag;
 873                 scanChar();
 874             }
 875
 876             _tokenLength = _codePtr - _tokenStartPtr - 1;
 877             return true;
 878
 879         case '\\':
 880             // regular expression backslash sequence
 881             _tokenText += _char;
 882             scanChar();
 883
 884             if (_char.isNull() || isLineTerminator()) {
 885                 _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Unterminated regular expression backslash sequence");
 886                 return false;
 887             }
 888
 889             _tokenText += _char;
 890             scanChar();
 891             break;
 892
 893         case '[':
 894             // regular expression class
 895             _tokenText += _char;
 896             scanChar();
 897
 898             while (! _char.isNull() && ! isLineTerminator()) {
 899                 if (_char == QLatin1Char(']'))
 900                     break;
 901                 else if (_char == QLatin1Char('\\')) {
 902                     // regular expression backslash sequence
 903                     _tokenText += _char;
 904                     scanChar();
 905
 906                     if (_char.isNull() || isLineTerminator()) {
 907                         _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Unterminated regular expression backslash sequence");
 908                         return false;
 909                     }
 910
 911                     _tokenText += _char;
 912                     scanChar();
 913                 } else {
 914                     _tokenText += _char;
 915                     scanChar();
 916                 }
 917             }
 918
 919             if (_char != QLatin1Char(']')) {
 920                 _errorMessage = QCoreApplication::translate("QDeclarativeParser", "Unterminated regular expression class");
 921                 return false;
 922             }
 923
 924             _tokenText += _char;
 925             scanChar(); // skip ]
 926             break;
 927
 928         default:
 929             _tokenText += _char;
 930             scanChar();
 931         } // switch
 932     } // while
 933
 934     return false;
 935 }
 936
 937 bool Lexer::isLineTerminator() const
 938 {
 939     return (_char == QLatin1Char('\n') || _char == QLatin1Char('\r'));
 940 }
 941
 942 bool Lexer::isIdentLetter(QChar ch)
 943 {
 944     // ASCII-biased, since all reserved words are ASCII, aand hence the
 945     // bulk of content to be parsed.
 946     if ((ch >= QLatin1Char('a') && ch <= QLatin1Char('z'))
 947             || (ch >= QLatin1Char('A') && ch <= QLatin1Char('Z'))
 948             || ch == QLatin1Char('$')
 949             || ch == QLatin1Char('_'))
 950         return true;
 951     if (ch.unicode() < 128)
 952         return false;
 953     return ch.isLetterOrNumber();
 954 }
 955
 956 bool Lexer::isDecimalDigit(ushort c)
 957 {
 958     return (c >= '0' && c <= '9');
 959 }
 960
 961 bool Lexer::isHexDigit(QChar c)
 962 {
 963     return ((c >= QLatin1Char('0') && c <= QLatin1Char('9'))
 964             || (c >= QLatin1Char('a') && c <= QLatin1Char('f'))
 965             || (c >= QLatin1Char('A') && c <= QLatin1Char('F')));
 966 }
 967
 968 bool Lexer::isOctalDigit(ushort c)
 969 {
 970     return (c >= '0' && c <= '7');
 971 }
 972
 973 int Lexer::tokenKind() const
 974 {
 975     return _tokenKind;
 976 }
 977
 978 int Lexer::tokenOffset() const
 979 {
 980     return _tokenStartPtr - _code.unicode();
 981 }
 982
 983 int Lexer::tokenLength() const
 984 {
 985     return _tokenLength;
 986 }
 987
 988 int Lexer::tokenStartLine() const
 989 {
 990     return _tokenLine;
 991 }
 992
 993 int Lexer::tokenStartColumn() const
 994 {
 995     return _tokenStartPtr - _tokenLinePtr + 1;
 996 }
 997
 998 int Lexer::tokenEndLine() const
 999 {
1000     return _currentLineNumber;
1001 }
1002
1003 int Lexer::tokenEndColumn() const
1004 {
1005     return _codePtr - _lastLinePtr;
1006 }
1007
1008 QStringRef Lexer::tokenSpell() const
1009 {
1010     return _tokenSpell;
1011 }
1012
1013 double Lexer::tokenValue() const
1014 {
1015     return _tokenValue;
1016 }
1017
1018 QString Lexer::tokenText() const
1019 {
1020     if (_validTokenText)
1021         return _tokenText;
1022
1023     if (_tokenKind == T_STRING_LITERAL)
1024         return QString(_tokenStartPtr + 1, _tokenLength - 2);
1025
1026     return QString(_tokenStartPtr, _tokenLength);
1027 }
1028
1029 Lexer::Error Lexer::errorCode() const
1030 {
1031     return _errorCode;
1032 }
1033
1034 QString Lexer::errorMessage() const
1035 {
1036     return _errorMessage;
1037 }
1038
1039 void Lexer::syncProhibitAutomaticSemicolon()
1040 {
1041     if (_parenthesesState == BalancedParentheses) {
1042         // we have seen something like "if (foo)", which means we should
1043         // never insert an automatic semicolon at this point, since it would
1044         // then be expanded into an empty statement (ECMA-262 7.9.1)
1045         _prohibitAutomaticSemicolon = true;
1046         _parenthesesState = IgnoreParentheses;
1047     } else {
1048         _prohibitAutomaticSemicolon = false;
1049     }
1050 }
1051
1052 bool Lexer::prevTerminator() const
1053 {
1054     return _terminator;
1055 }
1056
1057 bool Lexer::followsClosingBrace() const
1058 {
1059     return _followsClosingBrace;
1060 }
1061
1062 bool Lexer::canInsertAutomaticSemicolon(int token) const
1063 {
1064     return token == T_RBRACE
1065             || token == EOF_SYMBOL
1066             || _terminator
1067             || _followsClosingBrace;
1068 }
1069
1070 bool Lexer::scanDirectives(Directives *directives)
1071 {
1072     if (_qmlMode) {
1073         // the directives are a Javascript-only extension.
1074         return false;
1075     }
1076
1077     lex(); // fetch the first token
1078
1079     if (_tokenKind != T_DOT)
1080         return true;
1081
1082     do {
1083         lex(); // skip T_DOT
1084
1085         const int lineNumber = tokenStartLine();
1086
1087         if (! (_tokenKind == T_IDENTIFIER || _tokenKind == T_RESERVED_WORD))
1088             return false; // expected a valid QML/JS directive
1089
1090         const QString directiveName = tokenText();
1091
1092         if (! (directiveName == QLatin1String("pragma") ||
1093                directiveName == QLatin1String("import")))
1094             return false; // not a valid directive name
1095
1096         // it must be a pragma or an import directive.
1097         if (directiveName == QLatin1String("pragma")) {
1098             // .pragma library
1099             if (! (lex() == T_IDENTIFIER && tokenText() == QLatin1String("library")))
1100                 return false; // expected `library
1101
1102             // we found a .pragma library directive
1103             directives->pragmaLibrary();
1104
1105         } else {
1106             Q_ASSERT(directiveName == QLatin1String("import"));
1107             lex(); // skip .import
1108
1109             QString pathOrUri;
1110             QString version;
1111             bool fileImport = false; // file or uri import
1112
1113             if (_tokenKind == T_STRING_LITERAL) {
1114                 // .import T_STRING_LITERAL as T_IDENTIFIER
1115
1116                 fileImport = true;
1117                 pathOrUri = tokenText();
1118
1119             } else if (_tokenKind == T_IDENTIFIER) {
1120                 // .import T_IDENTIFIER (. T_IDENTIFIER)* T_NUMERIC_LITERAL as T_IDENTIFIER
1121
1122                 pathOrUri = tokenText();
1123
1124                 lex(); // skip the first T_IDENTIFIER
1125                 for (; _tokenKind == T_DOT; lex()) {
1126                     if (lex() != T_IDENTIFIER)
1127                         return false;
1128
1129                     pathOrUri += QLatin1Char('.');
1130                     pathOrUri += tokenText();
1131                 }
1132
1133                 if (_tokenKind != T_NUMERIC_LITERAL)
1134                     return false; // expected the module version number
1135
1136                 version = tokenText();
1137             }
1138
1139             //
1140             // recognize the mandatory `as' followed by the module name
1141             //
1142             if (! (lex() == T_RESERVED_WORD && tokenText() == QLatin1String("as")))
1143                 return false; // expected `as'
1144
1145             if (lex() != T_IDENTIFIER)
1146                 return false; // expected module name
1147
1148             const QString module = tokenText();
1149
1150             if (fileImport)
1151                 directives->importFile(pathOrUri, module);
1152             else
1153                 directives->importModule(pathOrUri, version, module);
1154         }
1155
1156         if (tokenStartLine() != lineNumber)
1157             return false; // the directives cannot span over multiple lines
1158
1159         // fetch the first token after the .pragma/.import directive
1160         lex();
1161     } while (_tokenKind == T_DOT);
1162
1163     return true;
1164 }
1165
1166 #include "qdeclarativejskeywords_p.h"