src/qml/qml/parser/qqmljslexer.cpp

   1 /****************************************************************************
   2 **
   3 ** Copyright (C) 2012 Digia Plc and/or its subsidiary(-ies).
   4 ** Contact: http://www.qt-project.org/legal
   5 **
   6 ** This file is part of the QtQml module of the Qt Toolkit.
   7 **
   8 ** $QT_BEGIN_LICENSE:LGPL$
   9 ** Commercial License Usage
  10 ** Licensees holding valid commercial Qt licenses may use this file in
  11 ** accordance with the commercial license agreement provided with the
  12 ** Software or, alternatively, in accordance with the terms contained in
  13 ** a written agreement between you and Digia.  For licensing terms and
  14 ** conditions see http://qt.digia.com/licensing.  For further information
  15 ** use the contact form at http://qt.digia.com/contact-us.
  16 **
  17 ** GNU Lesser General Public License Usage
  18 ** Alternatively, this file may be used under the terms of the GNU Lesser
  19 ** General Public License version 2.1 as published by the Free Software
  20 ** Foundation and appearing in the file LICENSE.LGPL included in the
  21 ** packaging of this file.  Please review the following information to
  22 ** ensure the GNU Lesser General Public License version 2.1 requirements
  23 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
  24 **
  25 ** In addition, as a special exception, Digia gives you certain additional
  26 ** rights.  These rights are described in the Digia Qt LGPL Exception
  27 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
  28 **
  29 ** GNU General Public License Usage
  30 ** Alternatively, this file may be used under the terms of the GNU
  31 ** General Public License version 3.0 as published by the Free Software
  32 ** Foundation and appearing in the file LICENSE.GPL included in the
  33 ** packaging of this file.  Please review the following information to
  34 ** ensure the GNU General Public License version 3.0 requirements will be
  35 ** met: http://www.gnu.org/copyleft/gpl.html.
  36 **
  37 **
  38 ** $QT_END_LICENSE$
  39 **
  40 ****************************************************************************/
  41
  42 #include "qqmljslexer_p.h"
  43 #include "qqmljsengine_p.h"
  44 #include "qqmljsmemorypool_p.h"
  45
  46 #include <QtCore/QCoreApplication>
  47 #include <QtCore/QVarLengthArray>
  48 #include <QtCore/QDebug>
  49
  50 QT_BEGIN_NAMESPACE
  51 Q_CORE_EXPORT double qstrtod(const char *s00, char const **se, bool *ok);
  52 QT_END_NAMESPACE
  53
  54 using namespace QQmlJS;
  55
  56 static int regExpFlagFromChar(const QChar &ch)
  57 {
  58     switch (ch.unicode()) {
  59     case 'g': return Lexer::RegExp_Global;
  60     case 'i': return Lexer::RegExp_IgnoreCase;
  61     case 'm': return Lexer::RegExp_Multiline;
  62     }
  63     return 0;
  64 }
  65
  66 static unsigned char convertHex(ushort c)
  67 {
  68     if (c >= '0' && c <= '9')
  69         return (c - '0');
  70     else if (c >= 'a' && c <= 'f')
  71         return (c - 'a' + 10);
  72     else
  73         return (c - 'A' + 10);
  74 }
  75
  76 static QChar convertHex(QChar c1, QChar c2)
  77 {
  78     return QChar((convertHex(c1.unicode()) << 4) + convertHex(c2.unicode()));
  79 }
  80
  81 static QChar convertUnicode(QChar c1, QChar c2, QChar c3, QChar c4)
  82 {
  83     return QChar((convertHex(c3.unicode()) << 4) + convertHex(c4.unicode()),
  84                  (convertHex(c1.unicode()) << 4) + convertHex(c2.unicode()));
  85 }
  86
  87 Lexer::Lexer(Engine *engine)
  88     : _engine(engine)
  89     , _codePtr(0)
  90     , _lastLinePtr(0)
  91     , _tokenLinePtr(0)
  92     , _tokenStartPtr(0)
  93     , _char(QLatin1Char('\n'))
  94     , _errorCode(NoError)
  95     , _currentLineNumber(0)
  96     , _tokenValue(0)
  97     , _parenthesesState(IgnoreParentheses)
  98     , _parenthesesCount(0)
  99     , _stackToken(-1)
 100     , _patternFlags(0)
 101     , _tokenKind(0)
 102     , _tokenLength(0)
 103     , _tokenLine(0)
 104     , _validTokenText(false)
 105     , _prohibitAutomaticSemicolon(false)
 106     , _restrictedKeyword(false)
 107     , _terminator(false)
 108     , _followsClosingBrace(false)
 109     , _delimited(true)
 110     , _qmlMode(true)
 111 {
 112     if (engine)
 113         engine->setLexer(this);
 114 }
 115
 116 bool Lexer::qmlMode() const
 117 {
 118     return _qmlMode;
 119 }
 120
 121 QString Lexer::code() const
 122 {
 123     return _code;
 124 }
 125
 126 void Lexer::setCode(const QString &code, int lineno, bool qmlMode)
 127 {
 128     if (_engine)
 129         _engine->setCode(code);
 130
 131     _qmlMode = qmlMode;
 132     _code = code;
 133     _tokenText.clear();
 134     _tokenText.reserve(1024);
 135     _errorMessage.clear();
 136     _tokenSpell = QStringRef();
 137
 138     _codePtr = code.unicode();
 139     _lastLinePtr = _codePtr;
 140     _tokenLinePtr = _codePtr;
 141     _tokenStartPtr = _codePtr;
 142
 143     _char = QLatin1Char('\n');
 144     _errorCode = NoError;
 145
 146     _currentLineNumber = lineno;
 147     _tokenValue = 0;
 148
 149     // parentheses state
 150     _parenthesesState = IgnoreParentheses;
 151     _parenthesesCount = 0;
 152
 153     _stackToken = -1;
 154
 155     _patternFlags = 0;
 156     _tokenLength = 0;
 157     _tokenLine = lineno;
 158
 159     _validTokenText = false;
 160     _prohibitAutomaticSemicolon = false;
 161     _restrictedKeyword = false;
 162     _terminator = false;
 163     _followsClosingBrace = false;
 164     _delimited = true;
 165 }
 166
 167 void Lexer::scanChar()
 168 {
 169     _char = *_codePtr++;
 170
 171     if (_char == QLatin1Char('\n')) {
 172         _lastLinePtr = _codePtr; // points to the first character after the newline
 173         ++_currentLineNumber;
 174     }
 175 }
 176
 177 int Lexer::lex()
 178 {
 179     const int previousTokenKind = _tokenKind;
 180
 181     _tokenSpell = QStringRef();
 182     _tokenKind = scanToken();
 183     _tokenLength = _codePtr - _tokenStartPtr - 1;
 184
 185     _delimited = false;
 186     _restrictedKeyword = false;
 187     _followsClosingBrace = (previousTokenKind == T_RBRACE);
 188
 189     // update the flags
 190     switch (_tokenKind) {
 191     case T_LBRACE:
 192     case T_SEMICOLON:
 193     case T_COLON:
 194         _delimited = true;
 195         break;
 196
 197     case T_IF:
 198     case T_FOR:
 199     case T_WHILE:
 200     case T_WITH:
 201         _parenthesesState = CountParentheses;
 202         _parenthesesCount = 0;
 203         break;
 204
 205     case T_DO:
 206         _parenthesesState = BalancedParentheses;
 207         break;
 208
 209     case T_CONTINUE:
 210     case T_BREAK:
 211     case T_RETURN:
 212     case T_THROW:
 213         _restrictedKeyword = true;
 214         break;
 215     } // switch
 216
 217     // update the parentheses state
 218     switch (_parenthesesState) {
 219     case IgnoreParentheses:
 220         break;
 221
 222     case CountParentheses:
 223         if (_tokenKind == T_RPAREN) {
 224             --_parenthesesCount;
 225             if (_parenthesesCount == 0)
 226                 _parenthesesState = BalancedParentheses;
 227         } else if (_tokenKind == T_LPAREN) {
 228             ++_parenthesesCount;
 229         }
 230         break;
 231
 232     case BalancedParentheses:
 233         _parenthesesState = IgnoreParentheses;
 234         break;
 235     } // switch
 236
 237     return _tokenKind;
 238 }
 239
 240 bool Lexer::isUnicodeEscapeSequence(const QChar *chars)
 241 {
 242     if (isHexDigit(chars[0]) && isHexDigit(chars[1]) && isHexDigit(chars[2]) && isHexDigit(chars[3]))
 243         return true;
 244
 245     return false;
 246 }
 247
 248 QChar Lexer::decodeUnicodeEscapeCharacter(bool *ok)
 249 {
 250     if (_char == QLatin1Char('u') && isUnicodeEscapeSequence(&_codePtr[0])) {
 251         scanChar(); // skip u
 252
 253         const QChar c1 = _char;
 254         scanChar();
 255
 256         const QChar c2 = _char;
 257         scanChar();
 258
 259         const QChar c3 = _char;
 260         scanChar();
 261
 262         const QChar c4 = _char;
 263         scanChar();
 264
 265         if (ok)
 266             *ok = true;
 267
 268         return convertUnicode(c1, c2, c3, c4);
 269     }
 270
 271     *ok = false;
 272     return QChar();
 273 }
 274
 275 int Lexer::scanToken()
 276 {
 277     if (_stackToken != -1) {
 278         int tk = _stackToken;
 279         _stackToken = -1;
 280         return tk;
 281     }
 282
 283     _terminator = false;
 284
 285 again:
 286     _validTokenText = false;
 287     _tokenLinePtr = _lastLinePtr;
 288
 289     while (_char.isSpace()) {
 290         if (_char == QLatin1Char('\n')) {
 291             _tokenLinePtr = _codePtr;
 292
 293             if (_restrictedKeyword) {
 294                 // automatic semicolon insertion
 295                 _tokenLine = _currentLineNumber;
 296                 _tokenStartPtr = _codePtr - 1; // ### TODO: insert it before the optional \r sequence.
 297                 return T_SEMICOLON;
 298             } else {
 299                 _terminator = true;
 300                 syncProhibitAutomaticSemicolon();
 301             }
 302         }
 303
 304         scanChar();
 305     }
 306
 307     _tokenStartPtr = _codePtr - 1;
 308     _tokenLine = _currentLineNumber;
 309
 310     if (_char.isNull())
 311         return EOF_SYMBOL;
 312
 313     const QChar ch = _char;
 314     scanChar();
 315
 316     switch (ch.unicode()) {
 317     case '~': return T_TILDE;
 318     case '}': return T_RBRACE;
 319
 320     case '|':
 321         if (_char == QLatin1Char('|')) {
 322             scanChar();
 323             return T_OR_OR;
 324         } else if (_char == QLatin1Char('=')) {
 325             scanChar();
 326             return T_OR_EQ;
 327         }
 328         return T_OR;
 329
 330     case '{': return T_LBRACE;
 331
 332     case '^':
 333         if (_char == QLatin1Char('=')) {
 334             scanChar();
 335             return T_XOR_EQ;
 336         }
 337         return T_XOR;
 338
 339     case ']': return T_RBRACKET;
 340     case '[': return T_LBRACKET;
 341     case '?': return T_QUESTION;
 342
 343     case '>':
 344         if (_char == QLatin1Char('>')) {
 345             scanChar();
 346             if (_char == QLatin1Char('>')) {
 347                 scanChar();
 348                 if (_char == QLatin1Char('=')) {
 349                     scanChar();
 350                     return T_GT_GT_GT_EQ;
 351                 }
 352                 return T_GT_GT_GT;
 353             } else if (_char == QLatin1Char('=')) {
 354                 scanChar();
 355                 return T_GT_GT_EQ;
 356             }
 357             return T_GT_GT;
 358         } else if (_char == QLatin1Char('=')) {
 359             scanChar();
 360             return T_GE;
 361         }
 362         return T_GT;
 363
 364     case '=':
 365         if (_char == QLatin1Char('=')) {
 366             scanChar();
 367             if (_char == QLatin1Char('=')) {
 368                 scanChar();
 369                 return T_EQ_EQ_EQ;
 370             }
 371             return T_EQ_EQ;
 372         }
 373         return T_EQ;
 374
 375     case '<':
 376         if (_char == QLatin1Char('=')) {
 377             scanChar();
 378             return T_LE;
 379         } else if (_char == QLatin1Char('<')) {
 380             scanChar();
 381             if (_char == QLatin1Char('=')) {
 382                 scanChar();
 383                 return T_LT_LT_EQ;
 384             }
 385             return T_LT_LT;
 386         }
 387         return T_LT;
 388
 389     case ';': return T_SEMICOLON;
 390     case ':': return T_COLON;
 391
 392     case '/':
 393         if (_char == QLatin1Char('*')) {
 394             scanChar();
 395             while (!_char.isNull()) {
 396                 if (_char == QLatin1Char('*')) {
 397                     scanChar();
 398                     if (_char == QLatin1Char('/')) {
 399                         scanChar();
 400
 401                         if (_engine) {
 402                             _engine->addComment(tokenOffset() + 2, _codePtr - _tokenStartPtr - 1 - 4,
 403                                                 tokenStartLine(), tokenStartColumn() + 2);
 404                         }
 405
 406                         goto again;
 407                     }
 408                 } else {
 409                     scanChar();
 410                 }
 411             }
 412         } else if (_char == QLatin1Char('/')) {
 413             while (!_char.isNull() && _char != QLatin1Char('\n')) {
 414                 scanChar();
 415             }
 416             if (_engine) {
 417                 _engine->addComment(tokenOffset() + 2, _codePtr - _tokenStartPtr - 1 - 2,
 418                                     tokenStartLine(), tokenStartColumn() + 2);
 419             }
 420             goto again;
 421         } if (_char == QLatin1Char('=')) {
 422             scanChar();
 423             return T_DIVIDE_EQ;
 424         }
 425         return T_DIVIDE_;
 426
 427     case '.':
 428         if (_char.isDigit()) {
 429             QVarLengthArray<char,32> chars;
 430
 431             chars.append(ch.unicode()); // append the `.'
 432
 433             while (_char.isDigit()) {
 434                 chars.append(_char.unicode());
 435                 scanChar();
 436             }
 437
 438             if (_char == QLatin1Char('e') || _char == QLatin1Char('E')) {
 439                 if (_codePtr[0].isDigit() || ((_codePtr[0] == QLatin1Char('+') || _codePtr[0] == QLatin1Char('-')) &&
 440                                               _codePtr[1].isDigit())) {
 441
 442                     chars.append(_char.unicode());
 443                     scanChar(); // consume `e'
 444
 445                     if (_char == QLatin1Char('+') || _char == QLatin1Char('-')) {
 446                         chars.append(_char.unicode());
 447                         scanChar(); // consume the sign
 448                     }
 449
 450                     while (_char.isDigit()) {
 451                         chars.append(_char.unicode());
 452                         scanChar();
 453                     }
 454                 }
 455             }
 456
 457             chars.append('\0');
 458
 459             const char *begin = chars.constData();
 460             const char *end = 0;
 461             bool ok = false;
 462
 463             _tokenValue = qstrtod(begin, &end, &ok);
 464
 465             if (end - begin != chars.size() - 1) {
 466                 _errorCode = IllegalExponentIndicator;
 467                 _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal syntax for exponential number");
 468                 return T_ERROR;
 469             }
 470
 471             return T_NUMERIC_LITERAL;
 472         }
 473         return T_DOT;
 474
 475     case '-':
 476         if (_char == QLatin1Char('=')) {
 477             scanChar();
 478             return T_MINUS_EQ;
 479         } else if (_char == QLatin1Char('-')) {
 480             scanChar();
 481
 482             if (_terminator && !_delimited && !_prohibitAutomaticSemicolon) {
 483                 _stackToken = T_MINUS_MINUS;
 484                 return T_SEMICOLON;
 485             }
 486
 487             return T_MINUS_MINUS;
 488         }
 489         return T_MINUS;
 490
 491     case ',': return T_COMMA;
 492
 493     case '+':
 494         if (_char == QLatin1Char('=')) {
 495             scanChar();
 496             return T_PLUS_EQ;
 497         } else if (_char == QLatin1Char('+')) {
 498             scanChar();
 499
 500             if (_terminator && !_delimited && !_prohibitAutomaticSemicolon) {
 501                 _stackToken = T_PLUS_PLUS;
 502                 return T_SEMICOLON;
 503             }
 504
 505             return T_PLUS_PLUS;
 506         }
 507         return T_PLUS;
 508
 509     case '*':
 510         if (_char == QLatin1Char('=')) {
 511             scanChar();
 512             return T_STAR_EQ;
 513         }
 514         return T_STAR;
 515
 516     case ')': return T_RPAREN;
 517     case '(': return T_LPAREN;
 518
 519     case '&':
 520         if (_char == QLatin1Char('=')) {
 521             scanChar();
 522             return T_AND_EQ;
 523         } else if (_char == QLatin1Char('&')) {
 524             scanChar();
 525             return T_AND_AND;
 526         }
 527         return T_AND;
 528
 529     case '%':
 530         if (_char == QLatin1Char('=')) {
 531             scanChar();
 532             return T_REMAINDER_EQ;
 533         }
 534         return T_REMAINDER;
 535
 536     case '!':
 537         if (_char == QLatin1Char('=')) {
 538             scanChar();
 539             if (_char == QLatin1Char('=')) {
 540                 scanChar();
 541                 return T_NOT_EQ_EQ;
 542             }
 543             return T_NOT_EQ;
 544         }
 545         return T_NOT;
 546
 547     case '\'':
 548     case '"': {
 549         const QChar quote = ch;
 550         bool multilineStringLiteral = false;
 551
 552         const QChar *startCode = _codePtr;
 553
 554         if (_engine) {
 555             while (!_char.isNull()) {
 556                 if (_char == QLatin1Char('\n') || _char == QLatin1Char('\\')) {
 557                     break;
 558                 } else if (_char == quote) {
 559                     _tokenSpell = _engine->midRef(startCode - _code.unicode() - 1, _codePtr - startCode);
 560                     scanChar();
 561
 562                     return T_STRING_LITERAL;
 563                 }
 564                 scanChar();
 565             }
 566         }
 567
 568         _validTokenText = true;
 569         _tokenText.resize(0);
 570         startCode--;
 571         while (startCode != _codePtr - 1)
 572             _tokenText += *startCode++;
 573
 574         while (! _char.isNull()) {
 575             if (_char == QLatin1Char('\n')) {
 576                 multilineStringLiteral = true;
 577                 _tokenText += _char;
 578                 scanChar();
 579             } else if (_char == quote) {
 580                 scanChar();
 581
 582                 if (_engine)
 583                     _tokenSpell = _engine->newStringRef(_tokenText);
 584
 585                 return multilineStringLiteral ? T_MULTILINE_STRING_LITERAL : T_STRING_LITERAL;
 586             } else if (_char == QLatin1Char('\\')) {
 587                 scanChar();
 588
 589                 QChar u;
 590                 bool ok = false;
 591
 592                 switch (_char.unicode()) {
 593                 // unicode escape sequence
 594                 case 'u':
 595                     u = decodeUnicodeEscapeCharacter(&ok);
 596                     if (! ok)
 597                         u = _char;
 598                     break;
 599
 600                 // hex escape sequence
 601                 case 'x':
 602                 case 'X':
 603                     if (isHexDigit(_codePtr[0]) && isHexDigit(_codePtr[1])) {
 604                         scanChar();
 605
 606                         const QChar c1 = _char;
 607                         scanChar();
 608
 609                         const QChar c2 = _char;
 610                         scanChar();
 611
 612                         u = convertHex(c1, c2);
 613                     } else {
 614                         u = _char;
 615                     }
 616                     break;
 617
 618                 // single character escape sequence
 619                 case '\\': u = QLatin1Char('\\'); scanChar(); break;
 620                 case '\'': u = QLatin1Char('\''); scanChar(); break;
 621                 case '\"': u = QLatin1Char('\"'); scanChar(); break;
 622                 case 'b':  u = QLatin1Char('\b'); scanChar(); break;
 623                 case 'f':  u = QLatin1Char('\f'); scanChar(); break;
 624                 case 'n':  u = QLatin1Char('\n'); scanChar(); break;
 625                 case 'r':  u = QLatin1Char('\r'); scanChar(); break;
 626                 case 't':  u = QLatin1Char('\t'); scanChar(); break;
 627                 case 'v':  u = QLatin1Char('\v'); scanChar(); break;
 628
 629                 case '0':
 630                     if (! _codePtr[1].isDigit()) {
 631                         scanChar();
 632                         u = QLatin1Char('\0');
 633                     } else {
 634                         // ### parse deprecated octal escape sequence ?
 635                         u = _char;
 636                     }
 637                     break;
 638
 639                 case '\r':
 640                     while (_char == QLatin1Char('\r'))
 641                         scanChar();
 642
 643                     if (_char == QLatin1Char('\n')) {
 644                         u = _char;
 645                         scanChar();
 646                     } else {
 647                         u = QLatin1Char('\n');
 648                     }
 649
 650                     break;
 651
 652                 case '\n':
 653                     u = _char;
 654                     scanChar();
 655                     break;
 656
 657                 default:
 658                     // non escape character
 659                     u = _char;
 660                     scanChar();
 661                 }
 662
 663                 _tokenText += u;
 664             } else {
 665                 _tokenText += _char;
 666                 scanChar();
 667             }
 668         }
 669
 670         _errorCode = UnclosedStringLiteral;
 671         _errorMessage = QCoreApplication::translate("QQmlParser", "Unclosed string at end of line");
 672         return T_ERROR;
 673     }
 674     case '0':
 675     case '1':
 676     case '2':
 677     case '3':
 678     case '4':
 679     case '5':
 680     case '6':
 681     case '7':
 682     case '8':
 683     case '9':
 684         return scanNumber(ch);
 685
 686     default:
 687         if (ch.isLetter() || ch == QLatin1Char('$') || ch == QLatin1Char('_') || (ch == QLatin1Char('\\') && _char == QLatin1Char('u'))) {
 688             bool identifierWithEscapeChars = false;
 689             if (ch == QLatin1Char('\\')) {
 690                 identifierWithEscapeChars = true;
 691                 _tokenText.resize(0);
 692                 bool ok = false;
 693                 _tokenText += decodeUnicodeEscapeCharacter(&ok);
 694                 _validTokenText = true;
 695                 if (! ok) {
 696                     _errorCode = IllegalUnicodeEscapeSequence;
 697                     _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal unicode escape sequence");
 698                     return T_ERROR;
 699                 }
 700             }
 701             while (true) {
 702                 if (_char.isLetterOrNumber() || _char == QLatin1Char('$') || _char == QLatin1Char('_')) {
 703                     if (identifierWithEscapeChars)
 704                         _tokenText += _char;
 705
 706                     scanChar();
 707                 } else if (_char == QLatin1Char('\\') && _codePtr[0] == QLatin1Char('u')) {
 708                     if (! identifierWithEscapeChars) {
 709                         identifierWithEscapeChars = true;
 710                         _tokenText.resize(0);
 711                         _tokenText.insert(0, _tokenStartPtr, _codePtr - _tokenStartPtr - 1);
 712                         _validTokenText = true;
 713                     }
 714
 715                     scanChar(); // skip '\\'
 716                     bool ok = false;
 717                     _tokenText += decodeUnicodeEscapeCharacter(&ok);
 718                     if (! ok) {
 719                         _errorCode = IllegalUnicodeEscapeSequence;
 720                         _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal unicode escape sequence");
 721                         return T_ERROR;
 722                     }
 723                 } else {
 724                     _tokenLength = _codePtr - _tokenStartPtr - 1;
 725
 726                     int kind = T_IDENTIFIER;
 727
 728                     if (! identifierWithEscapeChars)
 729                         kind = classify(_tokenStartPtr, _tokenLength, _qmlMode);
 730
 731                     if (_engine) {
 732                         if (kind == T_IDENTIFIER && identifierWithEscapeChars)
 733                             _tokenSpell = _engine->newStringRef(_tokenText);
 734                         else
 735                             _tokenSpell = _engine->midRef(_tokenStartPtr - _code.unicode(), _tokenLength);
 736                     }
 737
 738                     return kind;
 739                 }
 740             }
 741         }
 742
 743         break;
 744     }
 745
 746     return T_ERROR;
 747 }
 748
 749 int Lexer::scanNumber(QChar ch)
 750 {
 751     if (ch != QLatin1Char('0')) {
 752         double integer = ch.unicode() - '0';
 753
 754         QChar n = _char;
 755         const QChar *code = _codePtr;
 756         while (n.isDigit()) {
 757             integer = integer * 10 + (n.unicode() - '0');
 758             n = *code++;
 759         }
 760
 761         if (n != QLatin1Char('.') && n != QLatin1Char('e') && n != QLatin1Char('E')) {
 762             if (code != _codePtr) {
 763                 _codePtr = code - 1;
 764                 scanChar();
 765             }
 766             _tokenValue = integer;
 767             return T_NUMERIC_LITERAL;
 768         }
 769     }
 770
 771     QVarLengthArray<char,32> chars;
 772     chars.append(ch.unicode());
 773
 774     if (ch == QLatin1Char('0') && (_char == QLatin1Char('x') || _char == QLatin1Char('X'))) {
 775         // parse hex integer literal
 776
 777         chars.append(_char.unicode());
 778         scanChar(); // consume `x'
 779
 780         while (isHexDigit(_char)) {
 781             chars.append(_char.unicode());
 782             scanChar();
 783         }
 784
 785         _tokenValue = integerFromString(chars.constData(), chars.size(), 16);
 786         return T_NUMERIC_LITERAL;
 787     }
 788
 789     // decimal integer literal
 790     while (_char.isDigit()) {
 791         chars.append(_char.unicode());
 792         scanChar(); // consume the digit
 793     }
 794
 795     if (_char == QLatin1Char('.')) {
 796         chars.append(_char.unicode());
 797         scanChar(); // consume `.'
 798
 799         while (_char.isDigit()) {
 800             chars.append(_char.unicode());
 801             scanChar();
 802         }
 803
 804         if (_char == QLatin1Char('e') || _char == QLatin1Char('E')) {
 805             if (_codePtr[0].isDigit() || ((_codePtr[0] == QLatin1Char('+') || _codePtr[0] == QLatin1Char('-')) &&
 806                                           _codePtr[1].isDigit())) {
 807
 808                 chars.append(_char.unicode());
 809                 scanChar(); // consume `e'
 810
 811                 if (_char == QLatin1Char('+') || _char == QLatin1Char('-')) {
 812                     chars.append(_char.unicode());
 813                     scanChar(); // consume the sign
 814                 }
 815
 816                 while (_char.isDigit()) {
 817                     chars.append(_char.unicode());
 818                     scanChar();
 819                 }
 820             }
 821         }
 822     } else if (_char == QLatin1Char('e') || _char == QLatin1Char('E')) {
 823         if (_codePtr[0].isDigit() || ((_codePtr[0] == QLatin1Char('+') || _codePtr[0] == QLatin1Char('-')) &&
 824                                       _codePtr[1].isDigit())) {
 825
 826             chars.append(_char.unicode());
 827             scanChar(); // consume `e'
 828
 829             if (_char == QLatin1Char('+') || _char == QLatin1Char('-')) {
 830                 chars.append(_char.unicode());
 831                 scanChar(); // consume the sign
 832             }
 833
 834             while (_char.isDigit()) {
 835                 chars.append(_char.unicode());
 836                 scanChar();
 837             }
 838         }
 839     }
 840
 841     if (chars.length() == 1) {
 842         // if we ended up with a single digit, then it was a '0'
 843         _tokenValue = 0;
 844         return T_NUMERIC_LITERAL;
 845     }
 846
 847     chars.append('\0');
 848
 849     const char *begin = chars.constData();
 850     const char *end = 0;
 851     bool ok = false;
 852
 853     _tokenValue = qstrtod(begin, &end, &ok);
 854
 855     if (end - begin != chars.size() - 1) {
 856         _errorCode = IllegalExponentIndicator;
 857         _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal syntax for exponential number");
 858         return T_ERROR;
 859     }
 860
 861     return T_NUMERIC_LITERAL;
 862 }
 863
 864 bool Lexer::scanRegExp(RegExpBodyPrefix prefix)
 865 {
 866     _tokenText.resize(0);
 867     _validTokenText = true;
 868     _patternFlags = 0;
 869
 870     if (prefix == EqualPrefix)
 871         _tokenText += QLatin1Char('=');
 872
 873     while (true) {
 874         switch (_char.unicode()) {
 875         case 0: // eof
 876         case '\n': case '\r': // line terminator
 877             _errorMessage = QCoreApplication::translate("QQmlParser", "Unterminated regular expression literal");
 878             return false;
 879
 880         case '/':
 881             scanChar();
 882
 883             // scan the flags
 884             _patternFlags = 0;
 885             while (isIdentLetter(_char)) {
 886                 int flag = regExpFlagFromChar(_char);
 887                 if (flag == 0) {
 888                     _errorMessage = QCoreApplication::translate("QQmlParser", "Invalid regular expression flag '%0'")
 889                              .arg(QChar(_char));
 890                     return false;
 891                 }
 892                 _patternFlags |= flag;
 893                 scanChar();
 894             }
 895
 896             _tokenLength = _codePtr - _tokenStartPtr - 1;
 897             return true;
 898
 899         case '\\':
 900             // regular expression backslash sequence
 901             _tokenText += _char;
 902             scanChar();
 903
 904             if (_char.isNull() || isLineTerminator()) {
 905                 _errorMessage = QCoreApplication::translate("QQmlParser", "Unterminated regular expression backslash sequence");
 906                 return false;
 907             }
 908
 909             _tokenText += _char;
 910             scanChar();
 911             break;
 912
 913         case '[':
 914             // regular expression class
 915             _tokenText += _char;
 916             scanChar();
 917
 918             while (! _char.isNull() && ! isLineTerminator()) {
 919                 if (_char == QLatin1Char(']'))
 920                     break;
 921                 else if (_char == QLatin1Char('\\')) {
 922                     // regular expression backslash sequence
 923                     _tokenText += _char;
 924                     scanChar();
 925
 926                     if (_char.isNull() || isLineTerminator()) {
 927                         _errorMessage = QCoreApplication::translate("QQmlParser", "Unterminated regular expression backslash sequence");
 928                         return false;
 929                     }
 930
 931                     _tokenText += _char;
 932                     scanChar();
 933                 } else {
 934                     _tokenText += _char;
 935                     scanChar();
 936                 }
 937             }
 938
 939             if (_char != QLatin1Char(']')) {
 940                 _errorMessage = QCoreApplication::translate("QQmlParser", "Unterminated regular expression class");
 941                 return false;
 942             }
 943
 944             _tokenText += _char;
 945             scanChar(); // skip ]
 946             break;
 947
 948         default:
 949             _tokenText += _char;
 950             scanChar();
 951         } // switch
 952     } // while
 953
 954     return false;
 955 }
 956
 957 bool Lexer::isLineTerminator() const
 958 {
 959     return (_char == QLatin1Char('\n') || _char == QLatin1Char('\r'));
 960 }
 961
 962 bool Lexer::isIdentLetter(QChar ch)
 963 {
 964     // ASCII-biased, since all reserved words are ASCII, aand hence the
 965     // bulk of content to be parsed.
 966     if ((ch >= QLatin1Char('a') && ch <= QLatin1Char('z'))
 967             || (ch >= QLatin1Char('A') && ch <= QLatin1Char('Z'))
 968             || ch == QLatin1Char('$')
 969             || ch == QLatin1Char('_'))
 970         return true;
 971     if (ch.unicode() < 128)
 972         return false;
 973     return ch.isLetterOrNumber();
 974 }
 975
 976 bool Lexer::isDecimalDigit(ushort c)
 977 {
 978     return (c >= '0' && c <= '9');
 979 }
 980
 981 bool Lexer::isHexDigit(QChar c)
 982 {
 983     return ((c >= QLatin1Char('0') && c <= QLatin1Char('9'))
 984             || (c >= QLatin1Char('a') && c <= QLatin1Char('f'))
 985             || (c >= QLatin1Char('A') && c <= QLatin1Char('F')));
 986 }
 987
 988 bool Lexer::isOctalDigit(ushort c)
 989 {
 990     return (c >= '0' && c <= '7');
 991 }
 992
 993 int Lexer::tokenEndLine() const
 994 {
 995     return _currentLineNumber;
 996 }
 997
 998 int Lexer::tokenEndColumn() const
 999 {
1000     return _codePtr - _lastLinePtr;
1001 }
1002
1003 QString Lexer::tokenText() const
1004 {
1005     if (_validTokenText)
1006         return _tokenText;
1007
1008     if (_tokenKind == T_STRING_LITERAL)
1009         return QString(_tokenStartPtr + 1, _tokenLength - 2);
1010
1011     return QString(_tokenStartPtr, _tokenLength);
1012 }
1013
1014 Lexer::Error Lexer::errorCode() const
1015 {
1016     return _errorCode;
1017 }
1018
1019 QString Lexer::errorMessage() const
1020 {
1021     return _errorMessage;
1022 }
1023
1024 void Lexer::syncProhibitAutomaticSemicolon()
1025 {
1026     if (_parenthesesState == BalancedParentheses) {
1027         // we have seen something like "if (foo)", which means we should
1028         // never insert an automatic semicolon at this point, since it would
1029         // then be expanded into an empty statement (ECMA-262 7.9.1)
1030         _prohibitAutomaticSemicolon = true;
1031         _parenthesesState = IgnoreParentheses;
1032     } else {
1033         _prohibitAutomaticSemicolon = false;
1034     }
1035 }
1036
1037 bool Lexer::prevTerminator() const
1038 {
1039     return _terminator;
1040 }
1041
1042 bool Lexer::followsClosingBrace() const
1043 {
1044     return _followsClosingBrace;
1045 }
1046
1047 bool Lexer::canInsertAutomaticSemicolon(int token) const
1048 {
1049     return token == T_RBRACE
1050             || token == EOF_SYMBOL
1051             || _terminator
1052             || _followsClosingBrace;
1053 }
1054
1055 bool Lexer::scanDirectives(Directives *directives)
1056 {
1057     if (_qmlMode) {
1058         // the directives are a Javascript-only extension.
1059         return false;
1060     }
1061
1062     lex(); // fetch the first token
1063
1064     if (_tokenKind != T_DOT)
1065         return true;
1066
1067     do {
1068         lex(); // skip T_DOT
1069
1070         const int lineNumber = tokenStartLine();
1071
1072         if (! (_tokenKind == T_IDENTIFIER || _tokenKind == T_RESERVED_WORD))
1073             return false; // expected a valid QML/JS directive
1074
1075         const QString directiveName = tokenText();
1076
1077         if (! (directiveName == QLatin1String("pragma") ||
1078                directiveName == QLatin1String("import")))
1079             return false; // not a valid directive name
1080
1081         // it must be a pragma or an import directive.
1082         if (directiveName == QLatin1String("pragma")) {
1083             // .pragma library
1084             if (! (lex() == T_IDENTIFIER && tokenText() == QLatin1String("library")))
1085                 return false; // expected `library
1086
1087             // we found a .pragma library directive
1088             directives->pragmaLibrary();
1089
1090         } else {
1091             Q_ASSERT(directiveName == QLatin1String("import"));
1092             lex(); // skip .import
1093
1094             QString pathOrUri;
1095             QString version;
1096             bool fileImport = false; // file or uri import
1097
1098             if (_tokenKind == T_STRING_LITERAL) {
1099                 // .import T_STRING_LITERAL as T_IDENTIFIER
1100
1101                 fileImport = true;
1102                 pathOrUri = tokenText();
1103
1104             } else if (_tokenKind == T_IDENTIFIER) {
1105                 // .import T_IDENTIFIER (. T_IDENTIFIER)* T_NUMERIC_LITERAL as T_IDENTIFIER
1106
1107                 pathOrUri = tokenText();
1108
1109                 lex(); // skip the first T_IDENTIFIER
1110                 for (; _tokenKind == T_DOT; lex()) {
1111                     if (lex() != T_IDENTIFIER)
1112                         return false;
1113
1114                     pathOrUri += QLatin1Char('.');
1115                     pathOrUri += tokenText();
1116                 }
1117
1118                 if (_tokenKind != T_NUMERIC_LITERAL)
1119                     return false; // expected the module version number
1120
1121                 version = tokenText();
1122             }
1123
1124             //
1125             // recognize the mandatory `as' followed by the module name
1126             //
1127             if (! (lex() == T_RESERVED_WORD && tokenText() == QLatin1String("as")))
1128                 return false; // expected `as'
1129
1130             if (lex() != T_IDENTIFIER)
1131                 return false; // expected module name
1132
1133             const QString module = tokenText();
1134
1135             if (fileImport)
1136                 directives->importFile(pathOrUri, module);
1137             else
1138                 directives->importModule(pathOrUri, version, module);
1139         }
1140
1141         if (tokenStartLine() != lineNumber)
1142             return false; // the directives cannot span over multiple lines
1143
1144         // fetch the first token after the .pragma/.import directive
1145         lex();
1146     } while (_tokenKind == T_DOT);
1147
1148     return true;
1149 }
1150
1151 #include "qqmljskeywords_p.h"