src/scanner.h

   1 // Copyright 2011 the V8 project authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 // Features shared by parsing and pre-parsing scanners.
   6
   7 #ifndef V8_SCANNER_H_
   8 #define V8_SCANNER_H_
   9
  10 #include "src/allocation.h"
  11 #include "src/base/logging.h"
  12 #include "src/char-predicates.h"
  13 #include "src/globals.h"
  14 #include "src/hashmap.h"
  15 #include "src/list.h"
  16 #include "src/token.h"
  17 #include "src/unicode.h"
  18 #include "src/unicode-decoder.h"
  19 #include "src/utils.h"
  20
  21 namespace v8 {
  22 namespace internal {
  23
  24
  25 class AstRawString;
  26 class AstValueFactory;
  27 class ParserRecorder;
  28 class UnicodeCache;
  29
  30
  31 // Returns the value (0 .. 15) of a hexadecimal character c.
  32 // If c is not a legal hexadecimal character, returns a value < 0.
  33 inline int HexValue(uc32 c) {
  34   c -= '0';
  35   if (static_cast<unsigned>(c) <= 9) return c;
  36   c = (c | 0x20) - ('a' - '0');  // detect 0x11..0x16 and 0x31..0x36.
  37   if (static_cast<unsigned>(c) <= 5) return c + 10;
  38   return -1;
  39 }
  40
  41
  42 // ---------------------------------------------------------------------
  43 // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.
  44 // A code unit is a 16 bit value representing either a 16 bit code point
  45 // or one part of a surrogate pair that make a single 21 bit code point.
  46
  47 class Utf16CharacterStream {
  48  public:
  49   Utf16CharacterStream() : pos_(0) { }
  50   virtual ~Utf16CharacterStream() { }
  51
  52   // Returns and advances past the next UTF-16 code unit in the input
  53   // stream. If there are no more code units, it returns a negative
  54   // value.
  55   inline uc32 Advance() {
  56     if (buffer_cursor_ < buffer_end_ || ReadBlock()) {
  57       pos_++;
  58       return static_cast<uc32>(*(buffer_cursor_++));
  59     }
  60     // Note: currently the following increment is necessary to avoid a
  61     // parser problem! The scanner treats the final kEndOfInput as
  62     // a code unit with a position, and does math relative to that
  63     // position.
  64     pos_++;
  65
  66     return kEndOfInput;
  67   }
  68
  69   // Return the current position in the code unit stream.
  70   // Starts at zero.
  71   inline size_t pos() const { return pos_; }
  72
  73   // Skips forward past the next code_unit_count UTF-16 code units
  74   // in the input, or until the end of input if that comes sooner.
  75   // Returns the number of code units actually skipped. If less
  76   // than code_unit_count,
  77   inline size_t SeekForward(size_t code_unit_count) {
  78     size_t buffered_chars = buffer_end_ - buffer_cursor_;
  79     if (code_unit_count <= buffered_chars) {
  80       buffer_cursor_ += code_unit_count;
  81       pos_ += code_unit_count;
  82       return code_unit_count;
  83     }
  84     return SlowSeekForward(code_unit_count);
  85   }
  86
  87   // Pushes back the most recently read UTF-16 code unit (or negative
  88   // value if at end of input), i.e., the value returned by the most recent
  89   // call to Advance.
  90   // Must not be used right after calling SeekForward.
  91   virtual void PushBack(int32_t code_unit) = 0;
  92
  93   virtual bool SetBookmark();
  94   virtual void ResetToBookmark();
  95
  96  protected:
  97   static const uc32 kEndOfInput = -1;
  98
  99   // Ensures that the buffer_cursor_ points to the code_unit at
 100   // position pos_ of the input, if possible. If the position
 101   // is at or after the end of the input, return false. If there
 102   // are more code_units available, return true.
 103   virtual bool ReadBlock() = 0;
 104   virtual size_t SlowSeekForward(size_t code_unit_count) = 0;
 105
 106   const uint16_t* buffer_cursor_;
 107   const uint16_t* buffer_end_;
 108   size_t pos_;
 109 };
 110
 111
 112 // ---------------------------------------------------------------------
 113 // DuplicateFinder discovers duplicate symbols.
 114
 115 class DuplicateFinder {
 116  public:
 117   explicit DuplicateFinder(UnicodeCache* constants)
 118       : unicode_constants_(constants),
 119         backing_store_(16),
 120         map_(&Match) { }
 121
 122   int AddOneByteSymbol(Vector<const uint8_t> key, int value);
 123   int AddTwoByteSymbol(Vector<const uint16_t> key, int value);
 124   // Add a a number literal by converting it (if necessary)
 125   // to the string that ToString(ToNumber(literal)) would generate.
 126   // and then adding that string with AddOneByteSymbol.
 127   // This string is the actual value used as key in an object literal,
 128   // and the one that must be different from the other keys.
 129   int AddNumber(Vector<const uint8_t> key, int value);
 130
 131  private:
 132   int AddSymbol(Vector<const uint8_t> key, bool is_one_byte, int value);
 133   // Backs up the key and its length in the backing store.
 134   // The backup is stored with a base 127 encoding of the
 135   // length (plus a bit saying whether the string is one byte),
 136   // followed by the bytes of the key.
 137   uint8_t* BackupKey(Vector<const uint8_t> key, bool is_one_byte);
 138
 139   // Compare two encoded keys (both pointing into the backing store)
 140   // for having the same base-127 encoded lengths and representation.
 141   // and then having the same 'length' bytes following.
 142   static bool Match(void* first, void* second);
 143   // Creates a hash from a sequence of bytes.
 144   static uint32_t Hash(Vector<const uint8_t> key, bool is_one_byte);
 145   // Checks whether a string containing a JS number is its canonical
 146   // form.
 147   static bool IsNumberCanonical(Vector<const uint8_t> key);
 148
 149   // Size of buffer. Sufficient for using it to call DoubleToCString in
 150   // from conversions.h.
 151   static const int kBufferSize = 100;
 152
 153   UnicodeCache* unicode_constants_;
 154   // Backing store used to store strings used as hashmap keys.
 155   SequenceCollector<unsigned char> backing_store_;
 156   HashMap map_;
 157   // Buffer used for string->number->canonical string conversions.
 158   char number_buffer_[kBufferSize];
 159 };
 160
 161
 162 // ----------------------------------------------------------------------------
 163 // LiteralBuffer -  Collector of chars of literals.
 164
 165 class LiteralBuffer {
 166  public:
 167   LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() { }
 168
 169   ~LiteralBuffer() {
 170     if (backing_store_.length() > 0) {
 171       backing_store_.Dispose();
 172     }
 173   }
 174
 175   INLINE(void AddChar(uint32_t code_unit)) {
 176     if (position_ >= backing_store_.length()) ExpandBuffer();
 177     if (is_one_byte_) {
 178       if (code_unit <= unibrow::Latin1::kMaxChar) {
 179         backing_store_[position_] = static_cast<byte>(code_unit);
 180         position_ += kOneByteSize;
 181         return;
 182       }
 183       ConvertToTwoByte();
 184     }
 185     if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
 186       *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit;
 187       position_ += kUC16Size;
 188     } else {
 189       *reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
 190           unibrow::Utf16::LeadSurrogate(code_unit);
 191       position_ += kUC16Size;
 192       if (position_ >= backing_store_.length()) ExpandBuffer();
 193       *reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
 194           unibrow::Utf16::TrailSurrogate(code_unit);
 195       position_ += kUC16Size;
 196     }
 197   }
 198
 199   bool is_one_byte() const { return is_one_byte_; }
 200
 201   bool is_contextual_keyword(Vector<const char> keyword) const {
 202     return is_one_byte() && keyword.length() == position_ &&
 203         (memcmp(keyword.start(), backing_store_.start(), position_) == 0);
 204   }
 205
 206   Vector<const uint16_t> two_byte_literal() const {
 207     DCHECK(!is_one_byte_);
 208     DCHECK((position_ & 0x1) == 0);
 209     return Vector<const uint16_t>(
 210         reinterpret_cast<const uint16_t*>(backing_store_.start()),
 211         position_ >> 1);
 212   }
 213
 214   Vector<const uint8_t> one_byte_literal() const {
 215     DCHECK(is_one_byte_);
 216     return Vector<const uint8_t>(
 217         reinterpret_cast<const uint8_t*>(backing_store_.start()),
 218         position_);
 219   }
 220
 221   int length() const {
 222     return is_one_byte_ ? position_ : (position_ >> 1);
 223   }
 224
 225   void ReduceLength(int delta) {
 226     position_ -= delta * (is_one_byte_ ? kOneByteSize : kUC16Size);
 227   }
 228
 229   void Reset() {
 230     position_ = 0;
 231     is_one_byte_ = true;
 232   }
 233
 234   Handle<String> Internalize(Isolate* isolate) const;
 235
 236   void CopyFrom(const LiteralBuffer* other) {
 237     if (other == nullptr) {
 238       Reset();
 239     } else {
 240       is_one_byte_ = other->is_one_byte_;
 241       position_ = other->position_;
 242       backing_store_.Dispose();
 243       backing_store_ = other->backing_store_.Clone();
 244     }
 245   }
 246
 247  private:
 248   static const int kInitialCapacity = 16;
 249   static const int kGrowthFactory = 4;
 250   static const int kMinConversionSlack = 256;
 251   static const int kMaxGrowth = 1 * MB;
 252   inline int NewCapacity(int min_capacity) {
 253     int capacity = Max(min_capacity, backing_store_.length());
 254     int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth);
 255     return new_capacity;
 256   }
 257
 258   void ExpandBuffer() {
 259     Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity));
 260     MemCopy(new_store.start(), backing_store_.start(), position_);
 261     backing_store_.Dispose();
 262     backing_store_ = new_store;
 263   }
 264
 265   void ConvertToTwoByte() {
 266     DCHECK(is_one_byte_);
 267     Vector<byte> new_store;
 268     int new_content_size = position_ * kUC16Size;
 269     if (new_content_size >= backing_store_.length()) {
 270       // Ensure room for all currently read code units as UC16 as well
 271       // as the code unit about to be stored.
 272       new_store = Vector<byte>::New(NewCapacity(new_content_size));
 273     } else {
 274       new_store = backing_store_;
 275     }
 276     uint8_t* src = backing_store_.start();
 277     uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start());
 278     for (int i = position_ - 1; i >= 0; i--) {
 279       dst[i] = src[i];
 280     }
 281     if (new_store.start() != backing_store_.start()) {
 282       backing_store_.Dispose();
 283       backing_store_ = new_store;
 284     }
 285     position_ = new_content_size;
 286     is_one_byte_ = false;
 287   }
 288
 289   bool is_one_byte_;
 290   int position_;
 291   Vector<byte> backing_store_;
 292
 293   DISALLOW_COPY_AND_ASSIGN(LiteralBuffer);
 294 };
 295
 296
 297 // ----------------------------------------------------------------------------
 298 // JavaScript Scanner.
 299
 300 class Scanner {
 301  public:
 302   // Scoped helper for literal recording. Automatically drops the literal
 303   // if aborting the scanning before it's complete.
 304   class LiteralScope {
 305    public:
 306     explicit LiteralScope(Scanner* self) : scanner_(self), complete_(false) {
 307       scanner_->StartLiteral();
 308     }
 309      ~LiteralScope() {
 310        if (!complete_) scanner_->DropLiteral();
 311      }
 312     void Complete() {
 313       complete_ = true;
 314     }
 315
 316    private:
 317     Scanner* scanner_;
 318     bool complete_;
 319   };
 320
 321   // Scoped helper for a re-settable bookmark.
 322   class BookmarkScope {
 323    public:
 324     explicit BookmarkScope(Scanner* scanner) : scanner_(scanner) {
 325       DCHECK_NOT_NULL(scanner_);
 326     }
 327     ~BookmarkScope() { scanner_->DropBookmark(); }
 328
 329     bool Set() { return scanner_->SetBookmark(); }
 330     void Reset() { scanner_->ResetToBookmark(); }
 331     bool HasBeenSet() { return scanner_->BookmarkHasBeenSet(); }
 332     bool HasBeenReset() { return scanner_->BookmarkHasBeenReset(); }
 333
 334    private:
 335     Scanner* scanner_;
 336
 337     DISALLOW_COPY_AND_ASSIGN(BookmarkScope);
 338   };
 339
 340   // Representation of an interval of source positions.
 341   struct Location {
 342     Location(int b, int e) : beg_pos(b), end_pos(e) { }
 343     Location() : beg_pos(0), end_pos(0) { }
 344
 345     bool IsValid() const {
 346       return beg_pos >= 0 && end_pos >= beg_pos;
 347     }
 348
 349     static Location invalid() { return Location(-1, -1); }
 350
 351     int beg_pos;
 352     int end_pos;
 353   };
 354
 355   // -1 is outside of the range of any real source code.
 356   static const int kNoOctalLocation = -1;
 357
 358   explicit Scanner(UnicodeCache* scanner_contants);
 359
 360   void Initialize(Utf16CharacterStream* source);
 361
 362   // Returns the next token and advances input.
 363   Token::Value Next();
 364   // Returns the token following peek()
 365   Token::Value PeekAhead();
 366   // Returns the current token again.
 367   Token::Value current_token() { return current_.token; }
 368   // Returns the location information for the current token
 369   // (the token last returned by Next()).
 370   Location location() const { return current_.location; }
 371
 372   // Similar functions for the upcoming token.
 373
 374   // One token look-ahead (past the token returned by Next()).
 375   Token::Value peek() const { return next_.token; }
 376
 377   Location peek_location() const { return next_.location; }
 378
 379   bool literal_contains_escapes() const {
 380     Location location = current_.location;
 381     int source_length = (location.end_pos - location.beg_pos);
 382     if (current_.token == Token::STRING) {
 383       // Subtract delimiters.
 384       source_length -= 2;
 385     }
 386     return current_.literal_chars->length() != source_length;
 387   }
 388   bool is_literal_contextual_keyword(Vector<const char> keyword) {
 389     DCHECK_NOT_NULL(current_.literal_chars);
 390     return current_.literal_chars->is_contextual_keyword(keyword);
 391   }
 392   bool is_next_contextual_keyword(Vector<const char> keyword) {
 393     DCHECK_NOT_NULL(next_.literal_chars);
 394     return next_.literal_chars->is_contextual_keyword(keyword);
 395   }
 396
 397   const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory);
 398   const AstRawString* NextSymbol(AstValueFactory* ast_value_factory);
 399   const AstRawString* CurrentRawSymbol(AstValueFactory* ast_value_factory);
 400
 401   double DoubleValue();
 402   bool ContainsDot();
 403   bool LiteralMatches(const char* data, int length, bool allow_escapes = true) {
 404     if (is_literal_one_byte() &&
 405         literal_length() == length &&
 406         (allow_escapes || !literal_contains_escapes())) {
 407       const char* token =
 408           reinterpret_cast<const char*>(literal_one_byte_string().start());
 409       return !strncmp(token, data, length);
 410     }
 411     return false;
 412   }
 413   inline bool UnescapedLiteralMatches(const char* data, int length) {
 414     return LiteralMatches(data, length, false);
 415   }
 416
 417   void IsGetOrSet(bool* is_get, bool* is_set) {
 418     if (is_literal_one_byte() &&
 419         literal_length() == 3 &&
 420         !literal_contains_escapes()) {
 421       const char* token =
 422           reinterpret_cast<const char*>(literal_one_byte_string().start());
 423       *is_get = strncmp(token, "get", 3) == 0;
 424       *is_set = !*is_get && strncmp(token, "set", 3) == 0;
 425     }
 426   }
 427
 428   int FindSymbol(DuplicateFinder* finder, int value);
 429
 430   UnicodeCache* unicode_cache() { return unicode_cache_; }
 431
 432   // Returns the location of the last seen octal literal.
 433   Location octal_position() const { return octal_pos_; }
 434   void clear_octal_position() { octal_pos_ = Location::invalid(); }
 435
 436   // Returns the value of the last smi that was scanned.
 437   int smi_value() const { return current_.smi_value_; }
 438
 439   // Seek forward to the given position.  This operation does not
 440   // work in general, for instance when there are pushed back
 441   // characters, but works for seeking forward until simple delimiter
 442   // tokens, which is what it is used for.
 443   void SeekForward(int pos);
 444
 445   // Returns true if there was a line terminator before the peek'ed token,
 446   // possibly inside a multi-line comment.
 447   bool HasAnyLineTerminatorBeforeNext() const {
 448     return has_line_terminator_before_next_ ||
 449            has_multiline_comment_before_next_;
 450   }
 451
 452   // Scans the input as a regular expression pattern, previous
 453   // character(s) must be /(=). Returns true if a pattern is scanned.
 454   bool ScanRegExpPattern(bool seen_equal);
 455   // Returns true if regexp flags are scanned (always since flags can
 456   // be empty).
 457   bool ScanRegExpFlags();
 458
 459   // Scans the input as a template literal
 460   Token::Value ScanTemplateStart();
 461   Token::Value ScanTemplateContinuation();
 462
 463   const LiteralBuffer* source_url() const { return &source_url_; }
 464   const LiteralBuffer* source_mapping_url() const {
 465     return &source_mapping_url_;
 466   }
 467
 468   bool IdentifierIsFutureStrictReserved(const AstRawString* string) const;
 469
 470  private:
 471   // The current and look-ahead token.
 472   struct TokenDesc {
 473     Token::Value token;
 474     Location location;
 475     LiteralBuffer* literal_chars;
 476     LiteralBuffer* raw_literal_chars;
 477     int smi_value_;
 478   };
 479
 480   static const int kCharacterLookaheadBufferSize = 1;
 481
 482   // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
 483   template <bool capture_raw>
 484   uc32 ScanOctalEscape(uc32 c, int length);
 485
 486   // Call this after setting source_ to the input.
 487   void Init() {
 488     // Set c0_ (one character ahead)
 489     STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
 490     Advance();
 491     // Initialize current_ to not refer to a literal.
 492     current_.literal_chars = NULL;
 493     current_.raw_literal_chars = NULL;
 494     next_next_.token = Token::UNINITIALIZED;
 495   }
 496
 497   // Support BookmarkScope functionality.
 498   bool SetBookmark();
 499   void ResetToBookmark();
 500   bool BookmarkHasBeenSet();
 501   bool BookmarkHasBeenReset();
 502   void DropBookmark();
 503   static void CopyTokenDesc(TokenDesc* to, TokenDesc* from);
 504
 505   // Literal buffer support
 506   inline void StartLiteral() {
 507     LiteralBuffer* free_buffer =
 508         (current_.literal_chars == &literal_buffer0_)
 509             ? &literal_buffer1_
 510             : (current_.literal_chars == &literal_buffer1_) ? &literal_buffer2_
 511                                                             : &literal_buffer0_;
 512     free_buffer->Reset();
 513     next_.literal_chars = free_buffer;
 514   }
 515
 516   inline void StartRawLiteral() {
 517     LiteralBuffer* free_buffer =
 518         (current_.raw_literal_chars == &raw_literal_buffer0_)
 519             ? &raw_literal_buffer1_
 520             : (current_.raw_literal_chars == &raw_literal_buffer1_)
 521                   ? &raw_literal_buffer2_
 522                   : &raw_literal_buffer0_;
 523     free_buffer->Reset();
 524     next_.raw_literal_chars = free_buffer;
 525   }
 526
 527   INLINE(void AddLiteralChar(uc32 c)) {
 528     DCHECK_NOT_NULL(next_.literal_chars);
 529     next_.literal_chars->AddChar(c);
 530   }
 531
 532   INLINE(void AddRawLiteralChar(uc32 c)) {
 533     DCHECK_NOT_NULL(next_.raw_literal_chars);
 534     next_.raw_literal_chars->AddChar(c);
 535   }
 536
 537   INLINE(void ReduceRawLiteralLength(int delta)) {
 538     DCHECK_NOT_NULL(next_.raw_literal_chars);
 539     next_.raw_literal_chars->ReduceLength(delta);
 540   }
 541
 542   // Stops scanning of a literal and drop the collected characters,
 543   // e.g., due to an encountered error.
 544   inline void DropLiteral() {
 545     next_.literal_chars = NULL;
 546     next_.raw_literal_chars = NULL;
 547   }
 548
 549   inline void AddLiteralCharAdvance() {
 550     AddLiteralChar(c0_);
 551     Advance();
 552   }
 553
 554   // Low-level scanning support.
 555   template <bool capture_raw = false, bool check_surrogate = true>
 556   void Advance() {
 557     if (capture_raw) {
 558       AddRawLiteralChar(c0_);
 559     }
 560     c0_ = source_->Advance();
 561     if (check_surrogate) HandleLeadSurrogate();
 562   }
 563
 564   void HandleLeadSurrogate() {
 565     if (unibrow::Utf16::IsLeadSurrogate(c0_)) {
 566       uc32 c1 = source_->Advance();
 567       if (!unibrow::Utf16::IsTrailSurrogate(c1)) {
 568         source_->PushBack(c1);
 569       } else {
 570         c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1);
 571       }
 572     }
 573   }
 574
 575   void PushBack(uc32 ch) {
 576     if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
 577       source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_));
 578       source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_));
 579     } else {
 580       source_->PushBack(c0_);
 581     }
 582     c0_ = ch;
 583   }
 584
 585   inline Token::Value Select(Token::Value tok) {
 586     Advance();
 587     return tok;
 588   }
 589
 590   inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {
 591     Advance();
 592     if (c0_ == next) {
 593       Advance();
 594       return then;
 595     } else {
 596       return else_;
 597     }
 598   }
 599
 600   // Returns the literal string, if any, for the current token (the
 601   // token last returned by Next()). The string is 0-terminated.
 602   // Literal strings are collected for identifiers, strings, numbers as well
 603   // as for template literals. For template literals we also collect the raw
 604   // form.
 605   // These functions only give the correct result if the literal was scanned
 606   // when a LiteralScope object is alive.
 607   Vector<const uint8_t> literal_one_byte_string() {
 608     DCHECK_NOT_NULL(current_.literal_chars);
 609     return current_.literal_chars->one_byte_literal();
 610   }
 611   Vector<const uint16_t> literal_two_byte_string() {
 612     DCHECK_NOT_NULL(current_.literal_chars);
 613     return current_.literal_chars->two_byte_literal();
 614   }
 615   bool is_literal_one_byte() {
 616     DCHECK_NOT_NULL(current_.literal_chars);
 617     return current_.literal_chars->is_one_byte();
 618   }
 619   int literal_length() const {
 620     DCHECK_NOT_NULL(current_.literal_chars);
 621     return current_.literal_chars->length();
 622   }
 623   // Returns the literal string for the next token (the token that
 624   // would be returned if Next() were called).
 625   Vector<const uint8_t> next_literal_one_byte_string() {
 626     DCHECK_NOT_NULL(next_.literal_chars);
 627     return next_.literal_chars->one_byte_literal();
 628   }
 629   Vector<const uint16_t> next_literal_two_byte_string() {
 630     DCHECK_NOT_NULL(next_.literal_chars);
 631     return next_.literal_chars->two_byte_literal();
 632   }
 633   bool is_next_literal_one_byte() {
 634     DCHECK_NOT_NULL(next_.literal_chars);
 635     return next_.literal_chars->is_one_byte();
 636   }
 637   Vector<const uint8_t> raw_literal_one_byte_string() {
 638     DCHECK_NOT_NULL(current_.raw_literal_chars);
 639     return current_.raw_literal_chars->one_byte_literal();
 640   }
 641   Vector<const uint16_t> raw_literal_two_byte_string() {
 642     DCHECK_NOT_NULL(current_.raw_literal_chars);
 643     return current_.raw_literal_chars->two_byte_literal();
 644   }
 645   bool is_raw_literal_one_byte() {
 646     DCHECK_NOT_NULL(current_.raw_literal_chars);
 647     return current_.raw_literal_chars->is_one_byte();
 648   }
 649
 650   template <bool capture_raw>
 651   uc32 ScanHexNumber(int expected_length);
 652   // Scan a number of any length but not bigger than max_value. For example, the
 653   // number can be 000000001, so it's very long in characters but its value is
 654   // small.
 655   template <bool capture_raw>
 656   uc32 ScanUnlimitedLengthHexNumber(int max_value);
 657
 658   // Scans a single JavaScript token.
 659   void Scan();
 660
 661   bool SkipWhiteSpace();
 662   Token::Value SkipSingleLineComment();
 663   Token::Value SkipSourceURLComment();
 664   void TryToParseSourceURLComment();
 665   Token::Value SkipMultiLineComment();
 666   // Scans a possible HTML comment -- begins with '<!'.
 667   Token::Value ScanHtmlComment();
 668
 669   void ScanDecimalDigits();
 670   Token::Value ScanNumber(bool seen_period);
 671   Token::Value ScanIdentifierOrKeyword();
 672   Token::Value ScanIdentifierSuffix(LiteralScope* literal);
 673
 674   Token::Value ScanString();
 675
 676   // Scans an escape-sequence which is part of a string and adds the
 677   // decoded character to the current literal. Returns true if a pattern
 678   // is scanned.
 679   template <bool capture_raw, bool in_template_literal>
 680   bool ScanEscape();
 681
 682   // Decodes a Unicode escape-sequence which is part of an identifier.
 683   // If the escape sequence cannot be decoded the result is kBadChar.
 684   uc32 ScanIdentifierUnicodeEscape();
 685   // Helper for the above functions.
 686   template <bool capture_raw>
 687   uc32 ScanUnicodeEscape();
 688
 689   Token::Value ScanTemplateSpan();
 690
 691   // Return the current source position.
 692   int source_pos() {
 693     return static_cast<int>(source_->pos()) - kCharacterLookaheadBufferSize;
 694   }
 695
 696   UnicodeCache* unicode_cache_;
 697
 698   // Buffers collecting literal strings, numbers, etc.
 699   LiteralBuffer literal_buffer0_;
 700   LiteralBuffer literal_buffer1_;
 701   LiteralBuffer literal_buffer2_;
 702
 703   // Values parsed from magic comments.
 704   LiteralBuffer source_url_;
 705   LiteralBuffer source_mapping_url_;
 706
 707   // Buffer to store raw string values
 708   LiteralBuffer raw_literal_buffer0_;
 709   LiteralBuffer raw_literal_buffer1_;
 710   LiteralBuffer raw_literal_buffer2_;
 711
 712   TokenDesc current_;    // desc for current token (as returned by Next())
 713   TokenDesc next_;       // desc for next token (one token look-ahead)
 714   TokenDesc next_next_;  // desc for the token after next (after PeakAhead())
 715
 716   // Variables for Scanner::BookmarkScope and the *Bookmark implementation.
 717   // These variables contain the scanner state when a bookmark is set.
 718   //
 719   // We will use bookmark_c0_ as a 'control' variable, where:
 720   // - bookmark_c0_ >= 0: A bookmark has been set and this contains c0_.
 721   // - bookmark_c0_ == -1: No bookmark has been set.
 722   // - bookmark_c0_ == -2: The bookmark has been applied (ResetToBookmark).
 723   //
 724   // Which state is being bookmarked? The parser state is distributed over
 725   // several variables, roughly like this:
 726   //   ...    1234        +       5678 ..... [character stream]
 727   //       [current_] [next_] c0_ |      [scanner state]
 728   // So when the scanner is logically at the beginning of an expression
 729   // like "1234 + 4567", then:
 730   // - current_ contains "1234"
 731   // - next_ contains "+"
 732   // - c0_ contains ' ' (the space between "+" and "5678",
 733   // - the source_ character stream points to the beginning of "5678".
 734   // To be able to restore this state, we will keep copies of current_, next_,
 735   // and c0_; we'll ask the stream to bookmark itself, and we'll copy the
 736   // contents of current_'s and next_'s literal buffers to bookmark_*_literal_.
 737   static const uc32 kNoBookmark = -1;
 738   static const uc32 kBookmarkWasApplied = -2;
 739   uc32 bookmark_c0_;
 740   TokenDesc bookmark_current_;
 741   TokenDesc bookmark_next_;
 742   LiteralBuffer bookmark_current_literal_;
 743   LiteralBuffer bookmark_current_raw_literal_;
 744   LiteralBuffer bookmark_next_literal_;
 745   LiteralBuffer bookmark_next_raw_literal_;
 746
 747   // Input stream. Must be initialized to an Utf16CharacterStream.
 748   Utf16CharacterStream* source_;
 749
 750
 751   // Start position of the octal literal last scanned.
 752   Location octal_pos_;
 753
 754   // One Unicode character look-ahead; c0_ < 0 at the end of the input.
 755   uc32 c0_;
 756
 757   // Whether there is a line terminator whitespace character after
 758   // the current token, and  before the next. Does not count newlines
 759   // inside multiline comments.
 760   bool has_line_terminator_before_next_;
 761   // Whether there is a multi-line comment that contains a
 762   // line-terminator after the current token, and before the next.
 763   bool has_multiline_comment_before_next_;
 764 };
 765
 766 } }  // namespace v8::internal
 767
 768 #endif  // V8_SCANNER_H_