Merged Scanner and JavaScriptScanner.

author lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>

Tue, 1 Nov 2011 07:47:15 +0000 (07:47 +0000)

committer lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>

Tue, 1 Nov 2011 07:47:15 +0000 (07:47 +0000)
author lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Tue, 1 Nov 2011 07:47:15 +0000 (07:47 +0000)
committer lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Tue, 1 Nov 2011 07:47:15 +0000 (07:47 +0000)
diff --git a/src/parser.cc b/src/parser.cc

index 52ba66a45430957432b8d31eab22b07dca7586e0..0a635fcbb0bc547f01c2c3029a03341a30e08d9b 100644 (file)
--- a/src/parser.cc
+++ b/src/parser.cc
@@ -5302,7 +5302,7 @@ static ScriptDataImpl* DoPreParse(UC16CharacterStream* source,
                                    int flags,
                                    ParserRecorder* recorder) {
    Isolate* isolate = Isolate::Current();
-  JavaScriptScanner scanner(isolate->unicode_cache());
+  Scanner scanner(isolate->unicode_cache());
    scanner.SetHarmonyScoping((flags & kHarmonyScoping) != 0);
    scanner.Initialize(source);
    intptr_t stack_limit = isolate->stack_guard()->real_climit();
diff --git a/src/parser.h b/src/parser.h

index 2e82d4ebbb3f797daae8d5dab9cb1c9f3388f4a3..9624301fcdd65a1c4ef08f1cc89c60bd573c05e0 100644 (file)
--- a/src/parser.h
+++ b/src/parser.h
@@ -486,7 +486,7 @@ class Parser {
    void ReportMessage(const char* message, Vector<const char*> args);
  
    bool inside_with() const { return top_scope_->inside_with(); }
-  JavaScriptScanner& scanner()  { return scanner_; }
+  Scanner& scanner()  { return scanner_; }
    Mode mode() const { return mode_; }
    ScriptDataImpl* pre_data() const { return pre_data_; }
  
@@ -726,7 +726,7 @@ class Parser {
    ZoneList<Handle<String> > symbol_cache_;
  
    Handle<Script> script_;
-  JavaScriptScanner scanner_;
+  Scanner scanner_;
  
    Scope* top_scope_;
  
diff --git a/src/preparser-api.cc b/src/preparser-api.cc

index 25c7a823c3980ec3fa7fa19bc497fbf6ff0e6df4..1bca9a3333227058ca897ffb533bbd4e9df8ef01 100644 (file)
--- a/src/preparser-api.cc
+++ b/src/preparser-api.cc
@@ -182,7 +182,7 @@ PreParserData Preparse(UnicodeInputStream* input, size_t max_stack) {
    internal::InputStreamUTF16Buffer buffer(input);
    uintptr_t stack_limit = reinterpret_cast<uintptr_t>(&buffer) - max_stack;
    internal::UnicodeCache unicode_cache;
-  internal::JavaScriptScanner scanner(&unicode_cache);
+  internal::Scanner scanner(&unicode_cache);
    scanner.Initialize(&buffer);
    internal::CompleteParserRecorder recorder;
    preparser::PreParser::PreParseResult result =
diff --git a/src/preparser.cc b/src/preparser.cc

index b1628eb5c54d5dde11b6dedb1372be5bf54acfce..291dfd303f8d80be4fd0775e2e10f5290707d9fd 100644 (file)
--- a/src/preparser.cc
+++ b/src/preparser.cc
@@ -72,7 +72,7 @@ void PreParser::ReportUnexpectedToken(i::Token::Value token) {
    if (token == i::Token::ILLEGAL && stack_overflow_) {
      return;
    }
-  i::JavaScriptScanner::Location source_location = scanner_->location();
+  i::Scanner::Location source_location = scanner_->location();
  
    // Four of the tokens are treated specially
    switch (token) {
@@ -647,7 +647,7 @@ PreParser::Statement PreParser::ParseThrowStatement(bool* ok) {
  
    Expect(i::Token::THROW, CHECK_OK);
    if (scanner_->HasAnyLineTerminatorBeforeNext()) {
-    i::JavaScriptScanner::Location pos = scanner_->location();
+    i::Scanner::Location pos = scanner_->location();
      ReportMessageAt(pos, "newline_after_throw", NULL);
      *ok = false;
      return Statement::Default();
diff --git a/src/preparser.h b/src/preparser.h

index 45e81e9ac5e6253584ec72ea2ee5b99f154a7ad7..647a142b1a152d31fe059fbd72224450bca9d3b5 100644 (file)
--- a/src/preparser.h
+++ b/src/preparser.h
@@ -116,7 +116,7 @@ class PreParser {
    // success (even if parsing failed, the pre-parse data successfully
    // captured the syntax error), and false if a stack-overflow happened
    // during parsing.
-  static PreParseResult PreParseProgram(i::JavaScriptScanner* scanner,
+  static PreParseResult PreParseProgram(i::Scanner* scanner,
                                          i::ParserRecorder* log,
                                          int flags,
                                          uintptr_t stack_limit) {
@@ -449,7 +449,7 @@ class PreParser {
    };
  
    // Private constructor only used in PreParseProgram.
-  PreParser(i::JavaScriptScanner* scanner,
+  PreParser(i::Scanner* scanner,
              i::ParserRecorder* log,
              uintptr_t stack_limit,
              bool allow_lazy,
@@ -619,7 +619,7 @@ class PreParser {
                                       Identifier identifier,
                                       bool* ok);
  
-  i::JavaScriptScanner* scanner_;
+  i::Scanner* scanner_;
    i::ParserRecorder* log_;
    Scope* scope_;
    uintptr_t stack_limit_;
diff --git a/src/scanner.cc b/src/scanner.cc

index 95748f241729f0a7620881a88ec74fbcb399d630..01fe81c64646ccf1ac039ed9a9b4d2a47a1f4461 100755 (executable)
--- a/src/scanner.cc
+++ b/src/scanner.cc
@@ -36,30 +36,26 @@ namespace v8 {
  namespace internal {
  
  // ----------------------------------------------------------------------------
-// Scanner::LiteralScope
-
-Scanner::LiteralScope::LiteralScope(Scanner* self)
-    : scanner_(self), complete_(false) {
-  self->StartLiteral();
-}
-
+// Scanner
  
-Scanner::LiteralScope::~LiteralScope() {
-  if (!complete_) scanner_->DropLiteral();
-}
+Scanner::Scanner(UnicodeCache* unicode_cache)
+    : unicode_cache_(unicode_cache),
+      octal_pos_(Location::invalid()),
+      harmony_scoping_(false) { }
  
  
-void Scanner::LiteralScope::Complete() {
-  scanner_->TerminateLiteral();
-  complete_ = true;
+void Scanner::Initialize(UC16CharacterStream* source) {
+  source_ = source;
+  // Need to capture identifiers in order to recognize "get" and "set"
+  // in object literals.
+  Init();
+  // Skip initial whitespace allowing HTML comment ends just like
+  // after a newline and scan first token.
+  has_line_terminator_before_next_ = true;
+  SkipWhiteSpace();
+  Scan();
  }
  
-// ----------------------------------------------------------------------------
-// Scanner
-
-Scanner::Scanner(UnicodeCache* unicode_cache)
-    : unicode_cache_(unicode_cache) { }
-
  
  uc32 Scanner::ScanHexNumber(int expected_length) {
    ASSERT(expected_length <= 4);  // prevent overflow
@@ -88,29 +84,6 @@ uc32 Scanner::ScanHexNumber(int expected_length) {
  }
  
  
-
-// ----------------------------------------------------------------------------
-// JavaScriptScanner
-
-JavaScriptScanner::JavaScriptScanner(UnicodeCache* scanner_contants)
-    : Scanner(scanner_contants),
-      octal_pos_(Location::invalid()),
-      harmony_scoping_(false) { }
-
-
-void JavaScriptScanner::Initialize(UC16CharacterStream* source) {
-  source_ = source;
-  // Need to capture identifiers in order to recognize "get" and "set"
-  // in object literals.
-  Init();
-  // Skip initial whitespace allowing HTML comment ends just like
-  // after a newline and scan first token.
-  has_line_terminator_before_next_ = true;
-  SkipWhiteSpace();
-  Scan();
-}
-
-
  // Ensure that tokens can be stored in a byte.
  STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
  
@@ -247,7 +220,7 @@ static const byte one_char_tokens[] = {
  };
  
  
-Token::Value JavaScriptScanner::Next() {
+Token::Value Scanner::Next() {
    current_ = next_;
    has_line_terminator_before_next_ = false;
    has_multiline_comment_before_next_ = false;
@@ -279,7 +252,7 @@ static inline bool IsByteOrderMark(uc32 c) {
  }
  
  
-bool JavaScriptScanner::SkipWhiteSpace() {
+bool Scanner::SkipWhiteSpace() {
    int start_position = source_pos();
  
    while (true) {
@@ -319,7 +292,7 @@ bool JavaScriptScanner::SkipWhiteSpace() {
  }
  
  
-Token::Value JavaScriptScanner::SkipSingleLineComment() {
+Token::Value Scanner::SkipSingleLineComment() {
    Advance();
  
    // The line terminator at the end of the line is not considered
@@ -335,7 +308,7 @@ Token::Value JavaScriptScanner::SkipSingleLineComment() {
  }
  
  
-Token::Value JavaScriptScanner::SkipMultiLineComment() {
+Token::Value Scanner::SkipMultiLineComment() {
    ASSERT(c0_ == '*');
    Advance();
  
@@ -361,7 +334,7 @@ Token::Value JavaScriptScanner::SkipMultiLineComment() {
  }
  
  
-Token::Value JavaScriptScanner::ScanHtmlComment() {
+Token::Value Scanner::ScanHtmlComment() {
    // Check for <!-- comments.
    ASSERT(c0_ == '!');
    Advance();
@@ -376,7 +349,7 @@ Token::Value JavaScriptScanner::ScanHtmlComment() {
  }
  
  
-void JavaScriptScanner::Scan() {
+void Scanner::Scan() {
    next_.literal_chars = NULL;
    Token::Value token;
    do {
@@ -616,7 +589,7 @@ void JavaScriptScanner::Scan() {
  }
  
  
-void JavaScriptScanner::SeekForward(int pos) {
+void Scanner::SeekForward(int pos) {
    // After this call, we will have the token at the given position as
    // the "next" token. The "current" token will be invalid.
    if (pos == next_.location.beg_pos) return;
@@ -637,7 +610,7 @@ void JavaScriptScanner::SeekForward(int pos) {
  }
  
  
-void JavaScriptScanner::ScanEscape() {
+void Scanner::ScanEscape() {
    uc32 c = c0_;
    Advance();
  
@@ -689,7 +662,7 @@ void JavaScriptScanner::ScanEscape() {
  
  // Octal escapes of the forms '\0xx' and '\xxx' are not a part of
  // ECMA-262. Other JS VMs support them.
-uc32 JavaScriptScanner::ScanOctalEscape(uc32 c, int length) {
+uc32 Scanner::ScanOctalEscape(uc32 c, int length) {
    uc32 x = c - '0';
    int i = 0;
    for (; i < length; i++) {
@@ -712,7 +685,7 @@ uc32 JavaScriptScanner::ScanOctalEscape(uc32 c, int length) {
  }
  
  
-Token::Value JavaScriptScanner::ScanString() {
+Token::Value Scanner::ScanString() {
    uc32 quote = c0_;
    Advance();  // consume quote
  
@@ -736,13 +709,13 @@ Token::Value JavaScriptScanner::ScanString() {
  }
  
  
-void JavaScriptScanner::ScanDecimalDigits() {
+void Scanner::ScanDecimalDigits() {
    while (IsDecimalDigit(c0_))
      AddLiteralCharAdvance();
  }
  
  
-Token::Value JavaScriptScanner::ScanNumber(bool seen_period) {
+Token::Value Scanner::ScanNumber(bool seen_period) {
    ASSERT(IsDecimalDigit(c0_));  // the first digit of the number or the fraction
  
    enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;
@@ -827,7 +800,7 @@ Token::Value JavaScriptScanner::ScanNumber(bool seen_period) {
  }
  
  
-uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() {
+uc32 Scanner::ScanIdentifierUnicodeEscape() {
    Advance();
    if (c0_ != 'u') return -1;
    Advance();
@@ -944,7 +917,7 @@ static Token::Value KeywordOrIdentifierToken(const char* input,
  }
  
  
-Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() {
+Token::Value Scanner::ScanIdentifierOrKeyword() {
    ASSERT(unicode_cache_->IsIdentifierStart(c0_));
    LiteralScope literal(this);
    // Scan identifier start character.
@@ -989,7 +962,7 @@ Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() {
  }
  
  
-Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) {
+Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) {
    // Scan the rest of the identifier characters.
    while (unicode_cache_->IsIdentifierPart(c0_)) {
      if (c0_ == '\\') {
@@ -1012,7 +985,7 @@ Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) {
  }
  
  
-bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) {
+bool Scanner::ScanRegExpPattern(bool seen_equal) {
    // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
    bool in_character_class = false;
  
@@ -1059,7 +1032,7 @@ bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) {
  }
  
  
-bool JavaScriptScanner::ScanLiteralUnicodeEscape() {
+bool Scanner::ScanLiteralUnicodeEscape() {
    ASSERT(c0_ == '\\');
    uc32 chars_read[6] = {'\\', 'u', 0, 0, 0, 0};
    Advance();
@@ -1089,7 +1062,7 @@ bool JavaScriptScanner::ScanLiteralUnicodeEscape() {
  }
  
  
-bool JavaScriptScanner::ScanRegExpFlags() {
+bool Scanner::ScanRegExpFlags() {
    // Scan regular expression flags.
    LiteralScope literal(this);
    while (unicode_cache_->IsIdentifierPart(c0_)) {
diff --git a/src/scanner.h b/src/scanner.h

index 6e689b606a0beee8fff3058b9b83535609eabed0..88e3bceb1b4d5119f13890c45b0f7eeed89b5b90 100644 (file)
--- a/src/scanner.h
+++ b/src/scanner.h
@@ -260,35 +260,32 @@ class LiteralBuffer {
  
  
  // ----------------------------------------------------------------------------
-// Scanner base-class.
+// JavaScript Scanner.
  
-// Generic functionality used by both JSON and JavaScript scanners.
  class Scanner {
   public:
-  // -1 is outside of the range of any real source code.
-  static const int kNoOctalLocation = -1;
-
-  typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
-
+  // Scoped helper for literal recording. Automatically drops the literal
+  // if aborting the scanning before it's complete.
    class LiteralScope {
     public:
-    explicit LiteralScope(Scanner* self);
-    ~LiteralScope();
-    void Complete();
+    explicit LiteralScope(Scanner* self)
+        : scanner_(self), complete_(false) {
+      scanner_->StartLiteral();
+    }
+     ~LiteralScope() {
+       if (!complete_) scanner_->DropLiteral();
+     }
+    void Complete() {
+      scanner_->TerminateLiteral();
+      complete_ = true;
+    }
  
     private:
      Scanner* scanner_;
      bool complete_;
    };
  
-  explicit Scanner(UnicodeCache* scanner_contants);
-
-  // Returns the current token again.
-  Token::Value current_token() { return current_.token; }
-
-  // One token look-ahead (past the token returned by Next()).
-  Token::Value peek() const { return next_.token; }
-
+  // Representation of an interval of source positions.
    struct Location {
      Location(int b, int e) : beg_pos(b), end_pos(e) { }
      Location() : beg_pos(0), end_pos(0) { }
@@ -303,21 +300,28 @@ class Scanner {
      int end_pos;
    };
  
+  // -1 is outside of the range of any real source code.
+  static const int kNoOctalLocation = -1;
+
+  typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
+
+  explicit Scanner(UnicodeCache* scanner_contants);
+
+  void Initialize(UC16CharacterStream* source);
+
+  // Returns the next token and advances input.
+  Token::Value Next();
+  // Returns the current token again.
+  Token::Value current_token() { return current_.token; }
    // Returns the location information for the current token
-  // (the token returned by Next()).
+  // (the token last returned by Next()).
    Location location() const { return current_.location; }
-  Location peek_location() const { return next_.location; }
-
    // Returns the literal string, if any, for the current token (the
-  // token returned by Next()). The string is 0-terminated and in
-  // UTF-8 format; they may contain 0-characters. Literal strings are
-  // collected for identifiers, strings, and numbers.
+  // token last returned by Next()). The string is 0-terminated.
+  // Literal strings are collected for identifiers, strings, and
+  // numbers.
    // These functions only give the correct result if the literal
    // was scanned between calls to StartLiteral() and TerminateLiteral().
-  bool is_literal_ascii() {
-    ASSERT_NOT_NULL(current_.literal_chars);
-    return current_.literal_chars->is_ascii();
-  }
    Vector<const char> literal_ascii_string() {
      ASSERT_NOT_NULL(current_.literal_chars);
      return current_.literal_chars->ascii_literal();
@@ -326,6 +330,10 @@ class Scanner {
      ASSERT_NOT_NULL(current_.literal_chars);
      return current_.literal_chars->uc16_literal();
    }
+  bool is_literal_ascii() {
+    ASSERT_NOT_NULL(current_.literal_chars);
+    return current_.literal_chars->is_ascii();
+  }
    int literal_length() const {
      ASSERT_NOT_NULL(current_.literal_chars);
      return current_.literal_chars->length();
@@ -341,12 +349,15 @@ class Scanner {
      return current_.literal_chars->length() != source_length;
    }
  
+  // Similar functions for the upcoming token.
+
+  // One token look-ahead (past the token returned by Next()).
+  Token::Value peek() const { return next_.token; }
+
+  Location peek_location() const { return next_.location; }
+
    // Returns the literal string for the next token (the token that
    // would be returned if Next() were called).
-  bool is_next_literal_ascii() {
-    ASSERT_NOT_NULL(next_.literal_chars);
-    return next_.literal_chars->is_ascii();
-  }
    Vector<const char> next_literal_ascii_string() {
      ASSERT_NOT_NULL(next_.literal_chars);
      return next_.literal_chars->ascii_literal();
@@ -355,6 +366,10 @@ class Scanner {
      ASSERT_NOT_NULL(next_.literal_chars);
      return next_.literal_chars->uc16_literal();
    }
+  bool is_next_literal_ascii() {
+    ASSERT_NOT_NULL(next_.literal_chars);
+    return next_.literal_chars->is_ascii();
+  }
    int next_literal_length() const {
      ASSERT_NOT_NULL(next_.literal_chars);
      return next_.literal_chars->length();
@@ -364,7 +379,46 @@ class Scanner {
  
    static const int kCharacterLookaheadBufferSize = 1;
  
- protected:
+  // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
+  uc32 ScanOctalEscape(uc32 c, int length);
+
+  // Returns the location of the last seen octal literal.
+  Location octal_position() const { return octal_pos_; }
+  void clear_octal_position() { octal_pos_ = Location::invalid(); }
+
+  // Seek forward to the given position.  This operation does not
+  // work in general, for instance when there are pushed back
+  // characters, but works for seeking forward until simple delimiter
+  // tokens, which is what it is used for.
+  void SeekForward(int pos);
+
+  bool HarmonyScoping() const {
+    return harmony_scoping_;
+  }
+  void SetHarmonyScoping(bool block_scoping) {
+    harmony_scoping_ = block_scoping;
+  }
+
+
+  // Returns true if there was a line terminator before the peek'ed token,
+  // possibly inside a multi-line comment.
+  bool HasAnyLineTerminatorBeforeNext() const {
+    return has_line_terminator_before_next_ ||
+           has_multiline_comment_before_next_;
+  }
+
+  // Scans the input as a regular expression pattern, previous
+  // character(s) must be /(=). Returns true if a pattern is scanned.
+  bool ScanRegExpPattern(bool seen_equal);
+  // Returns true if regexp flags are scanned (always since flags can
+  // be empty).
+  bool ScanRegExpFlags();
+
+  // Tells whether the buffer contains an identifier (no escapes).
+  // Used for checking if a property name is an identifier.
+  static bool IsIdentifier(unibrow::CharacterStream* buffer);
+
+ private:
    // The current and look-ahead token.
    struct TokenDesc {
      Token::Value token;
@@ -434,107 +488,14 @@ class Scanner {
  
    uc32 ScanHexNumber(int expected_length);
  
-  // Return the current source position.
-  int source_pos() {
-    return source_->pos() - kCharacterLookaheadBufferSize;
-  }
-
-  UnicodeCache* unicode_cache_;
-
-  // Buffers collecting literal strings, numbers, etc.
-  LiteralBuffer literal_buffer1_;
-  LiteralBuffer literal_buffer2_;
-
-  TokenDesc current_;  // desc for current token (as returned by Next())
-  TokenDesc next_;     // desc for next token (one token look-ahead)
-
-  // Input stream. Must be initialized to an UC16CharacterStream.
-  UC16CharacterStream* source_;
-
-  // One Unicode character look-ahead; c0_ < 0 at the end of the input.
-  uc32 c0_;
-};
-
-// ----------------------------------------------------------------------------
-// JavaScriptScanner - base logic for JavaScript scanning.
-
-class JavaScriptScanner : public Scanner {
- public:
-  // A LiteralScope that disables recording of some types of JavaScript
-  // literals. If the scanner is configured to not record the specific
-  // type of literal, the scope will not call StartLiteral.
-  class LiteralScope {
-   public:
-    explicit LiteralScope(JavaScriptScanner* self)
-        : scanner_(self), complete_(false) {
-      scanner_->StartLiteral();
-    }
-     ~LiteralScope() {
-       if (!complete_) scanner_->DropLiteral();
-     }
-    void Complete() {
-      scanner_->TerminateLiteral();
-      complete_ = true;
-    }
-
-   private:
-    JavaScriptScanner* scanner_;
-    bool complete_;
-  };
-
-  explicit JavaScriptScanner(UnicodeCache* scanner_contants);
-
-  void Initialize(UC16CharacterStream* source);
-
-  // Returns the next token.
-  Token::Value Next();
-
-  // Returns true if there was a line terminator before the peek'ed token,
-  // possibly inside a multi-line comment.
-  bool HasAnyLineTerminatorBeforeNext() const {
-    return has_line_terminator_before_next_ ||
-           has_multiline_comment_before_next_;
-  }
-
-  // Scans the input as a regular expression pattern, previous
-  // character(s) must be /(=). Returns true if a pattern is scanned.
-  bool ScanRegExpPattern(bool seen_equal);
-  // Returns true if regexp flags are scanned (always since flags can
-  // be empty).
-  bool ScanRegExpFlags();
-
-  // Tells whether the buffer contains an identifier (no escapes).
-  // Used for checking if a property name is an identifier.
-  static bool IsIdentifier(unibrow::CharacterStream* buffer);
-
-  // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
-  uc32 ScanOctalEscape(uc32 c, int length);
-
-  // Returns the location of the last seen octal literal
-  Location octal_position() const { return octal_pos_; }
-  void clear_octal_position() { octal_pos_ = Location::invalid(); }
-
-  // Seek forward to the given position.  This operation does not
-  // work in general, for instance when there are pushed back
-  // characters, but works for seeking forward until simple delimiter
-  // tokens, which is what it is used for.
-  void SeekForward(int pos);
-
-  bool HarmonyScoping() const {
-    return harmony_scoping_;
-  }
-  void SetHarmonyScoping(bool block_scoping) {
-    harmony_scoping_ = block_scoping;
-  }
-
+  // Scans a single JavaScript token.
+  void Scan();
  
- protected:
    bool SkipWhiteSpace();
    Token::Value SkipSingleLineComment();
    Token::Value SkipMultiLineComment();
-
-  // Scans a single JavaScript token.
-  void Scan();
+  // Scans a possible HTML comment -- begins with '<!'.
+  Token::Value ScanHtmlComment();
  
    void ScanDecimalDigits();
    Token::Value ScanNumber(bool seen_period);
@@ -544,9 +505,6 @@ class JavaScriptScanner : public Scanner {
    void ScanEscape();
    Token::Value ScanString();
  
-  // Scans a possible HTML comment -- begins with '<!'.
-  Token::Value ScanHtmlComment();
-
    // Decodes a unicode escape-sequence which is part of an identifier.
    // If the escape sequence cannot be decoded the result is kBadChar.
    uc32 ScanIdentifierUnicodeEscape();
@@ -555,9 +513,30 @@ class JavaScriptScanner : public Scanner {
    // flags.
    bool ScanLiteralUnicodeEscape();
  
+  // Return the current source position.
+  int source_pos() {
+    return source_->pos() - kCharacterLookaheadBufferSize;
+  }
+
+  UnicodeCache* unicode_cache_;
+
+  // Buffers collecting literal strings, numbers, etc.
+  LiteralBuffer literal_buffer1_;
+  LiteralBuffer literal_buffer2_;
+
+  TokenDesc current_;  // desc for current token (as returned by Next())
+  TokenDesc next_;     // desc for next token (one token look-ahead)
+
+  // Input stream. Must be initialized to an UC16CharacterStream.
+  UC16CharacterStream* source_;
+
+
    // Start position of the octal literal last scanned.
    Location octal_pos_;
  
+  // One Unicode character look-ahead; c0_ < 0 at the end of the input.
+  uc32 c0_;
+
    // Whether there is a line terminator whitespace character after
    // the current token, and  before the next. Does not count newlines
    // inside multiline comments.
diff --git a/test/cctest/test-parsing.cc b/test/cctest/test-parsing.cc

index 8cfd5f71fdce50a61e3759962460de9e64f1d47a..6cdaef4a973254997d790f9eb77708cb43611088 100755 (executable)
--- a/test/cctest/test-parsing.cc
+++ b/test/cctest/test-parsing.cc
@@ -63,7 +63,7 @@ TEST(ScanKeywords) {
      CHECK(static_cast<int>(sizeof(buffer)) >= length);
      {
        i::Utf8ToUC16CharacterStream stream(keyword, length);
-      i::JavaScriptScanner scanner(&unicode_cache);
+      i::Scanner scanner(&unicode_cache);
        // The scanner should parse 'let' as Token::LET for this test.
        scanner.SetHarmonyScoping(true);
        scanner.Initialize(&stream);
@@ -73,7 +73,7 @@ TEST(ScanKeywords) {
      // Removing characters will make keyword matching fail.
      {
        i::Utf8ToUC16CharacterStream stream(keyword, length - 1);
-      i::JavaScriptScanner scanner(&unicode_cache);
+      i::Scanner scanner(&unicode_cache);
        scanner.Initialize(&stream);
        CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
        CHECK_EQ(i::Token::EOS, scanner.Next());
@@ -84,7 +84,7 @@ TEST(ScanKeywords) {
        memmove(buffer, keyword, length);
        buffer[length] = chars_to_append[j];
        i::Utf8ToUC16CharacterStream stream(buffer, length + 1);
-      i::JavaScriptScanner scanner(&unicode_cache);
+      i::Scanner scanner(&unicode_cache);
        scanner.Initialize(&stream);
        CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
        CHECK_EQ(i::Token::EOS, scanner.Next());
@@ -94,7 +94,7 @@ TEST(ScanKeywords) {
        memmove(buffer, keyword, length);
        buffer[length - 1] = '_';
        i::Utf8ToUC16CharacterStream stream(buffer, length);
-      i::JavaScriptScanner scanner(&unicode_cache);
+      i::Scanner scanner(&unicode_cache);
        scanner.Initialize(&stream);
        CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
        CHECK_EQ(i::Token::EOS, scanner.Next());
@@ -257,7 +257,7 @@ TEST(StandAlonePreParser) {
          reinterpret_cast<const i::byte*>(program),
          static_cast<unsigned>(strlen(program)));
      i::CompleteParserRecorder log;
-    i::JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
+    i::Scanner scanner(i::Isolate::Current()->unicode_cache());
      scanner.Initialize(&stream);
  
      int flags = i::kAllowLazy | i::kAllowNativesSyntax;
@@ -293,7 +293,7 @@ TEST(StandAlonePreParserNoNatives) {
          reinterpret_cast<const i::byte*>(program),
          static_cast<unsigned>(strlen(program)));
      i::CompleteParserRecorder log;
-    i::JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
+    i::Scanner scanner(i::Isolate::Current()->unicode_cache());
      scanner.Initialize(&stream);
  
      // Flags don't allow natives syntax.
@@ -394,7 +394,7 @@ TEST(PreParseOverflow) {
        reinterpret_cast<const i::byte*>(*program),
        static_cast<unsigned>(kProgramSize));
    i::CompleteParserRecorder log;
-  i::JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
+  i::Scanner scanner(i::Isolate::Current()->unicode_cache());
    scanner.Initialize(&stream);
  
  
@@ -612,7 +612,7 @@ void TestStreamScanner(i::UC16CharacterStream* stream,
                         i::Token::Value* expected_tokens,
                         int skip_pos = 0,  // Zero means not skipping.
                         int skip_to = 0) {
-  i::JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
+  i::Scanner scanner(i::Isolate::Current()->unicode_cache());
    scanner.Initialize(stream);
  
    int i = 0;
@@ -693,7 +693,7 @@ void TestScanRegExp(const char* re_source, const char* expected) {
    i::Utf8ToUC16CharacterStream stream(
         reinterpret_cast<const i::byte*>(re_source),
         static_cast<unsigned>(strlen(re_source)));
-  i::JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
+  i::Scanner scanner(i::Isolate::Current()->unicode_cache());
    scanner.Initialize(&stream);
  
    i::Token::Value start = scanner.peek();
author	lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
	Tue, 1 Nov 2011 07:47:15 +0000 (07:47 +0000)
committer	lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
	Tue, 1 Nov 2011 07:47:15 +0000 (07:47 +0000)
src/parser.cc		patch \| blob \| history
src/parser.h		patch \| blob \| history
src/preparser-api.cc		patch \| blob \| history
src/preparser.cc		patch \| blob \| history
src/preparser.h		patch \| blob \| history
src/scanner.cc		patch \| blob \| history
src/scanner.h		patch \| blob \| history
test/cctest/test-parsing.cc		patch \| blob \| history