Ensure that scanner state is correctly reset when an error is encountered.

author lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>

Tue, 24 Aug 2010 12:29:50 +0000 (12:29 +0000)

committer lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>

Tue, 24 Aug 2010 12:29:50 +0000 (12:29 +0000)
author lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Tue, 24 Aug 2010 12:29:50 +0000 (12:29 +0000)
committer lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Tue, 24 Aug 2010 12:29:50 +0000 (12:29 +0000)
diff --git a/src/scanner.cc b/src/scanner.cc

index 7911be0..1a8d721 100755 (executable)
--- a/src/scanner.cc
+++ b/src/scanner.cc
@@ -319,6 +319,26 @@ void KeywordMatcher::Step(uc32 input) {
  }
  
  
+
+// ----------------------------------------------------------------------------
+// Scanner::LiteralScope
+
+Scanner::LiteralScope::LiteralScope(Scanner* self)
+    : scanner_(self), complete_(false) {
+  self->StartLiteral();
+}
+
+
+Scanner::LiteralScope::~LiteralScope() {
+  if (!complete_) scanner_->DropLiteral();
+}
+
+
+void Scanner::LiteralScope::Complete() {
+  scanner_->TerminateLiteral();
+  complete_ = true;
+}
+
  // ----------------------------------------------------------------------------
  // Scanner
  
@@ -386,8 +406,10 @@ void Scanner::Init(Handle<String> source,
    // Set c0_ (one character ahead)
    ASSERT(kCharacterLookaheadBufferSize == 1);
    Advance();
-  // Initialise current_ to not refer to a literal.
+  // Initialize current_ to not refer to a literal.
    current_.literal_chars = Vector<const char>();
+  // Reset literal buffer.
+  literal_buffer_.Reset();
  
    // Skip initial whitespace allowing HTML comment ends just like
    // after a newline and scan first token.
@@ -423,11 +445,17 @@ void Scanner::AddChar(uc32 c) {
    literal_buffer_.AddChar(c);
  }
  
+
  void Scanner::TerminateLiteral() {
    next_.literal_chars = literal_buffer_.EndLiteral();
  }
  
  
+void Scanner::DropLiteral() {
+  literal_buffer_.DropLiteral();
+}
+
+
  void Scanner::AddCharAdvance() {
    AddChar(c0_);
    Advance();
@@ -636,7 +664,7 @@ void Scanner::ScanJson() {
  Token::Value Scanner::ScanJsonString() {
    ASSERT_EQ('"', c0_);
    Advance();
-  StartLiteral();
+  LiteralScope literal(this);
    while (c0_ != '"' && c0_ > 0) {
      // Check for control character (0x00-0x1f) or unterminated string (<0).
      if (c0_ < 0x20) return Token::ILLEGAL;
@@ -670,7 +698,9 @@ Token::Value Scanner::ScanJsonString() {
            for (int i = 0; i < 4; i++) {
              Advance();
              int digit = HexValue(c0_);
-            if (digit < 0) return Token::ILLEGAL;
+            if (digit < 0) {
+              return Token::ILLEGAL;
+            }
              value = value * 16 + digit;
            }
            AddChar(value);
@@ -685,14 +715,14 @@ Token::Value Scanner::ScanJsonString() {
    if (c0_ != '"') {
      return Token::ILLEGAL;
    }
-  TerminateLiteral();
+  literal.Complete();
    Advance();
    return Token::STRING;
  }
  
  
  Token::Value Scanner::ScanJsonNumber() {
-  StartLiteral();
+  LiteralScope literal(this);
    if (c0_ == '-') AddCharAdvance();
    if (c0_ == '0') {
      AddCharAdvance();
@@ -720,21 +750,21 @@ Token::Value Scanner::ScanJsonNumber() {
        AddCharAdvance();
      } while (c0_ >= '0' && c0_ <= '9');
    }
-  TerminateLiteral();
+  literal.Complete();
    return Token::NUMBER;
  }
  
  
  Token::Value Scanner::ScanJsonIdentifier(const char* text,
                                           Token::Value token) {
-  StartLiteral();
+  LiteralScope literal(this);
    while (*text != '\0') {
      if (c0_ != *text) return Token::ILLEGAL;
      Advance();
      text++;
    }
    if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;
-  TerminateLiteral();
+  literal.Complete();
    return token;
  }
  
@@ -1077,7 +1107,7 @@ Token::Value Scanner::ScanString() {
    uc32 quote = c0_;
    Advance();  // consume quote
  
-  StartLiteral();
+  LiteralScope literal(this);
    while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) {
      uc32 c = c0_;
      Advance();
@@ -1088,10 +1118,8 @@ Token::Value Scanner::ScanString() {
        AddChar(c);
      }
    }
-  if (c0_ != quote) {
-    return Token::ILLEGAL;
-  }
-  TerminateLiteral();
+  if (c0_ != quote) return Token::ILLEGAL;
+  literal.Complete();
  
    Advance();  // consume quote
    return Token::STRING;
@@ -1127,7 +1155,7 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
  
    enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;
  
-  StartLiteral();
+  LiteralScope literal(this);
    if (seen_period) {
      // we have already seen a decimal point of the float
      AddChar('.');
@@ -1143,12 +1171,13 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
          // hex number
          kind = HEX;
          AddCharAdvance();
-        if (!IsHexDigit(c0_))
+        if (!IsHexDigit(c0_)) {
            // we must have at least one hex digit after 'x'/'X'
            return Token::ILLEGAL;
-        while (IsHexDigit(c0_))
+        }
+        while (IsHexDigit(c0_)) {
            AddCharAdvance();
-
+        }
        } else if ('0' <= c0_ && c0_ <= '7') {
          // (possible) octal number
          kind = OCTAL;
@@ -1181,12 +1210,12 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
      AddCharAdvance();
      if (c0_ == '+' || c0_ == '-')
        AddCharAdvance();
-    if (!IsDecimalDigit(c0_))
+    if (!IsDecimalDigit(c0_)) {
        // we must have at least one decimal digit after 'e'/'E'
        return Token::ILLEGAL;
+    }
      ScanDecimalDigits();
    }
-  TerminateLiteral();
  
    // The source character immediately following a numeric literal must
    // not be an identifier start or a decimal digit; see ECMA-262
@@ -1195,6 +1224,8 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
    if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_))
      return Token::ILLEGAL;
  
+  literal.Complete();
+
    return Token::NUMBER;
  }
  
@@ -1214,7 +1245,7 @@ uc32 Scanner::ScanIdentifierUnicodeEscape() {
  Token::Value Scanner::ScanIdentifier() {
    ASSERT(kIsIdentifierStart.get(c0_));
  
-  StartLiteral();
+  LiteralScope literal(this);
    KeywordMatcher keyword_match;
  
    // Scan identifier start character.
@@ -1244,7 +1275,7 @@ Token::Value Scanner::ScanIdentifier() {
        Advance();
      }
    }
-  TerminateLiteral();
+  literal.Complete();
  
    return keyword_match.token();
  }
@@ -1274,36 +1305,32 @@ bool Scanner::ScanRegExpPattern(bool seen_equal) {
    // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
    // the scanner should pass uninterpreted bodies to the RegExp
    // constructor.
-  StartLiteral();
+  LiteralScope literal(this);
    if (seen_equal)
      AddChar('=');
  
    while (c0_ != '/' || in_character_class) {
-    if (kIsLineTerminator.get(c0_) || c0_ < 0)
-      return false;
+    if (kIsLineTerminator.get(c0_) || c0_ < 0) return false;
      if (c0_ == '\\') {  // escaped character
        AddCharAdvance();
-      if (kIsLineTerminator.get(c0_) || c0_ < 0)
-        return false;
+      if (kIsLineTerminator.get(c0_) || c0_ < 0) return false;
        AddCharAdvance();
      } else {  // unescaped character
-      if (c0_ == '[')
-        in_character_class = true;
-      if (c0_ == ']')
-        in_character_class = false;
+      if (c0_ == '[') in_character_class = true;
+      if (c0_ == ']') in_character_class = false;
        AddCharAdvance();
      }
    }
    Advance();  // consume '/'
  
-  TerminateLiteral();
+  literal.Complete();
  
    return true;
  }
  
  bool Scanner::ScanRegExpFlags() {
    // Scan regular expression flags.
-  StartLiteral();
+  LiteralScope literal(this);
    while (kIsIdentifierPart.get(c0_)) {
      if (c0_ == '\\') {
        uc32 c = ScanIdentifierUnicodeEscape();
@@ -1316,7 +1343,7 @@ bool Scanner::ScanRegExpFlags() {
      }
      AddCharAdvance();
    }
-  TerminateLiteral();
+  literal.Complete();
  
    next_.location.end_pos = source_pos() - 1;
    return true;
diff --git a/src/scanner.h b/src/scanner.h

index 1c256f1..53aaad6 100644 (file)
--- a/src/scanner.h
+++ b/src/scanner.h
@@ -58,6 +58,14 @@ class UTF8Buffer {
      return Vector<const char>(sequence.start(), sequence.length());
    }
  
+  void DropLiteral() {
+    buffer_.DropSequence();
+  }
+
+  void Reset() {
+    buffer_.Reset();
+  }
+
    // The end marker added after a parsed literal.
    // Using zero allows the usage of strlen and similar functions on
    // identifiers and numbers (but not strings, since they may contain zero
@@ -262,6 +270,17 @@ class Scanner {
   public:
    typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
  
+  class LiteralScope {
+   public:
+    explicit LiteralScope(Scanner* self);
+    ~LiteralScope();
+    void Complete();
+
+   private:
+    Scanner* scanner_;
+    bool complete_;
+  };
+
    // Construction
    explicit Scanner(ParserMode parse_mode);
  
@@ -382,6 +401,8 @@ class Scanner {
    inline void AddChar(uc32 ch);
    inline void AddCharAdvance();
    inline void TerminateLiteral();
+  // Stops scanning of a literal, e.g., due to an encountered error.
+  inline void DropLiteral();
  
    // Low-level scanning support.
    void Advance() { c0_ = source_->Advance(); }
diff --git a/src/utils.h b/src/utils.h

index 8eddb13..90fa74f 100644 (file)
--- a/src/utils.h
+++ b/src/utils.h
@@ -560,6 +560,15 @@ class Collector {
      return Vector<T>(new_store, total_length);
    }
  
+  // Resets the collector to be empty.
+  virtual void Reset() {
+    for (int i = chunks_.length() - 1; i >= 0; i--) {
+      chunks_.at(i).Dispose();
+    }
+    chunks_.Rewind(0);
+    index_ = 0;
+  }
+
   protected:
    static const int kMinCapacity = 16;
    List<Vector<T> > chunks_;
@@ -632,6 +641,18 @@ class SequenceCollector : public Collector<T> {
                       this->index_ - sequence_start);
    }
  
+  // Drops the currently added sequence, and all collected elements in it.
+  void DropSequence() {
+    ASSERT(sequence_start_ != kNoSequence);
+    this->index_ = sequence_start_;
+    sequence_start_ = kNoSequence;
+  }
+
+  virtual void Reset() {
+    sequence_start_ = kNoSequence;
+    this->Collector<T>::Reset();
+  }
+
   private:
    static const int kNoSequence = -1;
    int sequence_start_;
author	lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
	Tue, 24 Aug 2010 12:29:50 +0000 (12:29 +0000)
committer	lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
	Tue, 24 Aug 2010 12:29:50 +0000 (12:29 +0000)
src/scanner.cc		patch \| blob \| history
src/scanner.h		patch \| blob \| history
src/utils.h		patch \| blob \| history