Reapply 3246 and 3247 after fixing valgrind warning.

author lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>

Tue, 10 Nov 2009 10:23:23 +0000 (10:23 +0000)

committer lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>

Tue, 10 Nov 2009 10:23:23 +0000 (10:23 +0000)
author lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Tue, 10 Nov 2009 10:23:23 +0000 (10:23 +0000)
committer lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Tue, 10 Nov 2009 10:23:23 +0000 (10:23 +0000)
diff --git a/src/scanner.cc b/src/scanner.cc

index 23b8aff..0d3b789 100644 (file)
--- a/src/scanner.cc
+++ b/src/scanner.cc
@@ -49,17 +49,11 @@ StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;
  // ----------------------------------------------------------------------------
  // UTF8Buffer
  
-UTF8Buffer::UTF8Buffer() {
-  static const int kInitialCapacity = 1 * KB;
-  data_ = NewArray<char>(kInitialCapacity);
-  limit_ = ComputeLimit(data_, kInitialCapacity);
-  Reset();
-  ASSERT(Capacity() == kInitialCapacity && pos() == 0);
-}
+UTF8Buffer::UTF8Buffer() : data_(NULL), limit_(NULL) { }
  
  
  UTF8Buffer::~UTF8Buffer() {
-  DeleteArray(data_);
+  if (data_ != NULL) DeleteArray(data_);
  }
  
  
@@ -69,7 +63,7 @@ void UTF8Buffer::AddCharSlow(uc32 c) {
      int old_capacity = Capacity();
      int old_position = pos();
      int new_capacity =
-        Min(old_capacity * 2, old_capacity + kCapacityGrowthLimit);
+        Min(old_capacity * 3, old_capacity + kCapacityGrowthLimit);
      char* new_data = NewArray<char>(new_capacity);
      memcpy(new_data, data_, old_position);
      DeleteArray(data_);
@@ -346,12 +340,11 @@ void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,
  
    position_ = position;
  
-  // Reset literals buffer
-  literals_.Reset();
-
    // Set c0_ (one character ahead)
    ASSERT(kCharacterLookaheadBufferSize == 1);
    Advance();
+  // Initializer current_ to not refer to a literal buffer.
+  current_.literal_buffer = NULL;
  
    // Skip initial whitespace allowing HTML comment ends just like
    // after a newline and scan first token.
@@ -384,17 +377,23 @@ Token::Value Scanner::Next() {
  
  
  void Scanner::StartLiteral() {
-  next_.literal_pos = literals_.pos();
+  // Use the first buffer unless it's currently in use by the current_ token.
+  // In most cases we won't have two literals/identifiers in a row, so
+  // the second buffer won't be used very often and is unlikely to grow much.
+  UTF8Buffer* free_buffer =
+      (current_.literal_buffer != &literal_buffer_1_) ? &literal_buffer_1_
+                                                      : &literal_buffer_2_;
+  next_.literal_buffer = free_buffer;
+  free_buffer->Reset();
  }
  
  
  void Scanner::AddChar(uc32 c) {
-  literals_.AddChar(c);
+  next_.literal_buffer->AddChar(c);
  }
  
  
  void Scanner::TerminateLiteral() {
-  next_.literal_end = literals_.pos();
    AddChar(0);
  }
  
@@ -514,6 +513,7 @@ Token::Value Scanner::ScanHtmlComment() {
  
  
  void Scanner::Scan() {
+  next_.literal_buffer = NULL;
    Token::Value token;
    has_line_terminator_before_next_ = false;
    do {
diff --git a/src/scanner.h b/src/scanner.h

index 201803d..dc903cd 100644 (file)
--- a/src/scanner.h
+++ b/src/scanner.h
@@ -41,6 +41,7 @@ class UTF8Buffer {
    ~UTF8Buffer();
  
    void AddChar(uc32 c) {
+    ASSERT_NOT_NULL(data_);
      if (cursor_ <= limit_ &&
          static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {
        *cursor_++ = static_cast<char>(c);
@@ -49,16 +50,29 @@ class UTF8Buffer {
      }
    }
  
-  void Reset() { cursor_ = data_; }
-  int pos() const { return cursor_ - data_; }
+  void Reset() {
+    if (data_ == NULL) {
+      data_ = NewArray<char>(kInitialCapacity);
+      limit_ = ComputeLimit(data_, kInitialCapacity);
+    }
+    cursor_ = data_;
+  }
+
+  int pos() const {
+    ASSERT_NOT_NULL(data_);
+    return cursor_ - data_;
+  }
+
    char* data() const { return data_; }
  
   private:
+  static const int kInitialCapacity = 256;
    char* data_;
    char* cursor_;
    char* limit_;
  
    int Capacity() const {
+    ASSERT_NOT_NULL(data_);
      return (limit_ - data_) + unibrow::Utf8::kMaxEncodedSize;
    }
  
@@ -278,26 +292,30 @@ class Scanner {
    // token returned by Next()). The string is 0-terminated and in
    // UTF-8 format; they may contain 0-characters. Literal strings are
    // collected for identifiers, strings, and numbers.
+  // These functions only give the correct result if the literal
+  // was scanned between calls to StartLiteral() and TerminateLiteral().
    const char* literal_string() const {
-    return &literals_.data()[current_.literal_pos];
+    return current_.literal_buffer->data();
    }
    int literal_length() const {
-    return current_.literal_end - current_.literal_pos;
-  }
-
-  Vector<const char> next_literal() const {
-    return Vector<const char>(next_literal_string(), next_literal_length());
+    // Excluding terminal '\0' added by TerminateLiteral().
+    return current_.literal_buffer->pos() - 1;
    }
  
    // Returns the literal string for the next token (the token that
    // would be returned if Next() were called).
    const char* next_literal_string() const {
-    return &literals_.data()[next_.literal_pos];
+    return next_.literal_buffer->data();
    }
    // Returns the length of the next token (that would be returned if
    // Next() were called).
    int next_literal_length() const {
-    return next_.literal_end - next_.literal_pos;
+    return next_.literal_buffer->pos() - 1;
+  }
+
+  Vector<const char> next_literal() const {
+    return Vector<const char>(next_literal_string(),
+                              next_literal_length());
    }
  
    // Scans the input as a regular expression pattern, previous
@@ -339,7 +357,8 @@ class Scanner {
  
    // Buffer to hold literal values (identifiers, strings, numbers)
    // using 0-terminated UTF-8 encoding.
-  UTF8Buffer literals_;
+  UTF8Buffer literal_buffer_1_;
+  UTF8Buffer literal_buffer_2_;
  
    bool stack_overflow_;
    static StaticResource<Utf8Decoder> utf8_decoder_;
@@ -351,7 +370,7 @@ class Scanner {
    struct TokenDesc {
      Token::Value token;
      Location location;
-    int literal_pos, literal_end;
+    UTF8Buffer* literal_buffer;
    };
  
    TokenDesc current_;  // desc for current token (as returned by Next())
author	lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
	Tue, 10 Nov 2009 10:23:23 +0000 (10:23 +0000)
committer	lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
	Tue, 10 Nov 2009 10:23:23 +0000 (10:23 +0000)
src/scanner.cc		patch \| blob \| history
src/scanner.h		patch \| blob \| history