Fix DCHECK on SetBookmark.
authorvogelheim <vogelheim@chromium.org>
Wed, 27 May 2015 15:43:30 +0000 (08:43 -0700)
committerCommit bot <commit-bot@chromium.org>
Wed, 27 May 2015 15:43:42 +0000 (15:43 +0000)
The DCHECK was a lie. The idea was that - when a bookmark is set -
the scanner must clearly be at a character boundary and hence the
bookmark does not need to save a 'partial' UTF-8 code point. The
first part is true - the Scanner is always at a character boundary -
but the 'partial' UTF-8 code point is at the end of a block, not at
the current character position of the Scanner.
Hence, the 'partial' character needs to be saved as well.

jkummerow: Thanks for noticing.

BUG=chromium:470930
R=jochen@chromium.org, jkummerow@chromium.org
LOG=N

Review URL: https://codereview.chromium.org/1154773004

Cr-Commit-Position: refs/heads/master@{#28661}

src/scanner-character-streams.cc
src/scanner-character-streams.h

index 98e79aad76fd458715ad69649b692f3083a4ee40..40d2abcf13f79e08c33e75e58445184248c6a696 100644 (file)
@@ -388,14 +388,13 @@ size_t ExternalStreamingStream::FillBuffer(size_t position) {
 
 
 bool ExternalStreamingStream::SetBookmark() {
-  DCHECK(utf8_split_char_buffer_length_ == 0);  // We can't be within a char.
-
   // Bookmarking for this stream is a bit more complex than expected, since
   // the stream state is distributed over several places:
   // - pos_ (inherited from Utf16CharacterStream)
   // - buffer_cursor_ and buffer_end_ (also from Utf16CharacterStream)
   // - buffer_ (from BufferedUtf16CharacterStream)
   // - current_data_ (+ .._offset_ and .._length) (this class)
+  // - utf8_split_char_buffer_* (a partial utf8 symbol at the block boundary)
   //
   // The underlying source_stream_ instance likely could re-construct this
   // local data for us, but with the given interfaces we have no way of
@@ -405,6 +404,7 @@ bool ExternalStreamingStream::SetBookmark() {
   // - pos_  =>  bookmark_
   // - buffer_[buffer_cursor_ .. buffer_end_]  =>  bookmark_buffer_
   // - current_data_[.._offset_ .. .._length_]  =>  bookmark_data_
+  // - utf8_split_char_buffer_* => bookmark_utf8_split...
 
   bookmark_ = pos_;
 
@@ -419,6 +419,11 @@ bool ExternalStreamingStream::SetBookmark() {
   CopyBytes(bookmark_data_.start(), current_data_ + current_data_offset_,
             data_length);
 
+  bookmark_utf8_split_char_buffer_length_ = utf8_split_char_buffer_length_;
+  for (size_t i = 0; i < utf8_split_char_buffer_length_; i++) {
+    bookmark_utf8_split_char_buffer_[i] = utf8_split_char_buffer_[i];
+  }
+
   return source_stream_->SetBookmark();
 }
 
@@ -439,6 +444,12 @@ void ExternalStreamingStream::ResetToBookmark() {
                     bookmark_buffer_.length());
   buffer_cursor_ = buffer_;
   buffer_end_ = buffer_ + bookmark_buffer_.length();
+
+  // utf8 split char buffer
+  utf8_split_char_buffer_length_ = bookmark_utf8_split_char_buffer_length_;
+  for (size_t i = 0; i < bookmark_utf8_split_char_buffer_length_; i++) {
+    utf8_split_char_buffer_[i] = bookmark_utf8_split_char_buffer_[i];
+  }
 }
 
 
index e9047d3212e101675e777151e7a70f58ce290124..582165710db84c1171f74a163e3dcfdeaa8170ac 100644 (file)
@@ -93,7 +93,8 @@ class ExternalStreamingStream : public BufferedUtf16CharacterStream {
         current_data_offset_(0),
         current_data_length_(0),
         utf8_split_char_buffer_length_(0),
-        bookmark_(0) {}
+        bookmark_(0),
+        bookmark_utf8_split_char_buffer_length_(0) {}
 
   virtual ~ExternalStreamingStream() {
     delete[] current_data_;
@@ -133,6 +134,8 @@ class ExternalStreamingStream : public BufferedUtf16CharacterStream {
   size_t bookmark_;
   Vector<uint16_t> bookmark_buffer_;
   Vector<uint8_t> bookmark_data_;
+  uint8_t bookmark_utf8_split_char_buffer_[4];
+  size_t bookmark_utf8_split_char_buffer_length_;
 };