Scanner / Unicode decoding: use size_t instead of unsigned.
authormarja <marja@chromium.org>
Thu, 5 Feb 2015 07:54:24 +0000 (23:54 -0800)
committerCommit bot <commit-bot@chromium.org>
Thu, 5 Feb 2015 07:54:34 +0000 (07:54 +0000)
size_t is the correct data type for this purpose. Our APIs (in particular
ExternalSourceStream::GetMoreData) are already using it, and there were some
static_casts to convert between them.

This CL doesn't intend to fix all of V8, just the minimal sense-making part
around scanner character streams.

BUG=

Review URL: https://codereview.chromium.org/864273005

Cr-Commit-Position: refs/heads/master@{#26449}

14 files changed:
src/factory.cc
src/heap-snapshot-generator.cc
src/heap/heap.cc
src/objects.cc
src/scanner-character-streams.cc
src/scanner-character-streams.h
src/scanner.h
src/unicode-decoder.cc
src/unicode-decoder.h
src/unicode-inl.h
src/unicode.cc
src/unicode.h
src/utils.h
test/cctest/test-parsing.cc

index ce99df2e08cf243716496b06263b07d5d640af04..faded1923a7753f56198118f75cdce2a68961bed 100644 (file)
@@ -240,7 +240,7 @@ MaybeHandle<String> Factory::NewStringFromUtf8(Vector<const char> string,
       decoder(isolate()->unicode_cache()->utf8_decoder());
   decoder->Reset(string.start() + non_ascii_start,
                  length - non_ascii_start);
-  int utf16_length = decoder->Utf16Length();
+  int utf16_length = static_cast<int>(decoder->Utf16Length());
   DCHECK(utf16_length > 0);
   // Allocate string.
   Handle<SeqTwoByteString> result;
index ad6536e3580921add872e3b4fa17c6c3ae151c5b..8e185184cea3c6b5a9aca3af727b92cc0fac975a 100644 (file)
@@ -3102,7 +3102,7 @@ void HeapSnapshotJSONSerializer::SerializeString(const unsigned char* s) {
           WriteUChar(writer_, *s);
         } else {
           // Convert UTF-8 into \u UTF-16 literal.
-          unsigned length = 1, cursor = 0;
+          size_t length = 1, cursor = 0;
           for ( ; length <= 4 && *(s + length) != '\0'; ++length) { }
           unibrow::uchar c = unibrow::Utf8::CalculateValue(s, length, &cursor);
           if (c != unibrow::Utf8::kBadChar) {
index d5417acf17ae696d7f83fc3172ea2c83b6bcaab6..05c302fb99742a3e109c2dfc96ee84bca8f4c9f9 100644 (file)
@@ -3999,9 +3999,9 @@ static inline void WriteOneByteData(Vector<const char> vector, uint8_t* chars,
 static inline void WriteTwoByteData(Vector<const char> vector, uint16_t* chars,
                                     int len) {
   const uint8_t* stream = reinterpret_cast<const uint8_t*>(vector.start());
-  unsigned stream_length = vector.length();
+  size_t stream_length = vector.length();
   while (stream_length != 0) {
-    unsigned consumed = 0;
+    size_t consumed = 0;
     uint32_t c = unibrow::Utf8::ValueOf(stream, stream_length, &consumed);
     DCHECK(c != unibrow::Utf8::kBadChar);
     DCHECK(consumed <= stream_length);
index da73e8e8c9704b10d5c56bcfe5e50b0e0c1c2832..867f8f7ae819df3bab20cd0643862d0a6fce4482 100644 (file)
@@ -9115,10 +9115,10 @@ bool String::IsUtf8EqualTo(Vector<const char> str, bool allow_prefix_match) {
     return false;
   }
   int i;
-  unsigned remaining_in_str = static_cast<unsigned>(str_len);
+  size_t remaining_in_str = static_cast<size_t>(str_len);
   const uint8_t* utf8_data = reinterpret_cast<const uint8_t*>(str.start());
   for (i = 0; i < slen && remaining_in_str > 0; i++) {
-    unsigned cursor = 0;
+    size_t cursor = 0;
     uint32_t r = unibrow::Utf8::ValueOf(utf8_data, remaining_in_str, &cursor);
     DCHECK(cursor > 0 && cursor <= remaining_in_str);
     if (r > unibrow::Utf16::kMaxNonSurrogateCharCode) {
@@ -9292,13 +9292,13 @@ uint32_t StringHasher::ComputeUtf8Hash(Vector<const char> chars,
   // Start with a fake length which won't affect computation.
   // It will be updated later.
   StringHasher hasher(String::kMaxArrayIndexSize, seed);
-  unsigned remaining = static_cast<unsigned>(vector_length);
+  size_t remaining = static_cast<size_t>(vector_length);
   const uint8_t* stream = reinterpret_cast<const uint8_t*>(chars.start());
   int utf16_length = 0;
   bool is_index = true;
   DCHECK(hasher.is_array_index_);
   while (remaining > 0) {
-    unsigned consumed = 0;
+    size_t consumed = 0;
     uint32_t c = unibrow::Utf8::ValueOf(stream, remaining, &consumed);
     DCHECK(consumed > 0 && consumed <= remaining);
     stream += consumed;
index 50c3955c1bdc888200f593cad0f45ec69900ae18..cc4a18b540f3555a2ea53ab9b9c66147f9979b99 100644 (file)
@@ -15,9 +15,9 @@ namespace internal {
 
 namespace {
 
-unsigned CopyCharsHelper(uint16_t* dest, unsigned length, const uint8_t* src,
-                         unsigned* src_pos, unsigned src_length,
-                         ScriptCompiler::StreamedSource::Encoding encoding) {
+size_t CopyCharsHelper(uint16_t* dest, size_t length, const uint8_t* src,
+                       size_t* src_pos, size_t src_length,
+                       ScriptCompiler::StreamedSource::Encoding encoding) {
   // It's possible that this will be called with length 0, but don't assume that
   // the functions this calls handle it gracefully.
   if (length == 0) return 0;
@@ -27,7 +27,7 @@ unsigned CopyCharsHelper(uint16_t* dest, unsigned length, const uint8_t* src,
         dest, length, src, src_pos, src_length);
   }
 
-  unsigned to_fill = length;
+  size_t to_fill = length;
   if (to_fill > src_length - *src_pos) to_fill = src_length - *src_pos;
 
   if (encoding == ScriptCompiler::StreamedSource::ONE_BYTE) {
@@ -110,13 +110,13 @@ bool BufferedUtf16CharacterStream::ReadBlock() {
     if (buffer_cursor_ < buffer_end_) return true;
     // Otherwise read a new block.
   }
-  unsigned length = FillBuffer(pos_);
+  size_t length = FillBuffer(pos_);
   buffer_end_ = buffer_ + length;
   return length > 0;
 }
 
 
-unsigned BufferedUtf16CharacterStream::SlowSeekForward(unsigned delta) {
+size_t BufferedUtf16CharacterStream::SlowSeekForward(size_t delta) {
   // Leave pushback mode (i.e., ignore that there might be valid data
   // in the buffer before the pushback_limit_ point).
   pushback_limit_ = NULL;
@@ -129,11 +129,8 @@ unsigned BufferedUtf16CharacterStream::SlowSeekForward(unsigned delta) {
 
 
 GenericStringUtf16CharacterStream::GenericStringUtf16CharacterStream(
-    Handle<String> data,
-    unsigned start_position,
-    unsigned end_position)
-    : string_(data),
-      length_(end_position) {
+    Handle<String> data, size_t start_position, size_t end_position)
+    : string_(data), length_(end_position) {
   DCHECK(end_position >= start_position);
   pos_ = start_position;
 }
@@ -142,21 +139,22 @@ GenericStringUtf16CharacterStream::GenericStringUtf16CharacterStream(
 GenericStringUtf16CharacterStream::~GenericStringUtf16CharacterStream() { }
 
 
-unsigned GenericStringUtf16CharacterStream::BufferSeekForward(unsigned delta) {
-  unsigned old_pos = pos_;
+size_t GenericStringUtf16CharacterStream::BufferSeekForward(size_t delta) {
+  size_t old_pos = pos_;
   pos_ = Min(pos_ + delta, length_);
   ReadBlock();
   return pos_ - old_pos;
 }
 
 
-unsigned GenericStringUtf16CharacterStream::FillBuffer(unsigned from_pos) {
+size_t GenericStringUtf16CharacterStream::FillBuffer(size_t from_pos) {
   if (from_pos >= length_) return 0;
-  unsigned length = kBufferSize;
+  size_t length = kBufferSize;
   if (from_pos + length > length_) {
     length = length_ - from_pos;
   }
-  String::WriteToFlat<uc16>(*string_, buffer_, from_pos, from_pos + length);
+  String::WriteToFlat<uc16>(*string_, buffer_, static_cast<int>(from_pos),
+                            static_cast<int>(from_pos + length));
   return length;
 }
 
@@ -164,7 +162,7 @@ unsigned GenericStringUtf16CharacterStream::FillBuffer(unsigned from_pos) {
 // ----------------------------------------------------------------------------
 // Utf8ToUtf16CharacterStream
 Utf8ToUtf16CharacterStream::Utf8ToUtf16CharacterStream(const byte* data,
-                                                       unsigned length)
+                                                       size_t length)
     : BufferedUtf16CharacterStream(),
       raw_data_(data),
       raw_data_length_(length),
@@ -177,12 +175,11 @@ Utf8ToUtf16CharacterStream::Utf8ToUtf16CharacterStream(const byte* data,
 Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { }
 
 
-unsigned Utf8ToUtf16CharacterStream::CopyChars(uint16_t* dest, unsigned length,
-                                               const byte* src,
-                                               unsigned* src_pos,
-                                               unsigned src_length) {
+size_t Utf8ToUtf16CharacterStream::CopyChars(uint16_t* dest, size_t length,
+                                             const byte* src, size_t* src_pos,
+                                             size_t src_length) {
   static const unibrow::uchar kMaxUtf16Character = 0xffff;
-  unsigned i = 0;
+  size_t i = 0;
   // Because of the UTF-16 lead and trail surrogates, we stop filling the buffer
   // one character early (in the normal case), because we need to have at least
   // two free spaces in the buffer to be sure that the next character will fit.
@@ -206,9 +203,9 @@ unsigned Utf8ToUtf16CharacterStream::CopyChars(uint16_t* dest, unsigned length,
 }
 
 
-unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) {
-  unsigned old_pos = pos_;
-  unsigned target_pos = pos_ + delta;
+size_t Utf8ToUtf16CharacterStream::BufferSeekForward(size_t delta) {
+  size_t old_pos = pos_;
+  size_t target_pos = pos_ + delta;
   SetRawPosition(target_pos);
   pos_ = raw_character_position_;
   ReadBlock();
@@ -216,15 +213,15 @@ unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) {
 }
 
 
-unsigned Utf8ToUtf16CharacterStream::FillBuffer(unsigned char_position) {
+size_t Utf8ToUtf16CharacterStream::FillBuffer(size_t char_position) {
   SetRawPosition(char_position);
   if (raw_character_position_ != char_position) {
     // char_position was not a valid position in the stream (hit the end
     // while spooling to it).
     return 0u;
   }
-  unsigned i = CopyChars(buffer_, kBufferSize, raw_data_, &raw_data_pos_,
-                         raw_data_length_);
+  size_t i = CopyChars(buffer_, kBufferSize, raw_data_, &raw_data_pos_,
+                       raw_data_length_);
   raw_character_position_ = char_position + i;
   return i;
 }
@@ -249,7 +246,7 @@ static bool IsUtf8MultiCharacterFollower(byte later_byte) {
 
 // Move the cursor back to point at the preceding UTF-8 character start
 // in the buffer.
-static inline void Utf8CharacterBack(const byte* buffer, unsigned* cursor) {
+static inline void Utf8CharacterBack(const byte* buffer, size_t* cursor) {
   byte character = buffer[--*cursor];
   if (character > unibrow::Utf8::kMaxOneByteChar) {
     DCHECK(IsUtf8MultiCharacterFollower(character));
@@ -264,7 +261,7 @@ static inline void Utf8CharacterBack(const byte* buffer, unsigned* cursor) {
 
 // Move the cursor forward to point at the next following UTF-8 character start
 // in the buffer.
-static inline void Utf8CharacterForward(const byte* buffer, unsigned* cursor) {
+static inline void Utf8CharacterForward(const byte* buffer, size_t* cursor) {
   byte character = buffer[(*cursor)++];
   if (character > unibrow::Utf8::kMaxOneByteChar) {
     // First character of a multi-byte character encoding.
@@ -279,7 +276,7 @@ static inline void Utf8CharacterForward(const byte* buffer, unsigned* cursor) {
     // 2 if value in range 0xE0 .. 0xEF.
     // 3 if value in range 0xF0 .. 0xF7.
     // Encode that in a single value.
-    unsigned additional_bytes =
+    size_t additional_bytes =
         ((0x3211u) >> (((character - 0xC0) >> 2) & 0xC)) & 0x03;
     *cursor += additional_bytes;
     DCHECK(!IsUtf8MultiCharacterFollower(buffer[1 + additional_bytes]));
@@ -291,11 +288,11 @@ static inline void Utf8CharacterForward(const byte* buffer, unsigned* cursor) {
 // is no position in the UTF8 stream that corresponds to that.  This assumes
 // that the surrogate pair is correctly coded as a 4 byte UTF-8 sequence.  If
 // it is illegally coded as two 3 byte sequences then there is no problem here.
-void Utf8ToUtf16CharacterStream::SetRawPosition(unsigned target_position) {
+void Utf8ToUtf16CharacterStream::SetRawPosition(size_t target_position) {
   if (raw_character_position_ > target_position) {
     // Spool backwards in utf8 buffer.
     do {
-      int old_pos = raw_data_pos_;
+      size_t old_pos = raw_data_pos_;
       Utf8CharacterBack(raw_data_, &raw_data_pos_);
       raw_character_position_--;
       DCHECK(old_pos - raw_data_pos_ <= 4);
@@ -309,7 +306,7 @@ void Utf8ToUtf16CharacterStream::SetRawPosition(unsigned target_position) {
   // Spool forwards in the utf8 buffer.
   while (raw_character_position_ < target_position) {
     if (raw_data_pos_ == raw_data_length_) return;
-    int old_pos = raw_data_pos_;
+    size_t old_pos = raw_data_pos_;
     Utf8CharacterForward(raw_data_, &raw_data_pos_);
     raw_character_position_++;
     DCHECK(raw_data_pos_ - old_pos <= 4);
@@ -320,10 +317,10 @@ void Utf8ToUtf16CharacterStream::SetRawPosition(unsigned target_position) {
 }
 
 
-unsigned ExternalStreamingStream::FillBuffer(unsigned position) {
+size_t ExternalStreamingStream::FillBuffer(size_t position) {
   // Ignore "position" which is the position in the decoded data. Instead,
   // ExternalStreamingStream keeps track of the position in the raw data.
-  unsigned data_in_buffer = 0;
+  size_t data_in_buffer = 0;
   // Note that the UTF-8 decoder might not be able to fill the buffer
   // completely; it will typically leave the last character empty (see
   // Utf8ToUtf16CharacterStream::CopyChars).
@@ -331,10 +328,8 @@ unsigned ExternalStreamingStream::FillBuffer(unsigned position) {
     if (current_data_ == NULL) {
       // GetSomeData will wait until the embedder has enough data. Here's an
       // interface between the API which uses size_t (which is the correct type
-      // here) and the internal parts which use unsigned. TODO(marja): make the
-      // internal parts use size_t too.
-      current_data_length_ =
-          static_cast<unsigned>(source_stream_->GetMoreData(&current_data_));
+      // here) and the internal parts which use size_t.
+      current_data_length_ = source_stream_->GetMoreData(&current_data_);
       current_data_offset_ = 0;
       bool data_ends = current_data_length_ == 0;
 
@@ -363,8 +358,8 @@ unsigned ExternalStreamingStream::FillBuffer(unsigned position) {
     }
 
     // Fill the buffer from current_data_.
-    unsigned new_offset = 0;
-    unsigned new_chars_in_buffer =
+    size_t new_offset = 0;
+    size_t new_chars_in_buffer =
         CopyCharsHelper(buffer_ + data_in_buffer, kBufferSize - data_in_buffer,
                         current_data_ + current_data_offset_, &new_offset,
                         current_data_length_ - current_data_offset_, encoding_);
@@ -384,7 +379,7 @@ unsigned ExternalStreamingStream::FillBuffer(unsigned position) {
 }
 
 void ExternalStreamingStream::HandleUtf8SplitCharacters(
-    unsigned* data_in_buffer) {
+    size_t* data_in_buffer) {
   // Note the following property of UTF-8 which makes this function possible:
   // Given any byte, we can always read its local environment (in both
   // directions) to find out the (possibly multi-byte) character it belongs
@@ -407,8 +402,8 @@ void ExternalStreamingStream::HandleUtf8SplitCharacters(
     }
 
     // Convert the data in utf8_split_char_buffer_.
-    unsigned new_offset = 0;
-    unsigned new_chars_in_buffer =
+    size_t new_offset = 0;
+    size_t new_chars_in_buffer =
         CopyCharsHelper(buffer_ + *data_in_buffer,
                         kBufferSize - *data_in_buffer, utf8_split_char_buffer_,
                         &new_offset, utf8_split_char_buffer_length_, encoding_);
@@ -439,7 +434,7 @@ void ExternalStreamingStream::HandleUtf8SplitCharacters(
     }
   }
   CHECK(utf8_split_char_buffer_length_ <= 4);
-  for (unsigned i = 0; i < utf8_split_char_buffer_length_; ++i) {
+  for (size_t i = 0; i < utf8_split_char_buffer_length_; ++i) {
     utf8_split_char_buffer_[i] = current_data_[current_data_length_ + i];
   }
 }
index 3c1cccc4802a74b519e72d39fbc7ab760e71aef5..a391a659a4a829c6c31c078a643abb043602dc7f 100644 (file)
@@ -21,15 +21,15 @@ class BufferedUtf16CharacterStream: public Utf16CharacterStream {
   virtual void PushBack(uc32 character);
 
  protected:
-  static const unsigned kBufferSize = 512;
-  static const unsigned kPushBackStepSize = 16;
+  static const size_t kBufferSize = 512;
+  static const size_t kPushBackStepSize = 16;
 
-  virtual unsigned SlowSeekForward(unsigned delta);
+  virtual size_t SlowSeekForward(size_t delta);
   virtual bool ReadBlock();
   virtual void SlowPushBack(uc16 character);
 
-  virtual unsigned BufferSeekForward(unsigned delta) = 0;
-  virtual unsigned FillBuffer(unsigned position) = 0;
+  virtual size_t BufferSeekForward(size_t delta) = 0;
+  virtual size_t FillBuffer(size_t position) = 0;
 
   const uc16* pushback_limit_;
   uc16 buffer_[kBufferSize];
@@ -39,40 +39,39 @@ class BufferedUtf16CharacterStream: public Utf16CharacterStream {
 // Generic string stream.
 class GenericStringUtf16CharacterStream: public BufferedUtf16CharacterStream {
  public:
-  GenericStringUtf16CharacterStream(Handle<String> data,
-                                    unsigned start_position,
-                                    unsigned end_position);
+  GenericStringUtf16CharacterStream(Handle<String> data, size_t start_position,
+                                    size_t end_position);
   virtual ~GenericStringUtf16CharacterStream();
 
  protected:
-  virtual unsigned BufferSeekForward(unsigned delta);
-  virtual unsigned FillBuffer(unsigned position);
+  virtual size_t BufferSeekForward(size_t delta);
+  virtual size_t FillBuffer(size_t position);
 
   Handle<String> string_;
-  unsigned length_;
+  size_t length_;
 };
 
 
 // Utf16 stream based on a literal UTF-8 string.
 class Utf8ToUtf16CharacterStream: public BufferedUtf16CharacterStream {
  public:
-  Utf8ToUtf16CharacterStream(const byte* data, unsigned length);
+  Utf8ToUtf16CharacterStream(const byte* data, size_t length);
   virtual ~Utf8ToUtf16CharacterStream();
 
-  static unsigned CopyChars(uint16_t* dest, unsigned length, const byte* src,
-                            unsigned* src_pos, unsigned src_length);
+  static size_t CopyChars(uint16_t* dest, size_t length, const byte* src,
+                          size_t* src_pos, size_t src_length);
 
  protected:
-  virtual unsigned BufferSeekForward(unsigned delta);
-  virtual unsigned FillBuffer(unsigned char_position);
-  void SetRawPosition(unsigned char_position);
+  virtual size_t BufferSeekForward(size_t delta);
+  virtual size_t FillBuffer(size_t char_position);
+  void SetRawPosition(size_t char_position);
 
   const byte* raw_data_;
-  unsigned raw_data_length_;  // Measured in bytes, not characters.
-  unsigned raw_data_pos_;
+  size_t raw_data_length_;  // Measured in bytes, not characters.
+  size_t raw_data_pos_;
   // The character position of the character at raw_data[raw_data_pos_].
   // Not necessarily the same as pos_.
-  unsigned raw_character_position_;
+  size_t raw_character_position_;
 };
 
 
@@ -91,7 +90,7 @@ class ExternalStreamingStream : public BufferedUtf16CharacterStream {
 
   virtual ~ExternalStreamingStream() { delete[] current_data_; }
 
-  unsigned BufferSeekForward(unsigned delta) OVERRIDE {
+  size_t BufferSeekForward(size_t delta) OVERRIDE {
     // We never need to seek forward when streaming scripts. We only seek
     // forward when we want to parse a function whose location we already know,
     // and when streaming, we don't know the locations of anything we haven't
@@ -100,19 +99,19 @@ class ExternalStreamingStream : public BufferedUtf16CharacterStream {
     return 0;
   }
 
-  unsigned FillBuffer(unsigned position) OVERRIDE;
+  size_t FillBuffer(size_t position) OVERRIDE;
 
  private:
-  void HandleUtf8SplitCharacters(unsigned* data_in_buffer);
+  void HandleUtf8SplitCharacters(size_t* data_in_buffer);
 
   ScriptCompiler::ExternalSourceStream* source_stream_;
   v8::ScriptCompiler::StreamedSource::Encoding encoding_;
   const uint8_t* current_data_;
-  unsigned current_data_offset_;
-  unsigned current_data_length_;
+  size_t current_data_offset_;
+  size_t current_data_length_;
   // For converting UTF-8 characters which are split across two data chunks.
   uint8_t utf8_split_char_buffer_[4];
-  unsigned utf8_split_char_buffer_length_;
+  size_t utf8_split_char_buffer_length_;
 };
 
 
@@ -131,7 +130,7 @@ class ExternalTwoByteStringUtf16CharacterStream: public Utf16CharacterStream {
   }
 
  protected:
-  virtual unsigned SlowSeekForward(unsigned delta) {
+  virtual size_t SlowSeekForward(size_t delta) {
     // Fast case always handles seeking.
     return 0;
   }
index 8537c5308c9f73e8dbf2f5f97e8188f195d531cc..86a0098f86c0b4c97b4958148c4f6d27f04ce9b8 100644 (file)
@@ -67,15 +67,14 @@ class Utf16CharacterStream {
 
   // Return the current position in the code unit stream.
   // Starts at zero.
-  inline unsigned pos() const { return pos_; }
+  inline size_t pos() const { return pos_; }
 
   // Skips forward past the next code_unit_count UTF-16 code units
   // in the input, or until the end of input if that comes sooner.
   // Returns the number of code units actually skipped. If less
   // than code_unit_count,
-  inline unsigned SeekForward(unsigned code_unit_count) {
-    unsigned buffered_chars =
-        static_cast<unsigned>(buffer_end_ - buffer_cursor_);
+  inline size_t SeekForward(size_t code_unit_count) {
+    size_t buffered_chars = buffer_end_ - buffer_cursor_;
     if (code_unit_count <= buffered_chars) {
       buffer_cursor_ += code_unit_count;
       pos_ += code_unit_count;
@@ -98,11 +97,11 @@ class Utf16CharacterStream {
   // is at or after the end of the input, return false. If there
   // are more code_units available, return true.
   virtual bool ReadBlock() = 0;
-  virtual unsigned SlowSeekForward(unsigned code_unit_count) = 0;
+  virtual size_t SlowSeekForward(size_t code_unit_count) = 0;
 
   const uint16_t* buffer_cursor_;
   const uint16_t* buffer_end_;
-  unsigned pos_;
+  size_t pos_;
 };
 
 
@@ -697,7 +696,7 @@ class Scanner {
 
   // Return the current source position.
   int source_pos() {
-    return source_->pos() - kCharacterLookaheadBufferSize;
+    return static_cast<int>(source_->pos()) - kCharacterLookaheadBufferSize;
   }
 
   UnicodeCache* unicode_cache_;
index 88eff3ad2660d3b230bfe04922b5c9dfede8a2f9..a3bf829522688b27a84b74e3322eed5a0c4088f3 100644 (file)
 
 namespace unibrow {
 
-void Utf8DecoderBase::Reset(uint16_t* buffer, unsigned buffer_length,
-                            const uint8_t* stream, unsigned stream_length) {
+void Utf8DecoderBase::Reset(uint16_t* buffer, size_t buffer_length,
+                            const uint8_t* stream, size_t stream_length) {
   // Assume everything will fit in the buffer and stream won't be needed.
   last_byte_of_buffer_unused_ = false;
   unbuffered_start_ = NULL;
   bool writing_to_buffer = true;
   // Loop until stream is read, writing to buffer as long as buffer has space.
-  unsigned utf16_length = 0;
+  size_t utf16_length = 0;
   while (stream_length != 0) {
-    unsigned cursor = 0;
+    size_t cursor = 0;
     uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor);
     DCHECK(cursor > 0 && cursor <= stream_length);
     stream += cursor;
@@ -56,9 +56,9 @@ void Utf8DecoderBase::Reset(uint16_t* buffer, unsigned buffer_length,
 
 
 void Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream, uint16_t* data,
-                                     unsigned data_length) {
+                                     size_t data_length) {
   while (data_length != 0) {
-    unsigned cursor = 0;
+    size_t cursor = 0;
     uint32_t character = Utf8::ValueOf(stream, Utf8::kMaxEncodedSize, &cursor);
     // There's a total lack of bounds checking for stream
     // as it was already done in Reset.
index 35ea30cf1a5ad63ef02a41a9cbc377264edc2904..bfb14a38555244c529761724e901aa41584d8900 100644 (file)
@@ -14,32 +14,32 @@ class Utf8DecoderBase {
  public:
   // Initialization done in subclass.
   inline Utf8DecoderBase();
-  inline Utf8DecoderBase(uint16_t* buffer, unsigned buffer_length,
-                         const uint8_t* stream, unsigned stream_length);
-  inline unsigned Utf16Length() const { return utf16_length_; }
+  inline Utf8DecoderBase(uint16_t* buffer, size_t buffer_length,
+                         const uint8_t* stream, size_t stream_length);
+  inline size_t Utf16Length() const { return utf16_length_; }
 
  protected:
   // This reads all characters and sets the utf16_length_.
   // The first buffer_length utf16 chars are cached in the buffer.
-  void Reset(uint16_t* buffer, unsigned buffer_length, const uint8_t* stream,
-             unsigned stream_length);
+  void Reset(uint16_t* buffer, size_t buffer_length, const uint8_t* stream,
+             size_t stream_length);
   static void WriteUtf16Slow(const uint8_t* stream, uint16_t* data,
-                             unsigned length);
+                             size_t length);
   const uint8_t* unbuffered_start_;
-  unsigned utf16_length_;
+  size_t utf16_length_;
   bool last_byte_of_buffer_unused_;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(Utf8DecoderBase);
 };
 
-template <unsigned kBufferSize>
+template <size_t kBufferSize>
 class Utf8Decoder : public Utf8DecoderBase {
  public:
   inline Utf8Decoder() {}
-  inline Utf8Decoder(const char* stream, unsigned length);
-  inline void Reset(const char* stream, unsigned length);
-  inline unsigned WriteUtf16(uint16_t* data, unsigned length) const;
+  inline Utf8Decoder(const char* stream, size_t length);
+  inline void Reset(const char* stream, size_t length);
+  inline size_t WriteUtf16(uint16_t* data, size_t length) const;
 
  private:
   uint16_t buffer_[kBufferSize];
@@ -52,35 +52,34 @@ Utf8DecoderBase::Utf8DecoderBase()
       last_byte_of_buffer_unused_(false) {}
 
 
-Utf8DecoderBase::Utf8DecoderBase(uint16_t* buffer, unsigned buffer_length,
-                                 const uint8_t* stream,
-                                 unsigned stream_length) {
+Utf8DecoderBase::Utf8DecoderBase(uint16_t* buffer, size_t buffer_length,
+                                 const uint8_t* stream, size_t stream_length) {
   Reset(buffer, buffer_length, stream, stream_length);
 }
 
 
-template <unsigned kBufferSize>
-Utf8Decoder<kBufferSize>::Utf8Decoder(const char* stream, unsigned length)
+template <size_t kBufferSize>
+Utf8Decoder<kBufferSize>::Utf8Decoder(const char* stream, size_t length)
     : Utf8DecoderBase(buffer_, kBufferSize,
                       reinterpret_cast<const uint8_t*>(stream), length) {}
 
 
-template <unsigned kBufferSize>
-void Utf8Decoder<kBufferSize>::Reset(const char* stream, unsigned length) {
+template <size_t kBufferSize>
+void Utf8Decoder<kBufferSize>::Reset(const char* stream, size_t length) {
   Utf8DecoderBase::Reset(buffer_, kBufferSize,
                          reinterpret_cast<const uint8_t*>(stream), length);
 }
 
 
-template <unsigned kBufferSize>
-unsigned Utf8Decoder<kBufferSize>::WriteUtf16(uint16_t* data,
-                                              unsigned length) const {
+template <size_t kBufferSize>
+size_t Utf8Decoder<kBufferSize>::WriteUtf16(uint16_t* data,
+                                            size_t length) const {
   DCHECK(length > 0);
   if (length > utf16_length_) length = utf16_length_;
   // memcpy everything in buffer.
-  unsigned buffer_length =
+  size_t buffer_length =
       last_byte_of_buffer_unused_ ? kBufferSize - 1 : kBufferSize;
-  unsigned memcpy_length = length <= buffer_length ? length : buffer_length;
+  size_t memcpy_length = length <= buffer_length ? length : buffer_length;
   v8::internal::MemCopy(data, buffer_, memcpy_length * sizeof(uint16_t));
   if (length <= buffer_length) return length;
   DCHECK(unbuffered_start_ != NULL);
index 0f78d39e0656f6ba804a820c748800ae8753c511..b22e4825280dc78836dcb4e032fec65c614dbcd7 100644 (file)
@@ -110,7 +110,7 @@ unsigned Utf8::Encode(char* str,
 }
 
 
-uchar Utf8::ValueOf(const byte* bytes, unsigned length, unsigned* cursor) {
+uchar Utf8::ValueOf(const byte* bytes, size_t length, size_t* cursor) {
   if (length <= 0) return kBadChar;
   byte first = bytes[0];
   // Characters between 0000 and 0007F are encoded as a single character
index 26a336afd8dd47dd267d59c076ccd264d44339a6..0d0d63d1775660018b57e4e3e107110371a55661 100644 (file)
@@ -190,9 +190,7 @@ static int LookupMapping(const int32_t* table,
 }
 
 
-uchar Utf8::CalculateValue(const byte* str,
-                           unsigned length,
-                           unsigned* cursor) {
+uchar Utf8::CalculateValue(const byte* str, size_t length, size_t* cursor) {
   // We only get called for non-ASCII characters.
   if (length == 1) {
     *cursor += 1;
index 166681426ff9187ca068ce61cafc6ddcfc2b1ff0..7471a638c045d169e58bceae78087615b06202a9 100644 (file)
@@ -136,9 +136,7 @@ class Utf8 {
                                 uchar c,
                                 int previous,
                                 bool replace_invalid = false);
-  static uchar CalculateValue(const byte* str,
-                              unsigned length,
-                              unsigned* cursor);
+  static uchar CalculateValue(const byte* str, size_t length, size_t* cursor);
 
   // The unicode replacement character, used to signal invalid unicode
   // sequences (e.g. an orphan surrogate) when converting to a UTF-8 encoding.
@@ -156,9 +154,7 @@ class Utf8 {
   // The maximum size a single UTF-16 code unit may take up when encoded as
   // UTF-8.
   static const unsigned kMax16BitCodeUnitSize  = 3;
-  static inline uchar ValueOf(const byte* str,
-                              unsigned length,
-                              unsigned* cursor);
+  static inline uchar ValueOf(const byte* str, size_t length, size_t* cursor);
 };
 
 struct Uppercase {
index 4491f5584eab3613960d270be77f2bb8f2964ad3..6a43c77228c217e0170a00b2dc17cb7c11240af5 100644 (file)
@@ -733,9 +733,8 @@ class SequenceCollector : public Collector<T, growth_factor, max_growth> {
 
 // Compare 8bit/16bit chars to 8bit/16bit chars.
 template <typename lchar, typename rchar>
-inline int CompareCharsUnsigned(const lchar* lhs,
-                                const rchar* rhs,
-                                int chars) {
+inline int CompareCharsUnsigned(const lchar* lhs, const rchar* rhs,
+                                size_t chars) {
   const lchar* limit = lhs + chars;
   if (sizeof(*lhs) == sizeof(char) && sizeof(*rhs) == sizeof(char)) {
     // memcmp compares byte-by-byte, yielding wrong results for two-byte
@@ -751,8 +750,8 @@ inline int CompareCharsUnsigned(const lchar* lhs,
   return 0;
 }
 
-template<typename lchar, typename rchar>
-inline int CompareChars(const lchar* lhs, const rchar* rhs, int chars) {
+template <typename lchar, typename rchar>
+inline int CompareChars(const lchar* lhs, const rchar* rhs, size_t chars) {
   DCHECK(sizeof(lchar) <= 2);
   DCHECK(sizeof(rchar) <= 2);
   if (sizeof(lchar) == 1) {
@@ -1317,27 +1316,30 @@ Vector<const char> ReadFile(FILE* file,
 
 
 template <typename sourcechar, typename sinkchar>
-INLINE(static void CopyCharsUnsigned(sinkchar* dest,
-                                     const sourcechar* src,
-                                     int chars));
+INLINE(static void CopyCharsUnsigned(sinkchar* dest, const sourcechar* src,
+                                     size_t chars));
 #if defined(V8_HOST_ARCH_ARM)
-INLINE(void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, int chars));
-INLINE(void CopyCharsUnsigned(uint16_t* dest, const uint8_t* src, int chars));
-INLINE(void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, int chars));
+INLINE(void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, size_t chars));
+INLINE(void CopyCharsUnsigned(uint16_t* dest, const uint8_t* src,
+                              size_t chars));
+INLINE(void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src,
+                              size_t chars));
 #elif defined(V8_HOST_ARCH_MIPS)
-INLINE(void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, int chars));
-INLINE(void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, int chars));
+INLINE(void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, size_t chars));
+INLINE(void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src,
+                              size_t chars));
 #elif defined(V8_HOST_ARCH_PPC)
-INLINE(void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, int chars));
-INLINE(void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, int chars));
+INLINE(void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, size_t chars));
+INLINE(void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src,
+                              size_t chars));
 #endif
 
 // Copy from 8bit/16bit chars to 8bit/16bit chars.
 template <typename sourcechar, typename sinkchar>
-INLINE(void CopyChars(sinkchar* dest, const sourcechar* src, int chars));
+INLINE(void CopyChars(sinkchar* dest, const sourcechar* src, size_t chars));
 
-template<typename sourcechar, typename sinkchar>
-void CopyChars(sinkchar* dest, const sourcechar* src, int chars) {
+template <typename sourcechar, typename sinkchar>
+void CopyChars(sinkchar* dest, const sourcechar* src, size_t chars) {
   DCHECK(sizeof(sourcechar) <= 2);
   DCHECK(sizeof(sinkchar) <= 2);
   if (sizeof(sinkchar) == 1) {
@@ -1364,7 +1366,7 @@ void CopyChars(sinkchar* dest, const sourcechar* src, int chars) {
 }
 
 template <typename sourcechar, typename sinkchar>
-void CopyCharsUnsigned(sinkchar* dest, const sourcechar* src, int chars) {
+void CopyCharsUnsigned(sinkchar* dest, const sourcechar* src, size_t chars) {
   sinkchar* limit = dest + chars;
   if ((sizeof(*dest) == sizeof(*src)) &&
       (chars >= static_cast<int>(kMinComplexMemCopy / sizeof(*dest)))) {
@@ -1376,7 +1378,7 @@ void CopyCharsUnsigned(sinkchar* dest, const sourcechar* src, int chars) {
 
 
 #if defined(V8_HOST_ARCH_ARM)
-void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, int chars) {
+void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, size_t chars) {
   switch (static_cast<unsigned>(chars)) {
     case 0:
       break;
@@ -1432,7 +1434,7 @@ void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, int chars) {
 }
 
 
-void CopyCharsUnsigned(uint16_t* dest, const uint8_t* src, int chars) {
+void CopyCharsUnsigned(uint16_t* dest, const uint8_t* src, size_t chars) {
   if (chars >= kMinComplexConvertMemCopy) {
     MemCopyUint16Uint8(dest, src, chars);
   } else {
@@ -1441,7 +1443,7 @@ void CopyCharsUnsigned(uint16_t* dest, const uint8_t* src, int chars) {
 }
 
 
-void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, int chars) {
+void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, size_t chars) {
   switch (static_cast<unsigned>(chars)) {
     case 0:
       break;
@@ -1474,7 +1476,7 @@ void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, int chars) {
 
 
 #elif defined(V8_HOST_ARCH_MIPS)
-void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, int chars) {
+void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, size_t chars) {
   if (chars < kMinComplexMemCopy) {
     memcpy(dest, src, chars);
   } else {
@@ -1482,7 +1484,7 @@ void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, int chars) {
   }
 }
 
-void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, int chars) {
+void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, size_t chars) {
   if (chars < kMinComplexMemCopy) {
     memcpy(dest, src, chars * sizeof(*dest));
   } else {
@@ -1494,7 +1496,7 @@ void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, int chars) {
   case n:                 \
     memcpy(dest, src, n); \
     break
-void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, int chars) {
+void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, size_t chars) {
   switch (static_cast<unsigned>(chars)) {
     case 0:
       break;
@@ -1575,7 +1577,7 @@ void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, int chars) {
   case n:                     \
     memcpy(dest, src, n * 2); \
     break
-void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, int chars) {
+void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, size_t chars) {
   switch (static_cast<unsigned>(chars)) {
     case 0:
       break;
index 9dcbaaaaadf4712a4bf309dd24dc4388f218167b..ec1c9dd5aa2e930810e4629db78a7389ee9a0bd2 100644 (file)
@@ -696,7 +696,7 @@ TEST(Utf8CharacterStream) {
   int i = 0;
   while (stream.pos() < kMaxUC16CharU) {
     CHECK_EQU(i, stream.pos());
-    unsigned progress = stream.SeekForward(12);
+    int progress = static_cast<int>(stream.SeekForward(12));
     i += progress;
     int32_t c = stream.Advance();
     if (i <= kMaxUC16Char) {