Replace ToAsciiVector and ToUC16Vector with single function that returns a tagged...
authorlrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Tue, 23 Aug 2011 12:22:12 +0000 (12:22 +0000)
committerlrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Tue, 23 Aug 2011 12:22:12 +0000 (12:22 +0000)
The tag tells whether the content is ASCII or UC16, or even if the string wasn't flat.

BUG: v8:1633

Review URL: http://codereview.chromium.org/7709024

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@8999 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

src/handles.cc
src/hydrogen-instructions.cc
src/interpreter-irregexp.cc
src/jsregexp.cc
src/objects-inl.h
src/objects.cc
src/objects.h
src/runtime.cc
test/mjsunit/string-split.js

index c9984aa..e7d0f9b 100644 (file)
@@ -617,15 +617,17 @@ Handle<FixedArray> CalculateLineEnds(Handle<String> src,
   {
     AssertNoAllocation no_heap_allocation;  // ensure vectors stay valid.
     // Dispatch on type of strings.
-    if (src->IsAsciiRepresentation()) {
+    String::FlatContent content = src->GetFlatContent(no_heap_allocation);
+    ASSERT(content.IsFlat());
+    if (content.IsAscii()) {
       CalculateLineEnds(isolate,
                         &line_ends,
-                        src->ToAsciiVector(),
+                        content.ToAsciiVector(),
                         with_last_line);
     } else {
       CalculateLineEnds(isolate,
                         &line_ends,
-                        src->ToUC16Vector(),
+                        content.ToUC16Vector(),
                         with_last_line);
     }
   }
index 1762186..df22bfd 100644 (file)
@@ -778,7 +778,8 @@ void HHasInstanceTypeAndBranch::PrintDataTo(StringStream* stream) {
 void HTypeofIsAndBranch::PrintDataTo(StringStream* stream) {
   value()->PrintNameTo(stream);
   stream->Add(" == ");
-  stream->Add(type_literal_->ToAsciiVector());
+  AssertNoAllocation no_alloc;
+  stream->Add(type_literal_->GetFlatContent(no_alloc).ToAsciiVector());
 }
 
 
index 1c6c52c..45f6075 100644 (file)
@@ -1,4 +1,4 @@
-// Copyright 2008 the V8 project authors. All rights reserved.
+// Copyright 2011 the V8 project authors. All rights reserved.
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
@@ -635,8 +635,9 @@ bool IrregexpInterpreter::Match(Isolate* isolate,
   AssertNoAllocation a;
   const byte* code_base = code_array->GetDataStartAddress();
   uc16 previous_char = '\n';
-  if (subject->IsAsciiRepresentation()) {
-    Vector<const char> subject_vector = subject->ToAsciiVector();
+  String::FlatContent subject_content = subject->GetFlatContent(a);
+  if (subject_content.IsAscii()) {
+    Vector<const char> subject_vector = subject_content.ToAsciiVector();
     if (start_position != 0) previous_char = subject_vector[start_position - 1];
     return RawMatch(isolate,
                     code_base,
@@ -645,7 +646,8 @@ bool IrregexpInterpreter::Match(Isolate* isolate,
                     start_position,
                     previous_char);
   } else {
-    Vector<const uc16> subject_vector = subject->ToUC16Vector();
+    ASSERT(subject_content.IsTwoByte());
+    Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
     if (start_position != 0) previous_char = subject_vector[start_position - 1];
     return RawMatch(isolate,
                     code_base,
index af64437..ed7b968 100644 (file)
@@ -212,19 +212,7 @@ static void SetAtomLastCapture(FixedArray* array,
   RegExpImpl::SetCapture(array, 1, to);
 }
 
-  /* template <typename SubjectChar>, typename PatternChar>
-static int ReStringMatch(Vector<const SubjectChar> sub_vector,
-                         Vector<const PatternChar> pat_vector,
-                         int start_index) {
 
-  int pattern_length = pat_vector.length();
-  if (pattern_length == 0) return start_index;
-
-  int subject_length = sub_vector.length();
-  if (start_index + pattern_length > subject_length) return -1;
-  return SearchString(sub_vector, pat_vector, start_index);
-}
-  */
 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
                                     Handle<String> subject,
                                     int index,
@@ -237,35 +225,41 @@ Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
   if (!subject->IsFlat()) FlattenString(subject);
   AssertNoAllocation no_heap_allocation;  // ensure vectors stay valid
   // Extract flattened substrings of cons strings before determining asciiness.
-  String* seq_sub = *subject;
-  if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first();
 
   String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex));
   int needle_len = needle->length();
+  ASSERT(needle->IsFlat());
 
   if (needle_len != 0) {
-    if (index + needle_len > subject->length())
-        return isolate->factory()->null_value();
+    if (index + needle_len > subject->length()) {
+      return isolate->factory()->null_value();
+    }
 
+    String::FlatContent needle_content =
+        needle->GetFlatContent(no_heap_allocation);
+    String::FlatContent subject_content =
+        subject->GetFlatContent(no_heap_allocation);
+    ASSERT(needle_content.IsFlat());
+    ASSERT(subject_content.IsFlat());
     // dispatch on type of strings
-    index = (needle->IsAsciiRepresentation()
-             ? (seq_sub->IsAsciiRepresentation()
+    index = (needle_content.IsAscii()
+             ? (subject_content.IsAscii()
                 ? SearchString(isolate,
-                               seq_sub->ToAsciiVector(),
-                               needle->ToAsciiVector(),
+                               subject_content.ToAsciiVector(),
+                               needle_content.ToAsciiVector(),
                                index)
                 : SearchString(isolate,
-                               seq_sub->ToUC16Vector(),
-                               needle->ToAsciiVector(),
+                               subject_content.ToUC16Vector(),
+                               needle_content.ToAsciiVector(),
                                index))
-             : (seq_sub->IsAsciiRepresentation()
+             : (subject_content.IsAscii()
                 ? SearchString(isolate,
-                               seq_sub->ToAsciiVector(),
-                               needle->ToUC16Vector(),
+                               subject_content.ToAsciiVector(),
+                               needle_content.ToUC16Vector(),
                                index)
                 : SearchString(isolate,
-                               seq_sub->ToUC16Vector(),
-                               needle->ToUC16Vector(),
+                               subject_content.ToUC16Vector(),
+                               needle_content.ToUC16Vector(),
                                index)));
     if (index == -1) return isolate->factory()->null_value();
   }
index c5fda89..098bd7a 100644 (file)
@@ -297,6 +297,11 @@ StringRepresentationTag StringShape::representation_tag() {
 }
 
 
+uint32_t StringShape::encoding_tag() {
+  return type_ & kStringEncodingMask;
+}
+
+
 uint32_t StringShape::full_representation_tag() {
   return (type_ & (kStringRepresentationMask | kStringEncodingMask));
 }
index 1b29071..031c8bb 100644 (file)
@@ -5038,55 +5038,38 @@ int String::Utf8Length() {
 }
 
 
-Vector<const char> String::ToAsciiVector() {
-  ASSERT(IsAsciiRepresentation());
-  ASSERT(IsFlat());
-
-  int offset = 0;
-  int length = this->length();
-  StringRepresentationTag string_tag = StringShape(this).representation_tag();
-  String* string = this;
-  if (string_tag == kConsStringTag) {
-    ConsString* cons = ConsString::cast(string);
-    ASSERT(cons->second()->length() == 0);
-    string = cons->first();
-    string_tag = StringShape(string).representation_tag();
-  }
-  if (string_tag == kSeqStringTag) {
-    SeqAsciiString* seq = SeqAsciiString::cast(string);
-    char* start = seq->GetChars();
-    return Vector<const char>(start + offset, length);
-  }
-  ASSERT(string_tag == kExternalStringTag);
-  ExternalAsciiString* ext = ExternalAsciiString::cast(string);
-  const char* start = ext->resource()->data();
-  return Vector<const char>(start + offset, length);
-}
-
-
-Vector<const uc16> String::ToUC16Vector() {
-  ASSERT(IsTwoByteRepresentation());
-  ASSERT(IsFlat());
-
-  int offset = 0;
+String::FlatContent String::GetFlatContent(const AssertNoAllocation& promise) {
+  // Argument isn't used, it's only there to ensure that the user is
+  // aware that the extracted vectors may not survive a GC.
   int length = this->length();
-  StringRepresentationTag string_tag = StringShape(this).representation_tag();
+  StringShape shape(this);
   String* string = this;
-  if (string_tag == kConsStringTag) {
+  if (shape.representation_tag() == kConsStringTag) {
     ConsString* cons = ConsString::cast(string);
-    ASSERT(cons->second()->length() == 0);
+    if (cons->second()->length() != 0) {
+      return FlatContent();
+    }
     string = cons->first();
-    string_tag = StringShape(string).representation_tag();
+    shape = StringShape(string);
   }
-  if (string_tag == kSeqStringTag) {
-    SeqTwoByteString* seq = SeqTwoByteString::cast(string);
-    return Vector<const uc16>(seq->GetChars() + offset, length);
+  if (shape.encoding_tag() == kAsciiStringTag) {
+    const char* start;
+    if (shape.representation_tag() == kSeqStringTag) {
+      start = SeqAsciiString::cast(string)->GetChars();
+    } else {
+      start = ExternalAsciiString::cast(string)->resource()->data();
+    }
+    return FlatContent(Vector<const char>(start, length));
+  } else {
+    ASSERT(shape.encoding_tag() == kTwoByteStringTag);
+    const uc16* start;
+    if (shape.representation_tag() == kSeqStringTag) {
+      start = SeqTwoByteString::cast(string)->GetChars();
+    } else {
+      start = ExternalTwoByteString::cast(string)->resource()->data();
+    }
+    return FlatContent(Vector<const uc16>(start, length));
   }
-  ASSERT(string_tag == kExternalStringTag);
-  ExternalTwoByteString* ext = ExternalTwoByteString::cast(string);
-  const uc16* start =
-      reinterpret_cast<const uc16*>(ext->resource()->data());
-  return Vector<const uc16>(start + offset, length);
 }
 
 
@@ -5536,11 +5519,14 @@ void FlatStringReader::PostGarbageCollection() {
   if (str_ == NULL) return;
   Handle<String> str(str_);
   ASSERT(str->IsFlat());
-  is_ascii_ = str->IsAsciiRepresentation();
+  AssertNoAllocation no_alloc;
+  String::FlatContent content = str->GetFlatContent(no_alloc);
+  ASSERT(content.is_flat());
+  is_ascii_ = content.IsAscii();
   if (is_ascii_) {
-    start_ = str->ToAsciiVector().start();
+    start_ = content.ToAsciiVector().start();
   } else {
-    start_ = str->ToUC16Vector().start();
+    start_ = content.ToUC16Vector().start();
   }
 }
 
@@ -5860,12 +5846,14 @@ template <typename IteratorA>
 static inline bool CompareStringContentsPartial(Isolate* isolate,
                                                 IteratorA* ia,
                                                 String* b) {
-  if (b->IsFlat()) {
-    if (b->IsAsciiRepresentation()) {
-      VectorIterator<char> ib(b->ToAsciiVector());
+  AssertNoAllocation no_alloc;
+  String::FlatContent content = b->GetFlatContent(no_alloc);
+  if (content.IsFlat()) {
+    if (content.IsAscii()) {
+      VectorIterator<char> ib(content.ToAsciiVector());
       return CompareStringContents(ia, &ib);
     } else {
-      VectorIterator<uc16> ib(b->ToUC16Vector());
+      VectorIterator<uc16> ib(content.ToUC16Vector());
       return CompareStringContents(ia, &ib);
     }
   } else {
@@ -5895,6 +5883,8 @@ bool String::SlowEquals(String* other) {
   String* lhs = this->TryFlattenGetString();
   String* rhs = other->TryFlattenGetString();
 
+  AssertNoAllocation no_alloc;
+
   if (StringShape(lhs).IsSequentialAscii() &&
       StringShape(rhs).IsSequentialAscii()) {
     const char* str1 = SeqAsciiString::cast(lhs)->GetChars();
@@ -5904,16 +5894,18 @@ bool String::SlowEquals(String* other) {
   }
 
   Isolate* isolate = GetIsolate();
-  if (lhs->IsFlat()) {
-    if (lhs->IsAsciiRepresentation()) {
-      Vector<const char> vec1 = lhs->ToAsciiVector();
-      if (rhs->IsFlat()) {
-        if (rhs->IsAsciiRepresentation()) {
-          Vector<const char> vec2 = rhs->ToAsciiVector();
+  String::FlatContent lhs_content = lhs->GetFlatContent(no_alloc);
+  String::FlatContent rhs_content = rhs->GetFlatContent(no_alloc);
+  if (lhs_content.IsFlat()) {
+    if (lhs_content.IsAscii()) {
+      Vector<const char> vec1 = lhs_content.ToAsciiVector();
+      if (rhs_content.IsFlat()) {
+        if (rhs_content.IsAscii()) {
+          Vector<const char> vec2 = rhs_content.ToAsciiVector();
           return CompareRawStringContents(vec1, vec2);
         } else {
           VectorIterator<char> buf1(vec1);
-          VectorIterator<uc16> ib(rhs->ToUC16Vector());
+          VectorIterator<uc16> ib(rhs_content.ToUC16Vector());
           return CompareStringContents(&buf1, &ib);
         }
       } else {
@@ -5923,14 +5915,14 @@ bool String::SlowEquals(String* other) {
             isolate->objects_string_compare_buffer_b());
       }
     } else {
-      Vector<const uc16> vec1 = lhs->ToUC16Vector();
-      if (rhs->IsFlat()) {
-        if (rhs->IsAsciiRepresentation()) {
+      Vector<const uc16> vec1 = lhs_content.ToUC16Vector();
+      if (rhs_content.IsFlat()) {
+        if (rhs_content.IsAscii()) {
           VectorIterator<uc16> buf1(vec1);
-          VectorIterator<char> ib(rhs->ToAsciiVector());
+          VectorIterator<char> ib(rhs_content.ToAsciiVector());
           return CompareStringContents(&buf1, &ib);
         } else {
-          Vector<const uc16> vec2(rhs->ToUC16Vector());
+          Vector<const uc16> vec2(rhs_content.ToUC16Vector());
           return CompareRawStringContents(vec1, vec2);
         }
       } else {
@@ -5981,10 +5973,13 @@ bool String::IsEqualTo(Vector<const char> str) {
 
 
 bool String::IsAsciiEqualTo(Vector<const char> str) {
+  AssertNoAllocation no_alloc;
   int slen = length();
   if (str.length() != slen) return false;
-  if (IsFlat() && IsAsciiRepresentation()) {
-    return CompareChars(ToAsciiVector().start(), str.start(), slen) == 0;
+  FlatContent content = GetFlatContent(no_alloc);
+  if (content.IsAscii()) {
+    return CompareChars(content.ToAsciiVector().start(),
+                        str.start(), slen) == 0;
   }
   for (int i = 0; i < slen; i++) {
     if (Get(i) != static_cast<uint16_t>(str[i])) return false;
@@ -5994,10 +5989,12 @@ bool String::IsAsciiEqualTo(Vector<const char> str) {
 
 
 bool String::IsTwoByteEqualTo(Vector<const uc16> str) {
+  AssertNoAllocation no_alloc;
   int slen = length();
   if (str.length() != slen) return false;
-  if (IsFlat() && IsTwoByteRepresentation()) {
-    return CompareChars(ToUC16Vector().start(), str.start(), slen) == 0;
+  FlatContent content = GetFlatContent(no_alloc);
+  if (content.IsTwoByte()) {
+    return CompareChars(content.ToUC16Vector().start(), str.start(), slen) == 0;
   }
   for (int i = 0; i < slen; i++) {
     if (Get(i) != str[i]) return false;
index 93f7a1d..76fc0d5 100644 (file)
@@ -5789,6 +5789,7 @@ class StringShape BASE_EMBEDDED {
   inline bool IsSequentialTwoByte();
   inline bool IsSymbol();
   inline StringRepresentationTag representation_tag();
+  inline uint32_t encoding_tag();
   inline uint32_t full_representation_tag();
   inline uint32_t size_tag();
 #ifdef DEBUG
@@ -5820,6 +5821,51 @@ class StringShape BASE_EMBEDDED {
 // All string values have a length field.
 class String: public HeapObject {
  public:
+  // Representation of the flat content of a String.
+  // A non-flat string doesn't have flat content.
+  // A flat string has content that's encoded as a sequence of either
+  // ASCII chars or two-byte UC16.
+  // Returned by String::GetFlatContent().
+  class FlatContent {
+   public:
+    // Returns true if the string is flat and this structure contains content.
+    bool IsFlat() { return state_ != NON_FLAT; }
+    // Returns true if the structure contains ASCII content.
+    bool IsAscii() { return state_ == ASCII; }
+    // Returns true if the structure contains two-byte content.
+    bool IsTwoByte() { return state_ == TWO_BYTE; }
+
+    // Return the ASCII content of the string. Only use if IsAscii() returns
+    // true.
+    Vector<const char> ToAsciiVector() {
+      ASSERT_EQ(ASCII, state_);
+      return Vector<const char>::cast(buffer_);
+    }
+    // Return the two-byte content of the string. Only use if IsTwoByte()
+    // returns true.
+    Vector<const uc16> ToUC16Vector() {
+      ASSERT_EQ(TWO_BYTE, state_);
+      return Vector<const uc16>::cast(buffer_);
+    }
+
+   private:
+    enum State { NON_FLAT, ASCII, TWO_BYTE };
+
+    // Constructors only used by String::GetFlatContent().
+    explicit FlatContent(Vector<const char> chars)
+        : buffer_(Vector<const byte>::cast(chars)),
+          state_(ASCII) { }
+    explicit FlatContent(Vector<const uc16> chars)
+        : buffer_(Vector<const byte>::cast(chars)),
+          state_(TWO_BYTE) { }
+    FlatContent() : buffer_(), state_(NON_FLAT) { }
+
+    Vector<const byte> buffer_;
+    State state_;
+
+    friend class String;
+  };
+
   // Get and set the length of the string.
   inline int length();
   inline void set_length(int value);
@@ -5831,10 +5877,10 @@ class String: public HeapObject {
   inline bool IsAsciiRepresentation();
   inline bool IsTwoByteRepresentation();
 
-  // Returns whether this string has ascii chars, i.e. all of them can
-  // be ascii encoded.  This might be the case even if the string is
+  // Returns whether this string has only ASCII chars, i.e. all of them can
+  // be ASCII encoded.  This might be the case even if the string is
   // two-byte.  Such strings may appear when the embedder prefers
-  // two-byte external representations even for ascii data.
+  // two-byte external representations even for ASCII data.
   //
   // NOTE: this should be considered only a hint.  False negatives are
   // possible.
@@ -5868,8 +5914,12 @@ class String: public HeapObject {
   // string.
   inline String* TryFlattenGetString(PretenureFlag pretenure = NOT_TENURED);
 
-  Vector<const char> ToAsciiVector();
-  Vector<const uc16> ToUC16Vector();
+  // Tries to return the content of a flat string as a structure holding either
+  // a flat vector of char or of uc16.
+  // If the string isn't flat, and therefore doesn't have flat content, the
+  // returned structure will report so, and can't provide a vector of either
+  // kind.
+  FlatContent GetFlatContent(const AssertNoAllocation& safety_promise);
 
   // Mark the string as an undetectable object. It only applies to
   // ascii and two byte string types.
index 6c9fe1b..f942047 100644 (file)
@@ -2663,21 +2663,22 @@ class CompiledReplacement {
 void CompiledReplacement::Compile(Handle<String> replacement,
                                   int capture_count,
                                   int subject_length) {
-  ASSERT(replacement->IsFlat());
-  if (replacement->IsAsciiRepresentation()) {
-    AssertNoAllocation no_alloc;
-    ParseReplacementPattern(&parts_,
-                            replacement->ToAsciiVector(),
-                            capture_count,
-                            subject_length);
-  } else {
-    ASSERT(replacement->IsTwoByteRepresentation());
+  {
     AssertNoAllocation no_alloc;
-
-    ParseReplacementPattern(&parts_,
-                            replacement->ToUC16Vector(),
-                            capture_count,
-                            subject_length);
+    String::FlatContent content = replacement->GetFlatContent(no_alloc);
+    ASSERT(content.IsFlat());
+    if (content.IsAscii()) {
+      ParseReplacementPattern(&parts_,
+                              content.ToAsciiVector(),
+                              capture_count,
+                              subject_length);
+    } else {
+      ASSERT(content.IsTwoByte());
+      ParseReplacementPattern(&parts_,
+                              content.ToUC16Vector(),
+                              capture_count,
+                              subject_length);
+    }
   }
   Isolate* isolate = replacement->GetIsolate();
   // Find substrings of replacement string and create them as String objects.
@@ -3049,34 +3050,32 @@ int Runtime::StringMatch(Isolate* isolate,
 
   AssertNoAllocation no_heap_allocation;  // ensure vectors stay valid
   // Extract flattened substrings of cons strings before determining asciiness.
-  String* seq_sub = *sub;
-  if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first();
-  String* seq_pat = *pat;
-  if (seq_pat->IsConsString()) seq_pat = ConsString::cast(seq_pat)->first();
+  String::FlatContent seq_sub = sub->GetFlatContent(no_heap_allocation);
+  String::FlatContent seq_pat = pat->GetFlatContent(no_heap_allocation);
 
   // dispatch on type of strings
-  if (seq_pat->IsAsciiRepresentation()) {
-    Vector<const char> pat_vector = seq_pat->ToAsciiVector();
-    if (seq_sub->IsAsciiRepresentation()) {
+  if (seq_pat.IsAscii()) {
+    Vector<const char> pat_vector = seq_pat.ToAsciiVector();
+    if (seq_sub.IsAscii()) {
       return SearchString(isolate,
-                          seq_sub->ToAsciiVector(),
+                          seq_sub.ToAsciiVector(),
                           pat_vector,
                           start_index);
     }
     return SearchString(isolate,
-                        seq_sub->ToUC16Vector(),
+                        seq_sub.ToUC16Vector(),
                         pat_vector,
                         start_index);
   }
-  Vector<const uc16> pat_vector = seq_pat->ToUC16Vector();
-  if (seq_sub->IsAsciiRepresentation()) {
+  Vector<const uc16> pat_vector = seq_pat.ToUC16Vector();
+  if (seq_sub.IsAscii()) {
     return SearchString(isolate,
-                        seq_sub->ToAsciiVector(),
+                        seq_sub.ToAsciiVector(),
                         pat_vector,
                         start_index);
   }
   return SearchString(isolate,
-                      seq_sub->ToUC16Vector(),
+                      seq_sub.ToUC16Vector(),
                       pat_vector,
                       start_index);
 }
@@ -3161,31 +3160,29 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringLastIndexOf) {
 
   int position = -1;
   AssertNoAllocation no_heap_allocation;  // ensure vectors stay valid
-  // Extract flattened substrings of cons strings before determining asciiness.
-  String* seq_sub = *sub;
-  if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first();
-  String* seq_pat = *pat;
-  if (seq_pat->IsConsString()) seq_pat = ConsString::cast(seq_pat)->first();
-
-  if (seq_pat->IsAsciiRepresentation()) {
-    Vector<const char> pat_vector = seq_pat->ToAsciiVector();
-    if (seq_sub->IsAsciiRepresentation()) {
-      position = StringMatchBackwards(seq_sub->ToAsciiVector(),
+
+  String::FlatContent sub_content = sub->GetFlatContent(no_heap_allocation);
+  String::FlatContent pat_content = pat->GetFlatContent(no_heap_allocation);
+
+  if (pat_content.IsAscii()) {
+    Vector<const char> pat_vector = pat_content.ToAsciiVector();
+    if (sub_content.IsAscii()) {
+      position = StringMatchBackwards(sub_content.ToAsciiVector(),
                                       pat_vector,
                                       start_index);
     } else {
-      position = StringMatchBackwards(seq_sub->ToUC16Vector(),
+      position = StringMatchBackwards(sub_content.ToUC16Vector(),
                                       pat_vector,
                                       start_index);
     }
   } else {
-    Vector<const uc16> pat_vector = seq_pat->ToUC16Vector();
-    if (seq_sub->IsAsciiRepresentation()) {
-      position = StringMatchBackwards(seq_sub->ToAsciiVector(),
+    Vector<const uc16> pat_vector = pat_content.ToUC16Vector();
+    if (sub_content.IsAscii()) {
+      position = StringMatchBackwards(sub_content.ToAsciiVector(),
                                       pat_vector,
                                       start_index);
     } else {
-      position = StringMatchBackwards(seq_sub->ToUC16Vector(),
+      position = StringMatchBackwards(sub_content.ToUC16Vector(),
                                       pat_vector,
                                       start_index);
     }
@@ -3403,36 +3400,38 @@ static bool SearchStringMultiple(Isolate* isolate,
   for (;;) {  // Break when search complete.
     builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
     AssertNoAllocation no_gc;
-    if (subject->IsAsciiRepresentation()) {
-      Vector<const char> subject_vector = subject->ToAsciiVector();
-      if (pattern->IsAsciiRepresentation()) {
+    String::FlatContent subject_content = subject->GetFlatContent(no_gc);
+    String::FlatContent pattern_content = pattern->GetFlatContent(no_gc);
+    if (subject_content.IsAscii()) {
+      Vector<const char> subject_vector = subject_content.ToAsciiVector();
+      if (pattern_content.IsAscii()) {
         if (SearchStringMultiple(isolate,
                                  subject_vector,
-                                 pattern->ToAsciiVector(),
+                                 pattern_content.ToAsciiVector(),
                                  *pattern,
                                  builder,
                                  &match_pos)) break;
       } else {
         if (SearchStringMultiple(isolate,
                                  subject_vector,
-                                 pattern->ToUC16Vector(),
+                                 pattern_content.ToUC16Vector(),
                                  *pattern,
                                  builder,
                                  &match_pos)) break;
       }
     } else {
-      Vector<const uc16> subject_vector = subject->ToUC16Vector();
-      if (pattern->IsAsciiRepresentation()) {
+      Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
+      if (pattern_content.IsAscii()) {
         if (SearchStringMultiple(isolate,
                                  subject_vector,
-                                 pattern->ToAsciiVector(),
+                                 pattern_content.ToAsciiVector(),
                                  *pattern,
                                  builder,
                                  &match_pos)) break;
       } else {
         if (SearchStringMultiple(isolate,
                                  subject_vector,
-                                 pattern->ToUC16Vector(),
+                                 pattern_content.ToUC16Vector(),
                                  *pattern,
                                  builder,
                                  &match_pos)) break;
@@ -5420,12 +5419,15 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_QuoteJSONString) {
     str = String::cast(flat);
     ASSERT(str->IsFlat());
   }
-  if (str->IsTwoByteRepresentation()) {
+  AssertNoAllocation no_alloc;
+  String::FlatContent flat = str->GetFlatContent(no_alloc);
+  ASSERT(flat.IsFlat());
+  if (flat.IsTwoByte()) {
     return QuoteJsonString<uc16, SeqTwoByteString, false>(isolate,
-                                                          str->ToUC16Vector());
+                                                          flat.ToUC16Vector());
   } else {
     return QuoteJsonString<char, SeqAsciiString, false>(isolate,
-                                                        str->ToAsciiVector());
+                                                        flat.ToAsciiVector());
   }
 }
 
@@ -5442,12 +5444,14 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_QuoteJSONStringComma) {
     str = String::cast(flat);
     ASSERT(str->IsFlat());
   }
-  if (str->IsTwoByteRepresentation()) {
+  AssertNoAllocation no_alloc;
+  String::FlatContent flat = str->GetFlatContent(no_alloc);
+  if (flat.IsTwoByte()) {
     return QuoteJsonString<uc16, SeqTwoByteString, true>(isolate,
-                                                         str->ToUC16Vector());
+                                                         flat.ToUC16Vector());
   } else {
     return QuoteJsonString<char, SeqAsciiString, true>(isolate,
-                                                       str->ToAsciiVector());
+                                                       flat.ToAsciiVector());
   }
 }
 
@@ -5482,14 +5486,16 @@ static MaybeObject* QuoteJsonStringArray(Isolate* isolate,
   for (int i = 0; i < length; i++) {
     if (i != 0) *(write_cursor++) = ',';
     String* str = String::cast(array->get(i));
-    if (str->IsTwoByteRepresentation()) {
+    String::FlatContent content = str->GetFlatContent(no_gc);
+    ASSERT(content.IsFlat());
+    if (content.IsTwoByte()) {
       write_cursor = WriteQuoteJsonString<Char, uc16>(isolate,
                                                       write_cursor,
-                                                      str->ToUC16Vector());
+                                                      content.ToUC16Vector());
     } else {
       write_cursor = WriteQuoteJsonString<Char, char>(isolate,
                                                       write_cursor,
-                                                      str->ToAsciiVector());
+                                                      content.ToAsciiVector());
     }
   }
   *(write_cursor++) = ']';
@@ -5968,11 +5974,15 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringSplit) {
 
   // No allocation block.
   {
-    AssertNoAllocation nogc;
-    if (subject->IsAsciiRepresentation()) {
-      Vector<const char> subject_vector = subject->ToAsciiVector();
-      if (pattern->IsAsciiRepresentation()) {
-        Vector<const char> pattern_vector = pattern->ToAsciiVector();
+    AssertNoAllocation no_gc;
+    String::FlatContent subject_content = subject->GetFlatContent(no_gc);
+    String::FlatContent pattern_content = pattern->GetFlatContent(no_gc);
+    ASSERT(subject_content.IsFlat());
+    ASSERT(pattern_content.IsFlat());
+    if (subject_content.IsAscii()) {
+      Vector<const char> subject_vector = subject_content.ToAsciiVector();
+      if (pattern_content.IsAscii()) {
+        Vector<const char> pattern_vector = pattern_content.ToAsciiVector();
         if (pattern_vector.length() == 1) {
           FindAsciiStringIndices(subject_vector,
                                  pattern_vector[0],
@@ -5988,22 +5998,22 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringSplit) {
       } else {
         FindStringIndices(isolate,
                           subject_vector,
-                          pattern->ToUC16Vector(),
+                          pattern_content.ToUC16Vector(),
                           &indices,
                           limit);
       }
     } else {
-      Vector<const uc16> subject_vector = subject->ToUC16Vector();
+      Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
       if (pattern->IsAsciiRepresentation()) {
         FindStringIndices(isolate,
                           subject_vector,
-                          pattern->ToAsciiVector(),
+                          pattern_content.ToAsciiVector(),
                           &indices,
                           limit);
       } else {
         FindStringIndices(isolate,
                           subject_vector,
-                          pattern->ToUC16Vector(),
+                          pattern_content.ToUC16Vector(),
                           &indices,
                           limit);
       }
@@ -6085,36 +6095,40 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToArray) {
   CONVERT_ARG_CHECKED(String, s, 0);
   CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[1]);
 
-  s->TryFlatten();
+  s = FlattenGetString(s);
   const int length = static_cast<int>(Min<uint32_t>(s->length(), limit));
 
   Handle<FixedArray> elements;
+  int position = 0;
   if (s->IsFlat() && s->IsAsciiRepresentation()) {
+    // Try using cached chars where possible.
     Object* obj;
     { MaybeObject* maybe_obj =
           isolate->heap()->AllocateUninitializedFixedArray(length);
       if (!maybe_obj->ToObject(&obj)) return maybe_obj;
     }
+    AssertNoAllocation no_alloc;
     elements = Handle<FixedArray>(FixedArray::cast(obj), isolate);
-
-    Vector<const char> chars = s->ToAsciiVector();
-    // Note, this will initialize all elements (not only the prefix)
-    // to prevent GC from seeing partially initialized array.
-    int num_copied_from_cache = CopyCachedAsciiCharsToArray(isolate->heap(),
-                                                            chars.start(),
-                                                            *elements,
-                                                            length);
-
-    for (int i = num_copied_from_cache; i < length; ++i) {
-      Handle<Object> str = LookupSingleCharacterStringFromCode(chars[i]);
-      elements->set(i, *str);
+    String::FlatContent content = s->GetFlatContent(no_alloc);
+    if (content.IsAscii()) {
+      Vector<const char> chars = content.ToAsciiVector();
+      // Note, this will initialize all elements (not only the prefix)
+      // to prevent GC from seeing partially initialized array.
+      position = CopyCachedAsciiCharsToArray(isolate->heap(),
+                                             chars.start(),
+                                             *elements,
+                                             length);
+    } else {
+      MemsetPointer(elements->data_start(),
+                    isolate->heap()->undefined_value(),
+                    length);
     }
   } else {
     elements = isolate->factory()->NewFixedArray(length);
-    for (int i = 0; i < length; ++i) {
-      Handle<Object> str = LookupSingleCharacterStringFromCode(s->Get(i));
-      elements->set(i, *str);
-    }
+  }
+  for (int i = position; i < length; ++i) {
+    Handle<Object> str = LookupSingleCharacterStringFromCode(s->Get(i));
+    elements->set(i, *str);
   }
 
 #ifdef DEBUG
@@ -6916,6 +6930,7 @@ static Object* StringInputBufferCompare(RuntimeState* state,
 static Object* FlatStringCompare(String* x, String* y) {
   ASSERT(x->IsFlat());
   ASSERT(y->IsFlat());
+  AssertNoAllocation no_alloc;
   Object* equal_prefix_result = Smi::FromInt(EQUAL);
   int prefix_length = x->length();
   if (y->length() < prefix_length) {
@@ -6925,22 +6940,24 @@ static Object* FlatStringCompare(String* x, String* y) {
     equal_prefix_result = Smi::FromInt(LESS);
   }
   int r;
-  if (x->IsAsciiRepresentation()) {
-    Vector<const char> x_chars = x->ToAsciiVector();
-    if (y->IsAsciiRepresentation()) {
-      Vector<const char> y_chars = y->ToAsciiVector();
+  String::FlatContent x_content = x->GetFlatContent(no_alloc);
+  String::FlatContent y_content = y->GetFlatContent(no_alloc);
+  if (x_content.IsAscii()) {
+    Vector<const char> x_chars = x_content.ToAsciiVector();
+    if (y_content.IsAscii()) {
+      Vector<const char> y_chars = y_content.ToAsciiVector();
       r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
     } else {
-      Vector<const uc16> y_chars = y->ToUC16Vector();
+      Vector<const uc16> y_chars = y_content.ToUC16Vector();
       r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
     }
   } else {
-    Vector<const uc16> x_chars = x->ToUC16Vector();
-    if (y->IsAsciiRepresentation()) {
-      Vector<const char> y_chars = y->ToAsciiVector();
+    Vector<const uc16> x_chars = x_content.ToUC16Vector();
+    if (y_content.IsAscii()) {
+      Vector<const char> y_chars = y_content.ToAsciiVector();
       r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
     } else {
-      Vector<const uc16> y_chars = y->ToUC16Vector();
+      Vector<const uc16> y_chars = y_content.ToUC16Vector();
       r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
     }
   }
@@ -8821,13 +8838,14 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_DateParseString) {
   FixedArray* output_array = FixedArray::cast(output->elements());
   RUNTIME_ASSERT(output_array->length() >= DateParser::OUTPUT_SIZE);
   bool result;
-  if (str->IsAsciiRepresentation()) {
-    result = DateParser::Parse(str->ToAsciiVector(),
+  String::FlatContent str_content = str->GetFlatContent(no_allocation);
+  if (str_content.IsAscii()) {
+    result = DateParser::Parse(str_content.ToAsciiVector(),
                                output_array,
                                isolate->unicode_cache());
   } else {
-    ASSERT(str->IsTwoByteRepresentation());
-    result = DateParser::Parse(str->ToUC16Vector(),
+    ASSERT(str_content.IsTwoByte());
+    result = DateParser::Parse(str_content.ToUC16Vector(),
                                output_array,
                                isolate->unicode_cache());
   }
@@ -12805,9 +12823,12 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_ListNatives) {
 
 RUNTIME_FUNCTION(MaybeObject*, Runtime_Log) {
   ASSERT(args.length() == 2);
+  AssertNoAllocation no_alloc;
   CONVERT_CHECKED(String, format, args[0]);
   CONVERT_CHECKED(JSArray, elms, args[1]);
-  Vector<const char> chars = format->ToAsciiVector();
+  String::FlatContent format_content = format->GetFlatContent(no_alloc);
+  RUNTIME_ASSERT(format_content.IsAscii());
+  Vector<const char> chars = format_content.ToAsciiVector();
   LOGGER->LogRuntime(chars, elms);
   return isolate->heap()->undefined_value();
 }
index 6fcf557..9a5bb43 100644 (file)
@@ -116,3 +116,14 @@ assertEquals(["a", "b", "c"], "abc".split("", 3));
 assertEquals(["a", "b", "c"], "abc".split("", numberObj(3)));
 assertEquals(["a", "b", "c"], "abc".split("", 4));
 assertEquals(["a", "b", "c"], "abc".split("", numberObj(4)));
+
+var all_ascii_chars = [];
+for (var i = 0; i < 128; i++) all_ascii_chars[i] = String.fromCharCode(i);
+var all_ascii_string = all_ascii_chars.join("");
+
+var split_chars = all_ascii_string.split("");
+assertEquals(128, split_chars.length);
+for (var i = 0; i < 128; i++) {
+  assertEquals(1, split_chars[i].length);
+  assertEquals(i, split_chars[i].charCodeAt(0));
+}
\ No newline at end of file