From 1aae797ddde0a2467b42662540152c40ff83f28a Mon Sep 17 00:00:00 2001 From: "erik.corry@gmail.com" Date: Wed, 22 Oct 2008 09:09:07 +0000 Subject: [PATCH] Use direct copy and templates to speed up flattening of strings. Review URL: http://codereview.chromium.org/8011 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@549 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/factory.cc | 2 + src/heap.cc | 42 +++++++++-------- src/objects-inl.h | 15 +++++- src/objects.cc | 113 +++++++++++++++++++++++++++++----------------- src/objects.h | 23 ++++++---- src/runtime.cc | 59 ++++++++++++++++-------- src/utils.h | 9 ++++ 7 files changed, 171 insertions(+), 92 deletions(-) diff --git a/src/factory.cc b/src/factory.cc index 7c850e9d8..4e5f5da3f 100644 --- a/src/factory.cc +++ b/src/factory.cc @@ -79,6 +79,8 @@ Handle Factory::NewRawTwoByteString(int length, Handle Factory::NewConsString(Handle first, Handle second) { + if (first->length() == 0) return second; + if (second->length() == 0) return first; CALL_HEAP_FUNCTION(Heap::AllocateConsString(*first, *second), String); } diff --git a/src/heap.cc b/src/heap.cc index 92958bd84..660d777bd 100644 --- a/src/heap.cc +++ b/src/heap.cc @@ -1331,29 +1331,33 @@ Object* Heap::AllocateSharedFunctionInfo(Object* name) { Object* Heap::AllocateConsString(String* first, String* second) { - int length = first->length() + second->length(); + int first_length = first->length(); + int second_length = second->length(); + int length = first_length + second_length; bool is_ascii = first->is_ascii_representation() && second->is_ascii_representation(); // If the resulting string is small make a flat string. - if (length < ConsString::kMinLength) { - Object* result = is_ascii - ? AllocateRawAsciiString(length) - : AllocateRawTwoByteString(length); - if (result->IsFailure()) return result; - // Copy the characters into the new object. - String* string_result = String::cast(result); - int first_length = first->length(); - // Copy the content of the first string. - for (int i = 0; i < first_length; i++) { - string_result->Set(i, first->Get(i)); - } - int second_length = second->length(); - // Copy the content of the first string. - for (int i = 0; i < second_length; i++) { - string_result->Set(first_length + i, second->Get(i)); + if (length < String::kMinNonFlatLength) { + ASSERT(first->IsFlat()); + ASSERT(second->IsFlat()); + if (is_ascii) { + Object* result = AllocateRawAsciiString(length); + if (result->IsFailure()) return result; + // Copy the characters into the new object. + char* dest = SeqAsciiString::cast(result)->GetChars(); + String::WriteToFlat(first, dest, 0, first_length); + String::WriteToFlat(second, dest + first_length, 0, second_length); + return result; + } else { + Object* result = AllocateRawTwoByteString(length); + if (result->IsFailure()) return result; + // Copy the characters into the new object. + uc16* dest = SeqTwoByteString::cast(result)->GetChars(); + String::WriteToFlat(first, dest, 0, first_length); + String::WriteToFlat(second, dest + first_length, 0, second_length); + return result; } - return result; } Map* map; @@ -1384,7 +1388,7 @@ Object* Heap::AllocateSlicedString(String* buffer, int start, int end) { int length = end - start; // If the resulting string is small make a sub string. - if (end - start <= SlicedString::kMinLength) { + if (end - start <= String::kMinNonFlatLength) { return Heap::AllocateSubString(buffer, start, end); } diff --git a/src/objects-inl.h b/src/objects-inl.h index 3f7dc5fae..bc259a7ec 100644 --- a/src/objects-inl.h +++ b/src/objects-inl.h @@ -1372,6 +1372,12 @@ bool String::is_ascii_representation_map(Map* map) { } +int String::full_representation_tag() { + return map()->instance_type() & + (kStringRepresentationMask | kStringEncodingMask); +} + + StringRepresentationTag String::representation_tag() { return map_representation_tag(map()); } @@ -1417,8 +1423,8 @@ Address SeqAsciiString::GetCharsAddress() { } -const char* SeqAsciiString::GetChars() { - return reinterpret_cast(GetCharsAddress()); +char* SeqAsciiString::GetChars() { + return reinterpret_cast(GetCharsAddress()); } @@ -1427,6 +1433,11 @@ Address SeqTwoByteString::GetCharsAddress() { } +uc16* SeqTwoByteString::GetChars() { + return reinterpret_cast(FIELD_ADDR(this, kHeaderSize)); +} + + uint16_t SeqTwoByteString::SeqTwoByteStringGet(int index) { ASSERT(index >= 0 && index < length()); return READ_SHORT_FIELD(this, kHeaderSize + index * kShortSize); diff --git a/src/objects.cc b/src/objects.cc index c8147e453..fb4a74bdb 100644 --- a/src/objects.cc +++ b/src/objects.cc @@ -529,12 +529,33 @@ Object* String::Flatten() { // an old space GC. PretenureFlag tenure = Heap::InNewSpace(this) ? NOT_TENURED : TENURED; int len = length(); - Object* object = IsAsciiRepresentation() ? - Heap::AllocateRawAsciiString(len, tenure) : - Heap::AllocateRawTwoByteString(len, tenure); - if (object->IsFailure()) return object; - String* result = String::cast(object); - Flatten(this, result, 0, len, 0); + Object* object; + String* result; + if (IsAsciiRepresentation()) { + object = Heap::AllocateRawAsciiString(len, tenure); + if (object->IsFailure()) return object; + result = String::cast(object); + String* first = String::cast(cs->first()); + int first_length = first->length(); + char* dest = SeqAsciiString::cast(result)->GetChars(); + WriteToFlat(first, dest, 0, first_length); + WriteToFlat(String::cast(cs->second()), + dest + first_length, + 0, + len - first_length); + } else { + object = Heap::AllocateRawTwoByteString(len, tenure); + if (object->IsFailure()) return object; + result = String::cast(object); + uc16* dest = SeqTwoByteString::cast(result)->GetChars(); + String* first = String::cast(cs->first()); + int first_length = first->length(); + WriteToFlat(first, dest, 0, first_length); + WriteToFlat(String::cast(cs->second()), + dest + first_length, + 0, + len - first_length); + } cs->set_first(result); cs->set_second(Heap::empty_string()); return this; @@ -2922,7 +2943,7 @@ Vector String::ToAsciiVector() { } if (string_tag == kSeqStringTag) { SeqAsciiString* seq = SeqAsciiString::cast(string); - char* start = reinterpret_cast(seq->GetCharsAddress()); + char* start = seq->GetChars(); return Vector(start + offset, length); } ASSERT(string_tag == kExternalStringTag); @@ -2953,8 +2974,7 @@ Vector String::ToUC16Vector() { } if (string_tag == kSeqStringTag) { SeqTwoByteString* seq = SeqTwoByteString::cast(string); - uc16* start = reinterpret_cast(seq->GetCharsAddress()); - return Vector(start + offset, length); + return Vector(seq->GetChars() + offset, length); } ASSERT(string_tag == kExternalStringTag); ExternalTwoByteString* ext = ExternalTwoByteString::cast(string); @@ -3122,7 +3142,6 @@ const unibrow::byte* SeqAsciiString::SeqAsciiStringReadBlock( unsigned* remaining, unsigned* offset_ptr, unsigned max_chars) { - // Cast const char* to unibrow::byte* (signedness difference). const unibrow::byte* b = reinterpret_cast(this) - kHeapObjectTag + kHeaderSize + *offset_ptr * kCharSize; *remaining = max_chars; @@ -3589,47 +3608,62 @@ Object* SlicedString::SlicedStringFlatten() { } -void String::Flatten(String* src, - String* sink, - int f, - int t, - int so) { +template +void String::WriteToFlat(String* src, + sinkchar* sink, + int f, + int t) { String* source = src; int from = f; int to = t; - int sink_offset = so; while (true) { ASSERT(0 <= from && from <= to && to <= source->length()); - ASSERT(0 <= sink_offset && sink_offset < sink->length()); - switch (source->representation_tag()) { - case kSeqStringTag: - case kExternalStringTag: { - Access buffer(&string_input_buffer); - buffer->Reset(from, source); - int j = sink_offset; - for (int i = from; i < to; i++) { - uc32 c = buffer->GetNext(); - sink->Set(j++, c); - } + switch (source->full_representation_tag()) { + case kAsciiStringTag | kExternalStringTag: { + CopyChars(sink, + ExternalAsciiString::cast(source)->resource()->data() + from, + to - from); + return; + } + case kTwoByteStringTag | kExternalStringTag: { + const uc16* data = + ExternalTwoByteString::cast(source)->resource()->data(); + CopyChars(sink, + data + from, + to - from); return; } - case kSlicedStringTag: { + case kAsciiStringTag | kSeqStringTag: { + CopyChars(sink, + SeqAsciiString::cast(source)->GetChars() + from, + to - from); + return; + } + case kTwoByteStringTag | kSeqStringTag: { + CopyChars(sink, + SeqTwoByteString::cast(source)->GetChars() + from, + to - from); + return; + } + case kAsciiStringTag | kSlicedStringTag: + case kTwoByteStringTag | kSlicedStringTag: { SlicedString* sliced_string = SlicedString::cast(source); int start = sliced_string->start(); from += start; to += start; source = String::cast(sliced_string->buffer()); + break; } - break; - case kConsStringTag: { + case kAsciiStringTag | kConsStringTag: + case kTwoByteStringTag | kConsStringTag: { ConsString* cons_string = ConsString::cast(source); String* first = String::cast(cons_string->first()); int boundary = first->length(); if (to - boundary >= boundary - from) { // Right hand side is longer. Recurse over left. if (from < boundary) { - Flatten(first, sink, from, boundary, sink_offset); - sink_offset += boundary - from; + WriteToFlat(first, sink, from, boundary); + sink += boundary - from; from = 0; } else { from -= boundary; @@ -3637,22 +3671,19 @@ void String::Flatten(String* src, to -= boundary; source = String::cast(cons_string->second()); } else { - // Left hand side is longer. Recurse over right. The hasher - // needs us to visit the string from left to right so doing - // this invalidates that hash. + // Left hand side is longer. Recurse over right. if (to > boundary) { String* second = String::cast(cons_string->second()); - Flatten(second, - sink, - 0, - to - boundary, - sink_offset + boundary - from); + WriteToFlat(second, + sink + boundary - from, + 0, + to - boundary); to = boundary; } source = first; } + break; } - break; } } } diff --git a/src/objects.h b/src/objects.h index 0fc49e4ef..36ffbb886 100644 --- a/src/objects.h +++ b/src/objects.h @@ -3071,6 +3071,8 @@ class String: public HeapObject { // Get the representation tag. inline StringRepresentationTag representation_tag(); + // Get the representation and ASCII tag. + inline int full_representation_tag(); static inline StringRepresentationTag map_representation_tag(Map* map); // For use during stack traces. Performs rudimentary sanity check. @@ -3097,6 +3099,9 @@ class String: public HeapObject { // Max ascii char code. static const int kMaxAsciiCharCode = unibrow::Utf8::kMaxOneByteChar; + // Minimum lenth for a cons or sliced string. + static const int kMinNonFlatLength = 13; + // Mask constant for checking if a string has a computed hash code // and if it is an array index. The least significant bit indicates // whether a hash code has been computed. If the hash code has been @@ -3134,11 +3139,11 @@ class String: public HeapObject { unsigned* offset); // Helper function for flattening strings. - static void Flatten(String* source, - String* sink, - int from, - int to, - int sink_offset); + template + static void WriteToFlat(String* source, + sinkchar* sink, + int from, + int to); protected: class ReadBlockBuffer { @@ -3215,7 +3220,7 @@ class SeqAsciiString: public SeqString { // Get the address of the characters in this string. inline Address GetCharsAddress(); - inline const char* GetChars(); + inline char* GetChars(); // Casting static inline SeqAsciiString* cast(Object* obj); @@ -3257,6 +3262,8 @@ class SeqTwoByteString: public SeqString { // Get the address of the characters in this string. inline Address GetCharsAddress(); + inline uc16* GetChars(); + // For regexp code. const uint16_t* SeqTwoByteStringGetData(unsigned start); @@ -3328,7 +3335,6 @@ class ConsString: public String { unsigned* offset_ptr, unsigned chars); - // Minimum length for a cons string. static const int kMinLength = 13; @@ -3376,9 +3382,6 @@ class SlicedString: public String { unsigned* offset_ptr, unsigned chars); - // Minimum lenth for a sliced string. - static const int kMinLength = 13; - private: DISALLOW_IMPLICIT_CONSTRUCTORS(SlicedString); }; diff --git a/src/runtime.cc b/src/runtime.cc index fd2480cd6..ca94ef15f 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -2591,6 +2591,30 @@ static Object* Runtime_StringAdd(Arguments args) { } +template +static inline void StringBuilderConcatHelper(String* special, + sinkchar* sink, + FixedArray* fixed_array, + int array_length) { + int position = 0; + for (int i = 0; i < array_length; i++) { + Object* element = fixed_array->get(i); + if (element->IsSmi()) { + int len = Smi::cast(element)->value(); + int pos = len >> 11; + len &= 0x7ff; + String::WriteToFlat(special, sink + position, pos, pos + len); + position += len; + } else { + String* string = String::cast(element); + int element_length = string->length(); + String::WriteToFlat(string, sink + position, 0, element_length); + position += element_length; + } + } +} + + static Object* Runtime_StringBuilderConcat(Arguments args) { NoHandleAllocation ha; ASSERT(args.length() == 2); @@ -2647,32 +2671,27 @@ static Object* Runtime_StringBuilderConcat(Arguments args) { } int length = position; - position = 0; Object* object; + if (ascii) { object = Heap::AllocateRawAsciiString(length); + if (object->IsFailure()) return object; + SeqAsciiString* answer = SeqAsciiString::cast(object); + StringBuilderConcatHelper(special, + answer->GetChars(), + fixed_array, + array_length); + return answer; } else { object = Heap::AllocateRawTwoByteString(length); + if (object->IsFailure()) return object; + SeqTwoByteString* answer = SeqTwoByteString::cast(object); + StringBuilderConcatHelper(special, + answer->GetChars(), + fixed_array, + array_length); + return answer; } - if (object->IsFailure()) return object; - - String* answer = String::cast(object); - for (int i = 0; i < array_length; i++) { - Object* element = fixed_array->get(i); - if (element->IsSmi()) { - int len = Smi::cast(element)->value(); - int pos = len >> 11; - len &= 0x7ff; - String::Flatten(special, answer, pos, pos + len, position); - position += len; - } else { - String* string = String::cast(element); - int element_length = string->length(); - String::Flatten(string, answer, 0, element_length, position); - position += element_length; - } - } - return answer; } diff --git a/src/utils.h b/src/utils.h index 1f385257a..02127705d 100644 --- a/src/utils.h +++ b/src/utils.h @@ -443,6 +443,15 @@ class StringBuilder { DISALLOW_IMPLICIT_CONSTRUCTORS(StringBuilder); }; + +// Copy from ASCII/16bit chars to ASCII/16bit chars. +template +static inline void CopyChars(sinkchar* dest, const sourcechar* src, int chars) { + while (chars--) { + *dest++ = *src++; + } +} + } } // namespace v8::internal #endif // V8_UTILS_H_ -- 2.34.1