From f470cf27777021c0a2161cb0233b8a94c8370c86 Mon Sep 17 00:00:00 2001 From: "lrn@chromium.org" Date: Fri, 29 Apr 2011 08:53:36 +0000 Subject: [PATCH] Handle join of sparse arrays with non-empty separator more efficiently. BUG=v8:1028 Review URL: http://codereview.chromium.org/6902144 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@7716 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/array.js | 42 +++++++++--- src/runtime.cc | 129 +++++++++++++++++++++++++++++++++++++ src/runtime.h | 1 + test/mjsunit/array-join.js | 29 ++++++++- 4 files changed, 188 insertions(+), 13 deletions(-) diff --git a/src/array.js b/src/array.js index 6ed147608..b0620832b 100644 --- a/src/array.js +++ b/src/array.js @@ -67,6 +67,25 @@ function GetSortedArrayKeys(array, intervals) { } +function SparseJoinWithSeparator(array, len, convert, separator) { + var keys = GetSortedArrayKeys(array, %GetArrayKeys(array, len)); + var totalLength = 0; + var elements = new InternalArray(keys.length * 2); + var previousKey = -1; + for (var i = 0; i < keys.length; i++) { + var key = keys[i]; + if (key != previousKey) { // keys may contain duplicates. + var e = array[key]; + if (!IS_STRING(e)) e = convert(e); + elements[i * 2] = key; + elements[i * 2 + 1] = e; + previousKey = key; + } + } + return %SparseJoinWithSeparator(elements, len, separator); +} + + // Optimized for sparse arrays if separator is ''. function SparseJoin(array, len, convert) { var keys = GetSortedArrayKeys(array, %GetArrayKeys(array, len)); @@ -110,8 +129,12 @@ function Join(array, length, separator, convert) { // Attempt to convert the elements. try { - if (UseSparseVariant(array, length, is_array) && (separator.length == 0)) { - return SparseJoin(array, length, convert); + if (UseSparseVariant(array, length, is_array)) { + if (separator.length == 0) { + return SparseJoin(array, length, convert); + } else { + return SparseJoinWithSeparator(array, length, convert, separator); + } } // Fast case for one-element arrays. @@ -129,10 +152,8 @@ function Join(array, length, separator, convert) { var elements_length = 0; for (var i = 0; i < length; i++) { var e = array[i]; - if (!IS_UNDEFINED(e)) { - if (!IS_STRING(e)) e = convert(e); - elements[elements_length++] = e; - } + if (!IS_STRING(e)) e = convert(e); + elements[elements_length++] = e; } elements.length = elements_length; var result = %_FastAsciiArrayJoin(elements, ''); @@ -151,11 +172,12 @@ function Join(array, length, separator, convert) { } else { for (var i = 0; i < length; i++) { var e = array[i]; - if (IS_NUMBER(e)) elements[i] = %_NumberToString(e); - else { - if (!IS_STRING(e)) e = convert(e); + if (IS_NUMBER(e)) { + e = %_NumberToString(e); + } else if (!IS_STRING(e)) { + e = convert(e); + } elements[i] = e; - } } } var result = %_FastAsciiArrayJoin(elements, separator); diff --git a/src/runtime.cc b/src/runtime.cc index 42357d6cd..44fe64620 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -6159,6 +6159,135 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringBuilderJoin) { return answer; } +template +static void JoinSparseArrayWithSeparator(FixedArray* elements, + int elements_length, + uint32_t array_length, + String* separator, + Vector buffer) { + int previous_separator_position = 0; + int separator_length = separator->length(); + int cursor = 0; + for (int i = 0; i < elements_length; i += 2) { + int position = NumberToInt32(elements->get(i)); + String* string = String::cast(elements->get(i + 1)); + int string_length = string->length(); + if (string->length() > 0) { + while (previous_separator_position < position) { + String::WriteToFlat(separator, &buffer[cursor], + 0, separator_length); + cursor += separator_length; + previous_separator_position++; + } + String::WriteToFlat(string, &buffer[cursor], + 0, string_length); + cursor += string->length(); + } + } + if (separator_length > 0) { + // Array length must be representable as a signed 32-bit number, + // otherwise the total string length would have been too large. + ASSERT(array_length <= 0x7fffffff); // Is int32_t. + int last_array_index = static_cast(array_length - 1); + while (previous_separator_position < last_array_index) { + String::WriteToFlat(separator, &buffer[cursor], + 0, separator_length); + cursor += separator_length; + previous_separator_position++; + } + } + ASSERT(cursor <= buffer.length()); +} + + +RUNTIME_FUNCTION(MaybeObject*, Runtime_SparseJoinWithSeparator) { + NoHandleAllocation ha; + ASSERT(args.length() == 3); + CONVERT_CHECKED(JSArray, elements_array, args[0]); + RUNTIME_ASSERT(elements_array->HasFastElements()); + CONVERT_NUMBER_CHECKED(uint32_t, array_length, Uint32, args[1]); + CONVERT_CHECKED(String, separator, args[2]); + // elements_array is fast-mode JSarray of alternating positions + // (increasing order) and strings. + // array_length is length of original array (used to add separators); + // separator is string to put between elements. Assumed to be non-empty. + + // Find total length of join result. + int string_length = 0; + bool is_ascii = true; + int max_string_length = SeqAsciiString::kMaxLength; + bool overflow = false; + CONVERT_NUMBER_CHECKED(int, elements_length, + Int32, elements_array->length()); + RUNTIME_ASSERT((elements_length & 1) == 0); // Even length. + FixedArray* elements = FixedArray::cast(elements_array->elements()); + for (int i = 0; i < elements_length; i += 2) { + RUNTIME_ASSERT(elements->get(i)->IsNumber()); + CONVERT_CHECKED(String, string, elements->get(i + 1)); + int length = string->length(); + if (is_ascii && !string->IsAsciiRepresentation()) { + is_ascii = false; + max_string_length = SeqTwoByteString::kMaxLength; + } + if (length > max_string_length || + max_string_length - length < string_length) { + overflow = true; + break; + } + string_length += length; + } + int separator_length = separator->length(); + if (!overflow && separator_length > 0) { + if (array_length <= 0x7fffffffu) { + int separator_count = static_cast(array_length) - 1; + int remaining_length = max_string_length - string_length; + if ((remaining_length / separator_length) >= separator_count) { + string_length += separator_length * (array_length - 1); + } else { + // Not room for the separators within the maximal string length. + overflow = true; + } + } else { + // Nonempty separator and at least 2^31-1 separators necessary + // means that the string is too large to create. + STATIC_ASSERT(String::kMaxLength < 0x7fffffff); + overflow = true; + } + } + if (overflow) { + // Throw OutOfMemory exception for creating too large a string. + V8::FatalProcessOutOfMemory("Array join result too large."); + } + + if (is_ascii) { + MaybeObject* result_allocation = + isolate->heap()->AllocateRawAsciiString(string_length); + if (result_allocation->IsFailure()) return result_allocation; + SeqAsciiString* result_string = + SeqAsciiString::cast(result_allocation->ToObjectUnchecked()); + JoinSparseArrayWithSeparator(elements, + elements_length, + array_length, + separator, + Vector(result_string->GetChars(), + string_length)); + return result_string; + } else { + MaybeObject* result_allocation = + isolate->heap()->AllocateRawTwoByteString(string_length); + if (result_allocation->IsFailure()) return result_allocation; + SeqTwoByteString* result_string = + SeqTwoByteString::cast(result_allocation->ToObjectUnchecked()); + JoinSparseArrayWithSeparator(elements, + elements_length, + array_length, + separator, + Vector(result_string->GetChars(), + string_length)); + return result_string; + } +} + RUNTIME_FUNCTION(MaybeObject*, Runtime_NumberOr) { NoHandleAllocation ha; diff --git a/src/runtime.h b/src/runtime.h index bf1ba68b0..f9a9cad30 100644 --- a/src/runtime.h +++ b/src/runtime.h @@ -132,6 +132,7 @@ namespace internal { F(StringAdd, 2, 1) \ F(StringBuilderConcat, 3, 1) \ F(StringBuilderJoin, 3, 1) \ + F(SparseJoinWithSeparator, 3, 1) \ \ /* Bit operations */ \ F(NumberOr, 2, 1) \ diff --git a/test/mjsunit/array-join.js b/test/mjsunit/array-join.js index ddd14967f..5c837a5ca 100644 --- a/test/mjsunit/array-join.js +++ b/test/mjsunit/array-join.js @@ -44,7 +44,8 @@ assertEquals('1,2********3********4********5,6********', a.join('********')); assertEquals('1,2**********3**********4**********5,6**********', a.join('**********')); // Replace array.prototype.toString. -Array.prototype.toString = function() { return "array"; } +var oldToString = Array.prototype.toString; +Array.prototype.toString = function() { return "array"; }; assertEquals('array34arrayarray', a.join('')); assertEquals('array*3*4*array*array', a.join('*')); assertEquals('array**3**4**array**array', a.join('**')); @@ -52,7 +53,7 @@ assertEquals('array****3****4****array****array', a.join('****')); assertEquals('array********3********4********array********array', a.join('********')); assertEquals('array**********3**********4**********array**********array', a.join('**********')); -Array.prototype.toString = function() { throw 42; } +Array.prototype.toString = function() { throw 42; }; assertThrows("a.join('')"); assertThrows("a.join('*')"); assertThrows("a.join('**')"); @@ -60,7 +61,7 @@ assertThrows("a.join('****')"); assertThrows("a.join('********')"); assertThrows("a.join('**********')"); -Array.prototype.toString = function() { return "array"; } +Array.prototype.toString = function() { return "array"; }; assertEquals('array34arrayarray', a.join('')); assertEquals('array*3*4*array*array', a.join('*')); assertEquals('array**3**4**array**array', a.join('**')); @@ -68,3 +69,25 @@ assertEquals('array****3****4****array****array', a.join('****')); assertEquals('array********3********4********array********array', a.join('********')); assertEquals('array**********3**********4**********array**********array', a.join('**********')); +// Restore original toString. +delete Array.prototype.toString; +if (Array.prototype.toString != oldToString) { + Array.prototype.toString = oldToString; +} + +var a = new Array(123123123); +assertEquals(123123122, String(a).length); +assertEquals(123123122, a.join(",").length); +assertEquals(246246244, a.join("oo").length); + +a = new Array(Math.pow(2,32) - 1); // Max length. +assertEquals("", a.join("")); +a[123123123] = "o"; +a[1255215215] = "p"; +assertEquals("op", a.join("")); + +a = new Array(100001); +for (var i = 0; i < a.length; i++) a[i] = undefined; +a[5] = "ab"; +a[90000] = "cd"; +assertEquals("abcd", a.join("")); // Must not throw. \ No newline at end of file -- 2.34.1