From 25f84a48ce976bb1123de15ce1555a0b8065edbd Mon Sep 17 00:00:00 2001 From: "erik.corry@gmail.com" Date: Wed, 21 Mar 2012 13:48:29 +0000 Subject: [PATCH] Speed up WriteUtf8 in the case where the output buffer is large enough. Review URL: https://chromiumcodereview.appspot.com/9696032 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@11104 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/api.cc | 127 +++++++++++++++++++++++++++++++++++++++++++++++- test/cctest/test-api.cc | 6 +++ 2 files changed, 132 insertions(+), 1 deletion(-) diff --git a/src/api.cc b/src/api.cc index c852b97..239b41e 100644 --- a/src/api.cc +++ b/src/api.cc @@ -3694,6 +3694,94 @@ int String::Utf8Length() const { } +// Will fail with a negative answer if the recursion depth is too high. +static int RecursivelySerializeToUtf8(i::String* string, + char* buffer, + int start, + int end, + int recursion_budget, + int32_t previous_character, + int32_t* last_character) { + int utf8_bytes = 0; + while (true) { + if (string->IsAsciiRepresentation()) { + i::String::WriteToFlat(string, buffer, start, end); + *last_character = unibrow::Utf16::kNoPreviousCharacter; + return utf8_bytes + end - start; + } + switch (i::StringShape(string).representation_tag()) { + case i::kExternalStringTag: { + const uint16_t* data = i::ExternalTwoByteString::cast(string)-> + ExternalTwoByteStringGetData(0); + char* current = buffer; + for (int i = start; i < end; i++) { + uint16_t character = data[i]; + current += + unibrow::Utf8::Encode(current, character, previous_character); + previous_character = character; + } + *last_character = previous_character; + return utf8_bytes + current - buffer; + } + case i::kSeqStringTag: { + const uint16_t* data = + i::SeqTwoByteString::cast(string)->SeqTwoByteStringGetData(0); + char* current = buffer; + for (int i = start; i < end; i++) { + uint16_t character = data[i]; + current += + unibrow::Utf8::Encode(current, character, previous_character); + previous_character = character; + } + *last_character = previous_character; + return utf8_bytes + current - buffer; + } + case i::kSlicedStringTag: { + i::SlicedString* slice = i::SlicedString::cast(string); + unsigned offset = slice->offset(); + string = slice->parent(); + start += offset; + end += offset; + continue; + } + case i::kConsStringTag: { + i::ConsString* cons_string = i::ConsString::cast(string); + i::String* first = cons_string->first(); + int boundary = first->length(); + if (start >= boundary) { + // Only need RHS. + string = cons_string->second(); + start -= boundary; + end -= boundary; + continue; + } else if (end <= boundary) { + // Only need LHS. + string = first; + } else { + if (recursion_budget == 0) return -1; + int extra_utf8_bytes = + RecursivelySerializeToUtf8(first, + buffer, + start, + boundary, + recursion_budget - 1, + previous_character, + &previous_character); + if (extra_utf8_bytes < 0) return extra_utf8_bytes; + buffer += extra_utf8_bytes; + utf8_bytes += extra_utf8_bytes; + string = cons_string->second(); + start = 0; + end -= boundary; + } + } + } + } + UNREACHABLE(); + return 0; +} + + bool String::MayContainNonAscii() const { i::Handle str = Utils::OpenHandle(this); if (IsDeadCheck(str->GetIsolate(), "v8::String::MayContainNonAscii()")) { @@ -3712,11 +3800,12 @@ int String::WriteUtf8(char* buffer, LOG_API(isolate, "String::WriteUtf8"); ENTER_V8(isolate); i::Handle str = Utils::OpenHandle(this); + int string_length = str->length(); if (str->IsAsciiRepresentation()) { int len; if (capacity == -1) { capacity = str->length() + 1; - len = str->length(); + len = string_length; } else { len = i::Min(capacity, str->length()); } @@ -3729,6 +3818,42 @@ int String::WriteUtf8(char* buffer, return len; } + if (capacity == -1 || capacity >= string_length * 3) { + int32_t previous = unibrow::Utf16::kNoPreviousCharacter; + const int kMaxRecursion = 100; + int utf8_bytes = + RecursivelySerializeToUtf8(*str, + buffer, + 0, + string_length, + kMaxRecursion, + previous, + &previous); + if (utf8_bytes >= 0) { + // Success serializing with recursion. + if ((options & NO_NULL_TERMINATION) == 0 && + (capacity > utf8_bytes || capacity == -1)) { + buffer[utf8_bytes++] = '\0'; + } + if (nchars_ref != NULL) *nchars_ref = string_length; + return utf8_bytes; + } + FlattenString(str); + // Recurse once. This time around the string is flat and the serializing + // with recursion will certainly succeed. + return WriteUtf8(buffer, capacity, nchars_ref, options); + } else if (capacity >= string_length) { + // First check that the buffer is large enough. If it is, then recurse + // once without a capacity limit, which will get into the other branch of + // this 'if'. + int utf8_bytes = i::Utf8Length(str); + if ((options & NO_NULL_TERMINATION) == 0) utf8_bytes++; + if (utf8_bytes <= capacity) { + return WriteUtf8(buffer, -1, nchars_ref, options); + } + } + + // Slow case. i::StringInputBuffer& write_input_buffer = *isolate->write_input_buffer(); isolate->string_tracker()->RecordWrite(str); if (options & HINT_MANY_WRITES_EXPECTED) { diff --git a/test/cctest/test-api.cc b/test/cctest/test-api.cc index 8df0b7b..b1a23c1 100644 --- a/test/cctest/test-api.cc +++ b/test/cctest/test-api.cc @@ -5870,6 +5870,7 @@ THREADED_TEST(Utf16) { "p.push(String.fromCharCode(0xdc00));" "var a = [];" "var b = [];" + "var c = [];" "var alens = [];" "for (var i = 0; i < 3; i++) {" " p[1] = String.fromCharCode(lead++);" @@ -5877,17 +5878,21 @@ THREADED_TEST(Utf16) { " p[2] = String.fromCharCode(trail++);" " a.push(p[i] + p[j]);" " b.push(p[i] + p[j]);" + " c.push(p[i] + p[j]);" " alens.push(plens[i] + plens[j]);" " }" "}" "alens[5] -= 2;" // Here the surrogate pairs match up. "var a2 = [];" "var b2 = [];" + "var c2 = [];" "var a2lens = [];" "for (var m = 0; m < 9; m++) {" " for (var n = 0; n < 9; n++) {" " a2.push(a[m] + a[n]);" " b2.push(b[m] + b[n]);" + " var newc = 'x' + c[m] + c[n] + 'y';" + " c2.push(newc.substring(1, newc.length - 1));" " var utf = alens[m] + alens[n];" // And here. // The 'n's that start with 0xdc.. are 6-8 // The 'm's that end with 0xd8.. are 1, 4 and 7 @@ -5899,6 +5904,7 @@ THREADED_TEST(Utf16) { Utf16Helper(context, "a2", "a2lens", 81); WriteUtf8Helper(context, "b", "alens", 9); WriteUtf8Helper(context, "b2", "a2lens", 81); + WriteUtf8Helper(context, "c2", "a2lens", 81); } -- 2.7.4