}
+// Will fail with a negative answer if the recursion depth is too high.
+static int RecursivelySerializeToUtf8(i::String* string,
+ char* buffer,
+ int start,
+ int end,
+ int recursion_budget,
+ int32_t previous_character,
+ int32_t* last_character) {
+ int utf8_bytes = 0;
+ while (true) {
+ if (string->IsAsciiRepresentation()) {
+ i::String::WriteToFlat(string, buffer, start, end);
+ *last_character = unibrow::Utf16::kNoPreviousCharacter;
+ return utf8_bytes + end - start;
+ }
+ switch (i::StringShape(string).representation_tag()) {
+ case i::kExternalStringTag: {
+ const uint16_t* data = i::ExternalTwoByteString::cast(string)->
+ ExternalTwoByteStringGetData(0);
+ char* current = buffer;
+ for (int i = start; i < end; i++) {
+ uint16_t character = data[i];
+ current +=
+ unibrow::Utf8::Encode(current, character, previous_character);
+ previous_character = character;
+ }
+ *last_character = previous_character;
+ return utf8_bytes + current - buffer;
+ }
+ case i::kSeqStringTag: {
+ const uint16_t* data =
+ i::SeqTwoByteString::cast(string)->SeqTwoByteStringGetData(0);
+ char* current = buffer;
+ for (int i = start; i < end; i++) {
+ uint16_t character = data[i];
+ current +=
+ unibrow::Utf8::Encode(current, character, previous_character);
+ previous_character = character;
+ }
+ *last_character = previous_character;
+ return utf8_bytes + current - buffer;
+ }
+ case i::kSlicedStringTag: {
+ i::SlicedString* slice = i::SlicedString::cast(string);
+ unsigned offset = slice->offset();
+ string = slice->parent();
+ start += offset;
+ end += offset;
+ continue;
+ }
+ case i::kConsStringTag: {
+ i::ConsString* cons_string = i::ConsString::cast(string);
+ i::String* first = cons_string->first();
+ int boundary = first->length();
+ if (start >= boundary) {
+ // Only need RHS.
+ string = cons_string->second();
+ start -= boundary;
+ end -= boundary;
+ continue;
+ } else if (end <= boundary) {
+ // Only need LHS.
+ string = first;
+ } else {
+ if (recursion_budget == 0) return -1;
+ int extra_utf8_bytes =
+ RecursivelySerializeToUtf8(first,
+ buffer,
+ start,
+ boundary,
+ recursion_budget - 1,
+ previous_character,
+ &previous_character);
+ if (extra_utf8_bytes < 0) return extra_utf8_bytes;
+ buffer += extra_utf8_bytes;
+ utf8_bytes += extra_utf8_bytes;
+ string = cons_string->second();
+ start = 0;
+ end -= boundary;
+ }
+ }
+ }
+ }
+ UNREACHABLE();
+ return 0;
+}
+
+
bool String::MayContainNonAscii() const {
i::Handle<i::String> str = Utils::OpenHandle(this);
if (IsDeadCheck(str->GetIsolate(), "v8::String::MayContainNonAscii()")) {
LOG_API(isolate, "String::WriteUtf8");
ENTER_V8(isolate);
i::Handle<i::String> str = Utils::OpenHandle(this);
+ int string_length = str->length();
if (str->IsAsciiRepresentation()) {
int len;
if (capacity == -1) {
capacity = str->length() + 1;
- len = str->length();
+ len = string_length;
} else {
len = i::Min(capacity, str->length());
}
return len;
}
+ if (capacity == -1 || capacity >= string_length * 3) {
+ int32_t previous = unibrow::Utf16::kNoPreviousCharacter;
+ const int kMaxRecursion = 100;
+ int utf8_bytes =
+ RecursivelySerializeToUtf8(*str,
+ buffer,
+ 0,
+ string_length,
+ kMaxRecursion,
+ previous,
+ &previous);
+ if (utf8_bytes >= 0) {
+ // Success serializing with recursion.
+ if ((options & NO_NULL_TERMINATION) == 0 &&
+ (capacity > utf8_bytes || capacity == -1)) {
+ buffer[utf8_bytes++] = '\0';
+ }
+ if (nchars_ref != NULL) *nchars_ref = string_length;
+ return utf8_bytes;
+ }
+ FlattenString(str);
+ // Recurse once. This time around the string is flat and the serializing
+ // with recursion will certainly succeed.
+ return WriteUtf8(buffer, capacity, nchars_ref, options);
+ } else if (capacity >= string_length) {
+ // First check that the buffer is large enough. If it is, then recurse
+ // once without a capacity limit, which will get into the other branch of
+ // this 'if'.
+ int utf8_bytes = i::Utf8Length(str);
+ if ((options & NO_NULL_TERMINATION) == 0) utf8_bytes++;
+ if (utf8_bytes <= capacity) {
+ return WriteUtf8(buffer, -1, nchars_ref, options);
+ }
+ }
+
+ // Slow case.
i::StringInputBuffer& write_input_buffer = *isolate->write_input_buffer();
isolate->string_tracker()->RecordWrite(str);
if (options & HINT_MANY_WRITES_EXPECTED) {
"p.push(String.fromCharCode(0xdc00));"
"var a = [];"
"var b = [];"
+ "var c = [];"
"var alens = [];"
"for (var i = 0; i < 3; i++) {"
" p[1] = String.fromCharCode(lead++);"
" p[2] = String.fromCharCode(trail++);"
" a.push(p[i] + p[j]);"
" b.push(p[i] + p[j]);"
+ " c.push(p[i] + p[j]);"
" alens.push(plens[i] + plens[j]);"
" }"
"}"
"alens[5] -= 2;" // Here the surrogate pairs match up.
"var a2 = [];"
"var b2 = [];"
+ "var c2 = [];"
"var a2lens = [];"
"for (var m = 0; m < 9; m++) {"
" for (var n = 0; n < 9; n++) {"
" a2.push(a[m] + a[n]);"
" b2.push(b[m] + b[n]);"
+ " var newc = 'x' + c[m] + c[n] + 'y';"
+ " c2.push(newc.substring(1, newc.length - 1));"
" var utf = alens[m] + alens[n];" // And here.
// The 'n's that start with 0xdc.. are 6-8
// The 'm's that end with 0xd8.. are 1, 4 and 7
Utf16Helper(context, "a2", "a2lens", 81);
WriteUtf8Helper(context, "b", "alens", 9);
WriteUtf8Helper(context, "b2", "a2lens", 81);
+ WriteUtf8Helper(context, "c2", "a2lens", 81);
}