1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
7 #include "src/arguments.h"
8 #include "src/jsregexp-inl.h"
9 #include "src/jsregexp.h"
10 #include "src/runtime/runtime-utils.h"
11 #include "src/string-builder.h"
12 #include "src/string-search.h"
18 // Perform string match of pattern on subject, starting at start index.
19 // Caller must ensure that 0 <= start_index <= sub->length(),
20 // and should check that pat->length() + start_index <= sub->length().
21 int StringMatch(Isolate* isolate, Handle<String> sub, Handle<String> pat,
23 DCHECK(0 <= start_index);
24 DCHECK(start_index <= sub->length());
26 int pattern_length = pat->length();
27 if (pattern_length == 0) return start_index;
29 int subject_length = sub->length();
30 if (start_index + pattern_length > subject_length) return -1;
32 sub = String::Flatten(sub);
33 pat = String::Flatten(pat);
35 DisallowHeapAllocation no_gc; // ensure vectors stay valid
36 // Extract flattened substrings of cons strings before getting encoding.
37 String::FlatContent seq_sub = sub->GetFlatContent();
38 String::FlatContent seq_pat = pat->GetFlatContent();
40 // dispatch on type of strings
41 if (seq_pat.IsOneByte()) {
42 Vector<const uint8_t> pat_vector = seq_pat.ToOneByteVector();
43 if (seq_sub.IsOneByte()) {
44 return SearchString(isolate, seq_sub.ToOneByteVector(), pat_vector,
47 return SearchString(isolate, seq_sub.ToUC16Vector(), pat_vector,
50 Vector<const uc16> pat_vector = seq_pat.ToUC16Vector();
51 if (seq_sub.IsOneByte()) {
52 return SearchString(isolate, seq_sub.ToOneByteVector(), pat_vector,
55 return SearchString(isolate, seq_sub.ToUC16Vector(), pat_vector, start_index);
59 // This may return an empty MaybeHandle if an exception is thrown or
60 // we abort due to reaching the recursion limit.
61 MaybeHandle<String> StringReplaceOneCharWithString(
62 Isolate* isolate, Handle<String> subject, Handle<String> search,
63 Handle<String> replace, bool* found, int recursion_limit) {
64 StackLimitCheck stackLimitCheck(isolate);
65 if (stackLimitCheck.HasOverflowed() || (recursion_limit == 0)) {
66 return MaybeHandle<String>();
69 if (subject->IsConsString()) {
70 ConsString* cons = ConsString::cast(*subject);
71 Handle<String> first = Handle<String>(cons->first());
72 Handle<String> second = Handle<String>(cons->second());
73 Handle<String> new_first;
74 if (!StringReplaceOneCharWithString(isolate, first, search, replace, found,
75 recursion_limit).ToHandle(&new_first)) {
76 return MaybeHandle<String>();
78 if (*found) return isolate->factory()->NewConsString(new_first, second);
80 Handle<String> new_second;
81 if (!StringReplaceOneCharWithString(isolate, second, search, replace, found,
83 .ToHandle(&new_second)) {
84 return MaybeHandle<String>();
86 if (*found) return isolate->factory()->NewConsString(first, new_second);
90 int index = StringMatch(isolate, subject, search, 0);
91 if (index == -1) return subject;
93 Handle<String> first = isolate->factory()->NewSubString(subject, 0, index);
95 ASSIGN_RETURN_ON_EXCEPTION(
96 isolate, cons1, isolate->factory()->NewConsString(first, replace),
98 Handle<String> second =
99 isolate->factory()->NewSubString(subject, index + 1, subject->length());
100 return isolate->factory()->NewConsString(cons1, second);
105 RUNTIME_FUNCTION(Runtime_StringReplaceOneCharWithString) {
106 HandleScope scope(isolate);
107 DCHECK(args.length() == 3);
108 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
109 CONVERT_ARG_HANDLE_CHECKED(String, search, 1);
110 CONVERT_ARG_HANDLE_CHECKED(String, replace, 2);
112 // If the cons string tree is too deep, we simply abort the recursion and
113 // retry with a flattened subject string.
114 const int kRecursionLimit = 0x1000;
116 Handle<String> result;
117 if (StringReplaceOneCharWithString(isolate, subject, search, replace, &found,
118 kRecursionLimit).ToHandle(&result)) {
121 if (isolate->has_pending_exception()) return isolate->heap()->exception();
123 subject = String::Flatten(subject);
124 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
126 StringReplaceOneCharWithString(isolate, subject, search, replace, &found,
132 RUNTIME_FUNCTION(Runtime_StringIndexOf) {
133 HandleScope scope(isolate);
134 DCHECK(args.length() == 3);
136 CONVERT_ARG_HANDLE_CHECKED(String, sub, 0);
137 CONVERT_ARG_HANDLE_CHECKED(String, pat, 1);
138 CONVERT_ARG_HANDLE_CHECKED(Object, index, 2);
140 uint32_t start_index;
141 if (!index->ToArrayIndex(&start_index)) return Smi::FromInt(-1);
143 RUNTIME_ASSERT(start_index <= static_cast<uint32_t>(sub->length()));
144 int position = StringMatch(isolate, sub, pat, start_index);
145 return Smi::FromInt(position);
149 template <typename schar, typename pchar>
150 static int StringMatchBackwards(Vector<const schar> subject,
151 Vector<const pchar> pattern, int idx) {
152 int pattern_length = pattern.length();
153 DCHECK(pattern_length >= 1);
154 DCHECK(idx + pattern_length <= subject.length());
156 if (sizeof(schar) == 1 && sizeof(pchar) > 1) {
157 for (int i = 0; i < pattern_length; i++) {
159 if (c > String::kMaxOneByteCharCode) {
165 pchar pattern_first_char = pattern[0];
166 for (int i = idx; i >= 0; i--) {
167 if (subject[i] != pattern_first_char) continue;
169 while (j < pattern_length) {
170 if (pattern[j] != subject[i + j]) {
175 if (j == pattern_length) {
183 RUNTIME_FUNCTION(Runtime_StringLastIndexOf) {
184 HandleScope scope(isolate);
185 DCHECK(args.length() == 3);
187 CONVERT_ARG_HANDLE_CHECKED(String, sub, 0);
188 CONVERT_ARG_HANDLE_CHECKED(String, pat, 1);
189 CONVERT_ARG_HANDLE_CHECKED(Object, index, 2);
191 uint32_t start_index;
192 if (!index->ToArrayIndex(&start_index)) return Smi::FromInt(-1);
194 uint32_t pat_length = pat->length();
195 uint32_t sub_length = sub->length();
197 if (start_index + pat_length > sub_length) {
198 start_index = sub_length - pat_length;
201 if (pat_length == 0) {
202 return Smi::FromInt(start_index);
205 sub = String::Flatten(sub);
206 pat = String::Flatten(pat);
209 DisallowHeapAllocation no_gc; // ensure vectors stay valid
211 String::FlatContent sub_content = sub->GetFlatContent();
212 String::FlatContent pat_content = pat->GetFlatContent();
214 if (pat_content.IsOneByte()) {
215 Vector<const uint8_t> pat_vector = pat_content.ToOneByteVector();
216 if (sub_content.IsOneByte()) {
217 position = StringMatchBackwards(sub_content.ToOneByteVector(), pat_vector,
220 position = StringMatchBackwards(sub_content.ToUC16Vector(), pat_vector,
224 Vector<const uc16> pat_vector = pat_content.ToUC16Vector();
225 if (sub_content.IsOneByte()) {
226 position = StringMatchBackwards(sub_content.ToOneByteVector(), pat_vector,
229 position = StringMatchBackwards(sub_content.ToUC16Vector(), pat_vector,
234 return Smi::FromInt(position);
238 RUNTIME_FUNCTION(Runtime_StringLocaleCompare) {
239 HandleScope handle_scope(isolate);
240 DCHECK(args.length() == 2);
242 CONVERT_ARG_HANDLE_CHECKED(String, str1, 0);
243 CONVERT_ARG_HANDLE_CHECKED(String, str2, 1);
245 if (str1.is_identical_to(str2)) return Smi::FromInt(0); // Equal.
246 int str1_length = str1->length();
247 int str2_length = str2->length();
249 // Decide trivial cases without flattening.
250 if (str1_length == 0) {
251 if (str2_length == 0) return Smi::FromInt(0); // Equal.
252 return Smi::FromInt(-str2_length);
254 if (str2_length == 0) return Smi::FromInt(str1_length);
257 int end = str1_length < str2_length ? str1_length : str2_length;
259 // No need to flatten if we are going to find the answer on the first
260 // character. At this point we know there is at least one character
261 // in each string, due to the trivial case handling above.
262 int d = str1->Get(0) - str2->Get(0);
263 if (d != 0) return Smi::FromInt(d);
265 str1 = String::Flatten(str1);
266 str2 = String::Flatten(str2);
268 DisallowHeapAllocation no_gc;
269 String::FlatContent flat1 = str1->GetFlatContent();
270 String::FlatContent flat2 = str2->GetFlatContent();
272 for (int i = 0; i < end; i++) {
273 if (flat1.Get(i) != flat2.Get(i)) {
274 return Smi::FromInt(flat1.Get(i) - flat2.Get(i));
278 return Smi::FromInt(str1_length - str2_length);
282 RUNTIME_FUNCTION(Runtime_SubString) {
283 HandleScope scope(isolate);
284 DCHECK(args.length() == 3);
286 CONVERT_ARG_HANDLE_CHECKED(String, string, 0);
288 // We have a fast integer-only case here to avoid a conversion to double in
289 // the common case where from and to are Smis.
290 if (args[1]->IsSmi() && args[2]->IsSmi()) {
291 CONVERT_SMI_ARG_CHECKED(from_number, 1);
292 CONVERT_SMI_ARG_CHECKED(to_number, 2);
296 CONVERT_DOUBLE_ARG_CHECKED(from_number, 1);
297 CONVERT_DOUBLE_ARG_CHECKED(to_number, 2);
298 start = FastD2IChecked(from_number);
299 end = FastD2IChecked(to_number);
301 RUNTIME_ASSERT(end >= start);
302 RUNTIME_ASSERT(start >= 0);
303 RUNTIME_ASSERT(end <= string->length());
304 isolate->counters()->sub_string_runtime()->Increment();
306 return *isolate->factory()->NewSubString(string, start, end);
310 RUNTIME_FUNCTION(Runtime_StringAdd) {
311 HandleScope scope(isolate);
312 DCHECK(args.length() == 2);
313 CONVERT_ARG_HANDLE_CHECKED(String, str1, 0);
314 CONVERT_ARG_HANDLE_CHECKED(String, str2, 1);
315 isolate->counters()->string_add_runtime()->Increment();
316 Handle<String> result;
317 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
318 isolate, result, isolate->factory()->NewConsString(str1, str2));
323 RUNTIME_FUNCTION(Runtime_InternalizeString) {
324 HandleScope handles(isolate);
325 RUNTIME_ASSERT(args.length() == 1);
326 CONVERT_ARG_HANDLE_CHECKED(String, string, 0);
327 return *isolate->factory()->InternalizeString(string);
331 RUNTIME_FUNCTION(Runtime_StringMatch) {
332 HandleScope handles(isolate);
333 DCHECK(args.length() == 3);
335 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
336 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
337 CONVERT_ARG_HANDLE_CHECKED(JSArray, regexp_info, 2);
339 RUNTIME_ASSERT(regexp_info->HasFastObjectElements());
341 RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
342 if (global_cache.HasException()) return isolate->heap()->exception();
344 int capture_count = regexp->CaptureCount();
346 ZoneScope zone_scope(isolate->runtime_zone());
347 ZoneList<int> offsets(8, zone_scope.zone());
350 int32_t* match = global_cache.FetchNext();
351 if (match == NULL) break;
352 offsets.Add(match[0], zone_scope.zone()); // start
353 offsets.Add(match[1], zone_scope.zone()); // end
356 if (global_cache.HasException()) return isolate->heap()->exception();
358 if (offsets.length() == 0) {
359 // Not a single match.
360 return isolate->heap()->null_value();
363 RegExpImpl::SetLastMatchInfo(regexp_info, subject, capture_count,
364 global_cache.LastSuccessfulMatch());
366 int matches = offsets.length() / 2;
367 Handle<FixedArray> elements = isolate->factory()->NewFixedArray(matches);
368 Handle<String> substring =
369 isolate->factory()->NewSubString(subject, offsets.at(0), offsets.at(1));
370 elements->set(0, *substring);
371 for (int i = 1; i < matches; i++) {
372 HandleScope temp_scope(isolate);
373 int from = offsets.at(i * 2);
374 int to = offsets.at(i * 2 + 1);
375 Handle<String> substring =
376 isolate->factory()->NewProperSubString(subject, from, to);
377 elements->set(i, *substring);
379 Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(elements);
380 result->set_length(Smi::FromInt(matches));
385 RUNTIME_FUNCTION(Runtime_StringCharCodeAtRT) {
386 HandleScope handle_scope(isolate);
387 DCHECK(args.length() == 2);
389 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
390 CONVERT_NUMBER_CHECKED(uint32_t, i, Uint32, args[1]);
392 // Flatten the string. If someone wants to get a char at an index
393 // in a cons string, it is likely that more indices will be
395 subject = String::Flatten(subject);
397 if (i >= static_cast<uint32_t>(subject->length())) {
398 return isolate->heap()->nan_value();
401 return Smi::FromInt(subject->Get(i));
405 RUNTIME_FUNCTION(Runtime_CharFromCode) {
406 HandleScope handlescope(isolate);
407 DCHECK(args.length() == 1);
408 if (args[0]->IsNumber()) {
409 CONVERT_NUMBER_CHECKED(uint32_t, code, Uint32, args[0]);
411 return *isolate->factory()->LookupSingleCharacterStringFromCode(code);
413 return isolate->heap()->empty_string();
417 RUNTIME_FUNCTION(Runtime_StringCompare) {
418 HandleScope handle_scope(isolate);
419 DCHECK(args.length() == 2);
421 CONVERT_ARG_HANDLE_CHECKED(String, x, 0);
422 CONVERT_ARG_HANDLE_CHECKED(String, y, 1);
424 isolate->counters()->string_compare_runtime()->Increment();
426 // A few fast case tests before we flatten.
427 if (x.is_identical_to(y)) return Smi::FromInt(EQUAL);
428 if (y->length() == 0) {
429 if (x->length() == 0) return Smi::FromInt(EQUAL);
430 return Smi::FromInt(GREATER);
431 } else if (x->length() == 0) {
432 return Smi::FromInt(LESS);
435 int d = x->Get(0) - y->Get(0);
437 return Smi::FromInt(LESS);
439 return Smi::FromInt(GREATER);
442 x = String::Flatten(x);
443 y = String::Flatten(y);
445 DisallowHeapAllocation no_gc;
446 Object* equal_prefix_result = Smi::FromInt(EQUAL);
447 int prefix_length = x->length();
448 if (y->length() < prefix_length) {
449 prefix_length = y->length();
450 equal_prefix_result = Smi::FromInt(GREATER);
451 } else if (y->length() > prefix_length) {
452 equal_prefix_result = Smi::FromInt(LESS);
455 String::FlatContent x_content = x->GetFlatContent();
456 String::FlatContent y_content = y->GetFlatContent();
457 if (x_content.IsOneByte()) {
458 Vector<const uint8_t> x_chars = x_content.ToOneByteVector();
459 if (y_content.IsOneByte()) {
460 Vector<const uint8_t> y_chars = y_content.ToOneByteVector();
461 r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
463 Vector<const uc16> y_chars = y_content.ToUC16Vector();
464 r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
467 Vector<const uc16> x_chars = x_content.ToUC16Vector();
468 if (y_content.IsOneByte()) {
469 Vector<const uint8_t> y_chars = y_content.ToOneByteVector();
470 r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
472 Vector<const uc16> y_chars = y_content.ToUC16Vector();
473 r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
478 result = equal_prefix_result;
480 result = (r < 0) ? Smi::FromInt(LESS) : Smi::FromInt(GREATER);
486 RUNTIME_FUNCTION(Runtime_StringBuilderConcat) {
487 HandleScope scope(isolate);
488 DCHECK(args.length() == 3);
489 CONVERT_ARG_HANDLE_CHECKED(JSArray, array, 0);
490 int32_t array_length;
491 if (!args[1]->ToInt32(&array_length)) {
492 THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewInvalidStringLengthError());
494 CONVERT_ARG_HANDLE_CHECKED(String, special, 2);
496 size_t actual_array_length = 0;
498 TryNumberToSize(isolate, array->length(), &actual_array_length));
499 RUNTIME_ASSERT(array_length >= 0);
500 RUNTIME_ASSERT(static_cast<size_t>(array_length) <= actual_array_length);
502 // This assumption is used by the slice encoding in one or two smis.
503 DCHECK(Smi::kMaxValue >= String::kMaxLength);
505 RUNTIME_ASSERT(array->HasFastElements());
506 JSObject::EnsureCanContainHeapObjectElements(array);
508 int special_length = special->length();
509 if (!array->HasFastObjectElements()) {
510 return isolate->Throw(isolate->heap()->illegal_argument_string());
514 bool one_byte = special->HasOnlyOneByteChars();
517 DisallowHeapAllocation no_gc;
518 FixedArray* fixed_array = FixedArray::cast(array->elements());
519 if (fixed_array->length() < array_length) {
520 array_length = fixed_array->length();
523 if (array_length == 0) {
524 return isolate->heap()->empty_string();
525 } else if (array_length == 1) {
526 Object* first = fixed_array->get(0);
527 if (first->IsString()) return first;
529 length = StringBuilderConcatLength(special_length, fixed_array,
530 array_length, &one_byte);
534 return isolate->Throw(isolate->heap()->illegal_argument_string());
538 Handle<SeqOneByteString> answer;
539 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
540 isolate, answer, isolate->factory()->NewRawOneByteString(length));
541 StringBuilderConcatHelper(*special, answer->GetChars(),
542 FixedArray::cast(array->elements()),
546 Handle<SeqTwoByteString> answer;
547 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
548 isolate, answer, isolate->factory()->NewRawTwoByteString(length));
549 StringBuilderConcatHelper(*special, answer->GetChars(),
550 FixedArray::cast(array->elements()),
557 RUNTIME_FUNCTION(Runtime_StringBuilderJoin) {
558 HandleScope scope(isolate);
559 DCHECK(args.length() == 3);
560 CONVERT_ARG_HANDLE_CHECKED(JSArray, array, 0);
561 int32_t array_length;
562 if (!args[1]->ToInt32(&array_length)) {
563 THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewInvalidStringLengthError());
565 CONVERT_ARG_HANDLE_CHECKED(String, separator, 2);
566 RUNTIME_ASSERT(array->HasFastObjectElements());
567 RUNTIME_ASSERT(array_length >= 0);
569 Handle<FixedArray> fixed_array(FixedArray::cast(array->elements()));
570 if (fixed_array->length() < array_length) {
571 array_length = fixed_array->length();
574 if (array_length == 0) {
575 return isolate->heap()->empty_string();
576 } else if (array_length == 1) {
577 Object* first = fixed_array->get(0);
578 RUNTIME_ASSERT(first->IsString());
582 int separator_length = separator->length();
583 RUNTIME_ASSERT(separator_length > 0);
584 int max_nof_separators =
585 (String::kMaxLength + separator_length - 1) / separator_length;
586 if (max_nof_separators < (array_length - 1)) {
587 THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewInvalidStringLengthError());
589 int length = (array_length - 1) * separator_length;
590 for (int i = 0; i < array_length; i++) {
591 Object* element_obj = fixed_array->get(i);
592 RUNTIME_ASSERT(element_obj->IsString());
593 String* element = String::cast(element_obj);
594 int increment = element->length();
595 if (increment > String::kMaxLength - length) {
596 STATIC_ASSERT(String::kMaxLength < kMaxInt);
597 length = kMaxInt; // Provoke exception;
603 Handle<SeqTwoByteString> answer;
604 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
605 isolate, answer, isolate->factory()->NewRawTwoByteString(length));
607 DisallowHeapAllocation no_gc;
609 uc16* sink = answer->GetChars();
611 uc16* end = sink + length;
614 RUNTIME_ASSERT(fixed_array->get(0)->IsString());
615 String* first = String::cast(fixed_array->get(0));
616 String* separator_raw = *separator;
617 int first_length = first->length();
618 String::WriteToFlat(first, sink, 0, first_length);
619 sink += first_length;
621 for (int i = 1; i < array_length; i++) {
622 DCHECK(sink + separator_length <= end);
623 String::WriteToFlat(separator_raw, sink, 0, separator_length);
624 sink += separator_length;
626 RUNTIME_ASSERT(fixed_array->get(i)->IsString());
627 String* element = String::cast(fixed_array->get(i));
628 int element_length = element->length();
629 DCHECK(sink + element_length <= end);
630 String::WriteToFlat(element, sink, 0, element_length);
631 sink += element_length;
635 // Use %_FastOneByteArrayJoin instead.
636 DCHECK(!answer->IsOneByteRepresentation());
640 template <typename Char>
641 static void JoinSparseArrayWithSeparator(FixedArray* elements,
643 uint32_t array_length,
645 Vector<Char> buffer) {
646 DisallowHeapAllocation no_gc;
647 int previous_separator_position = 0;
648 int separator_length = separator->length();
650 for (int i = 0; i < elements_length; i += 2) {
651 int position = NumberToInt32(elements->get(i));
652 String* string = String::cast(elements->get(i + 1));
653 int string_length = string->length();
654 if (string->length() > 0) {
655 while (previous_separator_position < position) {
656 String::WriteToFlat<Char>(separator, &buffer[cursor], 0,
658 cursor += separator_length;
659 previous_separator_position++;
661 String::WriteToFlat<Char>(string, &buffer[cursor], 0, string_length);
662 cursor += string->length();
665 if (separator_length > 0) {
666 // Array length must be representable as a signed 32-bit number,
667 // otherwise the total string length would have been too large.
668 DCHECK(array_length <= 0x7fffffff); // Is int32_t.
669 int last_array_index = static_cast<int>(array_length - 1);
670 while (previous_separator_position < last_array_index) {
671 String::WriteToFlat<Char>(separator, &buffer[cursor], 0,
673 cursor += separator_length;
674 previous_separator_position++;
677 DCHECK(cursor <= buffer.length());
681 RUNTIME_FUNCTION(Runtime_SparseJoinWithSeparator) {
682 HandleScope scope(isolate);
683 DCHECK(args.length() == 3);
684 CONVERT_ARG_HANDLE_CHECKED(JSArray, elements_array, 0);
685 CONVERT_NUMBER_CHECKED(uint32_t, array_length, Uint32, args[1]);
686 CONVERT_ARG_HANDLE_CHECKED(String, separator, 2);
687 // elements_array is fast-mode JSarray of alternating positions
688 // (increasing order) and strings.
689 RUNTIME_ASSERT(elements_array->HasFastSmiOrObjectElements());
690 // array_length is length of original array (used to add separators);
691 // separator is string to put between elements. Assumed to be non-empty.
692 RUNTIME_ASSERT(array_length > 0);
694 // Find total length of join result.
695 int string_length = 0;
696 bool is_one_byte = separator->IsOneByteRepresentation();
697 bool overflow = false;
698 CONVERT_NUMBER_CHECKED(int, elements_length, Int32, elements_array->length());
699 RUNTIME_ASSERT(elements_length <= elements_array->elements()->length());
700 RUNTIME_ASSERT((elements_length & 1) == 0); // Even length.
701 FixedArray* elements = FixedArray::cast(elements_array->elements());
702 for (int i = 0; i < elements_length; i += 2) {
703 RUNTIME_ASSERT(elements->get(i)->IsNumber());
704 CONVERT_NUMBER_CHECKED(uint32_t, position, Uint32, elements->get(i));
705 RUNTIME_ASSERT(position < array_length);
706 RUNTIME_ASSERT(elements->get(i + 1)->IsString());
710 DisallowHeapAllocation no_gc;
711 for (int i = 0; i < elements_length; i += 2) {
712 String* string = String::cast(elements->get(i + 1));
713 int length = string->length();
714 if (is_one_byte && !string->IsOneByteRepresentation()) {
717 if (length > String::kMaxLength ||
718 String::kMaxLength - length < string_length) {
722 string_length += length;
726 int separator_length = separator->length();
727 if (!overflow && separator_length > 0) {
728 if (array_length <= 0x7fffffffu) {
729 int separator_count = static_cast<int>(array_length) - 1;
730 int remaining_length = String::kMaxLength - string_length;
731 if ((remaining_length / separator_length) >= separator_count) {
732 string_length += separator_length * (array_length - 1);
734 // Not room for the separators within the maximal string length.
738 // Nonempty separator and at least 2^31-1 separators necessary
739 // means that the string is too large to create.
740 STATIC_ASSERT(String::kMaxLength < 0x7fffffff);
745 // Throw an exception if the resulting string is too large. See
746 // https://code.google.com/p/chromium/issues/detail?id=336820
748 THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewInvalidStringLengthError());
752 Handle<SeqOneByteString> result = isolate->factory()
753 ->NewRawOneByteString(string_length)
755 JoinSparseArrayWithSeparator<uint8_t>(
756 FixedArray::cast(elements_array->elements()), elements_length,
757 array_length, *separator,
758 Vector<uint8_t>(result->GetChars(), string_length));
761 Handle<SeqTwoByteString> result = isolate->factory()
762 ->NewRawTwoByteString(string_length)
764 JoinSparseArrayWithSeparator<uc16>(
765 FixedArray::cast(elements_array->elements()), elements_length,
766 array_length, *separator,
767 Vector<uc16>(result->GetChars(), string_length));
773 // Copies Latin1 characters to the given fixed array looking up
774 // one-char strings in the cache. Gives up on the first char that is
775 // not in the cache and fills the remainder with smi zeros. Returns
776 // the length of the successfully copied prefix.
777 static int CopyCachedOneByteCharsToArray(Heap* heap, const uint8_t* chars,
778 FixedArray* elements, int length) {
779 DisallowHeapAllocation no_gc;
780 FixedArray* one_byte_cache = heap->single_character_string_cache();
781 Object* undefined = heap->undefined_value();
783 WriteBarrierMode mode = elements->GetWriteBarrierMode(no_gc);
784 for (i = 0; i < length; ++i) {
785 Object* value = one_byte_cache->get(chars[i]);
786 if (value == undefined) break;
787 elements->set(i, value, mode);
790 DCHECK(Smi::FromInt(0) == 0);
791 memset(elements->data_start() + i, 0, kPointerSize * (length - i));
794 for (int j = 0; j < length; ++j) {
795 Object* element = elements->get(j);
796 DCHECK(element == Smi::FromInt(0) ||
797 (element->IsString() && String::cast(element)->LooksValid()));
804 // Converts a String to JSArray.
805 // For example, "foo" => ["f", "o", "o"].
806 RUNTIME_FUNCTION(Runtime_StringToArray) {
807 HandleScope scope(isolate);
808 DCHECK(args.length() == 2);
809 CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
810 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[1]);
812 s = String::Flatten(s);
813 const int length = static_cast<int>(Min<uint32_t>(s->length(), limit));
815 Handle<FixedArray> elements;
817 if (s->IsFlat() && s->IsOneByteRepresentation()) {
818 // Try using cached chars where possible.
819 elements = isolate->factory()->NewUninitializedFixedArray(length);
821 DisallowHeapAllocation no_gc;
822 String::FlatContent content = s->GetFlatContent();
823 if (content.IsOneByte()) {
824 Vector<const uint8_t> chars = content.ToOneByteVector();
825 // Note, this will initialize all elements (not only the prefix)
826 // to prevent GC from seeing partially initialized array.
827 position = CopyCachedOneByteCharsToArray(isolate->heap(), chars.start(),
830 MemsetPointer(elements->data_start(), isolate->heap()->undefined_value(),
834 elements = isolate->factory()->NewFixedArray(length);
836 for (int i = position; i < length; ++i) {
838 isolate->factory()->LookupSingleCharacterStringFromCode(s->Get(i));
839 elements->set(i, *str);
843 for (int i = 0; i < length; ++i) {
844 DCHECK(String::cast(elements->get(i))->length() == 1);
848 return *isolate->factory()->NewJSArrayWithElements(elements);
852 static inline bool ToUpperOverflows(uc32 character) {
853 // y with umlauts and the micro sign are the only characters that stop
854 // fitting into one-byte when converting to uppercase.
855 static const uc32 yuml_code = 0xff;
856 static const uc32 micro_code = 0xb5;
857 return (character == yuml_code || character == micro_code);
861 template <class Converter>
862 MUST_USE_RESULT static Object* ConvertCaseHelper(
863 Isolate* isolate, String* string, SeqString* result, int result_length,
864 unibrow::Mapping<Converter, 128>* mapping) {
865 DisallowHeapAllocation no_gc;
866 // We try this twice, once with the assumption that the result is no longer
867 // than the input and, if that assumption breaks, again with the exact
868 // length. This may not be pretty, but it is nicer than what was here before
869 // and I hereby claim my vaffel-is.
871 // NOTE: This assumes that the upper/lower case of an ASCII
872 // character is also ASCII. This is currently the case, but it
873 // might break in the future if we implement more context and locale
874 // dependent upper/lower conversions.
875 bool has_changed_character = false;
877 // Convert all characters to upper case, assuming that they will fit
879 StringCharacterStream stream(string);
880 unibrow::uchar chars[Converter::kMaxWidth];
881 // We can assume that the string is not empty
882 uc32 current = stream.GetNext();
883 bool ignore_overflow = Converter::kIsToLower || result->IsSeqTwoByteString();
884 for (int i = 0; i < result_length;) {
885 bool has_next = stream.HasMore();
886 uc32 next = has_next ? stream.GetNext() : 0;
887 int char_length = mapping->get(current, next, chars);
888 if (char_length == 0) {
889 // The case conversion of this character is the character itself.
890 result->Set(i, current);
892 } else if (char_length == 1 &&
893 (ignore_overflow || !ToUpperOverflows(current))) {
894 // Common case: converting the letter resulted in one character.
895 DCHECK(static_cast<uc32>(chars[0]) != current);
896 result->Set(i, chars[0]);
897 has_changed_character = true;
899 } else if (result_length == string->length()) {
900 bool overflows = ToUpperOverflows(current);
901 // We've assumed that the result would be as long as the
902 // input but here is a character that converts to several
903 // characters. No matter, we calculate the exact length
904 // of the result and try the whole thing again.
906 // Note that this leaves room for optimization. We could just
907 // memcpy what we already have to the result string. Also,
908 // the result string is the last object allocated we could
909 // "realloc" it and probably, in the vast majority of cases,
910 // extend the existing string to be able to hold the full
914 next_length = mapping->get(next, 0, chars);
915 if (next_length == 0) next_length = 1;
917 int current_length = i + char_length + next_length;
918 while (stream.HasMore()) {
919 current = stream.GetNext();
920 overflows |= ToUpperOverflows(current);
921 // NOTE: we use 0 as the next character here because, while
922 // the next character may affect what a character converts to,
923 // it does not in any case affect the length of what it convert
925 int char_length = mapping->get(current, 0, chars);
926 if (char_length == 0) char_length = 1;
927 current_length += char_length;
928 if (current_length > String::kMaxLength) {
929 AllowHeapAllocation allocate_error_and_return;
930 THROW_NEW_ERROR_RETURN_FAILURE(isolate,
931 NewInvalidStringLengthError());
934 // Try again with the real length. Return signed if we need
935 // to allocate a two-byte string for to uppercase.
936 return (overflows && !ignore_overflow) ? Smi::FromInt(-current_length)
937 : Smi::FromInt(current_length);
939 for (int j = 0; j < char_length; j++) {
940 result->Set(i, chars[j]);
943 has_changed_character = true;
947 if (has_changed_character) {
950 // If we didn't actually change anything in doing the conversion
951 // we simple return the result and let the converted string
952 // become garbage; there is no reason to keep two identical strings
959 static const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF;
960 static const uintptr_t kAsciiMask = kOneInEveryByte << 7;
962 // Given a word and two range boundaries returns a word with high bit
963 // set in every byte iff the corresponding input byte was strictly in
964 // the range (m, n). All the other bits in the result are cleared.
965 // This function is only useful when it can be inlined and the
966 // boundaries are statically known.
967 // Requires: all bytes in the input word and the boundaries must be
968 // ASCII (less than 0x7F).
969 static inline uintptr_t AsciiRangeMask(uintptr_t w, char m, char n) {
970 // Use strict inequalities since in edge cases the function could be
971 // further simplified.
972 DCHECK(0 < m && m < n);
973 // Has high bit set in every w byte less than n.
974 uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w;
975 // Has high bit set in every w byte greater than m.
976 uintptr_t tmp2 = w + kOneInEveryByte * (0x7F - m);
977 return (tmp1 & tmp2 & (kOneInEveryByte * 0x80));
982 static bool CheckFastAsciiConvert(char* dst, const char* src, int length,
983 bool changed, bool is_to_lower) {
984 bool expected_changed = false;
985 for (int i = 0; i < length; i++) {
986 if (dst[i] == src[i]) continue;
987 expected_changed = true;
989 DCHECK('A' <= src[i] && src[i] <= 'Z');
990 DCHECK(dst[i] == src[i] + ('a' - 'A'));
992 DCHECK('a' <= src[i] && src[i] <= 'z');
993 DCHECK(dst[i] == src[i] - ('a' - 'A'));
996 return (expected_changed == changed);
1001 template <class Converter>
1002 static bool FastAsciiConvert(char* dst, const char* src, int length,
1003 bool* changed_out) {
1005 char* saved_dst = dst;
1006 const char* saved_src = src;
1008 DisallowHeapAllocation no_gc;
1009 // We rely on the distance between upper and lower case letters
1010 // being a known power of 2.
1011 DCHECK('a' - 'A' == (1 << 5));
1012 // Boundaries for the range of input characters than require conversion.
1013 static const char lo = Converter::kIsToLower ? 'A' - 1 : 'a' - 1;
1014 static const char hi = Converter::kIsToLower ? 'Z' + 1 : 'z' + 1;
1015 bool changed = false;
1016 uintptr_t or_acc = 0;
1017 const char* const limit = src + length;
1019 // dst is newly allocated and always aligned.
1020 DCHECK(IsAligned(reinterpret_cast<intptr_t>(dst), sizeof(uintptr_t)));
1021 // Only attempt processing one word at a time if src is also aligned.
1022 if (IsAligned(reinterpret_cast<intptr_t>(src), sizeof(uintptr_t))) {
1023 // Process the prefix of the input that requires no conversion one aligned
1024 // (machine) word at a time.
1025 while (src <= limit - sizeof(uintptr_t)) {
1026 const uintptr_t w = *reinterpret_cast<const uintptr_t*>(src);
1028 if (AsciiRangeMask(w, lo, hi) != 0) {
1032 *reinterpret_cast<uintptr_t*>(dst) = w;
1033 src += sizeof(uintptr_t);
1034 dst += sizeof(uintptr_t);
1036 // Process the remainder of the input performing conversion when
1037 // required one word at a time.
1038 while (src <= limit - sizeof(uintptr_t)) {
1039 const uintptr_t w = *reinterpret_cast<const uintptr_t*>(src);
1041 uintptr_t m = AsciiRangeMask(w, lo, hi);
1042 // The mask has high (7th) bit set in every byte that needs
1043 // conversion and we know that the distance between cases is
1045 *reinterpret_cast<uintptr_t*>(dst) = w ^ (m >> 2);
1046 src += sizeof(uintptr_t);
1047 dst += sizeof(uintptr_t);
1050 // Process the last few bytes of the input (or the whole input if
1051 // unaligned access is not supported).
1052 while (src < limit) {
1055 if (lo < c && c < hi) {
1064 if ((or_acc & kAsciiMask) != 0) return false;
1066 DCHECK(CheckFastAsciiConvert(saved_dst, saved_src, length, changed,
1067 Converter::kIsToLower));
1069 *changed_out = changed;
1074 template <class Converter>
1075 MUST_USE_RESULT static Object* ConvertCase(
1076 Handle<String> s, Isolate* isolate,
1077 unibrow::Mapping<Converter, 128>* mapping) {
1078 s = String::Flatten(s);
1079 int length = s->length();
1080 // Assume that the string is not empty; we need this assumption later
1081 if (length == 0) return *s;
1083 // Simpler handling of ASCII strings.
1085 // NOTE: This assumes that the upper/lower case of an ASCII
1086 // character is also ASCII. This is currently the case, but it
1087 // might break in the future if we implement more context and locale
1088 // dependent upper/lower conversions.
1089 if (s->IsOneByteRepresentationUnderneath()) {
1090 // Same length as input.
1091 Handle<SeqOneByteString> result =
1092 isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
1093 DisallowHeapAllocation no_gc;
1094 String::FlatContent flat_content = s->GetFlatContent();
1095 DCHECK(flat_content.IsFlat());
1096 bool has_changed_character = false;
1097 bool is_ascii = FastAsciiConvert<Converter>(
1098 reinterpret_cast<char*>(result->GetChars()),
1099 reinterpret_cast<const char*>(flat_content.ToOneByteVector().start()),
1100 length, &has_changed_character);
1101 // If not ASCII, we discard the result and take the 2 byte path.
1102 if (is_ascii) return has_changed_character ? *result : *s;
1105 Handle<SeqString> result; // Same length as input.
1106 if (s->IsOneByteRepresentation()) {
1107 result = isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
1109 result = isolate->factory()->NewRawTwoByteString(length).ToHandleChecked();
1112 Object* answer = ConvertCaseHelper(isolate, *s, *result, length, mapping);
1113 if (answer->IsException() || answer->IsString()) return answer;
1115 DCHECK(answer->IsSmi());
1116 length = Smi::cast(answer)->value();
1117 if (s->IsOneByteRepresentation() && length > 0) {
1118 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1119 isolate, result, isolate->factory()->NewRawOneByteString(length));
1121 if (length < 0) length = -length;
1122 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1123 isolate, result, isolate->factory()->NewRawTwoByteString(length));
1125 return ConvertCaseHelper(isolate, *s, *result, length, mapping);
1129 RUNTIME_FUNCTION(Runtime_StringToLowerCase) {
1130 HandleScope scope(isolate);
1131 DCHECK(args.length() == 1);
1132 CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
1133 return ConvertCase(s, isolate, isolate->runtime_state()->to_lower_mapping());
1137 RUNTIME_FUNCTION(Runtime_StringToUpperCase) {
1138 HandleScope scope(isolate);
1139 DCHECK(args.length() == 1);
1140 CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
1141 return ConvertCase(s, isolate, isolate->runtime_state()->to_upper_mapping());
1145 RUNTIME_FUNCTION(Runtime_StringTrim) {
1146 HandleScope scope(isolate);
1147 DCHECK(args.length() == 3);
1149 CONVERT_ARG_HANDLE_CHECKED(String, string, 0);
1150 CONVERT_BOOLEAN_ARG_CHECKED(trimLeft, 1);
1151 CONVERT_BOOLEAN_ARG_CHECKED(trimRight, 2);
1153 string = String::Flatten(string);
1154 int length = string->length();
1157 UnicodeCache* unicode_cache = isolate->unicode_cache();
1159 while (left < length &&
1160 unicode_cache->IsWhiteSpaceOrLineTerminator(string->Get(left))) {
1169 unicode_cache->IsWhiteSpaceOrLineTerminator(string->Get(right - 1))) {
1174 return *isolate->factory()->NewSubString(string, left, right);
1178 RUNTIME_FUNCTION(Runtime_TruncateString) {
1179 HandleScope scope(isolate);
1180 DCHECK(args.length() == 2);
1181 CONVERT_ARG_HANDLE_CHECKED(SeqString, string, 0);
1182 CONVERT_INT32_ARG_CHECKED(new_length, 1);
1183 RUNTIME_ASSERT(new_length >= 0);
1184 return *SeqString::Truncate(string, new_length);
1188 RUNTIME_FUNCTION(Runtime_NewString) {
1189 HandleScope scope(isolate);
1190 DCHECK(args.length() == 2);
1191 CONVERT_INT32_ARG_CHECKED(length, 0);
1192 CONVERT_BOOLEAN_ARG_CHECKED(is_one_byte, 1);
1193 if (length == 0) return isolate->heap()->empty_string();
1194 Handle<String> result;
1196 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1197 isolate, result, isolate->factory()->NewRawOneByteString(length));
1199 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1200 isolate, result, isolate->factory()->NewRawTwoByteString(length));
1206 RUNTIME_FUNCTION(Runtime_StringEquals) {
1207 HandleScope handle_scope(isolate);
1208 DCHECK(args.length() == 2);
1210 CONVERT_ARG_HANDLE_CHECKED(String, x, 0);
1211 CONVERT_ARG_HANDLE_CHECKED(String, y, 1);
1213 bool not_equal = !String::Equals(x, y);
1214 // This is slightly convoluted because the value that signifies
1215 // equality is 0 and inequality is 1 so we have to negate the result
1216 // from String::Equals.
1217 DCHECK(not_equal == 0 || not_equal == 1);
1218 STATIC_ASSERT(EQUAL == 0);
1219 STATIC_ASSERT(NOT_EQUAL == 1);
1220 return Smi::FromInt(not_equal);
1224 RUNTIME_FUNCTION(Runtime_FlattenString) {
1225 HandleScope scope(isolate);
1226 DCHECK(args.length() == 1);
1227 CONVERT_ARG_HANDLE_CHECKED(String, str, 0);
1228 return *String::Flatten(str);
1232 RUNTIME_FUNCTION(RuntimeReference_StringCharFromCode) {
1233 SealHandleScope shs(isolate);
1234 return __RT_impl_Runtime_CharFromCode(args, isolate);
1238 RUNTIME_FUNCTION(RuntimeReference_StringCharAt) {
1239 SealHandleScope shs(isolate);
1240 DCHECK(args.length() == 2);
1241 if (!args[0]->IsString()) return Smi::FromInt(0);
1242 if (!args[1]->IsNumber()) return Smi::FromInt(0);
1243 if (std::isinf(args.number_at(1))) return isolate->heap()->empty_string();
1244 Object* code = __RT_impl_Runtime_StringCharCodeAtRT(args, isolate);
1245 if (code->IsNaN()) return isolate->heap()->empty_string();
1246 return __RT_impl_Runtime_CharFromCode(Arguments(1, &code), isolate);
1250 RUNTIME_FUNCTION(RuntimeReference_OneByteSeqStringSetChar) {
1251 SealHandleScope shs(isolate);
1252 DCHECK(args.length() == 3);
1253 CONVERT_INT32_ARG_CHECKED(index, 0);
1254 CONVERT_INT32_ARG_CHECKED(value, 1);
1255 CONVERT_ARG_CHECKED(SeqOneByteString, string, 2);
1256 string->SeqOneByteStringSet(index, value);
1261 RUNTIME_FUNCTION(RuntimeReference_TwoByteSeqStringSetChar) {
1262 SealHandleScope shs(isolate);
1263 DCHECK(args.length() == 3);
1264 CONVERT_INT32_ARG_CHECKED(index, 0);
1265 CONVERT_INT32_ARG_CHECKED(value, 1);
1266 CONVERT_ARG_CHECKED(SeqTwoByteString, string, 2);
1267 string->SeqTwoByteStringSet(index, value);
1272 RUNTIME_FUNCTION(RuntimeReference_StringCompare) {
1273 SealHandleScope shs(isolate);
1274 return __RT_impl_Runtime_StringCompare(args, isolate);
1278 RUNTIME_FUNCTION(RuntimeReference_StringCharCodeAt) {
1279 SealHandleScope shs(isolate);
1280 DCHECK(args.length() == 2);
1281 if (!args[0]->IsString()) return isolate->heap()->undefined_value();
1282 if (!args[1]->IsNumber()) return isolate->heap()->undefined_value();
1283 if (std::isinf(args.number_at(1))) return isolate->heap()->nan_value();
1284 return __RT_impl_Runtime_StringCharCodeAtRT(args, isolate);
1288 RUNTIME_FUNCTION(RuntimeReference_SubString) {
1289 SealHandleScope shs(isolate);
1290 return __RT_impl_Runtime_SubString(args, isolate);
1294 RUNTIME_FUNCTION(RuntimeReference_StringAdd) {
1295 SealHandleScope shs(isolate);
1296 return __RT_impl_Runtime_StringAdd(args, isolate);
1300 RUNTIME_FUNCTION(RuntimeReference_IsStringWrapperSafeForDefaultValueOf) {
1305 } // namespace v8::internal