1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
7 #include "src/arguments.h"
8 #include "src/jsregexp-inl.h"
9 #include "src/jsregexp.h"
10 #include "src/runtime/runtime-utils.h"
11 #include "src/string-builder.h"
12 #include "src/string-search.h"
18 // Perform string match of pattern on subject, starting at start index.
19 // Caller must ensure that 0 <= start_index <= sub->length(),
20 // and should check that pat->length() + start_index <= sub->length().
21 int StringMatch(Isolate* isolate, Handle<String> sub, Handle<String> pat,
23 DCHECK(0 <= start_index);
24 DCHECK(start_index <= sub->length());
26 int pattern_length = pat->length();
27 if (pattern_length == 0) return start_index;
29 int subject_length = sub->length();
30 if (start_index + pattern_length > subject_length) return -1;
32 sub = String::Flatten(sub);
33 pat = String::Flatten(pat);
35 DisallowHeapAllocation no_gc; // ensure vectors stay valid
36 // Extract flattened substrings of cons strings before getting encoding.
37 String::FlatContent seq_sub = sub->GetFlatContent();
38 String::FlatContent seq_pat = pat->GetFlatContent();
40 // dispatch on type of strings
41 if (seq_pat.IsOneByte()) {
42 Vector<const uint8_t> pat_vector = seq_pat.ToOneByteVector();
43 if (seq_sub.IsOneByte()) {
44 return SearchString(isolate, seq_sub.ToOneByteVector(), pat_vector,
47 return SearchString(isolate, seq_sub.ToUC16Vector(), pat_vector,
50 Vector<const uc16> pat_vector = seq_pat.ToUC16Vector();
51 if (seq_sub.IsOneByte()) {
52 return SearchString(isolate, seq_sub.ToOneByteVector(), pat_vector,
55 return SearchString(isolate, seq_sub.ToUC16Vector(), pat_vector, start_index);
59 // This may return an empty MaybeHandle if an exception is thrown or
60 // we abort due to reaching the recursion limit.
61 MaybeHandle<String> StringReplaceOneCharWithString(
62 Isolate* isolate, Handle<String> subject, Handle<String> search,
63 Handle<String> replace, bool* found, int recursion_limit) {
64 StackLimitCheck stackLimitCheck(isolate);
65 if (stackLimitCheck.HasOverflowed() || (recursion_limit == 0)) {
66 return MaybeHandle<String>();
69 if (subject->IsConsString()) {
70 ConsString* cons = ConsString::cast(*subject);
71 Handle<String> first = Handle<String>(cons->first());
72 Handle<String> second = Handle<String>(cons->second());
73 Handle<String> new_first;
74 if (!StringReplaceOneCharWithString(isolate, first, search, replace, found,
75 recursion_limit).ToHandle(&new_first)) {
76 return MaybeHandle<String>();
78 if (*found) return isolate->factory()->NewConsString(new_first, second);
80 Handle<String> new_second;
81 if (!StringReplaceOneCharWithString(isolate, second, search, replace, found,
83 .ToHandle(&new_second)) {
84 return MaybeHandle<String>();
86 if (*found) return isolate->factory()->NewConsString(first, new_second);
90 int index = StringMatch(isolate, subject, search, 0);
91 if (index == -1) return subject;
93 Handle<String> first = isolate->factory()->NewSubString(subject, 0, index);
95 ASSIGN_RETURN_ON_EXCEPTION(
96 isolate, cons1, isolate->factory()->NewConsString(first, replace),
98 Handle<String> second =
99 isolate->factory()->NewSubString(subject, index + 1, subject->length());
100 return isolate->factory()->NewConsString(cons1, second);
105 RUNTIME_FUNCTION(Runtime_StringReplaceOneCharWithString) {
106 HandleScope scope(isolate);
107 DCHECK(args.length() == 3);
108 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
109 CONVERT_ARG_HANDLE_CHECKED(String, search, 1);
110 CONVERT_ARG_HANDLE_CHECKED(String, replace, 2);
112 // If the cons string tree is too deep, we simply abort the recursion and
113 // retry with a flattened subject string.
114 const int kRecursionLimit = 0x1000;
116 Handle<String> result;
117 if (StringReplaceOneCharWithString(isolate, subject, search, replace, &found,
118 kRecursionLimit).ToHandle(&result)) {
121 if (isolate->has_pending_exception()) return isolate->heap()->exception();
123 subject = String::Flatten(subject);
124 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
126 StringReplaceOneCharWithString(isolate, subject, search, replace, &found,
132 RUNTIME_FUNCTION(Runtime_StringIndexOf) {
133 HandleScope scope(isolate);
134 DCHECK(args.length() == 3);
136 CONVERT_ARG_HANDLE_CHECKED(String, sub, 0);
137 CONVERT_ARG_HANDLE_CHECKED(String, pat, 1);
138 CONVERT_ARG_HANDLE_CHECKED(Object, index, 2);
140 uint32_t start_index;
141 if (!index->ToArrayIndex(&start_index)) return Smi::FromInt(-1);
143 RUNTIME_ASSERT(start_index <= static_cast<uint32_t>(sub->length()));
144 int position = StringMatch(isolate, sub, pat, start_index);
145 return Smi::FromInt(position);
149 template <typename schar, typename pchar>
150 static int StringMatchBackwards(Vector<const schar> subject,
151 Vector<const pchar> pattern, int idx) {
152 int pattern_length = pattern.length();
153 DCHECK(pattern_length >= 1);
154 DCHECK(idx + pattern_length <= subject.length());
156 if (sizeof(schar) == 1 && sizeof(pchar) > 1) {
157 for (int i = 0; i < pattern_length; i++) {
159 if (c > String::kMaxOneByteCharCode) {
165 pchar pattern_first_char = pattern[0];
166 for (int i = idx; i >= 0; i--) {
167 if (subject[i] != pattern_first_char) continue;
169 while (j < pattern_length) {
170 if (pattern[j] != subject[i + j]) {
175 if (j == pattern_length) {
183 RUNTIME_FUNCTION(Runtime_StringLastIndexOf) {
184 HandleScope scope(isolate);
185 DCHECK(args.length() == 3);
187 CONVERT_ARG_HANDLE_CHECKED(String, sub, 0);
188 CONVERT_ARG_HANDLE_CHECKED(String, pat, 1);
189 CONVERT_ARG_HANDLE_CHECKED(Object, index, 2);
191 uint32_t start_index;
192 if (!index->ToArrayIndex(&start_index)) return Smi::FromInt(-1);
194 uint32_t pat_length = pat->length();
195 uint32_t sub_length = sub->length();
197 if (start_index + pat_length > sub_length) {
198 start_index = sub_length - pat_length;
201 if (pat_length == 0) {
202 return Smi::FromInt(start_index);
205 sub = String::Flatten(sub);
206 pat = String::Flatten(pat);
209 DisallowHeapAllocation no_gc; // ensure vectors stay valid
211 String::FlatContent sub_content = sub->GetFlatContent();
212 String::FlatContent pat_content = pat->GetFlatContent();
214 if (pat_content.IsOneByte()) {
215 Vector<const uint8_t> pat_vector = pat_content.ToOneByteVector();
216 if (sub_content.IsOneByte()) {
217 position = StringMatchBackwards(sub_content.ToOneByteVector(), pat_vector,
220 position = StringMatchBackwards(sub_content.ToUC16Vector(), pat_vector,
224 Vector<const uc16> pat_vector = pat_content.ToUC16Vector();
225 if (sub_content.IsOneByte()) {
226 position = StringMatchBackwards(sub_content.ToOneByteVector(), pat_vector,
229 position = StringMatchBackwards(sub_content.ToUC16Vector(), pat_vector,
234 return Smi::FromInt(position);
238 RUNTIME_FUNCTION(Runtime_StringLocaleCompare) {
239 HandleScope handle_scope(isolate);
240 DCHECK(args.length() == 2);
242 CONVERT_ARG_HANDLE_CHECKED(String, str1, 0);
243 CONVERT_ARG_HANDLE_CHECKED(String, str2, 1);
245 if (str1.is_identical_to(str2)) return Smi::FromInt(0); // Equal.
246 int str1_length = str1->length();
247 int str2_length = str2->length();
249 // Decide trivial cases without flattening.
250 if (str1_length == 0) {
251 if (str2_length == 0) return Smi::FromInt(0); // Equal.
252 return Smi::FromInt(-str2_length);
254 if (str2_length == 0) return Smi::FromInt(str1_length);
257 int end = str1_length < str2_length ? str1_length : str2_length;
259 // No need to flatten if we are going to find the answer on the first
260 // character. At this point we know there is at least one character
261 // in each string, due to the trivial case handling above.
262 int d = str1->Get(0) - str2->Get(0);
263 if (d != 0) return Smi::FromInt(d);
265 str1 = String::Flatten(str1);
266 str2 = String::Flatten(str2);
268 DisallowHeapAllocation no_gc;
269 String::FlatContent flat1 = str1->GetFlatContent();
270 String::FlatContent flat2 = str2->GetFlatContent();
272 for (int i = 0; i < end; i++) {
273 if (flat1.Get(i) != flat2.Get(i)) {
274 return Smi::FromInt(flat1.Get(i) - flat2.Get(i));
278 return Smi::FromInt(str1_length - str2_length);
282 RUNTIME_FUNCTION(Runtime_SubStringRT) {
283 HandleScope scope(isolate);
284 DCHECK(args.length() == 3);
286 CONVERT_ARG_HANDLE_CHECKED(String, string, 0);
288 // We have a fast integer-only case here to avoid a conversion to double in
289 // the common case where from and to are Smis.
290 if (args[1]->IsSmi() && args[2]->IsSmi()) {
291 CONVERT_SMI_ARG_CHECKED(from_number, 1);
292 CONVERT_SMI_ARG_CHECKED(to_number, 2);
296 CONVERT_DOUBLE_ARG_CHECKED(from_number, 1);
297 CONVERT_DOUBLE_ARG_CHECKED(to_number, 2);
298 start = FastD2IChecked(from_number);
299 end = FastD2IChecked(to_number);
301 RUNTIME_ASSERT(end >= start);
302 RUNTIME_ASSERT(start >= 0);
303 RUNTIME_ASSERT(end <= string->length());
304 isolate->counters()->sub_string_runtime()->Increment();
306 return *isolate->factory()->NewSubString(string, start, end);
310 RUNTIME_FUNCTION(Runtime_SubString) {
311 SealHandleScope shs(isolate);
312 return __RT_impl_Runtime_SubStringRT(args, isolate);
316 RUNTIME_FUNCTION(Runtime_StringAddRT) {
317 HandleScope scope(isolate);
318 DCHECK(args.length() == 2);
319 CONVERT_ARG_HANDLE_CHECKED(String, str1, 0);
320 CONVERT_ARG_HANDLE_CHECKED(String, str2, 1);
321 isolate->counters()->string_add_runtime()->Increment();
322 Handle<String> result;
323 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
324 isolate, result, isolate->factory()->NewConsString(str1, str2));
329 RUNTIME_FUNCTION(Runtime_StringAdd) {
330 SealHandleScope shs(isolate);
331 return __RT_impl_Runtime_StringAddRT(args, isolate);
335 RUNTIME_FUNCTION(Runtime_InternalizeString) {
336 HandleScope handles(isolate);
337 RUNTIME_ASSERT(args.length() == 1);
338 CONVERT_ARG_HANDLE_CHECKED(String, string, 0);
339 return *isolate->factory()->InternalizeString(string);
343 RUNTIME_FUNCTION(Runtime_StringMatch) {
344 HandleScope handles(isolate);
345 DCHECK(args.length() == 3);
347 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
348 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
349 CONVERT_ARG_HANDLE_CHECKED(JSArray, regexp_info, 2);
351 RUNTIME_ASSERT(regexp_info->HasFastObjectElements());
353 RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
354 if (global_cache.HasException()) return isolate->heap()->exception();
356 int capture_count = regexp->CaptureCount();
358 ZoneScope zone_scope(isolate->runtime_zone());
359 ZoneList<int> offsets(8, zone_scope.zone());
362 int32_t* match = global_cache.FetchNext();
363 if (match == NULL) break;
364 offsets.Add(match[0], zone_scope.zone()); // start
365 offsets.Add(match[1], zone_scope.zone()); // end
368 if (global_cache.HasException()) return isolate->heap()->exception();
370 if (offsets.length() == 0) {
371 // Not a single match.
372 return isolate->heap()->null_value();
375 RegExpImpl::SetLastMatchInfo(regexp_info, subject, capture_count,
376 global_cache.LastSuccessfulMatch());
378 int matches = offsets.length() / 2;
379 Handle<FixedArray> elements = isolate->factory()->NewFixedArray(matches);
380 Handle<String> substring =
381 isolate->factory()->NewSubString(subject, offsets.at(0), offsets.at(1));
382 elements->set(0, *substring);
383 for (int i = 1; i < matches; i++) {
384 HandleScope temp_scope(isolate);
385 int from = offsets.at(i * 2);
386 int to = offsets.at(i * 2 + 1);
387 Handle<String> substring =
388 isolate->factory()->NewProperSubString(subject, from, to);
389 elements->set(i, *substring);
391 Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(elements);
392 result->set_length(Smi::FromInt(matches));
397 RUNTIME_FUNCTION(Runtime_StringCharCodeAtRT) {
398 HandleScope handle_scope(isolate);
399 DCHECK(args.length() == 2);
401 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
402 CONVERT_NUMBER_CHECKED(uint32_t, i, Uint32, args[1]);
404 // Flatten the string. If someone wants to get a char at an index
405 // in a cons string, it is likely that more indices will be
407 subject = String::Flatten(subject);
409 if (i >= static_cast<uint32_t>(subject->length())) {
410 return isolate->heap()->nan_value();
413 return Smi::FromInt(subject->Get(i));
417 RUNTIME_FUNCTION(Runtime_CharFromCode) {
418 HandleScope handlescope(isolate);
419 DCHECK(args.length() == 1);
420 if (args[0]->IsNumber()) {
421 CONVERT_NUMBER_CHECKED(uint32_t, code, Uint32, args[0]);
423 return *isolate->factory()->LookupSingleCharacterStringFromCode(code);
425 return isolate->heap()->empty_string();
429 RUNTIME_FUNCTION(Runtime_StringCompareRT) {
430 HandleScope handle_scope(isolate);
431 DCHECK(args.length() == 2);
433 CONVERT_ARG_HANDLE_CHECKED(String, x, 0);
434 CONVERT_ARG_HANDLE_CHECKED(String, y, 1);
436 isolate->counters()->string_compare_runtime()->Increment();
438 // A few fast case tests before we flatten.
439 if (x.is_identical_to(y)) return Smi::FromInt(EQUAL);
440 if (y->length() == 0) {
441 if (x->length() == 0) return Smi::FromInt(EQUAL);
442 return Smi::FromInt(GREATER);
443 } else if (x->length() == 0) {
444 return Smi::FromInt(LESS);
447 int d = x->Get(0) - y->Get(0);
449 return Smi::FromInt(LESS);
451 return Smi::FromInt(GREATER);
454 x = String::Flatten(x);
455 y = String::Flatten(y);
457 DisallowHeapAllocation no_gc;
458 Object* equal_prefix_result = Smi::FromInt(EQUAL);
459 int prefix_length = x->length();
460 if (y->length() < prefix_length) {
461 prefix_length = y->length();
462 equal_prefix_result = Smi::FromInt(GREATER);
463 } else if (y->length() > prefix_length) {
464 equal_prefix_result = Smi::FromInt(LESS);
467 String::FlatContent x_content = x->GetFlatContent();
468 String::FlatContent y_content = y->GetFlatContent();
469 if (x_content.IsOneByte()) {
470 Vector<const uint8_t> x_chars = x_content.ToOneByteVector();
471 if (y_content.IsOneByte()) {
472 Vector<const uint8_t> y_chars = y_content.ToOneByteVector();
473 r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
475 Vector<const uc16> y_chars = y_content.ToUC16Vector();
476 r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
479 Vector<const uc16> x_chars = x_content.ToUC16Vector();
480 if (y_content.IsOneByte()) {
481 Vector<const uint8_t> y_chars = y_content.ToOneByteVector();
482 r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
484 Vector<const uc16> y_chars = y_content.ToUC16Vector();
485 r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
490 result = equal_prefix_result;
492 result = (r < 0) ? Smi::FromInt(LESS) : Smi::FromInt(GREATER);
498 RUNTIME_FUNCTION(Runtime_StringCompare) {
499 SealHandleScope shs(isolate);
500 return __RT_impl_Runtime_StringCompareRT(args, isolate);
504 RUNTIME_FUNCTION(Runtime_StringBuilderConcat) {
505 HandleScope scope(isolate);
506 DCHECK(args.length() == 3);
507 CONVERT_ARG_HANDLE_CHECKED(JSArray, array, 0);
508 int32_t array_length;
509 if (!args[1]->ToInt32(&array_length)) {
510 THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewInvalidStringLengthError());
512 CONVERT_ARG_HANDLE_CHECKED(String, special, 2);
514 size_t actual_array_length = 0;
516 TryNumberToSize(isolate, array->length(), &actual_array_length));
517 RUNTIME_ASSERT(array_length >= 0);
518 RUNTIME_ASSERT(static_cast<size_t>(array_length) <= actual_array_length);
520 // This assumption is used by the slice encoding in one or two smis.
521 DCHECK(Smi::kMaxValue >= String::kMaxLength);
523 RUNTIME_ASSERT(array->HasFastElements());
524 JSObject::EnsureCanContainHeapObjectElements(array);
526 int special_length = special->length();
527 if (!array->HasFastObjectElements()) {
528 return isolate->Throw(isolate->heap()->illegal_argument_string());
532 bool one_byte = special->HasOnlyOneByteChars();
535 DisallowHeapAllocation no_gc;
536 FixedArray* fixed_array = FixedArray::cast(array->elements());
537 if (fixed_array->length() < array_length) {
538 array_length = fixed_array->length();
541 if (array_length == 0) {
542 return isolate->heap()->empty_string();
543 } else if (array_length == 1) {
544 Object* first = fixed_array->get(0);
545 if (first->IsString()) return first;
547 length = StringBuilderConcatLength(special_length, fixed_array,
548 array_length, &one_byte);
552 return isolate->Throw(isolate->heap()->illegal_argument_string());
556 Handle<SeqOneByteString> answer;
557 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
558 isolate, answer, isolate->factory()->NewRawOneByteString(length));
559 StringBuilderConcatHelper(*special, answer->GetChars(),
560 FixedArray::cast(array->elements()),
564 Handle<SeqTwoByteString> answer;
565 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
566 isolate, answer, isolate->factory()->NewRawTwoByteString(length));
567 StringBuilderConcatHelper(*special, answer->GetChars(),
568 FixedArray::cast(array->elements()),
575 RUNTIME_FUNCTION(Runtime_StringBuilderJoin) {
576 HandleScope scope(isolate);
577 DCHECK(args.length() == 3);
578 CONVERT_ARG_HANDLE_CHECKED(JSArray, array, 0);
579 int32_t array_length;
580 if (!args[1]->ToInt32(&array_length)) {
581 THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewInvalidStringLengthError());
583 CONVERT_ARG_HANDLE_CHECKED(String, separator, 2);
584 RUNTIME_ASSERT(array->HasFastObjectElements());
585 RUNTIME_ASSERT(array_length >= 0);
587 Handle<FixedArray> fixed_array(FixedArray::cast(array->elements()));
588 if (fixed_array->length() < array_length) {
589 array_length = fixed_array->length();
592 if (array_length == 0) {
593 return isolate->heap()->empty_string();
594 } else if (array_length == 1) {
595 Object* first = fixed_array->get(0);
596 RUNTIME_ASSERT(first->IsString());
600 int separator_length = separator->length();
601 RUNTIME_ASSERT(separator_length > 0);
602 int max_nof_separators =
603 (String::kMaxLength + separator_length - 1) / separator_length;
604 if (max_nof_separators < (array_length - 1)) {
605 THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewInvalidStringLengthError());
607 int length = (array_length - 1) * separator_length;
608 for (int i = 0; i < array_length; i++) {
609 Object* element_obj = fixed_array->get(i);
610 RUNTIME_ASSERT(element_obj->IsString());
611 String* element = String::cast(element_obj);
612 int increment = element->length();
613 if (increment > String::kMaxLength - length) {
614 STATIC_ASSERT(String::kMaxLength < kMaxInt);
615 length = kMaxInt; // Provoke exception;
621 Handle<SeqTwoByteString> answer;
622 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
623 isolate, answer, isolate->factory()->NewRawTwoByteString(length));
625 DisallowHeapAllocation no_gc;
627 uc16* sink = answer->GetChars();
629 uc16* end = sink + length;
632 RUNTIME_ASSERT(fixed_array->get(0)->IsString());
633 String* first = String::cast(fixed_array->get(0));
634 String* separator_raw = *separator;
635 int first_length = first->length();
636 String::WriteToFlat(first, sink, 0, first_length);
637 sink += first_length;
639 for (int i = 1; i < array_length; i++) {
640 DCHECK(sink + separator_length <= end);
641 String::WriteToFlat(separator_raw, sink, 0, separator_length);
642 sink += separator_length;
644 RUNTIME_ASSERT(fixed_array->get(i)->IsString());
645 String* element = String::cast(fixed_array->get(i));
646 int element_length = element->length();
647 DCHECK(sink + element_length <= end);
648 String::WriteToFlat(element, sink, 0, element_length);
649 sink += element_length;
653 // Use %_FastOneByteArrayJoin instead.
654 DCHECK(!answer->IsOneByteRepresentation());
658 template <typename Char>
659 static void JoinSparseArrayWithSeparator(FixedArray* elements,
661 uint32_t array_length,
663 Vector<Char> buffer) {
664 DisallowHeapAllocation no_gc;
665 int previous_separator_position = 0;
666 int separator_length = separator->length();
668 for (int i = 0; i < elements_length; i += 2) {
669 int position = NumberToInt32(elements->get(i));
670 String* string = String::cast(elements->get(i + 1));
671 int string_length = string->length();
672 if (string->length() > 0) {
673 while (previous_separator_position < position) {
674 String::WriteToFlat<Char>(separator, &buffer[cursor], 0,
676 cursor += separator_length;
677 previous_separator_position++;
679 String::WriteToFlat<Char>(string, &buffer[cursor], 0, string_length);
680 cursor += string->length();
683 if (separator_length > 0) {
684 // Array length must be representable as a signed 32-bit number,
685 // otherwise the total string length would have been too large.
686 DCHECK(array_length <= 0x7fffffff); // Is int32_t.
687 int last_array_index = static_cast<int>(array_length - 1);
688 while (previous_separator_position < last_array_index) {
689 String::WriteToFlat<Char>(separator, &buffer[cursor], 0,
691 cursor += separator_length;
692 previous_separator_position++;
695 DCHECK(cursor <= buffer.length());
699 RUNTIME_FUNCTION(Runtime_SparseJoinWithSeparator) {
700 HandleScope scope(isolate);
701 DCHECK(args.length() == 3);
702 CONVERT_ARG_HANDLE_CHECKED(JSArray, elements_array, 0);
703 CONVERT_NUMBER_CHECKED(uint32_t, array_length, Uint32, args[1]);
704 CONVERT_ARG_HANDLE_CHECKED(String, separator, 2);
705 // elements_array is fast-mode JSarray of alternating positions
706 // (increasing order) and strings.
707 RUNTIME_ASSERT(elements_array->HasFastSmiOrObjectElements());
708 // array_length is length of original array (used to add separators);
709 // separator is string to put between elements. Assumed to be non-empty.
710 RUNTIME_ASSERT(array_length > 0);
712 // Find total length of join result.
713 int string_length = 0;
714 bool is_one_byte = separator->IsOneByteRepresentation();
715 bool overflow = false;
716 CONVERT_NUMBER_CHECKED(int, elements_length, Int32, elements_array->length());
717 RUNTIME_ASSERT(elements_length <= elements_array->elements()->length());
718 RUNTIME_ASSERT((elements_length & 1) == 0); // Even length.
719 FixedArray* elements = FixedArray::cast(elements_array->elements());
720 for (int i = 0; i < elements_length; i += 2) {
721 RUNTIME_ASSERT(elements->get(i)->IsNumber());
722 CONVERT_NUMBER_CHECKED(uint32_t, position, Uint32, elements->get(i));
723 RUNTIME_ASSERT(position < array_length);
724 RUNTIME_ASSERT(elements->get(i + 1)->IsString());
728 DisallowHeapAllocation no_gc;
729 for (int i = 0; i < elements_length; i += 2) {
730 String* string = String::cast(elements->get(i + 1));
731 int length = string->length();
732 if (is_one_byte && !string->IsOneByteRepresentation()) {
735 if (length > String::kMaxLength ||
736 String::kMaxLength - length < string_length) {
740 string_length += length;
744 int separator_length = separator->length();
745 if (!overflow && separator_length > 0) {
746 if (array_length <= 0x7fffffffu) {
747 int separator_count = static_cast<int>(array_length) - 1;
748 int remaining_length = String::kMaxLength - string_length;
749 if ((remaining_length / separator_length) >= separator_count) {
750 string_length += separator_length * (array_length - 1);
752 // Not room for the separators within the maximal string length.
756 // Nonempty separator and at least 2^31-1 separators necessary
757 // means that the string is too large to create.
758 STATIC_ASSERT(String::kMaxLength < 0x7fffffff);
763 // Throw an exception if the resulting string is too large. See
764 // https://code.google.com/p/chromium/issues/detail?id=336820
766 THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewInvalidStringLengthError());
770 Handle<SeqOneByteString> result = isolate->factory()
771 ->NewRawOneByteString(string_length)
773 JoinSparseArrayWithSeparator<uint8_t>(
774 FixedArray::cast(elements_array->elements()), elements_length,
775 array_length, *separator,
776 Vector<uint8_t>(result->GetChars(), string_length));
779 Handle<SeqTwoByteString> result = isolate->factory()
780 ->NewRawTwoByteString(string_length)
782 JoinSparseArrayWithSeparator<uc16>(
783 FixedArray::cast(elements_array->elements()), elements_length,
784 array_length, *separator,
785 Vector<uc16>(result->GetChars(), string_length));
791 // Copies Latin1 characters to the given fixed array looking up
792 // one-char strings in the cache. Gives up on the first char that is
793 // not in the cache and fills the remainder with smi zeros. Returns
794 // the length of the successfully copied prefix.
795 static int CopyCachedOneByteCharsToArray(Heap* heap, const uint8_t* chars,
796 FixedArray* elements, int length) {
797 DisallowHeapAllocation no_gc;
798 FixedArray* one_byte_cache = heap->single_character_string_cache();
799 Object* undefined = heap->undefined_value();
801 WriteBarrierMode mode = elements->GetWriteBarrierMode(no_gc);
802 for (i = 0; i < length; ++i) {
803 Object* value = one_byte_cache->get(chars[i]);
804 if (value == undefined) break;
805 elements->set(i, value, mode);
808 DCHECK(Smi::FromInt(0) == 0);
809 memset(elements->data_start() + i, 0, kPointerSize * (length - i));
812 for (int j = 0; j < length; ++j) {
813 Object* element = elements->get(j);
814 DCHECK(element == Smi::FromInt(0) ||
815 (element->IsString() && String::cast(element)->LooksValid()));
822 // Converts a String to JSArray.
823 // For example, "foo" => ["f", "o", "o"].
824 RUNTIME_FUNCTION(Runtime_StringToArray) {
825 HandleScope scope(isolate);
826 DCHECK(args.length() == 2);
827 CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
828 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[1]);
830 s = String::Flatten(s);
831 const int length = static_cast<int>(Min<uint32_t>(s->length(), limit));
833 Handle<FixedArray> elements;
835 if (s->IsFlat() && s->IsOneByteRepresentation()) {
836 // Try using cached chars where possible.
837 elements = isolate->factory()->NewUninitializedFixedArray(length);
839 DisallowHeapAllocation no_gc;
840 String::FlatContent content = s->GetFlatContent();
841 if (content.IsOneByte()) {
842 Vector<const uint8_t> chars = content.ToOneByteVector();
843 // Note, this will initialize all elements (not only the prefix)
844 // to prevent GC from seeing partially initialized array.
845 position = CopyCachedOneByteCharsToArray(isolate->heap(), chars.start(),
848 MemsetPointer(elements->data_start(), isolate->heap()->undefined_value(),
852 elements = isolate->factory()->NewFixedArray(length);
854 for (int i = position; i < length; ++i) {
856 isolate->factory()->LookupSingleCharacterStringFromCode(s->Get(i));
857 elements->set(i, *str);
861 for (int i = 0; i < length; ++i) {
862 DCHECK(String::cast(elements->get(i))->length() == 1);
866 return *isolate->factory()->NewJSArrayWithElements(elements);
870 static inline bool ToUpperOverflows(uc32 character) {
871 // y with umlauts and the micro sign are the only characters that stop
872 // fitting into one-byte when converting to uppercase.
873 static const uc32 yuml_code = 0xff;
874 static const uc32 micro_code = 0xb5;
875 return (character == yuml_code || character == micro_code);
879 template <class Converter>
880 MUST_USE_RESULT static Object* ConvertCaseHelper(
881 Isolate* isolate, String* string, SeqString* result, int result_length,
882 unibrow::Mapping<Converter, 128>* mapping) {
883 DisallowHeapAllocation no_gc;
884 // We try this twice, once with the assumption that the result is no longer
885 // than the input and, if that assumption breaks, again with the exact
886 // length. This may not be pretty, but it is nicer than what was here before
887 // and I hereby claim my vaffel-is.
889 // NOTE: This assumes that the upper/lower case of an ASCII
890 // character is also ASCII. This is currently the case, but it
891 // might break in the future if we implement more context and locale
892 // dependent upper/lower conversions.
893 bool has_changed_character = false;
895 // Convert all characters to upper case, assuming that they will fit
897 StringCharacterStream stream(string);
898 unibrow::uchar chars[Converter::kMaxWidth];
899 // We can assume that the string is not empty
900 uc32 current = stream.GetNext();
901 bool ignore_overflow = Converter::kIsToLower || result->IsSeqTwoByteString();
902 for (int i = 0; i < result_length;) {
903 bool has_next = stream.HasMore();
904 uc32 next = has_next ? stream.GetNext() : 0;
905 int char_length = mapping->get(current, next, chars);
906 if (char_length == 0) {
907 // The case conversion of this character is the character itself.
908 result->Set(i, current);
910 } else if (char_length == 1 &&
911 (ignore_overflow || !ToUpperOverflows(current))) {
912 // Common case: converting the letter resulted in one character.
913 DCHECK(static_cast<uc32>(chars[0]) != current);
914 result->Set(i, chars[0]);
915 has_changed_character = true;
917 } else if (result_length == string->length()) {
918 bool overflows = ToUpperOverflows(current);
919 // We've assumed that the result would be as long as the
920 // input but here is a character that converts to several
921 // characters. No matter, we calculate the exact length
922 // of the result and try the whole thing again.
924 // Note that this leaves room for optimization. We could just
925 // memcpy what we already have to the result string. Also,
926 // the result string is the last object allocated we could
927 // "realloc" it and probably, in the vast majority of cases,
928 // extend the existing string to be able to hold the full
932 next_length = mapping->get(next, 0, chars);
933 if (next_length == 0) next_length = 1;
935 int current_length = i + char_length + next_length;
936 while (stream.HasMore()) {
937 current = stream.GetNext();
938 overflows |= ToUpperOverflows(current);
939 // NOTE: we use 0 as the next character here because, while
940 // the next character may affect what a character converts to,
941 // it does not in any case affect the length of what it convert
943 int char_length = mapping->get(current, 0, chars);
944 if (char_length == 0) char_length = 1;
945 current_length += char_length;
946 if (current_length > String::kMaxLength) {
947 AllowHeapAllocation allocate_error_and_return;
948 THROW_NEW_ERROR_RETURN_FAILURE(isolate,
949 NewInvalidStringLengthError());
952 // Try again with the real length. Return signed if we need
953 // to allocate a two-byte string for to uppercase.
954 return (overflows && !ignore_overflow) ? Smi::FromInt(-current_length)
955 : Smi::FromInt(current_length);
957 for (int j = 0; j < char_length; j++) {
958 result->Set(i, chars[j]);
961 has_changed_character = true;
965 if (has_changed_character) {
968 // If we didn't actually change anything in doing the conversion
969 // we simple return the result and let the converted string
970 // become garbage; there is no reason to keep two identical strings
977 static const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF;
978 static const uintptr_t kAsciiMask = kOneInEveryByte << 7;
980 // Given a word and two range boundaries returns a word with high bit
981 // set in every byte iff the corresponding input byte was strictly in
982 // the range (m, n). All the other bits in the result are cleared.
983 // This function is only useful when it can be inlined and the
984 // boundaries are statically known.
985 // Requires: all bytes in the input word and the boundaries must be
986 // ASCII (less than 0x7F).
987 static inline uintptr_t AsciiRangeMask(uintptr_t w, char m, char n) {
988 // Use strict inequalities since in edge cases the function could be
989 // further simplified.
990 DCHECK(0 < m && m < n);
991 // Has high bit set in every w byte less than n.
992 uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w;
993 // Has high bit set in every w byte greater than m.
994 uintptr_t tmp2 = w + kOneInEveryByte * (0x7F - m);
995 return (tmp1 & tmp2 & (kOneInEveryByte * 0x80));
1000 static bool CheckFastAsciiConvert(char* dst, const char* src, int length,
1001 bool changed, bool is_to_lower) {
1002 bool expected_changed = false;
1003 for (int i = 0; i < length; i++) {
1004 if (dst[i] == src[i]) continue;
1005 expected_changed = true;
1007 DCHECK('A' <= src[i] && src[i] <= 'Z');
1008 DCHECK(dst[i] == src[i] + ('a' - 'A'));
1010 DCHECK('a' <= src[i] && src[i] <= 'z');
1011 DCHECK(dst[i] == src[i] - ('a' - 'A'));
1014 return (expected_changed == changed);
1019 template <class Converter>
1020 static bool FastAsciiConvert(char* dst, const char* src, int length,
1021 bool* changed_out) {
1023 char* saved_dst = dst;
1024 const char* saved_src = src;
1026 DisallowHeapAllocation no_gc;
1027 // We rely on the distance between upper and lower case letters
1028 // being a known power of 2.
1029 DCHECK('a' - 'A' == (1 << 5));
1030 // Boundaries for the range of input characters than require conversion.
1031 static const char lo = Converter::kIsToLower ? 'A' - 1 : 'a' - 1;
1032 static const char hi = Converter::kIsToLower ? 'Z' + 1 : 'z' + 1;
1033 bool changed = false;
1034 uintptr_t or_acc = 0;
1035 const char* const limit = src + length;
1037 // dst is newly allocated and always aligned.
1038 DCHECK(IsAligned(reinterpret_cast<intptr_t>(dst), sizeof(uintptr_t)));
1039 // Only attempt processing one word at a time if src is also aligned.
1040 if (IsAligned(reinterpret_cast<intptr_t>(src), sizeof(uintptr_t))) {
1041 // Process the prefix of the input that requires no conversion one aligned
1042 // (machine) word at a time.
1043 while (src <= limit - sizeof(uintptr_t)) {
1044 const uintptr_t w = *reinterpret_cast<const uintptr_t*>(src);
1046 if (AsciiRangeMask(w, lo, hi) != 0) {
1050 *reinterpret_cast<uintptr_t*>(dst) = w;
1051 src += sizeof(uintptr_t);
1052 dst += sizeof(uintptr_t);
1054 // Process the remainder of the input performing conversion when
1055 // required one word at a time.
1056 while (src <= limit - sizeof(uintptr_t)) {
1057 const uintptr_t w = *reinterpret_cast<const uintptr_t*>(src);
1059 uintptr_t m = AsciiRangeMask(w, lo, hi);
1060 // The mask has high (7th) bit set in every byte that needs
1061 // conversion and we know that the distance between cases is
1063 *reinterpret_cast<uintptr_t*>(dst) = w ^ (m >> 2);
1064 src += sizeof(uintptr_t);
1065 dst += sizeof(uintptr_t);
1068 // Process the last few bytes of the input (or the whole input if
1069 // unaligned access is not supported).
1070 while (src < limit) {
1073 if (lo < c && c < hi) {
1082 if ((or_acc & kAsciiMask) != 0) return false;
1084 DCHECK(CheckFastAsciiConvert(saved_dst, saved_src, length, changed,
1085 Converter::kIsToLower));
1087 *changed_out = changed;
1092 template <class Converter>
1093 MUST_USE_RESULT static Object* ConvertCase(
1094 Handle<String> s, Isolate* isolate,
1095 unibrow::Mapping<Converter, 128>* mapping) {
1096 s = String::Flatten(s);
1097 int length = s->length();
1098 // Assume that the string is not empty; we need this assumption later
1099 if (length == 0) return *s;
1101 // Simpler handling of ASCII strings.
1103 // NOTE: This assumes that the upper/lower case of an ASCII
1104 // character is also ASCII. This is currently the case, but it
1105 // might break in the future if we implement more context and locale
1106 // dependent upper/lower conversions.
1107 if (s->IsOneByteRepresentationUnderneath()) {
1108 // Same length as input.
1109 Handle<SeqOneByteString> result =
1110 isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
1111 DisallowHeapAllocation no_gc;
1112 String::FlatContent flat_content = s->GetFlatContent();
1113 DCHECK(flat_content.IsFlat());
1114 bool has_changed_character = false;
1115 bool is_ascii = FastAsciiConvert<Converter>(
1116 reinterpret_cast<char*>(result->GetChars()),
1117 reinterpret_cast<const char*>(flat_content.ToOneByteVector().start()),
1118 length, &has_changed_character);
1119 // If not ASCII, we discard the result and take the 2 byte path.
1120 if (is_ascii) return has_changed_character ? *result : *s;
1123 Handle<SeqString> result; // Same length as input.
1124 if (s->IsOneByteRepresentation()) {
1125 result = isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
1127 result = isolate->factory()->NewRawTwoByteString(length).ToHandleChecked();
1130 Object* answer = ConvertCaseHelper(isolate, *s, *result, length, mapping);
1131 if (answer->IsException() || answer->IsString()) return answer;
1133 DCHECK(answer->IsSmi());
1134 length = Smi::cast(answer)->value();
1135 if (s->IsOneByteRepresentation() && length > 0) {
1136 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1137 isolate, result, isolate->factory()->NewRawOneByteString(length));
1139 if (length < 0) length = -length;
1140 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1141 isolate, result, isolate->factory()->NewRawTwoByteString(length));
1143 return ConvertCaseHelper(isolate, *s, *result, length, mapping);
1147 RUNTIME_FUNCTION(Runtime_StringToLowerCase) {
1148 HandleScope scope(isolate);
1149 DCHECK(args.length() == 1);
1150 CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
1151 return ConvertCase(s, isolate, isolate->runtime_state()->to_lower_mapping());
1155 RUNTIME_FUNCTION(Runtime_StringToUpperCase) {
1156 HandleScope scope(isolate);
1157 DCHECK(args.length() == 1);
1158 CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
1159 return ConvertCase(s, isolate, isolate->runtime_state()->to_upper_mapping());
1163 RUNTIME_FUNCTION(Runtime_StringTrim) {
1164 HandleScope scope(isolate);
1165 DCHECK(args.length() == 3);
1167 CONVERT_ARG_HANDLE_CHECKED(String, string, 0);
1168 CONVERT_BOOLEAN_ARG_CHECKED(trimLeft, 1);
1169 CONVERT_BOOLEAN_ARG_CHECKED(trimRight, 2);
1171 string = String::Flatten(string);
1172 int length = string->length();
1175 UnicodeCache* unicode_cache = isolate->unicode_cache();
1177 while (left < length &&
1178 unicode_cache->IsWhiteSpaceOrLineTerminator(string->Get(left))) {
1187 unicode_cache->IsWhiteSpaceOrLineTerminator(string->Get(right - 1))) {
1192 return *isolate->factory()->NewSubString(string, left, right);
1196 RUNTIME_FUNCTION(Runtime_TruncateString) {
1197 HandleScope scope(isolate);
1198 DCHECK(args.length() == 2);
1199 CONVERT_ARG_HANDLE_CHECKED(SeqString, string, 0);
1200 CONVERT_INT32_ARG_CHECKED(new_length, 1);
1201 RUNTIME_ASSERT(new_length >= 0);
1202 return *SeqString::Truncate(string, new_length);
1206 RUNTIME_FUNCTION(Runtime_NewString) {
1207 HandleScope scope(isolate);
1208 DCHECK(args.length() == 2);
1209 CONVERT_INT32_ARG_CHECKED(length, 0);
1210 CONVERT_BOOLEAN_ARG_CHECKED(is_one_byte, 1);
1211 if (length == 0) return isolate->heap()->empty_string();
1212 Handle<String> result;
1214 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1215 isolate, result, isolate->factory()->NewRawOneByteString(length));
1217 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1218 isolate, result, isolate->factory()->NewRawTwoByteString(length));
1224 RUNTIME_FUNCTION(Runtime_NewConsString) {
1225 HandleScope scope(isolate);
1226 DCHECK(args.length() == 4);
1227 CONVERT_INT32_ARG_CHECKED(length, 0);
1228 CONVERT_BOOLEAN_ARG_CHECKED(is_one_byte, 1);
1229 CONVERT_ARG_HANDLE_CHECKED(String, left, 2);
1230 CONVERT_ARG_HANDLE_CHECKED(String, right, 3);
1232 Handle<String> result;
1234 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1236 isolate->factory()->NewOneByteConsString(length, left, right));
1238 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1240 isolate->factory()->NewTwoByteConsString(length, left, right));
1246 RUNTIME_FUNCTION(Runtime_StringEquals) {
1247 HandleScope handle_scope(isolate);
1248 DCHECK(args.length() == 2);
1250 CONVERT_ARG_HANDLE_CHECKED(String, x, 0);
1251 CONVERT_ARG_HANDLE_CHECKED(String, y, 1);
1253 bool not_equal = !String::Equals(x, y);
1254 // This is slightly convoluted because the value that signifies
1255 // equality is 0 and inequality is 1 so we have to negate the result
1256 // from String::Equals.
1257 DCHECK(not_equal == 0 || not_equal == 1);
1258 STATIC_ASSERT(EQUAL == 0);
1259 STATIC_ASSERT(NOT_EQUAL == 1);
1260 return Smi::FromInt(not_equal);
1264 RUNTIME_FUNCTION(Runtime_FlattenString) {
1265 HandleScope scope(isolate);
1266 DCHECK(args.length() == 1);
1267 CONVERT_ARG_HANDLE_CHECKED(String, str, 0);
1268 return *String::Flatten(str);
1272 RUNTIME_FUNCTION(Runtime_StringCharFromCode) {
1273 SealHandleScope shs(isolate);
1274 return __RT_impl_Runtime_CharFromCode(args, isolate);
1278 RUNTIME_FUNCTION(Runtime_StringCharAt) {
1279 SealHandleScope shs(isolate);
1280 DCHECK(args.length() == 2);
1281 if (!args[0]->IsString()) return Smi::FromInt(0);
1282 if (!args[1]->IsNumber()) return Smi::FromInt(0);
1283 if (std::isinf(args.number_at(1))) return isolate->heap()->empty_string();
1284 Object* code = __RT_impl_Runtime_StringCharCodeAtRT(args, isolate);
1285 if (code->IsNaN()) return isolate->heap()->empty_string();
1286 return __RT_impl_Runtime_CharFromCode(Arguments(1, &code), isolate);
1290 RUNTIME_FUNCTION(Runtime_OneByteSeqStringGetChar) {
1291 SealHandleScope shs(isolate);
1292 DCHECK(args.length() == 2);
1293 CONVERT_ARG_CHECKED(SeqOneByteString, string, 0);
1294 CONVERT_INT32_ARG_CHECKED(index, 1);
1295 return Smi::FromInt(string->SeqOneByteStringGet(index));
1299 RUNTIME_FUNCTION(Runtime_OneByteSeqStringSetChar) {
1300 SealHandleScope shs(isolate);
1301 DCHECK(args.length() == 3);
1302 CONVERT_INT32_ARG_CHECKED(index, 0);
1303 CONVERT_INT32_ARG_CHECKED(value, 1);
1304 CONVERT_ARG_CHECKED(SeqOneByteString, string, 2);
1305 string->SeqOneByteStringSet(index, value);
1310 RUNTIME_FUNCTION(Runtime_TwoByteSeqStringGetChar) {
1311 SealHandleScope shs(isolate);
1312 DCHECK(args.length() == 2);
1313 CONVERT_ARG_CHECKED(SeqTwoByteString, string, 0);
1314 CONVERT_INT32_ARG_CHECKED(index, 1);
1315 return Smi::FromInt(string->SeqTwoByteStringGet(index));
1319 RUNTIME_FUNCTION(Runtime_TwoByteSeqStringSetChar) {
1320 SealHandleScope shs(isolate);
1321 DCHECK(args.length() == 3);
1322 CONVERT_INT32_ARG_CHECKED(index, 0);
1323 CONVERT_INT32_ARG_CHECKED(value, 1);
1324 CONVERT_ARG_CHECKED(SeqTwoByteString, string, 2);
1325 string->SeqTwoByteStringSet(index, value);
1330 RUNTIME_FUNCTION(Runtime_StringCharCodeAt) {
1331 SealHandleScope shs(isolate);
1332 DCHECK(args.length() == 2);
1333 if (!args[0]->IsString()) return isolate->heap()->undefined_value();
1334 if (!args[1]->IsNumber()) return isolate->heap()->undefined_value();
1335 if (std::isinf(args.number_at(1))) return isolate->heap()->nan_value();
1336 return __RT_impl_Runtime_StringCharCodeAtRT(args, isolate);
1340 RUNTIME_FUNCTION(Runtime_IsStringWrapperSafeForDefaultValueOf) {
1346 RUNTIME_FUNCTION(Runtime_StringGetLength) {
1347 HandleScope scope(isolate);
1348 DCHECK(args.length() == 1);
1349 CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
1350 return Smi::FromInt(s->length());
1353 } // namespace v8::internal