1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
7 #include "src/arguments.h"
8 #include "src/jsregexp-inl.h"
9 #include "src/jsregexp.h"
10 #include "src/runtime/runtime-utils.h"
11 #include "src/string-builder.h"
12 #include "src/string-search.h"
17 class CompiledReplacement {
19 explicit CompiledReplacement(Zone* zone)
20 : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
22 // Return whether the replacement is simple.
23 bool Compile(Handle<String> replacement, int capture_count,
26 // Use Apply only if Compile returned false.
27 void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
30 // Number of distinct parts of the replacement pattern.
31 int parts() { return parts_.length(); }
33 Zone* zone() const { return zone_; }
40 REPLACEMENT_SUBSTRING,
45 struct ReplacementPart {
46 static inline ReplacementPart SubjectMatch() {
47 return ReplacementPart(SUBJECT_CAPTURE, 0);
49 static inline ReplacementPart SubjectCapture(int capture_index) {
50 return ReplacementPart(SUBJECT_CAPTURE, capture_index);
52 static inline ReplacementPart SubjectPrefix() {
53 return ReplacementPart(SUBJECT_PREFIX, 0);
55 static inline ReplacementPart SubjectSuffix(int subject_length) {
56 return ReplacementPart(SUBJECT_SUFFIX, subject_length);
58 static inline ReplacementPart ReplacementString() {
59 return ReplacementPart(REPLACEMENT_STRING, 0);
61 static inline ReplacementPart ReplacementSubString(int from, int to) {
64 return ReplacementPart(-from, to);
67 // If tag <= 0 then it is the negation of a start index of a substring of
68 // the replacement pattern, otherwise it's a value from PartType.
69 ReplacementPart(int tag, int data) : tag(tag), data(data) {
70 // Must be non-positive or a PartType value.
71 DCHECK(tag < NUMBER_OF_PART_TYPES);
73 // Either a value of PartType or a non-positive number that is
74 // the negation of an index into the replacement string.
76 // The data value's interpretation depends on the value of tag:
77 // tag == SUBJECT_PREFIX ||
78 // tag == SUBJECT_SUFFIX: data is unused.
79 // tag == SUBJECT_CAPTURE: data is the number of the capture.
80 // tag == REPLACEMENT_SUBSTRING ||
81 // tag == REPLACEMENT_STRING: data is index into array of substrings
82 // of the replacement string.
83 // tag <= 0: Temporary representation of the substring of the replacement
84 // string ranging over -tag .. data.
85 // Is replaced by REPLACEMENT_{SUB,}STRING when we create the
90 template <typename Char>
91 bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
92 Vector<Char> characters, int capture_count,
93 int subject_length, Zone* zone) {
94 int length = characters.length();
96 for (int i = 0; i < length; i++) {
97 Char c = characters[i];
99 int next_index = i + 1;
100 if (next_index == length) { // No next character!
103 Char c2 = characters[next_index];
107 // There is a substring before. Include the first "$".
109 ReplacementPart::ReplacementSubString(last, next_index),
111 last = next_index + 1; // Continue after the second "$".
113 // Let the next substring start with the second "$".
120 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
122 parts->Add(ReplacementPart::SubjectPrefix(), zone);
128 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
130 parts->Add(ReplacementPart::SubjectSuffix(subject_length), zone);
136 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
138 parts->Add(ReplacementPart::SubjectMatch(), zone);
152 int capture_ref = c2 - '0';
153 if (capture_ref > capture_count) {
157 int second_digit_index = next_index + 1;
158 if (second_digit_index < length) {
159 // Peek ahead to see if we have two digits.
160 Char c3 = characters[second_digit_index];
161 if ('0' <= c3 && c3 <= '9') { // Double digits.
162 int double_digit_ref = capture_ref * 10 + c3 - '0';
163 if (double_digit_ref <= capture_count) {
164 next_index = second_digit_index;
165 capture_ref = double_digit_ref;
169 if (capture_ref > 0) {
171 parts->Add(ReplacementPart::ReplacementSubString(last, i),
174 DCHECK(capture_ref <= capture_count);
175 parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone);
176 last = next_index + 1;
189 // Replacement is simple. Do not use Apply to do the replacement.
192 parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
198 ZoneList<ReplacementPart> parts_;
199 ZoneList<Handle<String> > replacement_substrings_;
204 bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count,
205 int subject_length) {
207 DisallowHeapAllocation no_gc;
208 String::FlatContent content = replacement->GetFlatContent();
209 DCHECK(content.IsFlat());
211 if (content.IsOneByte()) {
212 simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
213 capture_count, subject_length, zone());
215 DCHECK(content.IsTwoByte());
216 simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
217 capture_count, subject_length, zone());
219 if (simple) return true;
222 Isolate* isolate = replacement->GetIsolate();
223 // Find substrings of replacement string and create them as String objects.
224 int substring_index = 0;
225 for (int i = 0, n = parts_.length(); i < n; i++) {
226 int tag = parts_[i].tag;
227 if (tag <= 0) { // A replacement string slice.
229 int to = parts_[i].data;
230 replacement_substrings_.Add(
231 isolate->factory()->NewSubString(replacement, from, to), zone());
232 parts_[i].tag = REPLACEMENT_SUBSTRING;
233 parts_[i].data = substring_index;
235 } else if (tag == REPLACEMENT_STRING) {
236 replacement_substrings_.Add(replacement, zone());
237 parts_[i].data = substring_index;
245 void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
246 int match_from, int match_to, int32_t* match) {
247 DCHECK_LT(0, parts_.length());
248 for (int i = 0, n = parts_.length(); i < n; i++) {
249 ReplacementPart part = parts_[i];
252 if (match_from > 0) builder->AddSubjectSlice(0, match_from);
254 case SUBJECT_SUFFIX: {
255 int subject_length = part.data;
256 if (match_to < subject_length) {
257 builder->AddSubjectSlice(match_to, subject_length);
261 case SUBJECT_CAPTURE: {
262 int capture = part.data;
263 int from = match[capture * 2];
264 int to = match[capture * 2 + 1];
265 if (from >= 0 && to > from) {
266 builder->AddSubjectSlice(from, to);
270 case REPLACEMENT_SUBSTRING:
271 case REPLACEMENT_STRING:
272 builder->AddString(replacement_substrings_[part.data]);
281 void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern,
282 ZoneList<int>* indices, unsigned int limit,
285 // Collect indices of pattern in subject using memchr.
286 // Stop after finding at most limit values.
287 const uint8_t* subject_start = subject.start();
288 const uint8_t* subject_end = subject_start + subject.length();
289 const uint8_t* pos = subject_start;
291 pos = reinterpret_cast<const uint8_t*>(
292 memchr(pos, pattern, subject_end - pos));
293 if (pos == NULL) return;
294 indices->Add(static_cast<int>(pos - subject_start), zone);
301 void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
302 ZoneList<int>* indices, unsigned int limit,
305 const uc16* subject_start = subject.start();
306 const uc16* subject_end = subject_start + subject.length();
307 for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
308 if (*pos == pattern) {
309 indices->Add(static_cast<int>(pos - subject_start), zone);
316 template <typename SubjectChar, typename PatternChar>
317 void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
318 Vector<const PatternChar> pattern,
319 ZoneList<int>* indices, unsigned int limit, Zone* zone) {
321 // Collect indices of pattern in subject.
322 // Stop after finding at most limit values.
323 int pattern_length = pattern.length();
325 StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
327 index = search.Search(subject, index);
328 if (index < 0) return;
329 indices->Add(index, zone);
330 index += pattern_length;
336 void FindStringIndicesDispatch(Isolate* isolate, String* subject,
337 String* pattern, ZoneList<int>* indices,
338 unsigned int limit, Zone* zone) {
340 DisallowHeapAllocation no_gc;
341 String::FlatContent subject_content = subject->GetFlatContent();
342 String::FlatContent pattern_content = pattern->GetFlatContent();
343 DCHECK(subject_content.IsFlat());
344 DCHECK(pattern_content.IsFlat());
345 if (subject_content.IsOneByte()) {
346 Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
347 if (pattern_content.IsOneByte()) {
348 Vector<const uint8_t> pattern_vector =
349 pattern_content.ToOneByteVector();
350 if (pattern_vector.length() == 1) {
351 FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
354 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
358 FindStringIndices(isolate, subject_vector,
359 pattern_content.ToUC16Vector(), indices, limit, zone);
362 Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
363 if (pattern_content.IsOneByte()) {
364 Vector<const uint8_t> pattern_vector =
365 pattern_content.ToOneByteVector();
366 if (pattern_vector.length() == 1) {
367 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
370 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
374 Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
375 if (pattern_vector.length() == 1) {
376 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
379 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
388 template <typename ResultSeqString>
389 MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString(
390 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
391 Handle<String> replacement, Handle<JSArray> last_match_info) {
392 DCHECK(subject->IsFlat());
393 DCHECK(replacement->IsFlat());
395 ZoneScope zone_scope(isolate->runtime_zone());
396 ZoneList<int> indices(8, zone_scope.zone());
397 DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
399 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
400 int subject_len = subject->length();
401 int pattern_len = pattern->length();
402 int replacement_len = replacement->length();
404 FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff,
407 int matches = indices.length();
408 if (matches == 0) return *subject;
410 // Detect integer overflow.
411 int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
412 static_cast<int64_t>(pattern_len)) *
413 static_cast<int64_t>(matches) +
414 static_cast<int64_t>(subject_len);
416 if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
417 STATIC_ASSERT(String::kMaxLength < kMaxInt);
418 result_len = kMaxInt; // Provoke exception.
420 result_len = static_cast<int>(result_len_64);
426 MaybeHandle<SeqString> maybe_res;
427 if (ResultSeqString::kHasOneByteEncoding) {
428 maybe_res = isolate->factory()->NewRawOneByteString(result_len);
430 maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
432 Handle<SeqString> untyped_res;
433 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
434 Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
436 for (int i = 0; i < matches; i++) {
437 // Copy non-matched subject content.
438 if (subject_pos < indices.at(i)) {
439 String::WriteToFlat(*subject, result->GetChars() + result_pos,
440 subject_pos, indices.at(i));
441 result_pos += indices.at(i) - subject_pos;
445 if (replacement_len > 0) {
446 String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0,
448 result_pos += replacement_len;
451 subject_pos = indices.at(i) + pattern_len;
453 // Add remaining subject content at the end.
454 if (subject_pos < subject_len) {
455 String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos,
459 int32_t match_indices[] = {indices.at(matches - 1),
460 indices.at(matches - 1) + pattern_len};
461 RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices);
467 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString(
468 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
469 Handle<String> replacement, Handle<JSArray> last_match_info) {
470 DCHECK(subject->IsFlat());
471 DCHECK(replacement->IsFlat());
473 int capture_count = regexp->CaptureCount();
474 int subject_length = subject->length();
476 // CompiledReplacement uses zone allocation.
477 ZoneScope zone_scope(isolate->runtime_zone());
478 CompiledReplacement compiled_replacement(zone_scope.zone());
479 bool simple_replace =
480 compiled_replacement.Compile(replacement, capture_count, subject_length);
482 // Shortcut for simple non-regexp global replacements
483 if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) {
484 if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {
485 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
486 isolate, subject, regexp, replacement, last_match_info);
488 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
489 isolate, subject, regexp, replacement, last_match_info);
493 RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
494 if (global_cache.HasException()) return isolate->heap()->exception();
496 int32_t* current_match = global_cache.FetchNext();
497 if (current_match == NULL) {
498 if (global_cache.HasException()) return isolate->heap()->exception();
502 // Guessing the number of parts that the final result string is built
503 // from. Global regexps can match any number of times, so we guess
505 int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
506 ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
508 // Number of parts added by compiled replacement plus preceeding
509 // string and possibly suffix after last match. It is possible for
510 // all components to use two elements when encoded as two smis.
511 const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
516 builder.EnsureCapacity(parts_added_per_loop);
518 int start = current_match[0];
519 int end = current_match[1];
522 builder.AddSubjectSlice(prev, start);
525 if (simple_replace) {
526 builder.AddString(replacement);
528 compiled_replacement.Apply(&builder, start, end, current_match);
532 current_match = global_cache.FetchNext();
533 } while (current_match != NULL);
535 if (global_cache.HasException()) return isolate->heap()->exception();
537 if (prev < subject_length) {
538 builder.EnsureCapacity(2);
539 builder.AddSubjectSlice(prev, subject_length);
542 RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
543 global_cache.LastSuccessfulMatch());
545 Handle<String> result;
546 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, result, builder.ToString());
551 template <typename ResultSeqString>
552 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString(
553 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
554 Handle<JSArray> last_match_info) {
555 DCHECK(subject->IsFlat());
557 // Shortcut for simple non-regexp global replacements
558 if (regexp->TypeTag() == JSRegExp::ATOM) {
559 Handle<String> empty_string = isolate->factory()->empty_string();
560 if (subject->IsOneByteRepresentation()) {
561 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
562 isolate, subject, regexp, empty_string, last_match_info);
564 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
565 isolate, subject, regexp, empty_string, last_match_info);
569 RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
570 if (global_cache.HasException()) return isolate->heap()->exception();
572 int32_t* current_match = global_cache.FetchNext();
573 if (current_match == NULL) {
574 if (global_cache.HasException()) return isolate->heap()->exception();
578 int start = current_match[0];
579 int end = current_match[1];
580 int capture_count = regexp->CaptureCount();
581 int subject_length = subject->length();
583 int new_length = subject_length - (end - start);
584 if (new_length == 0) return isolate->heap()->empty_string();
586 Handle<ResultSeqString> answer;
587 if (ResultSeqString::kHasOneByteEncoding) {
588 answer = Handle<ResultSeqString>::cast(
589 isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
591 answer = Handle<ResultSeqString>::cast(
592 isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
599 start = current_match[0];
600 end = current_match[1];
602 // Add substring subject[prev;start] to answer string.
603 String::WriteToFlat(*subject, answer->GetChars() + position, prev, start);
604 position += start - prev;
608 current_match = global_cache.FetchNext();
609 } while (current_match != NULL);
611 if (global_cache.HasException()) return isolate->heap()->exception();
613 RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
614 global_cache.LastSuccessfulMatch());
616 if (prev < subject_length) {
617 // Add substring subject[prev;length] to answer string.
618 String::WriteToFlat(*subject, answer->GetChars() + position, prev,
620 position += subject_length - prev;
623 if (position == 0) return isolate->heap()->empty_string();
625 // Shorten string and fill
626 int string_size = ResultSeqString::SizeFor(position);
627 int allocated_string_size = ResultSeqString::SizeFor(new_length);
628 int delta = allocated_string_size - string_size;
630 answer->set_length(position);
631 if (delta == 0) return *answer;
633 Address end_of_string = answer->address() + string_size;
634 Heap* heap = isolate->heap();
636 // The trimming is performed on a newly allocated object, which is on a
637 // fresly allocated page or on an already swept page. Hence, the sweeper
638 // thread can not get confused with the filler creation. No synchronization
640 heap->CreateFillerObjectAt(end_of_string, delta);
641 heap->AdjustLiveBytes(answer->address(), -delta, Heap::FROM_MUTATOR);
646 RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString) {
647 HandleScope scope(isolate);
648 DCHECK(args.length() == 4);
650 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
651 CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
652 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
653 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
655 RUNTIME_ASSERT(regexp->GetFlags().is_global());
656 RUNTIME_ASSERT(last_match_info->HasFastObjectElements());
658 subject = String::Flatten(subject);
660 if (replacement->length() == 0) {
661 if (subject->HasOnlyOneByteChars()) {
662 return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
663 isolate, subject, regexp, last_match_info);
665 return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
666 isolate, subject, regexp, last_match_info);
670 replacement = String::Flatten(replacement);
672 return StringReplaceGlobalRegExpWithString(isolate, subject, regexp,
673 replacement, last_match_info);
677 RUNTIME_FUNCTION(Runtime_StringSplit) {
678 HandleScope handle_scope(isolate);
679 DCHECK(args.length() == 3);
680 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
681 CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
682 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
683 RUNTIME_ASSERT(limit > 0);
685 int subject_length = subject->length();
686 int pattern_length = pattern->length();
687 RUNTIME_ASSERT(pattern_length > 0);
689 if (limit == 0xffffffffu) {
690 Handle<Object> cached_answer(
691 RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
692 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
694 if (*cached_answer != Smi::FromInt(0)) {
695 // The cache FixedArray is a COW-array and can therefore be reused.
696 Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
697 Handle<FixedArray>::cast(cached_answer));
702 // The limit can be very large (0xffffffffu), but since the pattern
703 // isn't empty, we can never create more parts than ~half the length
706 subject = String::Flatten(subject);
707 pattern = String::Flatten(pattern);
709 static const int kMaxInitialListCapacity = 16;
711 ZoneScope zone_scope(isolate->runtime_zone());
713 // Find (up to limit) indices of separator and end-of-string in subject
714 int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit);
715 ZoneList<int> indices(initial_capacity, zone_scope.zone());
717 FindStringIndicesDispatch(isolate, *subject, *pattern, &indices, limit,
720 if (static_cast<uint32_t>(indices.length()) < limit) {
721 indices.Add(subject_length, zone_scope.zone());
724 // The list indices now contains the end of each part to create.
726 // Create JSArray of substrings separated by separator.
727 int part_count = indices.length();
729 Handle<JSArray> result = isolate->factory()->NewJSArray(part_count);
730 JSObject::EnsureCanContainHeapObjectElements(result);
731 result->set_length(Smi::FromInt(part_count));
733 DCHECK(result->HasFastObjectElements());
735 if (part_count == 1 && indices.at(0) == subject_length) {
736 FixedArray::cast(result->elements())->set(0, *subject);
740 Handle<FixedArray> elements(FixedArray::cast(result->elements()));
742 for (int i = 0; i < part_count; i++) {
743 HandleScope local_loop_handle(isolate);
744 int part_end = indices.at(i);
745 Handle<String> substring =
746 isolate->factory()->NewProperSubString(subject, part_start, part_end);
747 elements->set(i, *substring);
748 part_start = part_end + pattern_length;
751 if (limit == 0xffffffffu) {
752 if (result->HasFastObjectElements()) {
753 RegExpResultsCache::Enter(isolate, subject, pattern, elements,
754 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
762 RUNTIME_FUNCTION(Runtime_RegExpExec) {
763 HandleScope scope(isolate);
764 DCHECK(args.length() == 4);
765 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
766 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
767 CONVERT_INT32_ARG_CHECKED(index, 2);
768 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
769 // Due to the way the JS calls are constructed this must be less than the
770 // length of a string, i.e. it is always a Smi. We check anyway for security.
771 RUNTIME_ASSERT(index >= 0);
772 RUNTIME_ASSERT(index <= subject->length());
773 isolate->counters()->regexp_entry_runtime()->Increment();
774 Handle<Object> result;
775 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
777 RegExpImpl::Exec(regexp, subject, index, last_match_info));
782 RUNTIME_FUNCTION(Runtime_RegExpConstructResultRT) {
783 HandleScope handle_scope(isolate);
784 DCHECK(args.length() == 3);
785 CONVERT_SMI_ARG_CHECKED(size, 0);
786 RUNTIME_ASSERT(size >= 0 && size <= FixedArray::kMaxLength);
787 CONVERT_ARG_HANDLE_CHECKED(Object, index, 1);
788 CONVERT_ARG_HANDLE_CHECKED(Object, input, 2);
789 Handle<FixedArray> elements = isolate->factory()->NewFixedArray(size);
790 Handle<Map> regexp_map(isolate->native_context()->regexp_result_map());
791 Handle<JSObject> object =
792 isolate->factory()->NewJSObjectFromMap(regexp_map, NOT_TENURED, false);
793 Handle<JSArray> array = Handle<JSArray>::cast(object);
794 array->set_elements(*elements);
795 array->set_length(Smi::FromInt(size));
796 // Write in-object properties after the length of the array.
797 array->InObjectPropertyAtPut(JSRegExpResult::kIndexIndex, *index);
798 array->InObjectPropertyAtPut(JSRegExpResult::kInputIndex, *input);
803 RUNTIME_FUNCTION(Runtime_RegExpConstructResult) {
804 SealHandleScope shs(isolate);
805 return __RT_impl_Runtime_RegExpConstructResultRT(args, isolate);
809 static JSRegExp::Flags RegExpFlagsFromString(Handle<String> flags,
811 uint32_t value = JSRegExp::NONE;
812 int length = flags->length();
813 // A longer flags string cannot be valid.
814 if (length > 5) return JSRegExp::Flags(0);
815 for (int i = 0; i < length; i++) {
816 uint32_t flag = JSRegExp::NONE;
817 switch (flags->Get(i)) {
819 flag = JSRegExp::GLOBAL;
822 flag = JSRegExp::IGNORE_CASE;
825 flag = JSRegExp::MULTILINE;
828 if (!FLAG_harmony_unicode_regexps) return JSRegExp::Flags(0);
829 flag = JSRegExp::UNICODE_ESCAPES;
832 if (!FLAG_harmony_regexps) return JSRegExp::Flags(0);
833 flag = JSRegExp::STICKY;
836 return JSRegExp::Flags(0);
839 if (value & flag) return JSRegExp::Flags(0);
843 return JSRegExp::Flags(value);
847 RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
848 HandleScope scope(isolate);
849 DCHECK(args.length() == 3);
850 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
851 CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
852 CONVERT_ARG_HANDLE_CHECKED(String, flags_string, 2);
853 Factory* factory = isolate->factory();
854 // If source is the empty string we set it to "(?:)" instead as
855 // suggested by ECMA-262, 5th, section 15.10.4.1.
856 if (source->length() == 0) source = factory->query_colon_string();
858 bool success = false;
859 JSRegExp::Flags flags = RegExpFlagsFromString(flags_string, &success);
861 Handle<FixedArray> element = factory->NewFixedArray(1);
862 element->set(0, *flags_string);
863 Handle<JSArray> args = factory->NewJSArrayWithElements(element);
864 THROW_NEW_ERROR_RETURN_FAILURE(
865 isolate, NewSyntaxError("invalid_regexp_flags", args));
868 Handle<Object> global = factory->ToBoolean(flags.is_global());
869 Handle<Object> ignore_case = factory->ToBoolean(flags.is_ignore_case());
870 Handle<Object> multiline = factory->ToBoolean(flags.is_multiline());
871 Handle<Object> sticky = factory->ToBoolean(flags.is_sticky());
872 Handle<Object> unicode = factory->ToBoolean(flags.is_unicode());
874 Map* map = regexp->map();
875 Object* constructor = map->GetConstructor();
876 if (!FLAG_harmony_regexps && !FLAG_harmony_unicode_regexps &&
877 constructor->IsJSFunction() &&
878 JSFunction::cast(constructor)->initial_map() == map) {
879 // If we still have the original map, set in-object properties directly.
880 // Both true and false are immovable immortal objects so no need for write
882 regexp->InObjectPropertyAtPut(JSRegExp::kGlobalFieldIndex, *global,
884 regexp->InObjectPropertyAtPut(JSRegExp::kIgnoreCaseFieldIndex, *ignore_case,
886 regexp->InObjectPropertyAtPut(JSRegExp::kMultilineFieldIndex, *multiline,
888 regexp->InObjectPropertyAtPut(JSRegExp::kLastIndexFieldIndex,
889 Smi::FromInt(0), SKIP_WRITE_BARRIER);
891 // Map has changed, so use generic, but slower, method. We also end here if
892 // the --harmony-regexp flag is set, because the initial map does not have
893 // space for the 'sticky' flag, since it is from the snapshot, but must work
894 // both with and without --harmony-regexp. When sticky comes out from under
895 // the flag, we will be able to use the fast initial map.
896 PropertyAttributes final =
897 static_cast<PropertyAttributes>(READ_ONLY | DONT_ENUM | DONT_DELETE);
898 PropertyAttributes writable =
899 static_cast<PropertyAttributes>(DONT_ENUM | DONT_DELETE);
900 Handle<Object> zero(Smi::FromInt(0), isolate);
901 JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->global_string(),
902 global, final).Check();
903 JSObject::SetOwnPropertyIgnoreAttributes(
904 regexp, factory->ignore_case_string(), ignore_case, final).Check();
905 JSObject::SetOwnPropertyIgnoreAttributes(
906 regexp, factory->multiline_string(), multiline, final).Check();
907 if (FLAG_harmony_regexps) {
908 JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->sticky_string(),
909 sticky, final).Check();
911 if (FLAG_harmony_unicode_regexps) {
912 JSObject::SetOwnPropertyIgnoreAttributes(
913 regexp, factory->unicode_string(), unicode, final).Check();
915 JSObject::SetOwnPropertyIgnoreAttributes(
916 regexp, factory->last_index_string(), zero, writable).Check();
919 Handle<Object> result;
920 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
921 isolate, result, RegExpImpl::Compile(regexp, source, flags));
926 RUNTIME_FUNCTION(Runtime_MaterializeRegExpLiteral) {
927 HandleScope scope(isolate);
928 DCHECK(args.length() == 4);
929 CONVERT_ARG_HANDLE_CHECKED(FixedArray, literals, 0);
930 CONVERT_SMI_ARG_CHECKED(index, 1);
931 CONVERT_ARG_HANDLE_CHECKED(String, pattern, 2);
932 CONVERT_ARG_HANDLE_CHECKED(String, flags, 3);
934 Handle<JSFunction> constructor = isolate->regexp_function();
935 // Compute the regular expression literal.
936 Handle<Object> regexp;
937 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
939 RegExpImpl::CreateRegExpLiteral(constructor, pattern, flags));
940 literals->set(index, *regexp);
945 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
946 // separate last match info. See comment on that function.
947 template <bool has_capture>
948 static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
949 Handle<JSRegExp> regexp,
950 Handle<JSArray> last_match_array,
951 Handle<JSArray> result_array) {
952 DCHECK(subject->IsFlat());
953 DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
955 int capture_count = regexp->CaptureCount();
956 int subject_length = subject->length();
958 static const int kMinLengthToCache = 0x1000;
960 if (subject_length > kMinLengthToCache) {
961 Handle<Object> cached_answer(
962 RegExpResultsCache::Lookup(isolate->heap(), *subject, regexp->data(),
963 RegExpResultsCache::REGEXP_MULTIPLE_INDICES),
965 if (*cached_answer != Smi::FromInt(0)) {
966 Handle<FixedArray> cached_fixed_array =
967 Handle<FixedArray>(FixedArray::cast(*cached_answer));
968 // The cache FixedArray is a COW-array and can therefore be reused.
969 JSArray::SetContent(result_array, cached_fixed_array);
970 // The actual length of the result array is stored in the last element of
971 // the backing store (the backing FixedArray may have a larger capacity).
972 Object* cached_fixed_array_last_element =
973 cached_fixed_array->get(cached_fixed_array->length() - 1);
974 Smi* js_array_length = Smi::cast(cached_fixed_array_last_element);
975 result_array->set_length(js_array_length);
976 RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
978 return *result_array;
982 RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
983 if (global_cache.HasException()) return isolate->heap()->exception();
985 // Ensured in Runtime_RegExpExecMultiple.
986 DCHECK(result_array->HasFastObjectElements());
987 Handle<FixedArray> result_elements(
988 FixedArray::cast(result_array->elements()));
989 if (result_elements->length() < 16) {
990 result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
993 FixedArrayBuilder builder(result_elements);
995 // Position to search from.
996 int match_start = -1;
1000 // Two smis before and after the match, for very long strings.
1001 static const int kMaxBuilderEntriesPerRegExpMatch = 5;
1004 int32_t* current_match = global_cache.FetchNext();
1005 if (current_match == NULL) break;
1006 match_start = current_match[0];
1007 builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
1008 if (match_end < match_start) {
1009 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1012 match_end = current_match[1];
1014 // Avoid accumulating new handles inside loop.
1015 HandleScope temp_scope(isolate);
1016 Handle<String> match;
1018 match = isolate->factory()->NewProperSubString(subject, match_start,
1022 isolate->factory()->NewSubString(subject, match_start, match_end);
1027 // Arguments array to replace function is match, captures, index and
1028 // subject, i.e., 3 + capture count in total.
1029 Handle<FixedArray> elements =
1030 isolate->factory()->NewFixedArray(3 + capture_count);
1032 elements->set(0, *match);
1033 for (int i = 1; i <= capture_count; i++) {
1034 int start = current_match[i * 2];
1036 int end = current_match[i * 2 + 1];
1037 DCHECK(start <= end);
1038 Handle<String> substring =
1039 isolate->factory()->NewSubString(subject, start, end);
1040 elements->set(i, *substring);
1042 DCHECK(current_match[i * 2 + 1] < 0);
1043 elements->set(i, isolate->heap()->undefined_value());
1046 elements->set(capture_count + 1, Smi::FromInt(match_start));
1047 elements->set(capture_count + 2, *subject);
1048 builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1050 builder.Add(*match);
1055 if (global_cache.HasException()) return isolate->heap()->exception();
1057 if (match_start >= 0) {
1058 // Finished matching, with at least one match.
1059 if (match_end < subject_length) {
1060 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1064 RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
1067 if (subject_length > kMinLengthToCache) {
1068 // Store the length of the result array into the last element of the
1069 // backing FixedArray.
1070 builder.EnsureCapacity(1);
1071 Handle<FixedArray> fixed_array = builder.array();
1072 fixed_array->set(fixed_array->length() - 1,
1073 Smi::FromInt(builder.length()));
1074 // Cache the result and turn the FixedArray into a COW array.
1075 RegExpResultsCache::Enter(isolate, subject,
1076 handle(regexp->data(), isolate), fixed_array,
1077 RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1079 return *builder.ToJSArray(result_array);
1081 return isolate->heap()->null_value(); // No matches at all.
1086 // This is only called for StringReplaceGlobalRegExpWithFunction. This sets
1087 // lastMatchInfoOverride to maintain the last match info, so we don't need to
1088 // set any other last match array info.
1089 RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
1090 HandleScope handles(isolate);
1091 DCHECK(args.length() == 4);
1093 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
1094 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1095 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2);
1096 CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
1097 RUNTIME_ASSERT(last_match_info->HasFastObjectElements());
1098 RUNTIME_ASSERT(result_array->HasFastObjectElements());
1100 subject = String::Flatten(subject);
1101 RUNTIME_ASSERT(regexp->GetFlags().is_global());
1103 if (regexp->CaptureCount() == 0) {
1104 return SearchRegExpMultiple<false>(isolate, subject, regexp,
1105 last_match_info, result_array);
1107 return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info,
1113 RUNTIME_FUNCTION(Runtime_RegExpExecReThrow) {
1114 SealHandleScope shs(isolate);
1115 DCHECK(args.length() == 4);
1116 Object* exception = isolate->pending_exception();
1117 isolate->clear_pending_exception();
1118 return isolate->ReThrow(exception);
1122 RUNTIME_FUNCTION(Runtime_IsRegExp) {
1123 SealHandleScope shs(isolate);
1124 DCHECK(args.length() == 1);
1125 CONVERT_ARG_CHECKED(Object, obj, 0);
1126 return isolate->heap()->ToBoolean(obj->IsJSRegExp());
1129 } // namespace v8::internal