Upstream version 11.40.271.0
[platform/framework/web/crosswalk.git] / src / v8 / src / runtime / runtime-regexp.cc
1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/v8.h"
6
7 #include "src/arguments.h"
8 #include "src/jsregexp-inl.h"
9 #include "src/jsregexp.h"
10 #include "src/runtime/runtime-utils.h"
11 #include "src/runtime/string-builder.h"
12 #include "src/string-search.h"
13
14 namespace v8 {
15 namespace internal {
16
17 class CompiledReplacement {
18  public:
19   explicit CompiledReplacement(Zone* zone)
20       : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
21
22   // Return whether the replacement is simple.
23   bool Compile(Handle<String> replacement, int capture_count,
24                int subject_length);
25
26   // Use Apply only if Compile returned false.
27   void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
28              int32_t* match);
29
30   // Number of distinct parts of the replacement pattern.
31   int parts() { return parts_.length(); }
32
33   Zone* zone() const { return zone_; }
34
35  private:
36   enum PartType {
37     SUBJECT_PREFIX = 1,
38     SUBJECT_SUFFIX,
39     SUBJECT_CAPTURE,
40     REPLACEMENT_SUBSTRING,
41     REPLACEMENT_STRING,
42     NUMBER_OF_PART_TYPES
43   };
44
45   struct ReplacementPart {
46     static inline ReplacementPart SubjectMatch() {
47       return ReplacementPart(SUBJECT_CAPTURE, 0);
48     }
49     static inline ReplacementPart SubjectCapture(int capture_index) {
50       return ReplacementPart(SUBJECT_CAPTURE, capture_index);
51     }
52     static inline ReplacementPart SubjectPrefix() {
53       return ReplacementPart(SUBJECT_PREFIX, 0);
54     }
55     static inline ReplacementPart SubjectSuffix(int subject_length) {
56       return ReplacementPart(SUBJECT_SUFFIX, subject_length);
57     }
58     static inline ReplacementPart ReplacementString() {
59       return ReplacementPart(REPLACEMENT_STRING, 0);
60     }
61     static inline ReplacementPart ReplacementSubString(int from, int to) {
62       DCHECK(from >= 0);
63       DCHECK(to > from);
64       return ReplacementPart(-from, to);
65     }
66
67     // If tag <= 0 then it is the negation of a start index of a substring of
68     // the replacement pattern, otherwise it's a value from PartType.
69     ReplacementPart(int tag, int data) : tag(tag), data(data) {
70       // Must be non-positive or a PartType value.
71       DCHECK(tag < NUMBER_OF_PART_TYPES);
72     }
73     // Either a value of PartType or a non-positive number that is
74     // the negation of an index into the replacement string.
75     int tag;
76     // The data value's interpretation depends on the value of tag:
77     // tag == SUBJECT_PREFIX ||
78     // tag == SUBJECT_SUFFIX:  data is unused.
79     // tag == SUBJECT_CAPTURE: data is the number of the capture.
80     // tag == REPLACEMENT_SUBSTRING ||
81     // tag == REPLACEMENT_STRING:    data is index into array of substrings
82     //                               of the replacement string.
83     // tag <= 0: Temporary representation of the substring of the replacement
84     //           string ranging over -tag .. data.
85     //           Is replaced by REPLACEMENT_{SUB,}STRING when we create the
86     //           substring objects.
87     int data;
88   };
89
90   template <typename Char>
91   bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
92                                Vector<Char> characters, int capture_count,
93                                int subject_length, Zone* zone) {
94     int length = characters.length();
95     int last = 0;
96     for (int i = 0; i < length; i++) {
97       Char c = characters[i];
98       if (c == '$') {
99         int next_index = i + 1;
100         if (next_index == length) {  // No next character!
101           break;
102         }
103         Char c2 = characters[next_index];
104         switch (c2) {
105           case '$':
106             if (i > last) {
107               // There is a substring before. Include the first "$".
108               parts->Add(
109                   ReplacementPart::ReplacementSubString(last, next_index),
110                   zone);
111               last = next_index + 1;  // Continue after the second "$".
112             } else {
113               // Let the next substring start with the second "$".
114               last = next_index;
115             }
116             i = next_index;
117             break;
118           case '`':
119             if (i > last) {
120               parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
121             }
122             parts->Add(ReplacementPart::SubjectPrefix(), zone);
123             i = next_index;
124             last = i + 1;
125             break;
126           case '\'':
127             if (i > last) {
128               parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
129             }
130             parts->Add(ReplacementPart::SubjectSuffix(subject_length), zone);
131             i = next_index;
132             last = i + 1;
133             break;
134           case '&':
135             if (i > last) {
136               parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
137             }
138             parts->Add(ReplacementPart::SubjectMatch(), zone);
139             i = next_index;
140             last = i + 1;
141             break;
142           case '0':
143           case '1':
144           case '2':
145           case '3':
146           case '4':
147           case '5':
148           case '6':
149           case '7':
150           case '8':
151           case '9': {
152             int capture_ref = c2 - '0';
153             if (capture_ref > capture_count) {
154               i = next_index;
155               continue;
156             }
157             int second_digit_index = next_index + 1;
158             if (second_digit_index < length) {
159               // Peek ahead to see if we have two digits.
160               Char c3 = characters[second_digit_index];
161               if ('0' <= c3 && c3 <= '9') {  // Double digits.
162                 int double_digit_ref = capture_ref * 10 + c3 - '0';
163                 if (double_digit_ref <= capture_count) {
164                   next_index = second_digit_index;
165                   capture_ref = double_digit_ref;
166                 }
167               }
168             }
169             if (capture_ref > 0) {
170               if (i > last) {
171                 parts->Add(ReplacementPart::ReplacementSubString(last, i),
172                            zone);
173               }
174               DCHECK(capture_ref <= capture_count);
175               parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone);
176               last = next_index + 1;
177             }
178             i = next_index;
179             break;
180           }
181           default:
182             i = next_index;
183             break;
184         }
185       }
186     }
187     if (length > last) {
188       if (last == 0) {
189         // Replacement is simple.  Do not use Apply to do the replacement.
190         return true;
191       } else {
192         parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
193       }
194     }
195     return false;
196   }
197
198   ZoneList<ReplacementPart> parts_;
199   ZoneList<Handle<String> > replacement_substrings_;
200   Zone* zone_;
201 };
202
203
204 bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count,
205                                   int subject_length) {
206   {
207     DisallowHeapAllocation no_gc;
208     String::FlatContent content = replacement->GetFlatContent();
209     DCHECK(content.IsFlat());
210     bool simple = false;
211     if (content.IsOneByte()) {
212       simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
213                                        capture_count, subject_length, zone());
214     } else {
215       DCHECK(content.IsTwoByte());
216       simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
217                                        capture_count, subject_length, zone());
218     }
219     if (simple) return true;
220   }
221
222   Isolate* isolate = replacement->GetIsolate();
223   // Find substrings of replacement string and create them as String objects.
224   int substring_index = 0;
225   for (int i = 0, n = parts_.length(); i < n; i++) {
226     int tag = parts_[i].tag;
227     if (tag <= 0) {  // A replacement string slice.
228       int from = -tag;
229       int to = parts_[i].data;
230       replacement_substrings_.Add(
231           isolate->factory()->NewSubString(replacement, from, to), zone());
232       parts_[i].tag = REPLACEMENT_SUBSTRING;
233       parts_[i].data = substring_index;
234       substring_index++;
235     } else if (tag == REPLACEMENT_STRING) {
236       replacement_substrings_.Add(replacement, zone());
237       parts_[i].data = substring_index;
238       substring_index++;
239     }
240   }
241   return false;
242 }
243
244
245 void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
246                                 int match_from, int match_to, int32_t* match) {
247   DCHECK_LT(0, parts_.length());
248   for (int i = 0, n = parts_.length(); i < n; i++) {
249     ReplacementPart part = parts_[i];
250     switch (part.tag) {
251       case SUBJECT_PREFIX:
252         if (match_from > 0) builder->AddSubjectSlice(0, match_from);
253         break;
254       case SUBJECT_SUFFIX: {
255         int subject_length = part.data;
256         if (match_to < subject_length) {
257           builder->AddSubjectSlice(match_to, subject_length);
258         }
259         break;
260       }
261       case SUBJECT_CAPTURE: {
262         int capture = part.data;
263         int from = match[capture * 2];
264         int to = match[capture * 2 + 1];
265         if (from >= 0 && to > from) {
266           builder->AddSubjectSlice(from, to);
267         }
268         break;
269       }
270       case REPLACEMENT_SUBSTRING:
271       case REPLACEMENT_STRING:
272         builder->AddString(replacement_substrings_[part.data]);
273         break;
274       default:
275         UNREACHABLE();
276     }
277   }
278 }
279
280
281 void FindOneByteStringIndices(Vector<const uint8_t> subject, char pattern,
282                               ZoneList<int>* indices, unsigned int limit,
283                               Zone* zone) {
284   DCHECK(limit > 0);
285   // Collect indices of pattern in subject using memchr.
286   // Stop after finding at most limit values.
287   const uint8_t* subject_start = subject.start();
288   const uint8_t* subject_end = subject_start + subject.length();
289   const uint8_t* pos = subject_start;
290   while (limit > 0) {
291     pos = reinterpret_cast<const uint8_t*>(
292         memchr(pos, pattern, subject_end - pos));
293     if (pos == NULL) return;
294     indices->Add(static_cast<int>(pos - subject_start), zone);
295     pos++;
296     limit--;
297   }
298 }
299
300
301 void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
302                               ZoneList<int>* indices, unsigned int limit,
303                               Zone* zone) {
304   DCHECK(limit > 0);
305   const uc16* subject_start = subject.start();
306   const uc16* subject_end = subject_start + subject.length();
307   for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
308     if (*pos == pattern) {
309       indices->Add(static_cast<int>(pos - subject_start), zone);
310       limit--;
311     }
312   }
313 }
314
315
316 template <typename SubjectChar, typename PatternChar>
317 void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
318                        Vector<const PatternChar> pattern,
319                        ZoneList<int>* indices, unsigned int limit, Zone* zone) {
320   DCHECK(limit > 0);
321   // Collect indices of pattern in subject.
322   // Stop after finding at most limit values.
323   int pattern_length = pattern.length();
324   int index = 0;
325   StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
326   while (limit > 0) {
327     index = search.Search(subject, index);
328     if (index < 0) return;
329     indices->Add(index, zone);
330     index += pattern_length;
331     limit--;
332   }
333 }
334
335
336 void FindStringIndicesDispatch(Isolate* isolate, String* subject,
337                                String* pattern, ZoneList<int>* indices,
338                                unsigned int limit, Zone* zone) {
339   {
340     DisallowHeapAllocation no_gc;
341     String::FlatContent subject_content = subject->GetFlatContent();
342     String::FlatContent pattern_content = pattern->GetFlatContent();
343     DCHECK(subject_content.IsFlat());
344     DCHECK(pattern_content.IsFlat());
345     if (subject_content.IsOneByte()) {
346       Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
347       if (pattern_content.IsOneByte()) {
348         Vector<const uint8_t> pattern_vector =
349             pattern_content.ToOneByteVector();
350         if (pattern_vector.length() == 1) {
351           FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
352                                    limit, zone);
353         } else {
354           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
355                             limit, zone);
356         }
357       } else {
358         FindStringIndices(isolate, subject_vector,
359                           pattern_content.ToUC16Vector(), indices, limit, zone);
360       }
361     } else {
362       Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
363       if (pattern_content.IsOneByte()) {
364         Vector<const uint8_t> pattern_vector =
365             pattern_content.ToOneByteVector();
366         if (pattern_vector.length() == 1) {
367           FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
368                                    limit, zone);
369         } else {
370           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
371                             limit, zone);
372         }
373       } else {
374         Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
375         if (pattern_vector.length() == 1) {
376           FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
377                                    limit, zone);
378         } else {
379           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
380                             limit, zone);
381         }
382       }
383     }
384   }
385 }
386
387
388 template <typename ResultSeqString>
389 MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString(
390     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
391     Handle<String> replacement, Handle<JSArray> last_match_info) {
392   DCHECK(subject->IsFlat());
393   DCHECK(replacement->IsFlat());
394
395   ZoneScope zone_scope(isolate->runtime_zone());
396   ZoneList<int> indices(8, zone_scope.zone());
397   DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
398   String* pattern =
399       String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
400   int subject_len = subject->length();
401   int pattern_len = pattern->length();
402   int replacement_len = replacement->length();
403
404   FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff,
405                             zone_scope.zone());
406
407   int matches = indices.length();
408   if (matches == 0) return *subject;
409
410   // Detect integer overflow.
411   int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
412                            static_cast<int64_t>(pattern_len)) *
413                               static_cast<int64_t>(matches) +
414                           static_cast<int64_t>(subject_len);
415   int result_len;
416   if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
417     STATIC_ASSERT(String::kMaxLength < kMaxInt);
418     result_len = kMaxInt;  // Provoke exception.
419   } else {
420     result_len = static_cast<int>(result_len_64);
421   }
422
423   int subject_pos = 0;
424   int result_pos = 0;
425
426   MaybeHandle<SeqString> maybe_res;
427   if (ResultSeqString::kHasOneByteEncoding) {
428     maybe_res = isolate->factory()->NewRawOneByteString(result_len);
429   } else {
430     maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
431   }
432   Handle<SeqString> untyped_res;
433   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
434   Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
435
436   for (int i = 0; i < matches; i++) {
437     // Copy non-matched subject content.
438     if (subject_pos < indices.at(i)) {
439       String::WriteToFlat(*subject, result->GetChars() + result_pos,
440                           subject_pos, indices.at(i));
441       result_pos += indices.at(i) - subject_pos;
442     }
443
444     // Replace match.
445     if (replacement_len > 0) {
446       String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0,
447                           replacement_len);
448       result_pos += replacement_len;
449     }
450
451     subject_pos = indices.at(i) + pattern_len;
452   }
453   // Add remaining subject content at the end.
454   if (subject_pos < subject_len) {
455     String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos,
456                         subject_len);
457   }
458
459   int32_t match_indices[] = {indices.at(matches - 1),
460                              indices.at(matches - 1) + pattern_len};
461   RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices);
462
463   return *result;
464 }
465
466
467 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString(
468     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
469     Handle<String> replacement, Handle<JSArray> last_match_info) {
470   DCHECK(subject->IsFlat());
471   DCHECK(replacement->IsFlat());
472
473   int capture_count = regexp->CaptureCount();
474   int subject_length = subject->length();
475
476   // CompiledReplacement uses zone allocation.
477   ZoneScope zone_scope(isolate->runtime_zone());
478   CompiledReplacement compiled_replacement(zone_scope.zone());
479   bool simple_replace =
480       compiled_replacement.Compile(replacement, capture_count, subject_length);
481
482   // Shortcut for simple non-regexp global replacements
483   if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) {
484     if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {
485       return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
486           isolate, subject, regexp, replacement, last_match_info);
487     } else {
488       return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
489           isolate, subject, regexp, replacement, last_match_info);
490     }
491   }
492
493   RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
494   if (global_cache.HasException()) return isolate->heap()->exception();
495
496   int32_t* current_match = global_cache.FetchNext();
497   if (current_match == NULL) {
498     if (global_cache.HasException()) return isolate->heap()->exception();
499     return *subject;
500   }
501
502   // Guessing the number of parts that the final result string is built
503   // from. Global regexps can match any number of times, so we guess
504   // conservatively.
505   int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
506   ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
507
508   // Number of parts added by compiled replacement plus preceeding
509   // string and possibly suffix after last match.  It is possible for
510   // all components to use two elements when encoded as two smis.
511   const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
512
513   int prev = 0;
514
515   do {
516     builder.EnsureCapacity(parts_added_per_loop);
517
518     int start = current_match[0];
519     int end = current_match[1];
520
521     if (prev < start) {
522       builder.AddSubjectSlice(prev, start);
523     }
524
525     if (simple_replace) {
526       builder.AddString(replacement);
527     } else {
528       compiled_replacement.Apply(&builder, start, end, current_match);
529     }
530     prev = end;
531
532     current_match = global_cache.FetchNext();
533   } while (current_match != NULL);
534
535   if (global_cache.HasException()) return isolate->heap()->exception();
536
537   if (prev < subject_length) {
538     builder.EnsureCapacity(2);
539     builder.AddSubjectSlice(prev, subject_length);
540   }
541
542   RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
543                                global_cache.LastSuccessfulMatch());
544
545   Handle<String> result;
546   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, result, builder.ToString());
547   return *result;
548 }
549
550
551 template <typename ResultSeqString>
552 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString(
553     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
554     Handle<JSArray> last_match_info) {
555   DCHECK(subject->IsFlat());
556
557   // Shortcut for simple non-regexp global replacements
558   if (regexp->TypeTag() == JSRegExp::ATOM) {
559     Handle<String> empty_string = isolate->factory()->empty_string();
560     if (subject->IsOneByteRepresentation()) {
561       return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
562           isolate, subject, regexp, empty_string, last_match_info);
563     } else {
564       return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
565           isolate, subject, regexp, empty_string, last_match_info);
566     }
567   }
568
569   RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
570   if (global_cache.HasException()) return isolate->heap()->exception();
571
572   int32_t* current_match = global_cache.FetchNext();
573   if (current_match == NULL) {
574     if (global_cache.HasException()) return isolate->heap()->exception();
575     return *subject;
576   }
577
578   int start = current_match[0];
579   int end = current_match[1];
580   int capture_count = regexp->CaptureCount();
581   int subject_length = subject->length();
582
583   int new_length = subject_length - (end - start);
584   if (new_length == 0) return isolate->heap()->empty_string();
585
586   Handle<ResultSeqString> answer;
587   if (ResultSeqString::kHasOneByteEncoding) {
588     answer = Handle<ResultSeqString>::cast(
589         isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
590   } else {
591     answer = Handle<ResultSeqString>::cast(
592         isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
593   }
594
595   int prev = 0;
596   int position = 0;
597
598   do {
599     start = current_match[0];
600     end = current_match[1];
601     if (prev < start) {
602       // Add substring subject[prev;start] to answer string.
603       String::WriteToFlat(*subject, answer->GetChars() + position, prev, start);
604       position += start - prev;
605     }
606     prev = end;
607
608     current_match = global_cache.FetchNext();
609   } while (current_match != NULL);
610
611   if (global_cache.HasException()) return isolate->heap()->exception();
612
613   RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
614                                global_cache.LastSuccessfulMatch());
615
616   if (prev < subject_length) {
617     // Add substring subject[prev;length] to answer string.
618     String::WriteToFlat(*subject, answer->GetChars() + position, prev,
619                         subject_length);
620     position += subject_length - prev;
621   }
622
623   if (position == 0) return isolate->heap()->empty_string();
624
625   // Shorten string and fill
626   int string_size = ResultSeqString::SizeFor(position);
627   int allocated_string_size = ResultSeqString::SizeFor(new_length);
628   int delta = allocated_string_size - string_size;
629
630   answer->set_length(position);
631   if (delta == 0) return *answer;
632
633   Address end_of_string = answer->address() + string_size;
634   Heap* heap = isolate->heap();
635
636   // The trimming is performed on a newly allocated object, which is on a
637   // fresly allocated page or on an already swept page. Hence, the sweeper
638   // thread can not get confused with the filler creation. No synchronization
639   // needed.
640   heap->CreateFillerObjectAt(end_of_string, delta);
641   heap->AdjustLiveBytes(answer->address(), -delta, Heap::FROM_MUTATOR);
642   return *answer;
643 }
644
645
646 RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString) {
647   HandleScope scope(isolate);
648   DCHECK(args.length() == 4);
649
650   CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
651   CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
652   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
653   CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
654
655   RUNTIME_ASSERT(regexp->GetFlags().is_global());
656   RUNTIME_ASSERT(last_match_info->HasFastObjectElements());
657
658   subject = String::Flatten(subject);
659
660   if (replacement->length() == 0) {
661     if (subject->HasOnlyOneByteChars()) {
662       return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
663           isolate, subject, regexp, last_match_info);
664     } else {
665       return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
666           isolate, subject, regexp, last_match_info);
667     }
668   }
669
670   replacement = String::Flatten(replacement);
671
672   return StringReplaceGlobalRegExpWithString(isolate, subject, regexp,
673                                              replacement, last_match_info);
674 }
675
676
677 RUNTIME_FUNCTION(Runtime_StringSplit) {
678   HandleScope handle_scope(isolate);
679   DCHECK(args.length() == 3);
680   CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
681   CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
682   CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
683   RUNTIME_ASSERT(limit > 0);
684
685   int subject_length = subject->length();
686   int pattern_length = pattern->length();
687   RUNTIME_ASSERT(pattern_length > 0);
688
689   if (limit == 0xffffffffu) {
690     Handle<Object> cached_answer(
691         RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
692                                    RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
693         isolate);
694     if (*cached_answer != Smi::FromInt(0)) {
695       // The cache FixedArray is a COW-array and can therefore be reused.
696       Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
697           Handle<FixedArray>::cast(cached_answer));
698       return *result;
699     }
700   }
701
702   // The limit can be very large (0xffffffffu), but since the pattern
703   // isn't empty, we can never create more parts than ~half the length
704   // of the subject.
705
706   subject = String::Flatten(subject);
707   pattern = String::Flatten(pattern);
708
709   static const int kMaxInitialListCapacity = 16;
710
711   ZoneScope zone_scope(isolate->runtime_zone());
712
713   // Find (up to limit) indices of separator and end-of-string in subject
714   int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit);
715   ZoneList<int> indices(initial_capacity, zone_scope.zone());
716
717   FindStringIndicesDispatch(isolate, *subject, *pattern, &indices, limit,
718                             zone_scope.zone());
719
720   if (static_cast<uint32_t>(indices.length()) < limit) {
721     indices.Add(subject_length, zone_scope.zone());
722   }
723
724   // The list indices now contains the end of each part to create.
725
726   // Create JSArray of substrings separated by separator.
727   int part_count = indices.length();
728
729   Handle<JSArray> result = isolate->factory()->NewJSArray(part_count);
730   JSObject::EnsureCanContainHeapObjectElements(result);
731   result->set_length(Smi::FromInt(part_count));
732
733   DCHECK(result->HasFastObjectElements());
734
735   if (part_count == 1 && indices.at(0) == subject_length) {
736     FixedArray::cast(result->elements())->set(0, *subject);
737     return *result;
738   }
739
740   Handle<FixedArray> elements(FixedArray::cast(result->elements()));
741   int part_start = 0;
742   for (int i = 0; i < part_count; i++) {
743     HandleScope local_loop_handle(isolate);
744     int part_end = indices.at(i);
745     Handle<String> substring =
746         isolate->factory()->NewProperSubString(subject, part_start, part_end);
747     elements->set(i, *substring);
748     part_start = part_end + pattern_length;
749   }
750
751   if (limit == 0xffffffffu) {
752     if (result->HasFastObjectElements()) {
753       RegExpResultsCache::Enter(isolate, subject, pattern, elements,
754                                 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
755     }
756   }
757
758   return *result;
759 }
760
761
762 RUNTIME_FUNCTION(Runtime_RegExpCompile) {
763   HandleScope scope(isolate);
764   DCHECK(args.length() == 3);
765   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, re, 0);
766   CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
767   CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);
768   Handle<Object> result;
769   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, result,
770                                      RegExpImpl::Compile(re, pattern, flags));
771   return *result;
772 }
773
774
775 RUNTIME_FUNCTION(Runtime_RegExpExecRT) {
776   HandleScope scope(isolate);
777   DCHECK(args.length() == 4);
778   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
779   CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
780   CONVERT_INT32_ARG_CHECKED(index, 2);
781   CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
782   // Due to the way the JS calls are constructed this must be less than the
783   // length of a string, i.e. it is always a Smi.  We check anyway for security.
784   RUNTIME_ASSERT(index >= 0);
785   RUNTIME_ASSERT(index <= subject->length());
786   isolate->counters()->regexp_entry_runtime()->Increment();
787   Handle<Object> result;
788   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
789       isolate, result,
790       RegExpImpl::Exec(regexp, subject, index, last_match_info));
791   return *result;
792 }
793
794
795 RUNTIME_FUNCTION(Runtime_RegExpConstructResult) {
796   HandleScope handle_scope(isolate);
797   DCHECK(args.length() == 3);
798   CONVERT_SMI_ARG_CHECKED(size, 0);
799   RUNTIME_ASSERT(size >= 0 && size <= FixedArray::kMaxLength);
800   CONVERT_ARG_HANDLE_CHECKED(Object, index, 1);
801   CONVERT_ARG_HANDLE_CHECKED(Object, input, 2);
802   Handle<FixedArray> elements = isolate->factory()->NewFixedArray(size);
803   Handle<Map> regexp_map(isolate->native_context()->regexp_result_map());
804   Handle<JSObject> object =
805       isolate->factory()->NewJSObjectFromMap(regexp_map, NOT_TENURED, false);
806   Handle<JSArray> array = Handle<JSArray>::cast(object);
807   array->set_elements(*elements);
808   array->set_length(Smi::FromInt(size));
809   // Write in-object properties after the length of the array.
810   array->InObjectPropertyAtPut(JSRegExpResult::kIndexIndex, *index);
811   array->InObjectPropertyAtPut(JSRegExpResult::kInputIndex, *input);
812   return *array;
813 }
814
815
816 RUNTIME_FUNCTION(Runtime_RegExpInitializeObject) {
817   HandleScope scope(isolate);
818   DCHECK(args.length() == 6);
819   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
820   CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
821   // If source is the empty string we set it to "(?:)" instead as
822   // suggested by ECMA-262, 5th, section 15.10.4.1.
823   if (source->length() == 0) source = isolate->factory()->query_colon_string();
824
825   CONVERT_ARG_HANDLE_CHECKED(Object, global, 2);
826   if (!global->IsTrue()) global = isolate->factory()->false_value();
827
828   CONVERT_ARG_HANDLE_CHECKED(Object, ignoreCase, 3);
829   if (!ignoreCase->IsTrue()) ignoreCase = isolate->factory()->false_value();
830
831   CONVERT_ARG_HANDLE_CHECKED(Object, multiline, 4);
832   if (!multiline->IsTrue()) multiline = isolate->factory()->false_value();
833
834   CONVERT_ARG_HANDLE_CHECKED(Object, sticky, 5);
835   if (!sticky->IsTrue()) sticky = isolate->factory()->false_value();
836
837   Map* map = regexp->map();
838   Object* constructor = map->constructor();
839   if (!FLAG_harmony_regexps && constructor->IsJSFunction() &&
840       JSFunction::cast(constructor)->initial_map() == map) {
841     // If we still have the original map, set in-object properties directly.
842     regexp->InObjectPropertyAtPut(JSRegExp::kSourceFieldIndex, *source);
843     // Both true and false are immovable immortal objects so no need for write
844     // barrier.
845     regexp->InObjectPropertyAtPut(JSRegExp::kGlobalFieldIndex, *global,
846                                   SKIP_WRITE_BARRIER);
847     regexp->InObjectPropertyAtPut(JSRegExp::kIgnoreCaseFieldIndex, *ignoreCase,
848                                   SKIP_WRITE_BARRIER);
849     regexp->InObjectPropertyAtPut(JSRegExp::kMultilineFieldIndex, *multiline,
850                                   SKIP_WRITE_BARRIER);
851     regexp->InObjectPropertyAtPut(JSRegExp::kLastIndexFieldIndex,
852                                   Smi::FromInt(0), SKIP_WRITE_BARRIER);
853     return *regexp;
854   }
855
856   // Map has changed, so use generic, but slower, method.  We also end here if
857   // the --harmony-regexp flag is set, because the initial map does not have
858   // space for the 'sticky' flag, since it is from the snapshot, but must work
859   // both with and without --harmony-regexp.  When sticky comes out from under
860   // the flag, we will be able to use the fast initial map.
861   PropertyAttributes final =
862       static_cast<PropertyAttributes>(READ_ONLY | DONT_ENUM | DONT_DELETE);
863   PropertyAttributes writable =
864       static_cast<PropertyAttributes>(DONT_ENUM | DONT_DELETE);
865   Handle<Object> zero(Smi::FromInt(0), isolate);
866   Factory* factory = isolate->factory();
867   JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->source_string(),
868                                            source, final).Check();
869   JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->global_string(),
870                                            global, final).Check();
871   JSObject::SetOwnPropertyIgnoreAttributes(
872       regexp, factory->ignore_case_string(), ignoreCase, final).Check();
873   JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->multiline_string(),
874                                            multiline, final).Check();
875   if (FLAG_harmony_regexps) {
876     JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->sticky_string(),
877                                              sticky, final).Check();
878   }
879   JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->last_index_string(),
880                                            zero, writable).Check();
881   return *regexp;
882 }
883
884
885 RUNTIME_FUNCTION(Runtime_MaterializeRegExpLiteral) {
886   HandleScope scope(isolate);
887   DCHECK(args.length() == 4);
888   CONVERT_ARG_HANDLE_CHECKED(FixedArray, literals, 0);
889   CONVERT_SMI_ARG_CHECKED(index, 1);
890   CONVERT_ARG_HANDLE_CHECKED(String, pattern, 2);
891   CONVERT_ARG_HANDLE_CHECKED(String, flags, 3);
892
893   // Get the RegExp function from the context in the literals array.
894   // This is the RegExp function from the context in which the
895   // function was created.  We do not use the RegExp function from the
896   // current native context because this might be the RegExp function
897   // from another context which we should not have access to.
898   Handle<JSFunction> constructor = Handle<JSFunction>(
899       JSFunction::NativeContextFromLiterals(*literals)->regexp_function());
900   // Compute the regular expression literal.
901   Handle<Object> regexp;
902   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
903       isolate, regexp,
904       RegExpImpl::CreateRegExpLiteral(constructor, pattern, flags));
905   literals->set(index, *regexp);
906   return *regexp;
907 }
908
909
910 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
911 // separate last match info.  See comment on that function.
912 template <bool has_capture>
913 static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
914                                     Handle<JSRegExp> regexp,
915                                     Handle<JSArray> last_match_array,
916                                     Handle<JSArray> result_array) {
917   DCHECK(subject->IsFlat());
918   DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
919
920   int capture_count = regexp->CaptureCount();
921   int subject_length = subject->length();
922
923   static const int kMinLengthToCache = 0x1000;
924
925   if (subject_length > kMinLengthToCache) {
926     Handle<Object> cached_answer(
927         RegExpResultsCache::Lookup(isolate->heap(), *subject, regexp->data(),
928                                    RegExpResultsCache::REGEXP_MULTIPLE_INDICES),
929         isolate);
930     if (*cached_answer != Smi::FromInt(0)) {
931       Handle<FixedArray> cached_fixed_array =
932           Handle<FixedArray>(FixedArray::cast(*cached_answer));
933       // The cache FixedArray is a COW-array and can therefore be reused.
934       JSArray::SetContent(result_array, cached_fixed_array);
935       // The actual length of the result array is stored in the last element of
936       // the backing store (the backing FixedArray may have a larger capacity).
937       Object* cached_fixed_array_last_element =
938           cached_fixed_array->get(cached_fixed_array->length() - 1);
939       Smi* js_array_length = Smi::cast(cached_fixed_array_last_element);
940       result_array->set_length(js_array_length);
941       RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
942                                    NULL);
943       return *result_array;
944     }
945   }
946
947   RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
948   if (global_cache.HasException()) return isolate->heap()->exception();
949
950   // Ensured in Runtime_RegExpExecMultiple.
951   DCHECK(result_array->HasFastObjectElements());
952   Handle<FixedArray> result_elements(
953       FixedArray::cast(result_array->elements()));
954   if (result_elements->length() < 16) {
955     result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
956   }
957
958   FixedArrayBuilder builder(result_elements);
959
960   // Position to search from.
961   int match_start = -1;
962   int match_end = 0;
963   bool first = true;
964
965   // Two smis before and after the match, for very long strings.
966   static const int kMaxBuilderEntriesPerRegExpMatch = 5;
967
968   while (true) {
969     int32_t* current_match = global_cache.FetchNext();
970     if (current_match == NULL) break;
971     match_start = current_match[0];
972     builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
973     if (match_end < match_start) {
974       ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
975                                                 match_start);
976     }
977     match_end = current_match[1];
978     {
979       // Avoid accumulating new handles inside loop.
980       HandleScope temp_scope(isolate);
981       Handle<String> match;
982       if (!first) {
983         match = isolate->factory()->NewProperSubString(subject, match_start,
984                                                        match_end);
985       } else {
986         match =
987             isolate->factory()->NewSubString(subject, match_start, match_end);
988         first = false;
989       }
990
991       if (has_capture) {
992         // Arguments array to replace function is match, captures, index and
993         // subject, i.e., 3 + capture count in total.
994         Handle<FixedArray> elements =
995             isolate->factory()->NewFixedArray(3 + capture_count);
996
997         elements->set(0, *match);
998         for (int i = 1; i <= capture_count; i++) {
999           int start = current_match[i * 2];
1000           if (start >= 0) {
1001             int end = current_match[i * 2 + 1];
1002             DCHECK(start <= end);
1003             Handle<String> substring =
1004                 isolate->factory()->NewSubString(subject, start, end);
1005             elements->set(i, *substring);
1006           } else {
1007             DCHECK(current_match[i * 2 + 1] < 0);
1008             elements->set(i, isolate->heap()->undefined_value());
1009           }
1010         }
1011         elements->set(capture_count + 1, Smi::FromInt(match_start));
1012         elements->set(capture_count + 2, *subject);
1013         builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1014       } else {
1015         builder.Add(*match);
1016       }
1017     }
1018   }
1019
1020   if (global_cache.HasException()) return isolate->heap()->exception();
1021
1022   if (match_start >= 0) {
1023     // Finished matching, with at least one match.
1024     if (match_end < subject_length) {
1025       ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1026                                                 subject_length);
1027     }
1028
1029     RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
1030                                  NULL);
1031
1032     if (subject_length > kMinLengthToCache) {
1033       // Store the length of the result array into the last element of the
1034       // backing FixedArray.
1035       builder.EnsureCapacity(1);
1036       Handle<FixedArray> fixed_array = builder.array();
1037       fixed_array->set(fixed_array->length() - 1,
1038                        Smi::FromInt(builder.length()));
1039       // Cache the result and turn the FixedArray into a COW array.
1040       RegExpResultsCache::Enter(isolate, subject,
1041                                 handle(regexp->data(), isolate), fixed_array,
1042                                 RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1043     }
1044     return *builder.ToJSArray(result_array);
1045   } else {
1046     return isolate->heap()->null_value();  // No matches at all.
1047   }
1048 }
1049
1050
1051 // This is only called for StringReplaceGlobalRegExpWithFunction.  This sets
1052 // lastMatchInfoOverride to maintain the last match info, so we don't need to
1053 // set any other last match array info.
1054 RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
1055   HandleScope handles(isolate);
1056   DCHECK(args.length() == 4);
1057
1058   CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
1059   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1060   CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2);
1061   CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
1062   RUNTIME_ASSERT(last_match_info->HasFastObjectElements());
1063   RUNTIME_ASSERT(result_array->HasFastObjectElements());
1064
1065   subject = String::Flatten(subject);
1066   RUNTIME_ASSERT(regexp->GetFlags().is_global());
1067
1068   if (regexp->CaptureCount() == 0) {
1069     return SearchRegExpMultiple<false>(isolate, subject, regexp,
1070                                        last_match_info, result_array);
1071   } else {
1072     return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info,
1073                                       result_array);
1074   }
1075 }
1076
1077
1078 RUNTIME_FUNCTION(RuntimeReference_RegExpConstructResult) {
1079   SealHandleScope shs(isolate);
1080   return __RT_impl_Runtime_RegExpConstructResult(args, isolate);
1081 }
1082
1083
1084 RUNTIME_FUNCTION(RuntimeReference_RegExpExec) {
1085   SealHandleScope shs(isolate);
1086   return __RT_impl_Runtime_RegExpExecRT(args, isolate);
1087 }
1088
1089
1090 RUNTIME_FUNCTION(RuntimeReference_IsRegExp) {
1091   SealHandleScope shs(isolate);
1092   DCHECK(args.length() == 1);
1093   CONVERT_ARG_CHECKED(Object, obj, 0);
1094   return isolate->heap()->ToBoolean(obj->IsJSRegExp());
1095 }
1096 }
1097 }  // namespace v8::internal