Revert r12258, r12300 and r12302 (global regexp).
authoryangguo@chromium.org <yangguo@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Tue, 21 Aug 2012 09:46:23 +0000 (09:46 +0000)
committeryangguo@chromium.org <yangguo@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Tue, 21 Aug 2012 09:46:23 +0000 (09:46 +0000)
R=ulan@chromium.org
BUG=

Review URL: https://chromiumcodereview.appspot.com/10825472

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@12350 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

src/arm/code-stubs-arm.cc
src/assembler.cc
src/ia32/code-stubs-ia32.cc
src/isolate.h
src/jsregexp.cc
src/jsregexp.h
src/mips/code-stubs-mips.cc
src/runtime.cc
src/x64/code-stubs-x64.cc
test/cctest/test-regexp.cc

index d9e3a3d..88178ff 100644 (file)
@@ -4818,7 +4818,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
   STATIC_ASSERT(kSmiTagSize + kSmiShiftSize == 1);
   __ add(r2, r2, Operand(2));  // r2 was a smi.
   // Check that the static offsets vector buffer is large enough.
-  __ cmp(r2, Operand(Isolate::kJSRegexpStaticOffsetsVectorSize));
+  __ cmp(r2, Operand(OffsetsVector::kStaticOffsetsVectorSize));
   __ b(hi, &runtime);
 
   // r2: Number of capture registers
index a58f77f..6dcd2a0 100644 (file)
@@ -1092,7 +1092,7 @@ ExternalReference ExternalReference::re_word_character_map() {
 ExternalReference ExternalReference::address_of_static_offsets_vector(
     Isolate* isolate) {
   return ExternalReference(
-      reinterpret_cast<Address>(isolate->jsregexp_static_offsets_vector()));
+      OffsetsVector::static_offsets_vector_address(isolate));
 }
 
 ExternalReference ExternalReference::address_of_regexp_stack_memory_address(
index 140db8a..80b0f22 100644 (file)
@@ -3748,7 +3748,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
   STATIC_ASSERT(kSmiTagSize + kSmiShiftSize == 1);
   __ add(edx, Immediate(2));  // edx was a smi.
   // Check that the static offsets vector buffer is large enough.
-  __ cmp(edx, Isolate::kJSRegexpStaticOffsetsVectorSize);
+  __ cmp(edx, OffsetsVector::kStaticOffsetsVectorSize);
   __ j(above, &runtime);
 
   // ecx: RegExp data (FixedArray)
index 3461f97..f654459 100644 (file)
@@ -308,7 +308,7 @@ class ThreadLocalTop BASE_EMBEDDED {
 
 #define ISOLATE_INIT_ARRAY_LIST(V)                                             \
   /* SerializerDeserializer state. */                                          \
-  V(int32_t, jsregexp_static_offsets_vector, kJSRegexpStaticOffsetsVectorSize) \
+  V(int, jsregexp_static_offsets_vector, kJSRegexpStaticOffsetsVectorSize)     \
   V(int, bad_char_shift_table, kUC16AlphabetSize)                              \
   V(int, good_suffix_shift_table, (kBMMaxShift + 1))                           \
   V(int, suffix_table, (kBMMaxShift + 1))                                      \
index ae25432..e730e14 100644 (file)
@@ -278,12 +278,11 @@ static void SetAtomLastCapture(FixedArray* array,
 }
 
 
-int RegExpImpl::AtomExecRaw(Handle<JSRegExp> regexp,
-                            Handle<String> subject,
-                            int index,
-                            int32_t* output,
-                            int output_size) {
-  Isolate* isolate = regexp->GetIsolate();
+Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
+                                    Handle<String> subject,
+                                    int index,
+                                    Handle<JSArray> last_match_info) {
+  Isolate* isolate = re->GetIsolate();
 
   ASSERT(0 <= index);
   ASSERT(index <= subject->length());
@@ -291,16 +290,15 @@ int RegExpImpl::AtomExecRaw(Handle<JSRegExp> regexp,
   if (!subject->IsFlat()) FlattenString(subject);
   AssertNoAllocation no_heap_allocation;  // ensure vectors stay valid
 
-  String* needle = String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex));
+  String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex));
   int needle_len = needle->length();
   ASSERT(needle->IsFlat());
-  ASSERT_LT(0, needle_len);
 
-  if (index + needle_len > subject->length()) {
-    return RegExpImpl::RE_FAILURE;
-  }
+  if (needle_len != 0) {
+    if (index + needle_len > subject->length()) {
+      return isolate->factory()->null_value();
+    }
 
-  for (int i = 0; i < output_size; i += 2) {
     String::FlatContent needle_content = needle->GetFlatContent();
     String::FlatContent subject_content = subject->GetFlatContent();
     ASSERT(needle_content.IsFlat());
@@ -325,36 +323,15 @@ int RegExpImpl::AtomExecRaw(Handle<JSRegExp> regexp,
                                subject_content.ToUC16Vector(),
                                needle_content.ToUC16Vector(),
                                index)));
-    if (index == -1) {
-      return i / 2;  // Return number of matches.
-    } else {
-      output[i] = index;
-      output[i+1] = index + needle_len;
-      index += needle_len;
-    }
+    if (index == -1) return isolate->factory()->null_value();
   }
-  return output_size / 2;
-}
+  ASSERT(last_match_info->HasFastObjectElements());
 
-
-Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
-                                    Handle<String> subject,
-                                    int index,
-                                    Handle<JSArray> last_match_info) {
-  Isolate* isolate = re->GetIsolate();
-
-  static const int kNumRegisters = 2;
-  STATIC_ASSERT(kNumRegisters <= Isolate::kJSRegexpStaticOffsetsVectorSize);
-  int32_t* output_registers = isolate->jsregexp_static_offsets_vector();
-
-  int res = AtomExecRaw(re, subject, index, output_registers, kNumRegisters);
-
-  if (res == RegExpImpl::RE_FAILURE) return isolate->factory()->null_value();
-
-  ASSERT_EQ(res, RegExpImpl::RE_SUCCESS);
-  NoHandleAllocation no_handles;
-  FixedArray* array = FixedArray::cast(last_match_info->elements());
-  SetAtomLastCapture(array, *subject, output_registers[0], output_registers[1]);
+  {
+    NoHandleAllocation no_handles;
+    FixedArray* array = FixedArray::cast(last_match_info->elements());
+    SetAtomLastCapture(array, *subject, index, index + needle_len);
+  }
   return last_match_info;
 }
 
@@ -534,11 +511,7 @@ int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
 
 #ifdef V8_INTERPRETED_REGEXP
   // Byte-code regexp needs space allocated for all its registers.
-  // The result captures are copied to the start of the registers array
-  // if the match succeeds.  This way those registers are not clobbered
-  // when we set the last match info from last successful match.
-  return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())) +
-         (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
+  return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data()));
 #else  // V8_INTERPRETED_REGEXP
   // Native regexp only needs room to output captures. Registers are handled
   // internally.
@@ -547,11 +520,27 @@ int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
 }
 
 
-int RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp,
-                                Handle<String> subject,
-                                int index,
-                                int32_t* output,
-                                int output_size) {
+int RegExpImpl::GlobalOffsetsVectorSize(Handle<JSRegExp> regexp,
+                                        int registers_per_match,
+                                        int* max_matches) {
+#ifdef V8_INTERPRETED_REGEXP
+  // Global loop in interpreted regexp is not implemented.  Therefore we choose
+  // the size of the offsets vector so that it can only store one match.
+  *max_matches = 1;
+  return registers_per_match;
+#else  // V8_INTERPRETED_REGEXP
+  int size = Max(registers_per_match, OffsetsVector::kStaticOffsetsVectorSize);
+  *max_matches = size / registers_per_match;
+  return size;
+#endif  // V8_INTERPRETED_REGEXP
+}
+
+
+int RegExpImpl::IrregexpExecRaw(
+    Handle<JSRegExp> regexp,
+    Handle<String> subject,
+    int index,
+    Vector<int> output) {
   Isolate* isolate = regexp->GetIsolate();
 
   Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate);
@@ -563,19 +552,15 @@ int RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp,
   bool is_ascii = subject->IsAsciiRepresentationUnderneath();
 
 #ifndef V8_INTERPRETED_REGEXP
-  ASSERT(output_size >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
+  ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
   do {
     EnsureCompiledIrregexp(regexp, subject, is_ascii);
     Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);
-    // The stack is used to allocate registers for the compiled regexp code.
-    // This means that in case of failure, the output registers array is left
-    // untouched and contains the capture results from the previous successful
-    // match.  We can use that to set the last match info lazily.
     NativeRegExpMacroAssembler::Result res =
         NativeRegExpMacroAssembler::Match(code,
                                           subject,
-                                          output,
-                                          output_size,
+                                          output.start(),
+                                          output.length(),
                                           index,
                                           isolate);
     if (res != NativeRegExpMacroAssembler::RETRY) {
@@ -602,29 +587,22 @@ int RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp,
   return RE_EXCEPTION;
 #else  // V8_INTERPRETED_REGEXP
 
-  ASSERT(output_size >= IrregexpNumberOfRegisters(*irregexp));
+  ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp));
   // We must have done EnsureCompiledIrregexp, so we can get the number of
   // registers.
+  int* register_vector = output.start();
   int number_of_capture_registers =
       (IrregexpNumberOfCaptures(*irregexp) + 1) * 2;
-  int32_t* raw_output = &output[number_of_capture_registers];
-  // We do not touch the actual capture result registers until we know there
-  // has been a match so that we can use those capture results to set the
-  // last match info.
   for (int i = number_of_capture_registers - 1; i >= 0; i--) {
-    raw_output[i] = -1;
+    register_vector[i] = -1;
   }
   Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate);
 
   IrregexpResult result = IrregexpInterpreter::Match(isolate,
                                                      byte_codes,
                                                      subject,
-                                                     raw_output,
+                                                     register_vector,
                                                      index);
-  if (result == RE_SUCCESS) {
-    // Copy capture results to the start of the registers array.
-    memcpy(output, raw_output, number_of_capture_registers * sizeof(int32_t));
-  }
   if (result == RE_EXCEPTION) {
     ASSERT(!isolate->has_pending_exception());
     isolate->StackOverflow();
@@ -634,44 +612,50 @@ int RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp,
 }
 
 
-Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
+Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp,
                                         Handle<String> subject,
                                         int previous_index,
                                         Handle<JSArray> last_match_info) {
-  Isolate* isolate = regexp->GetIsolate();
-  ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
+  Isolate* isolate = jsregexp->GetIsolate();
+  ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP);
 
   // Prepare space for the return values.
-#if defined(V8_INTERPRETED_REGEXP) && defined(DEBUG)
+#ifdef V8_INTERPRETED_REGEXP
+#ifdef DEBUG
   if (FLAG_trace_regexp_bytecodes) {
-    String* pattern = regexp->Pattern();
+    String* pattern = jsregexp->Pattern();
     PrintF("\n\nRegexp match:   /%s/\n\n", *(pattern->ToCString()));
     PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
   }
 #endif
-  int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject);
+#endif
+  int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject);
   if (required_registers < 0) {
     // Compiling failed with an exception.
     ASSERT(isolate->has_pending_exception());
     return Handle<Object>::null();
   }
 
-  int32_t* output_registers = NULL;
-  if (required_registers > Isolate::kJSRegexpStaticOffsetsVectorSize) {
-    output_registers = NewArray<int32_t>(required_registers);
-  }
-  SmartArrayPointer<int32_t> auto_release(output_registers);
-  if (output_registers == NULL) {
-    output_registers = isolate->jsregexp_static_offsets_vector();
-  }
+  OffsetsVector registers(required_registers, isolate);
 
-  int res = RegExpImpl::IrregexpExecRaw(
-      regexp, subject, previous_index, output_registers, required_registers);
+  int res = RegExpImpl::IrregexpExecRaw(jsregexp, subject, previous_index,
+                                        Vector<int>(registers.vector(),
+                                                    registers.length()));
   if (res == RE_SUCCESS) {
-    int capture_count =
-        IrregexpNumberOfCaptures(FixedArray::cast(regexp->data()));
-    return SetLastMatchInfo(
-        last_match_info, subject, capture_count, output_registers);
+    int capture_register_count =
+        (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2;
+    last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead);
+    AssertNoAllocation no_gc;
+    int* register_vector = registers.vector();
+    FixedArray* array = FixedArray::cast(last_match_info->elements());
+    for (int i = 0; i < capture_register_count; i += 2) {
+      SetCapture(array, i, register_vector[i]);
+      SetCapture(array, i + 1, register_vector[i + 1]);
+    }
+    SetLastCaptureCount(array, capture_register_count);
+    SetLastSubject(array, *subject);
+    SetLastInput(array, *subject);
+    return last_match_info;
   }
   if (res == RE_EXCEPTION) {
     ASSERT(isolate->has_pending_exception());
@@ -682,145 +666,6 @@ Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
 }
 
 
-Handle<JSArray> RegExpImpl::SetLastMatchInfo(Handle<JSArray> last_match_info,
-                                             Handle<String> subject,
-                                             int capture_count,
-                                             int32_t* match) {
-  int capture_register_count = (capture_count + 1) * 2;
-  last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead);
-  AssertNoAllocation no_gc;
-  FixedArray* array = FixedArray::cast(last_match_info->elements());
-  if (match != NULL) {
-    for (int i = 0; i < capture_register_count; i += 2) {
-      SetCapture(array, i, match[i]);
-      SetCapture(array, i + 1, match[i + 1]);
-    }
-  }
-  SetLastCaptureCount(array, capture_register_count);
-  SetLastSubject(array, *subject);
-  SetLastInput(array, *subject);
-  return last_match_info;
-}
-
-
-RegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp,
-                                     Handle<String> subject,
-                                     bool is_global,
-                                     Isolate* isolate) {
-#ifdef V8_INTERPRETED_REGEXP
-  bool interpreted = true;
-#else
-  bool interpreted = false;
-#endif  // V8_INTERPRETED_REGEXP
-
-  regexp_ = regexp;
-  subject_ = subject;
-
-  if (regexp_->TypeTag() == JSRegExp::ATOM) {
-    static const int kAtomRegistersPerMatch = 2;
-    registers_per_match_ = kAtomRegistersPerMatch;
-    // There is no distinction between interpreted and native for atom regexps.
-    interpreted = false;
-  } else {
-    registers_per_match_ = RegExpImpl::IrregexpPrepare(regexp_, subject_);
-    if (registers_per_match_ < 0) {
-      num_matches_ = -1;  // Signal exception.
-      return;
-    }
-  }
-
-  if (is_global && !interpreted) {
-    register_array_size_ =
-        Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize);
-    max_matches_ = register_array_size_ / registers_per_match_;
-  } else {
-    // Global loop in interpreted regexp is not implemented.  We choose
-    // the size of the offsets vector so that it can only store one match.
-    register_array_size_ = registers_per_match_;
-    max_matches_ = 1;
-  }
-
-  if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
-    register_array_ = NewArray<int32_t>(register_array_size_);
-  } else {
-    register_array_ = isolate->jsregexp_static_offsets_vector();
-  }
-
-  // Set state so that fetching the results the first time triggers a call
-  // to the compiled regexp.
-  current_match_index_ = max_matches_ - 1;
-  num_matches_ = max_matches_;
-  ASSERT(registers_per_match_ >= 2);  // Each match has at least one capture.
-  ASSERT_GE(register_array_size_, registers_per_match_);
-  int32_t* last_match =
-      &register_array_[current_match_index_ * registers_per_match_];
-  last_match[0] = -1;
-  last_match[1] = 0;
-}
-
-
-RegExpImpl::GlobalCache::~GlobalCache() {
-  // Deallocate the register array if we allocated it in the constructor
-  // (as opposed to using the existing jsregexp_static_offsets_vector).
-  if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
-    DeleteArray(register_array_);
-  }
-}
-
-
-int32_t* RegExpImpl::GlobalCache::FetchNext() {
-  current_match_index_++;
-  if (current_match_index_ >= num_matches_) {
-    // Current batch of results exhausted.
-    // Fail if last batch was not even fully filled.
-    if (num_matches_ < max_matches_) {
-      num_matches_ = 0;  // Signal failed match.
-      return NULL;
-    }
-
-    int32_t* last_match =
-        &register_array_[(current_match_index_ - 1) * registers_per_match_];
-    int last_end_index = last_match[1];
-
-    if (regexp_->TypeTag() == JSRegExp::ATOM) {
-      num_matches_ = RegExpImpl::AtomExecRaw(regexp_,
-                                             subject_,
-                                             last_end_index,
-                                             register_array_,
-                                             register_array_size_);
-    } else {
-      int last_start_index = last_match[0];
-      if (last_start_index == last_end_index) last_end_index++;
-      if (last_end_index > subject_->length()) {
-        num_matches_ = 0;  // Signal failed match.
-        return NULL;
-      }
-      num_matches_ = RegExpImpl::IrregexpExecRaw(regexp_,
-                                                 subject_,
-                                                 last_end_index,
-                                                 register_array_,
-                                                 register_array_size_);
-    }
-
-    if (num_matches_ <= 0) return NULL;
-    current_match_index_ = 0;
-    return register_array_;
-  } else {
-    return &register_array_[current_match_index_ * registers_per_match_];
-  }
-}
-
-
-int32_t* RegExpImpl::GlobalCache::LastSuccessfulMatch() {
-  int index = current_match_index_ * registers_per_match_;
-  if (num_matches_ == 0) {
-    // After a failed match we shift back by one result.
-    index -= registers_per_match_;
-  }
-  return &register_array_[index];
-}
-
-
 // -------------------------------------------------------------------
 // Implementation of the Irregexp regular expression engine.
 //
index 96825ce..9a84237 100644 (file)
@@ -93,14 +93,6 @@ class RegExpImpl {
                           JSRegExp::Flags flags,
                           Handle<String> match_pattern);
 
-
-  static int AtomExecRaw(Handle<JSRegExp> regexp,
-                         Handle<String> subject,
-                         int index,
-                         int32_t* output,
-                         int output_size);
-
-
   static Handle<Object> AtomExec(Handle<JSRegExp> regexp,
                                  Handle<String> subject,
                                  int index,
@@ -113,11 +105,17 @@ class RegExpImpl {
   // This ensures that the regexp is compiled for the subject, and that
   // the subject is flat.
   // Returns the number of integer spaces required by IrregexpExecOnce
-  // as its "registers" argument.  If the regexp cannot be compiled,
+  // as its "registers" argument. If the regexp cannot be compiled,
   // an exception is set as pending, and this function returns negative.
   static int IrregexpPrepare(Handle<JSRegExp> regexp,
                              Handle<String> subject);
 
+  // Calculate the size of offsets vector for the case of global regexp
+  // and the number of matches this vector is able to store.
+  static int GlobalOffsetsVectorSize(Handle<JSRegExp> regexp,
+                                     int registers_per_match,
+                                     int* max_matches);
+
   // Execute a regular expression on the subject, starting from index.
   // If matching succeeds, return the number of matches.  This can be larger
   // than one in the case of global regular expressions.
@@ -127,57 +125,17 @@ class RegExpImpl {
   static int IrregexpExecRaw(Handle<JSRegExp> regexp,
                              Handle<String> subject,
                              int index,
-                             int32_t* output,
-                             int output_size);
+                             Vector<int> registers);
 
   // Execute an Irregexp bytecode pattern.
   // On a successful match, the result is a JSArray containing
-  // captured positions.  On a failure, the result is the null value.
+  // captured positions. On a failure, the result is the null value.
   // Returns an empty handle in case of an exception.
   static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp,
                                      Handle<String> subject,
                                      int index,
                                      Handle<JSArray> lastMatchInfo);
 
-  // Set last match info.  If match is NULL, then setting captures is omitted.
-  static Handle<JSArray> SetLastMatchInfo(Handle<JSArray> last_match_info,
-                                          Handle<String> subject,
-                                          int capture_count,
-                                          int32_t* match);
-
-
-  class GlobalCache {
-   public:
-    GlobalCache(Handle<JSRegExp> regexp,
-                Handle<String> subject,
-                bool is_global,
-                Isolate* isolate);
-
-    ~GlobalCache();
-
-    // Fetch the next entry in the cache for global regexp match results.
-    // This does not set the last match info.  Upon failure, NULL is returned.
-    // The cause can be checked with Result().  The previous
-    // result is still in available in memory when a failure happens.
-    int32_t* FetchNext();
-
-    int32_t* LastSuccessfulMatch();
-
-    inline bool HasException() { return num_matches_ < 0; }
-
-   private:
-    int num_matches_;
-    int max_matches_;
-    int current_match_index_;
-    int registers_per_match_;
-    // Pointer to the last set of captures.
-    int32_t* register_array_;
-    int register_array_size_;
-    Handle<JSRegExp> regexp_;
-    Handle<String> subject_;
-  };
-
-
   // Array index in the lastMatchInfo array.
   static const int kLastCaptureCount = 0;
   static const int kLastSubject = 1;
@@ -237,10 +195,30 @@ class RegExpImpl {
   static const int kRegWxpCompiledLimit = 1 * MB;
 
  private:
+  static String* last_ascii_string_;
+  static String* two_byte_cached_string_;
+
   static bool CompileIrregexp(
       Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii);
   static inline bool EnsureCompiledIrregexp(
       Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii);
+
+
+  // Set the subject cache.  The previous string buffer is not deleted, so the
+  // caller should ensure that it doesn't leak.
+  static void SetSubjectCache(String* subject,
+                              char* utf8_subject,
+                              int uft8_length,
+                              int character_position,
+                              int utf8_position);
+
+  // A one element cache of the last utf8_subject string and its length.  The
+  // subject JS String object is cached in the heap.  We also cache a
+  // translation between position and utf8 position.
+  static char* utf8_subject_cache_;
+  static int utf8_length_cache_;
+  static int utf8_position_;
+  static int character_position_;
 };
 
 
@@ -1644,6 +1622,40 @@ class RegExpEngine: public AllStatic {
 };
 
 
+class OffsetsVector {
+ public:
+  inline OffsetsVector(int num_registers, Isolate* isolate)
+      : offsets_vector_length_(num_registers) {
+    if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
+      vector_ = NewArray<int>(offsets_vector_length_);
+    } else {
+      vector_ = isolate->jsregexp_static_offsets_vector();
+    }
+  }
+  inline ~OffsetsVector() {
+    if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
+      DeleteArray(vector_);
+      vector_ = NULL;
+    }
+  }
+  inline int* vector() { return vector_; }
+  inline int length() { return offsets_vector_length_; }
+
+  static const int kStaticOffsetsVectorSize =
+      Isolate::kJSRegexpStaticOffsetsVectorSize;
+
+ private:
+  static Address static_offsets_vector_address(Isolate* isolate) {
+    return reinterpret_cast<Address>(isolate->jsregexp_static_offsets_vector());
+  }
+
+  int* vector_;
+  int offsets_vector_length_;
+
+  friend class ExternalReference;
+};
+
+
 } }  // namespace v8::internal
 
 #endif  // V8_JSREGEXP_H_
index a5c80b8..abc82a7 100644 (file)
@@ -4977,8 +4977,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
   STATIC_ASSERT(kSmiTagSize + kSmiShiftSize == 1);
   __ Addu(a2, a2, Operand(2));  // a2 was a smi.
   // Check that the static offsets vector buffer is large enough.
-  __ Branch(
-      &runtime, hi, a2, Operand(Isolate::kJSRegexpStaticOffsetsVectorSize));
+  __ Branch(&runtime, hi, a2, Operand(OffsetsVector::kStaticOffsetsVectorSize));
 
   // a2: Number of capture registers
   // regexp_data: RegExp data (FixedArray)
index 3017f3f..4e0a86b 100644 (file)
@@ -2574,24 +2574,28 @@ class ReplacementStringBuilder {
 class CompiledReplacement {
  public:
   explicit CompiledReplacement(Zone* zone)
-      : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
+      : parts_(1, zone), replacement_substrings_(0, zone),
+        simple_hint_(false),
+        zone_(zone) {}
 
-  // Return whether the replacement is simple.
-  bool Compile(Handle<String> replacement,
+  void Compile(Handle<String> replacement,
                int capture_count,
                int subject_length);
 
-  // Use Apply only if Compile returned false.
   void Apply(ReplacementStringBuilder* builder,
              int match_from,
              int match_to,
-             int32_t* match);
+             Handle<JSArray> last_match_info);
 
   // Number of distinct parts of the replacement pattern.
   int parts() {
     return parts_.length();
   }
 
+  bool simple_hint() {
+    return simple_hint_;
+  }
+
   Zone* zone() const { return zone_; }
 
  private:
@@ -2652,11 +2656,11 @@ class CompiledReplacement {
   };
 
   template<typename Char>
-  bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
-                               Vector<Char> characters,
-                               int capture_count,
-                               int subject_length,
-                               Zone* zone) {
+  static bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
+                                      Vector<Char> characters,
+                                      int capture_count,
+                                      int subject_length,
+                                      Zone* zone) {
     int length = characters.length();
     int last = 0;
     for (int i = 0; i < length; i++) {
@@ -2750,7 +2754,7 @@ class CompiledReplacement {
     }
     if (length > last) {
       if (last == 0) {
-        // Replacement is simple.  Do not use Apply to do the replacement.
+        parts->Add(ReplacementPart::ReplacementString(), zone);
         return true;
       } else {
         parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
@@ -2761,35 +2765,33 @@ class CompiledReplacement {
 
   ZoneList<ReplacementPart> parts_;
   ZoneList<Handle<String> > replacement_substrings_;
+  bool simple_hint_;
   Zone* zone_;
 };
 
 
-bool CompiledReplacement::Compile(Handle<String> replacement,
+void CompiledReplacement::Compile(Handle<String> replacement,
                                   int capture_count,
                                   int subject_length) {
   {
     AssertNoAllocation no_alloc;
     String::FlatContent content = replacement->GetFlatContent();
     ASSERT(content.IsFlat());
-    bool simple = false;
     if (content.IsAscii()) {
-      simple = ParseReplacementPattern(&parts_,
-                                       content.ToAsciiVector(),
-                                       capture_count,
-                                       subject_length,
-                                       zone());
+      simple_hint_ = ParseReplacementPattern(&parts_,
+                                             content.ToAsciiVector(),
+                                             capture_count,
+                                             subject_length,
+                                             zone());
     } else {
       ASSERT(content.IsTwoByte());
-      simple = ParseReplacementPattern(&parts_,
-                                       content.ToUC16Vector(),
-                                       capture_count,
-                                       subject_length,
-                                       zone());
+      simple_hint_ = ParseReplacementPattern(&parts_,
+                                             content.ToUC16Vector(),
+                                             capture_count,
+                                             subject_length,
+                                             zone());
     }
-    if (simple) return true;
   }
-
   Isolate* isolate = replacement->GetIsolate();
   // Find substrings of replacement string and create them as String objects.
   int substring_index = 0;
@@ -2809,15 +2811,13 @@ bool CompiledReplacement::Compile(Handle<String> replacement,
       substring_index++;
     }
   }
-  return false;
 }
 
 
 void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
                                 int match_from,
                                 int match_to,
-                                int32_t* match) {
-  ASSERT_LT(0, parts_.length());
+                                Handle<JSArray> last_match_info) {
   for (int i = 0, n = parts_.length(); i < n; i++) {
     ReplacementPart part = parts_[i];
     switch (part.tag) {
@@ -2833,8 +2833,9 @@ void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
       }
       case SUBJECT_CAPTURE: {
         int capture = part.data;
-        int from = match[capture * 2];
-        int to = match[capture * 2 + 1];
+        FixedArray* match_info = FixedArray::cast(last_match_info->elements());
+        int from = RegExpImpl::GetCapture(match_info, capture * 2);
+        int to = RegExpImpl::GetCapture(match_info, capture * 2 + 1);
         if (from >= 0 && to > from) {
           builder->AddSubjectSlice(from, to);
         }
@@ -2956,19 +2957,85 @@ void FindStringIndicesDispatch(Isolate* isolate,
 }
 
 
+// Two smis before and after the match, for very long strings.
+const int kMaxBuilderEntriesPerRegExpMatch = 5;
+
+
+static void SetLastMatchInfoNoCaptures(Handle<String> subject,
+                                       Handle<JSArray> last_match_info,
+                                       int match_start,
+                                       int match_end) {
+  // Fill last_match_info with a single capture.
+  last_match_info->EnsureSize(2 + RegExpImpl::kLastMatchOverhead);
+  AssertNoAllocation no_gc;
+  FixedArray* elements = FixedArray::cast(last_match_info->elements());
+  RegExpImpl::SetLastCaptureCount(elements, 2);
+  RegExpImpl::SetLastInput(elements, *subject);
+  RegExpImpl::SetLastSubject(elements, *subject);
+  RegExpImpl::SetCapture(elements, 0, match_start);
+  RegExpImpl::SetCapture(elements, 1, match_end);
+}
+
+
+template <typename SubjectChar, typename PatternChar>
+static bool SearchStringMultiple(Isolate* isolate,
+                                 Vector<const SubjectChar> subject,
+                                 Vector<const PatternChar> pattern,
+                                 String* pattern_string,
+                                 FixedArrayBuilder* builder,
+                                 int* match_pos) {
+  int pos = *match_pos;
+  int subject_length = subject.length();
+  int pattern_length = pattern.length();
+  int max_search_start = subject_length - pattern_length;
+  StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
+  while (pos <= max_search_start) {
+    if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
+      *match_pos = pos;
+      return false;
+    }
+    // Position of end of previous match.
+    int match_end = pos + pattern_length;
+    int new_pos = search.Search(subject, match_end);
+    if (new_pos >= 0) {
+      // A match.
+      if (new_pos > match_end) {
+        ReplacementStringBuilder::AddSubjectSlice(builder,
+            match_end,
+            new_pos);
+      }
+      pos = new_pos;
+      builder->Add(pattern_string);
+    } else {
+      break;
+    }
+  }
+
+  if (pos < max_search_start) {
+    ReplacementStringBuilder::AddSubjectSlice(builder,
+                                              pos + pattern_length,
+                                              subject_length);
+  }
+  *match_pos = pos;
+  return true;
+}
+
+
+
+
 template<typename ResultSeqString>
 MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString(
     Isolate* isolate,
     Handle<String> subject,
     Handle<JSRegExp> pattern_regexp,
     Handle<String> replacement,
-    Handle<JSArray> last_match_info) {
+    Handle<JSArray> last_match_info,
+    Zone* zone) {
   ASSERT(subject->IsFlat());
   ASSERT(replacement->IsFlat());
 
-  Zone* zone = isolate->runtime_zone();
-  ZoneScope zone_space(zone, DELETE_ON_EXIT);
-  ZoneList<int> indices(8, zone);
+  ZoneScope zone_space(isolate->runtime_zone(), DELETE_ON_EXIT);
+  ZoneList<int> indices(8, isolate->runtime_zone());
   ASSERT_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
   String* pattern =
       String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
@@ -2976,8 +3043,8 @@ MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString(
   int pattern_len = pattern->length();
   int replacement_len = replacement->length();
 
-  FindStringIndicesDispatch(
-      isolate, *subject, pattern, &indices, 0xffffffff, zone);
+  FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff,
+                            zone);
 
   int matches = indices.length();
   if (matches == 0) return *subject;
@@ -3032,9 +3099,10 @@ MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString(
                         subject_len);
   }
 
-  int32_t match_indices[] = { indices.at(matches - 1),
-                            indices.at(matches - 1) + pattern_len };
-  RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices);
+  SetLastMatchInfoNoCaptures(subject,
+                             last_match_info,
+                             indices.at(matches - 1),
+                             indices.at(matches - 1) + pattern_len);
 
   return *result;
 }
@@ -3042,101 +3110,139 @@ MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString(
 
 MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString(
     Isolate* isolate,
-    Handle<String> subject,
-    Handle<JSRegExp> regexp,
-    Handle<String> replacement,
-    Handle<JSArray> last_match_info) {
+    String* subject,
+    JSRegExp* regexp,
+    String* replacement,
+    JSArray* last_match_info,
+    Zone* zone) {
   ASSERT(subject->IsFlat());
   ASSERT(replacement->IsFlat());
 
-  bool is_global = regexp->GetFlags().is_global();
-  int capture_count = regexp->CaptureCount();
-  int subject_length = subject->length();
+  HandleScope handles(isolate);
+
+  int length = subject->length();
+  Handle<String> subject_handle(subject);
+  Handle<JSRegExp> regexp_handle(regexp);
+  Handle<String> replacement_handle(replacement);
+  Handle<JSArray> last_match_info_handle(last_match_info);
+  Handle<Object> match = RegExpImpl::Exec(regexp_handle,
+                                          subject_handle,
+                                          0,
+                                          last_match_info_handle);
+  if (match.is_null()) {
+    return Failure::Exception();
+  }
+  if (match->IsNull()) {
+    return *subject_handle;
+  }
+
+  int capture_count = regexp_handle->CaptureCount();
 
   // CompiledReplacement uses zone allocation.
-  Zone* zone = isolate->runtime_zone();
   ZoneScope zonescope(zone, DELETE_ON_EXIT);
   CompiledReplacement compiled_replacement(zone);
-  bool simple_replace = compiled_replacement.Compile(replacement,
-                                                     capture_count,
-                                                     subject_length);
+
+  compiled_replacement.Compile(replacement_handle,
+                               capture_count,
+                               length);
+
+  bool is_global = regexp_handle->GetFlags().is_global();
 
   // Shortcut for simple non-regexp global replacements
   if (is_global &&
-      regexp->TypeTag() == JSRegExp::ATOM &&
-      simple_replace) {
-    if (subject->HasOnlyAsciiChars()) {
+          regexp_handle->TypeTag() == JSRegExp::ATOM &&
+           compiled_replacement.simple_hint()) {
+         if (subject_handle->HasOnlyAsciiChars() &&
+             replacement_handle->HasOnlyAsciiChars()) {
       return StringReplaceAtomRegExpWithString<SeqAsciiString>(
-          isolate, subject, regexp, replacement, last_match_info);
-    } else {
+                 isolate,
+                 subject_handle,
+                 regexp_handle,
+                 replacement_handle,
+                 last_match_info_handle,
+                 zone);
+         } else {
       return StringReplaceAtomRegExpWithString<SeqTwoByteString>(
-          isolate, subject, regexp, replacement, last_match_info);
+                 isolate,
+                 subject_handle,
+                 regexp_handle,
+                 replacement_handle,
+                 last_match_info_handle,
+                 zone);
     }
   }
 
-  RegExpImpl::GlobalCache global_cache(regexp, subject, is_global, isolate);
-  if (global_cache.HasException()) return Failure::Exception();
-
-  int32_t* current_match = global_cache.FetchNext();
-  if (current_match == NULL) {
-    if (global_cache.HasException()) return Failure::Exception();
-    return *subject;
-  }
-
   // Guessing the number of parts that the final result string is built
   // from. Global regexps can match any number of times, so we guess
   // conservatively.
   int expected_parts =
       (compiled_replacement.parts() + 1) * (is_global ? 4 : 1) + 1;
   ReplacementStringBuilder builder(isolate->heap(),
-                                   subject,
+                                   subject_handle,
                                    expected_parts);
 
+  // Index of end of last match.
+  int prev = 0;
+
+
   // Number of parts added by compiled replacement plus preceeding
   // string and possibly suffix after last match.  It is possible for
   // all components to use two elements when encoded as two smis.
   const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
-
-  int prev = 0;
-
+  bool matched = true;
   do {
+    ASSERT(last_match_info_handle->HasFastObjectElements());
+    // Increase the capacity of the builder before entering local handle-scope,
+    // so its internal buffer can safely allocate a new handle if it grows.
     builder.EnsureCapacity(parts_added_per_loop);
 
-    int start = current_match[0];
-    int end = current_match[1];
+    HandleScope loop_scope(isolate);
+    int start, end;
+    {
+      AssertNoAllocation match_info_array_is_not_in_a_handle;
+      FixedArray* match_info_array =
+          FixedArray::cast(last_match_info_handle->elements());
+
+      ASSERT_EQ(capture_count * 2 + 2,
+                RegExpImpl::GetLastCaptureCount(match_info_array));
+      start = RegExpImpl::GetCapture(match_info_array, 0);
+      end = RegExpImpl::GetCapture(match_info_array, 1);
+    }
 
     if (prev < start) {
       builder.AddSubjectSlice(prev, start);
     }
+    compiled_replacement.Apply(&builder,
+                               start,
+                               end,
+                               last_match_info_handle);
 
-    if (simple_replace) {
-      builder.AddString(replacement);
-    } else {
-      compiled_replacement.Apply(&builder,
-                                 start,
-                                 end,
-                                 current_match);
-    }
     prev = end;
 
     // Only continue checking for global regexps.
     if (!is_global) break;
 
-    current_match = global_cache.FetchNext();
-  } while (current_match != NULL);
+    // Continue from where the match ended, unless it was an empty match.
+    int next = end;
+    if (start == end) {
+      next = end + 1;
+      if (next > length) break;
+    }
 
-  if (global_cache.HasException()) return Failure::Exception();
+    match = RegExpImpl::Exec(regexp_handle,
+                             subject_handle,
+                             next,
+                             last_match_info_handle);
+    if (match.is_null()) {
+      return Failure::Exception();
+    }
+    matched = !match->IsNull();
+  } while (matched);
 
-  if (prev < subject_length) {
-    builder.EnsureCapacity(2);
-    builder.AddSubjectSlice(prev, subject_length);
+  if (prev < length) {
+    builder.AddSubjectSlice(prev, length);
   }
 
-  RegExpImpl::SetLastMatchInfo(last_match_info,
-                               subject,
-                               capture_count,
-                               global_cache.LastSuccessfulMatch());
-
   return *(builder.ToString());
 }
 
@@ -3144,51 +3250,69 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString(
 template <typename ResultSeqString>
 MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString(
     Isolate* isolate,
-    Handle<String> subject,
-    Handle<JSRegExp> regexp,
-    Handle<JSArray> last_match_info) {
+    String* subject,
+    JSRegExp* regexp,
+    JSArray* last_match_info,
+    Zone* zone) {
   ASSERT(subject->IsFlat());
 
-  bool is_global = regexp->GetFlags().is_global();
+  HandleScope handles(isolate);
+
+  Handle<String> subject_handle(subject);
+  Handle<JSRegExp> regexp_handle(regexp);
+  Handle<JSArray> last_match_info_handle(last_match_info);
 
   // Shortcut for simple non-regexp global replacements
-  if (is_global &&
-      regexp->TypeTag() == JSRegExp::ATOM) {
-    Handle<String> empty_string(HEAP->empty_string());
-    if (subject->HasOnlyAsciiChars()) {
+  if (regexp_handle->GetFlags().is_global() &&
+      regexp_handle->TypeTag() == JSRegExp::ATOM) {
+    Handle<String> empty_string_handle(HEAP->empty_string());
+    if (subject_handle->HasOnlyAsciiChars()) {
       return StringReplaceAtomRegExpWithString<SeqAsciiString>(
           isolate,
-          subject,
-          regexp,
-          empty_string,
-          last_match_info);
+          subject_handle,
+          regexp_handle,
+          empty_string_handle,
+          last_match_info_handle,
+          zone);
     } else {
       return StringReplaceAtomRegExpWithString<SeqTwoByteString>(
           isolate,
-          subject,
-          regexp,
-          empty_string,
-          last_match_info);
+          subject_handle,
+          regexp_handle,
+          empty_string_handle,
+          last_match_info_handle,
+          zone);
     }
   }
 
-  RegExpImpl::GlobalCache global_cache(regexp, subject, is_global, isolate);
-  if (global_cache.HasException()) return Failure::Exception();
+  Handle<Object> match = RegExpImpl::Exec(regexp_handle,
+                                          subject_handle,
+                                          0,
+                                          last_match_info_handle);
+  if (match.is_null()) return Failure::Exception();
+  if (match->IsNull()) return *subject_handle;
+
+  ASSERT(last_match_info_handle->HasFastObjectElements());
 
-  int32_t* current_match = global_cache.FetchNext();
-  if (current_match == NULL) {
-    if (global_cache.HasException()) return Failure::Exception();
-    return *subject;
+  int start, end;
+  {
+    AssertNoAllocation match_info_array_is_not_in_a_handle;
+    FixedArray* match_info_array =
+        FixedArray::cast(last_match_info_handle->elements());
+
+    start = RegExpImpl::GetCapture(match_info_array, 0);
+    end = RegExpImpl::GetCapture(match_info_array, 1);
   }
 
-  int start = current_match[0];
-  int end = current_match[1];
-  int capture_count = regexp->CaptureCount();
-  int subject_length = subject->length();
+  bool global = regexp_handle->GetFlags().is_global();
 
-  int new_length = subject_length - (end - start);
-  if (new_length == 0) return isolate->heap()->empty_string();
+  if (start == end && !global) return *subject_handle;
 
+  int length = subject_handle->length();
+  int new_length = length - (end - start);
+  if (new_length == 0) {
+    return isolate->heap()->empty_string();
+  }
   Handle<ResultSeqString> answer;
   if (ResultSeqString::kHasAsciiEncoding) {
     answer = Handle<ResultSeqString>::cast(
@@ -3198,55 +3322,73 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString(
         isolate->factory()->NewRawTwoByteString(new_length));
   }
 
-  if (!is_global) {
-    RegExpImpl::SetLastMatchInfo(
-        last_match_info, subject, capture_count, current_match);
-    if (start == end) {
-      return *subject;
-    } else {
-      if (start > 0) {
-        String::WriteToFlat(*subject, answer->GetChars(), 0, start);
-      }
-      if (end < subject_length) {
-        String::WriteToFlat(
-            *subject, answer->GetChars() + start, end, subject_length);
-      }
-      return *answer;
+  // If the regexp isn't global, only match once.
+  if (!global) {
+    if (start > 0) {
+      String::WriteToFlat(*subject_handle,
+                          answer->GetChars(),
+                          0,
+                          start);
     }
+    if (end < length) {
+      String::WriteToFlat(*subject_handle,
+                          answer->GetChars() + start,
+                          end,
+                          length);
+    }
+    return *answer;
   }
 
-  int prev = 0;
+  int prev = 0;  // Index of end of last match.
+  int next = 0;  // Start of next search (prev unless last match was empty).
   int position = 0;
 
   do {
-    start = current_match[0];
-    end = current_match[1];
     if (prev < start) {
       // Add substring subject[prev;start] to answer string.
-      String::WriteToFlat(
-          *subject, answer->GetChars() + position, prev, start);
+      String::WriteToFlat(*subject_handle,
+                          answer->GetChars() + position,
+                          prev,
+                          start);
       position += start - prev;
     }
     prev = end;
+    next = end;
+    // Continue from where the match ended, unless it was an empty match.
+    if (start == end) {
+      next++;
+      if (next > length) break;
+    }
+    match = RegExpImpl::Exec(regexp_handle,
+                             subject_handle,
+                             next,
+                             last_match_info_handle);
+    if (match.is_null()) return Failure::Exception();
+    if (match->IsNull()) break;
+
+    ASSERT(last_match_info_handle->HasFastObjectElements());
+    HandleScope loop_scope(isolate);
+    {
+      AssertNoAllocation match_info_array_is_not_in_a_handle;
+      FixedArray* match_info_array =
+          FixedArray::cast(last_match_info_handle->elements());
+      start = RegExpImpl::GetCapture(match_info_array, 0);
+      end = RegExpImpl::GetCapture(match_info_array, 1);
+    }
+  } while (true);
 
-    current_match = global_cache.FetchNext();
-  } while (current_match != NULL);
-
-  if (global_cache.HasException()) return Failure::Exception();
-
-  RegExpImpl::SetLastMatchInfo(last_match_info,
-                               subject,
-                               capture_count,
-                               global_cache.LastSuccessfulMatch());
-
-  if (prev < subject_length) {
+  if (prev < length) {
     // Add substring subject[prev;length] to answer string.
-    String::WriteToFlat(
-        *subject, answer->GetChars() + position, prev, subject_length);
-    position += subject_length - prev;
+    String::WriteToFlat(*subject_handle,
+                        answer->GetChars() + position,
+                        prev,
+                        length);
+    position += length - prev;
   }
 
-  if (position == 0) return isolate->heap()->empty_string();
+  if (position == 0) {
+    return isolate->heap()->empty_string();
+  }
 
   // Shorten string and fill
   int string_size = ResultSeqString::SizeFor(position);
@@ -3269,31 +3411,50 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString(
 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringReplaceRegExpWithString) {
   ASSERT(args.length() == 4);
 
-  HandleScope scope(isolate);
-
-  CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
-  CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
-  CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
-  CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
+  CONVERT_ARG_CHECKED(String, subject, 0);
+  if (!subject->IsFlat()) {
+    Object* flat_subject;
+    { MaybeObject* maybe_flat_subject = subject->TryFlatten();
+      if (!maybe_flat_subject->ToObject(&flat_subject)) {
+        return maybe_flat_subject;
+      }
+    }
+    subject = String::cast(flat_subject);
+  }
 
-  if (!subject->IsFlat()) subject = FlattenGetString(subject);
+  CONVERT_ARG_CHECKED(String, replacement, 2);
+  if (!replacement->IsFlat()) {
+    Object* flat_replacement;
+    { MaybeObject* maybe_flat_replacement = replacement->TryFlatten();
+      if (!maybe_flat_replacement->ToObject(&flat_replacement)) {
+        return maybe_flat_replacement;
+      }
+    }
+    replacement = String::cast(flat_replacement);
+  }
 
-  if (!replacement->IsFlat()) replacement = FlattenGetString(replacement);
+  CONVERT_ARG_CHECKED(JSRegExp, regexp, 1);
+  CONVERT_ARG_CHECKED(JSArray, last_match_info, 3);
 
   ASSERT(last_match_info->HasFastObjectElements());
 
+  Zone* zone = isolate->runtime_zone();
   if (replacement->length() == 0) {
     if (subject->HasOnlyAsciiChars()) {
       return StringReplaceRegExpWithEmptyString<SeqAsciiString>(
-          isolate, subject, regexp, last_match_info);
+          isolate, subject, regexp, last_match_info, zone);
     } else {
       return StringReplaceRegExpWithEmptyString<SeqTwoByteString>(
-          isolate, subject, regexp, last_match_info);
+          isolate, subject, regexp, last_match_info, zone);
     }
   }
 
-  return StringReplaceRegExpWithString(
-      isolate, subject, regexp, replacement, last_match_info);
+  return StringReplaceRegExpWithString(isolate,
+                                       subject,
+                                       regexp,
+                                       replacement,
+                                       last_match_info,
+                                       zone);
 }
 
 
@@ -3616,45 +3777,46 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringMatch) {
   CONVERT_ARG_HANDLE_CHECKED(JSArray, regexp_info, 2);
   HandleScope handles;
 
-  RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
-  if (global_cache.HasException()) return Failure::Exception();
-
-  int capture_count = regexp->CaptureCount();
+  Handle<Object> match = RegExpImpl::Exec(regexp, subject, 0, regexp_info);
 
-  Zone* zone = isolate->runtime_zone();
-  ZoneScope zone_space(zone, DELETE_ON_EXIT);
-  ZoneList<int> offsets(8, zone);
-
-  while (true) {
-    int32_t* match = global_cache.FetchNext();
-    if (match == NULL) break;
-    offsets.Add(match[0], zone);  // start
-    offsets.Add(match[1], zone);  // end
+  if (match.is_null()) {
+    return Failure::Exception();
   }
-
-  if (global_cache.HasException()) return Failure::Exception();
-
-  if (offsets.length() == 0) {
-    // Not a single match.
+  if (match->IsNull()) {
     return isolate->heap()->null_value();
   }
+  int length = subject->length();
 
-  RegExpImpl::SetLastMatchInfo(regexp_info,
-                               subject,
-                               capture_count,
-                               global_cache.LastSuccessfulMatch());
-
+  Zone* zone = isolate->runtime_zone();
+  ZoneScope zone_space(zone, DELETE_ON_EXIT);
+  ZoneList<int> offsets(8, zone);
+  int start;
+  int end;
+  do {
+    {
+      AssertNoAllocation no_alloc;
+      FixedArray* elements = FixedArray::cast(regexp_info->elements());
+      start = Smi::cast(elements->get(RegExpImpl::kFirstCapture))->value();
+      end = Smi::cast(elements->get(RegExpImpl::kFirstCapture + 1))->value();
+    }
+    offsets.Add(start, zone);
+    offsets.Add(end, zone);
+    if (start == end) if (++end > length) break;
+    match = RegExpImpl::Exec(regexp, subject, end, regexp_info);
+    if (match.is_null()) {
+      return Failure::Exception();
+    }
+  } while (!match->IsNull());
   int matches = offsets.length() / 2;
   Handle<FixedArray> elements = isolate->factory()->NewFixedArray(matches);
-  Handle<String> substring =
-      isolate->factory()->NewSubString(subject, offsets.at(0), offsets.at(1));
+  Handle<String> substring = isolate->factory()->
+    NewSubString(subject, offsets.at(0), offsets.at(1));
   elements->set(0, *substring);
-  for (int i = 1; i < matches; i++) {
-    HandleScope temp_scope(isolate);
+  for (int i = 1; i < matches ; i++) {
     int from = offsets.at(i * 2);
     int to = offsets.at(i * 2 + 1);
-    Handle<String> substring =
-        isolate->factory()->NewProperSubString(subject, from, to);
+    Handle<String> substring = isolate->factory()->
+        NewProperSubString(subject, from, to);
     elements->set(i, *substring);
   }
   Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(elements);
@@ -3663,100 +3825,149 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringMatch) {
 }
 
 
-// Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
-// separate last match info.  See comment on that function.
-template<bool has_capture>
-static int SearchRegExpMultiple(
+static bool SearchStringMultiple(Isolate* isolate,
+                                 Handle<String> subject,
+                                 Handle<String> pattern,
+                                 Handle<JSArray> last_match_info,
+                                 FixedArrayBuilder* builder) {
+  ASSERT(subject->IsFlat());
+  ASSERT(pattern->IsFlat());
+
+  // Treating as if a previous match was before first character.
+  int match_pos = -pattern->length();
+
+  for (;;) {  // Break when search complete.
+    builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
+    AssertNoAllocation no_gc;
+    String::FlatContent subject_content = subject->GetFlatContent();
+    String::FlatContent pattern_content = pattern->GetFlatContent();
+    if (subject_content.IsAscii()) {
+      Vector<const char> subject_vector = subject_content.ToAsciiVector();
+      if (pattern_content.IsAscii()) {
+        if (SearchStringMultiple(isolate,
+                                 subject_vector,
+                                 pattern_content.ToAsciiVector(),
+                                 *pattern,
+                                 builder,
+                                 &match_pos)) break;
+      } else {
+        if (SearchStringMultiple(isolate,
+                                 subject_vector,
+                                 pattern_content.ToUC16Vector(),
+                                 *pattern,
+                                 builder,
+                                 &match_pos)) break;
+      }
+    } else {
+      Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
+      if (pattern_content.IsAscii()) {
+        if (SearchStringMultiple(isolate,
+                                 subject_vector,
+                                 pattern_content.ToAsciiVector(),
+                                 *pattern,
+                                 builder,
+                                 &match_pos)) break;
+      } else {
+        if (SearchStringMultiple(isolate,
+                                 subject_vector,
+                                 pattern_content.ToUC16Vector(),
+                                 *pattern,
+                                 builder,
+                                 &match_pos)) break;
+      }
+    }
+  }
+
+  if (match_pos >= 0) {
+    SetLastMatchInfoNoCaptures(subject,
+                               last_match_info,
+                               match_pos,
+                               match_pos + pattern->length());
+    return true;
+  }
+  return false;  // No matches at all.
+}
+
+
+static int SearchRegExpNoCaptureMultiple(
     Isolate* isolate,
     Handle<String> subject,
     Handle<JSRegExp> regexp,
     Handle<JSArray> last_match_array,
     FixedArrayBuilder* builder) {
   ASSERT(subject->IsFlat());
-  ASSERT_NE(has_capture, regexp->CaptureCount() == 0);
-
-  RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
-  if (global_cache.HasException()) return RegExpImpl::RE_EXCEPTION;
-
-  int capture_count = regexp->CaptureCount();
-  int subject_length = subject->length();
-
-  // Position to search from.
+  ASSERT(regexp->CaptureCount() == 0);
   int match_start = -1;
   int match_end = 0;
+  int pos = 0;
+  int registers_per_match = RegExpImpl::IrregexpPrepare(regexp, subject);
+  if (registers_per_match < 0) return RegExpImpl::RE_EXCEPTION;
+
+  int max_matches;
+  int num_registers = RegExpImpl::GlobalOffsetsVectorSize(regexp,
+                                                          registers_per_match,
+                                                          &max_matches);
+  OffsetsVector registers(num_registers, isolate);
+  Vector<int32_t> register_vector(registers.vector(), registers.length());
+  int subject_length = subject->length();
   bool first = true;
-
-  // Two smis before and after the match, for very long strings.
-  static const int kMaxBuilderEntriesPerRegExpMatch = 5;
-
-  while (true) {
-    int32_t* current_match = global_cache.FetchNext();
-    if (current_match == NULL) break;
-    match_start = current_match[0];
-    builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
-    if (match_end < match_start) {
-      ReplacementStringBuilder::AddSubjectSlice(builder,
-                                                match_end,
-                                                match_start);
-    }
-    match_end = current_match[1];
-    {
-      // Avoid accumulating new handles inside loop.
-      HandleScope temp_scope(isolate);
-      Handle<String> match;
-      if (!first) {
-        match = isolate->factory()->NewProperSubString(subject,
-                                                       match_start,
-                                                       match_end);
-      } else {
-        match = isolate->factory()->NewSubString(subject,
-                                                 match_start,
-                                                 match_end);
-        first = false;
+  for (;;) {  // Break on failure, return on exception.
+    int num_matches = RegExpImpl::IrregexpExecRaw(regexp,
+                                                  subject,
+                                                  pos,
+                                                  register_vector);
+    if (num_matches > 0) {
+      for (int match_index = 0; match_index < num_matches; match_index++) {
+        int32_t* current_match = &register_vector[match_index * 2];
+        match_start = current_match[0];
+        builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
+        if (match_end < match_start) {
+          ReplacementStringBuilder::AddSubjectSlice(builder,
+                                                    match_end,
+                                                    match_start);
+        }
+        match_end = current_match[1];
+        HandleScope loop_scope(isolate);
+        if (!first) {
+          builder->Add(*isolate->factory()->NewProperSubString(subject,
+                                                               match_start,
+                                                               match_end));
+        } else {
+          builder->Add(*isolate->factory()->NewSubString(subject,
+                                                         match_start,
+                                                         match_end));
+          first = false;
+        }
       }
 
-      if (has_capture) {
-        // Arguments array to replace function is match, captures, index and
-        // subject, i.e., 3 + capture count in total.
-        Handle<FixedArray> elements =
-            isolate->factory()->NewFixedArray(3 + capture_count);
-
-        elements->set(0, *match);
-        for (int i = 1; i <= capture_count; i++) {
-          int start = current_match[i * 2];
-          if (start >= 0) {
-            int end = current_match[i * 2 + 1];
-            ASSERT(start <= end);
-            Handle<String> substring =
-                isolate->factory()->NewSubString(subject, start, end);
-            elements->set(i, *substring);
-          } else {
-            ASSERT(current_match[i * 2 + 1] < 0);
-            elements->set(i, isolate->heap()->undefined_value());
-          }
-        }
-        elements->set(capture_count + 1, Smi::FromInt(match_start));
-        elements->set(capture_count + 2, *subject);
-        builder->Add(*isolate->factory()->NewJSArrayWithElements(elements));
+      // If we did not get the maximum number of matches, we can stop here
+      // since there are no matches left.
+      if (num_matches < max_matches) break;
+
+      if (match_start != match_end) {
+        pos = match_end;
       } else {
-        builder->Add(*match);
+        pos = match_end + 1;
+        if (pos > subject_length) break;
       }
+    } else if (num_matches == 0) {
+      break;
+    } else {
+      ASSERT_EQ(num_matches, RegExpImpl::RE_EXCEPTION);
+      return RegExpImpl::RE_EXCEPTION;
     }
   }
 
-  if (global_cache.HasException()) return RegExpImpl::RE_EXCEPTION;
-
   if (match_start >= 0) {
-    // Finished matching, with at least one match.
     if (match_end < subject_length) {
       ReplacementStringBuilder::AddSubjectSlice(builder,
                                                 match_end,
                                                 subject_length);
     }
-
-    RegExpImpl::SetLastMatchInfo(
-        last_match_array, subject, capture_count, NULL);
-
+    SetLastMatchInfoNoCaptures(subject,
+                               last_match_array,
+                               match_start,
+                               match_end);
     return RegExpImpl::RE_SUCCESS;
   } else {
     return RegExpImpl::RE_FAILURE;  // No matches at all.
@@ -3764,6 +3975,147 @@ static int SearchRegExpMultiple(
 }
 
 
+// Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
+// separate last match info.  See comment on that function.
+static int SearchRegExpMultiple(
+    Isolate* isolate,
+    Handle<String> subject,
+    Handle<JSRegExp> regexp,
+    Handle<JSArray> last_match_array,
+    FixedArrayBuilder* builder,
+    Zone* zone) {
+
+  ASSERT(subject->IsFlat());
+  int registers_per_match = RegExpImpl::IrregexpPrepare(regexp, subject);
+  if (registers_per_match < 0) return RegExpImpl::RE_EXCEPTION;
+
+  int max_matches;
+  int num_registers = RegExpImpl::GlobalOffsetsVectorSize(regexp,
+                                                          registers_per_match,
+                                                          &max_matches);
+  OffsetsVector registers(num_registers, isolate);
+  Vector<int32_t> register_vector(registers.vector(), registers.length());
+
+  int num_matches = RegExpImpl::IrregexpExecRaw(regexp,
+                                                subject,
+                                                0,
+                                                register_vector);
+
+  int capture_count = regexp->CaptureCount();
+  int subject_length = subject->length();
+
+  // Position to search from.
+  int pos = 0;
+  // End of previous match. Differs from pos if match was empty.
+  int match_end = 0;
+  bool first = true;
+
+  if (num_matches > 0) {
+    do {
+      int match_start = 0;
+      for (int match_index = 0; match_index < num_matches; match_index++) {
+        int32_t* current_match =
+            &register_vector[match_index * registers_per_match];
+        match_start = current_match[0];
+        builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
+        if (match_end < match_start) {
+          ReplacementStringBuilder::AddSubjectSlice(builder,
+                                                    match_end,
+                                                    match_start);
+        }
+        match_end = current_match[1];
+
+        {
+          // Avoid accumulating new handles inside loop.
+          HandleScope temp_scope(isolate);
+          // Arguments array to replace function is match, captures, index and
+          // subject, i.e., 3 + capture count in total.
+          Handle<FixedArray> elements =
+              isolate->factory()->NewFixedArray(3 + capture_count);
+          Handle<String> match;
+          if (!first) {
+            match = isolate->factory()->NewProperSubString(subject,
+                                                           match_start,
+                                                           match_end);
+          } else {
+            match = isolate->factory()->NewSubString(subject,
+                                                     match_start,
+                                                     match_end);
+          }
+          elements->set(0, *match);
+          for (int i = 1; i <= capture_count; i++) {
+            int start = current_match[i * 2];
+            if (start >= 0) {
+              int end = current_match[i * 2 + 1];
+              ASSERT(start <= end);
+              Handle<String> substring;
+              if (!first) {
+                substring =
+                    isolate->factory()->NewProperSubString(subject, start, end);
+              } else {
+                substring =
+                    isolate->factory()->NewSubString(subject, start, end);
+              }
+              elements->set(i, *substring);
+            } else {
+              ASSERT(current_match[i * 2 + 1] < 0);
+              elements->set(i, isolate->heap()->undefined_value());
+            }
+          }
+          elements->set(capture_count + 1, Smi::FromInt(match_start));
+          elements->set(capture_count + 2, *subject);
+          builder->Add(*isolate->factory()->NewJSArrayWithElements(elements));
+        }
+        first = false;
+      }
+
+      // If we did not get the maximum number of matches, we can stop here
+      // since there are no matches left.
+      if (num_matches < max_matches) break;
+
+      if (match_end > match_start) {
+        pos = match_end;
+      } else {
+        pos = match_end + 1;
+        if (pos > subject_length) {
+          break;
+        }
+      }
+
+      num_matches = RegExpImpl::IrregexpExecRaw(regexp,
+                                                subject,
+                                                pos,
+                                                register_vector);
+    } while (num_matches > 0);
+
+    if (num_matches != RegExpImpl::RE_EXCEPTION) {
+      // Finished matching, with at least one match.
+      if (match_end < subject_length) {
+        ReplacementStringBuilder::AddSubjectSlice(builder,
+                                                  match_end,
+                                                  subject_length);
+      }
+
+      int last_match_capture_count = (capture_count + 1) * 2;
+      int last_match_array_size =
+          last_match_capture_count + RegExpImpl::kLastMatchOverhead;
+      last_match_array->EnsureSize(last_match_array_size);
+      AssertNoAllocation no_gc;
+      FixedArray* elements = FixedArray::cast(last_match_array->elements());
+      // We have to set this even though the rest of the last match array is
+      // ignored.
+      RegExpImpl::SetLastCaptureCount(elements, last_match_capture_count);
+      // These are also read without consulting the override.
+      RegExpImpl::SetLastSubject(elements, *subject);
+      RegExpImpl::SetLastInput(elements, *subject);
+      return RegExpImpl::RE_SUCCESS;
+    }
+  }
+  // No matches at all, return failure or exception result directly.
+  return num_matches;
+}
+
+
 // This is only called for StringReplaceGlobalRegExpWithFunction.  This sets
 // lastMatchInfoOverride to maintain the last match info, so we don't need to
 // set any other last match array info.
@@ -3789,15 +4141,34 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_RegExpExecMultiple) {
   }
   FixedArrayBuilder builder(result_elements);
 
+  if (regexp->TypeTag() == JSRegExp::ATOM) {
+    Handle<String> pattern(
+        String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex)));
+    ASSERT(pattern->IsFlat());
+    if (SearchStringMultiple(isolate, subject, pattern,
+                             last_match_info, &builder)) {
+      return *builder.ToJSArray(result_array);
+    }
+    return isolate->heap()->null_value();
+  }
+
+  ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
+
   int result;
   if (regexp->CaptureCount() == 0) {
-    result = SearchRegExpMultiple<false>(
-        isolate, subject, regexp, last_match_info, &builder);
+    result = SearchRegExpNoCaptureMultiple(isolate,
+                                           subject,
+                                           regexp,
+                                           last_match_info,
+                                           &builder);
   } else {
-    result = SearchRegExpMultiple<true>(
-        isolate, subject, regexp, last_match_info, &builder);
+    result = SearchRegExpMultiple(isolate,
+                                  subject,
+                                  regexp,
+                                  last_match_info,
+                                  &builder,
+                                  isolate->runtime_zone());
   }
-
   if (result == RegExpImpl::RE_SUCCESS) return *builder.ToJSArray(result_array);
   if (result == RegExpImpl::RE_FAILURE) return isolate->heap()->null_value();
   ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION);
index 3fa93b2..832616e 100644 (file)
@@ -2791,7 +2791,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
   // Calculate number of capture registers (number_of_captures + 1) * 2.
   __ leal(rdx, Operand(rdx, rdx, times_1, 2));
   // Check that the static offsets vector buffer is large enough.
-  __ cmpl(rdx, Immediate(Isolate::kJSRegexpStaticOffsetsVectorSize));
+  __ cmpl(rdx, Immediate(OffsetsVector::kStaticOffsetsVectorSize));
   __ j(above, &runtime);
 
   // rax: RegExp data (FixedArray)
index e433b92..50356e7 100644 (file)
@@ -267,7 +267,6 @@ TEST(Parser) {
   CHECK_PARSE_EQ("\\u003z", "'u003z'");
   CHECK_PARSE_EQ("foo[z]*", "(: 'foo' (# 0 - g [z]))");
 
-  CHECK_SIMPLE("", false);
   CHECK_SIMPLE("a", true);
   CHECK_SIMPLE("a|b", false);
   CHECK_SIMPLE("a\\n", false);
@@ -1350,7 +1349,7 @@ TEST(MacroAssembler) {
   V8::Initialize(NULL);
   byte codes[1024];
   RegExpMacroAssemblerIrregexp m(Vector<byte>(codes, 1024),
-                                 Isolate::Current()->runtime_zone());
+                                 Isolate::Current()->zone());
   // ^f(o)o.
   Label fail, fail2, start;
   uc16 foo_chars[3];