Reland regexp global optimizations.
authoryangguo@chromium.org <yangguo@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Tue, 28 Aug 2012 09:37:41 +0000 (09:37 +0000)
committeryangguo@chromium.org <yangguo@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Tue, 28 Aug 2012 09:37:41 +0000 (09:37 +0000)
BUG=

Review URL: https://chromiumcodereview.appspot.com/10872010

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@12396 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

14 files changed:
src/arm/code-stubs-arm.cc
src/assembler.cc
src/ia32/code-stubs-ia32.cc
src/isolate.h
src/jsregexp.cc
src/jsregexp.h
src/mips/code-stubs-mips.cc
src/runtime.cc
src/unicode-inl.h
src/unicode.h
src/x64/code-stubs-x64.cc
test/cctest/test-regexp.cc
test/cctest/test-strings.cc
test/mjsunit/regexp-global.js

index 88178ff..d9e3a3d 100644 (file)
@@ -4818,7 +4818,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
   STATIC_ASSERT(kSmiTagSize + kSmiShiftSize == 1);
   __ add(r2, r2, Operand(2));  // r2 was a smi.
   // Check that the static offsets vector buffer is large enough.
-  __ cmp(r2, Operand(OffsetsVector::kStaticOffsetsVectorSize));
+  __ cmp(r2, Operand(Isolate::kJSRegexpStaticOffsetsVectorSize));
   __ b(hi, &runtime);
 
   // r2: Number of capture registers
index 6dcd2a0..a58f77f 100644 (file)
@@ -1092,7 +1092,7 @@ ExternalReference ExternalReference::re_word_character_map() {
 ExternalReference ExternalReference::address_of_static_offsets_vector(
     Isolate* isolate) {
   return ExternalReference(
-      OffsetsVector::static_offsets_vector_address(isolate));
+      reinterpret_cast<Address>(isolate->jsregexp_static_offsets_vector()));
 }
 
 ExternalReference ExternalReference::address_of_regexp_stack_memory_address(
index 80b0f22..140db8a 100644 (file)
@@ -3748,7 +3748,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
   STATIC_ASSERT(kSmiTagSize + kSmiShiftSize == 1);
   __ add(edx, Immediate(2));  // edx was a smi.
   // Check that the static offsets vector buffer is large enough.
-  __ cmp(edx, OffsetsVector::kStaticOffsetsVectorSize);
+  __ cmp(edx, Isolate::kJSRegexpStaticOffsetsVectorSize);
   __ j(above, &runtime);
 
   // ecx: RegExp data (FixedArray)
index f654459..3461f97 100644 (file)
@@ -308,7 +308,7 @@ class ThreadLocalTop BASE_EMBEDDED {
 
 #define ISOLATE_INIT_ARRAY_LIST(V)                                             \
   /* SerializerDeserializer state. */                                          \
-  V(int, jsregexp_static_offsets_vector, kJSRegexpStaticOffsetsVectorSize)     \
+  V(int32_t, jsregexp_static_offsets_vector, kJSRegexpStaticOffsetsVectorSize) \
   V(int, bad_char_shift_table, kUC16AlphabetSize)                              \
   V(int, good_suffix_shift_table, (kBMMaxShift + 1))                           \
   V(int, suffix_table, (kBMMaxShift + 1))                                      \
index e730e14..ae25432 100644 (file)
@@ -278,11 +278,12 @@ static void SetAtomLastCapture(FixedArray* array,
 }
 
 
-Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
-                                    Handle<String> subject,
-                                    int index,
-                                    Handle<JSArray> last_match_info) {
-  Isolate* isolate = re->GetIsolate();
+int RegExpImpl::AtomExecRaw(Handle<JSRegExp> regexp,
+                            Handle<String> subject,
+                            int index,
+                            int32_t* output,
+                            int output_size) {
+  Isolate* isolate = regexp->GetIsolate();
 
   ASSERT(0 <= index);
   ASSERT(index <= subject->length());
@@ -290,15 +291,16 @@ Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
   if (!subject->IsFlat()) FlattenString(subject);
   AssertNoAllocation no_heap_allocation;  // ensure vectors stay valid
 
-  String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex));
+  String* needle = String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex));
   int needle_len = needle->length();
   ASSERT(needle->IsFlat());
+  ASSERT_LT(0, needle_len);
 
-  if (needle_len != 0) {
-    if (index + needle_len > subject->length()) {
-      return isolate->factory()->null_value();
-    }
+  if (index + needle_len > subject->length()) {
+    return RegExpImpl::RE_FAILURE;
+  }
 
+  for (int i = 0; i < output_size; i += 2) {
     String::FlatContent needle_content = needle->GetFlatContent();
     String::FlatContent subject_content = subject->GetFlatContent();
     ASSERT(needle_content.IsFlat());
@@ -323,15 +325,36 @@ Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
                                subject_content.ToUC16Vector(),
                                needle_content.ToUC16Vector(),
                                index)));
-    if (index == -1) return isolate->factory()->null_value();
+    if (index == -1) {
+      return i / 2;  // Return number of matches.
+    } else {
+      output[i] = index;
+      output[i+1] = index + needle_len;
+      index += needle_len;
+    }
   }
-  ASSERT(last_match_info->HasFastObjectElements());
+  return output_size / 2;
+}
 
-  {
-    NoHandleAllocation no_handles;
-    FixedArray* array = FixedArray::cast(last_match_info->elements());
-    SetAtomLastCapture(array, *subject, index, index + needle_len);
-  }
+
+Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
+                                    Handle<String> subject,
+                                    int index,
+                                    Handle<JSArray> last_match_info) {
+  Isolate* isolate = re->GetIsolate();
+
+  static const int kNumRegisters = 2;
+  STATIC_ASSERT(kNumRegisters <= Isolate::kJSRegexpStaticOffsetsVectorSize);
+  int32_t* output_registers = isolate->jsregexp_static_offsets_vector();
+
+  int res = AtomExecRaw(re, subject, index, output_registers, kNumRegisters);
+
+  if (res == RegExpImpl::RE_FAILURE) return isolate->factory()->null_value();
+
+  ASSERT_EQ(res, RegExpImpl::RE_SUCCESS);
+  NoHandleAllocation no_handles;
+  FixedArray* array = FixedArray::cast(last_match_info->elements());
+  SetAtomLastCapture(array, *subject, output_registers[0], output_registers[1]);
   return last_match_info;
 }
 
@@ -511,7 +534,11 @@ int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
 
 #ifdef V8_INTERPRETED_REGEXP
   // Byte-code regexp needs space allocated for all its registers.
-  return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data()));
+  // The result captures are copied to the start of the registers array
+  // if the match succeeds.  This way those registers are not clobbered
+  // when we set the last match info from last successful match.
+  return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())) +
+         (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
 #else  // V8_INTERPRETED_REGEXP
   // Native regexp only needs room to output captures. Registers are handled
   // internally.
@@ -520,27 +547,11 @@ int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
 }
 
 
-int RegExpImpl::GlobalOffsetsVectorSize(Handle<JSRegExp> regexp,
-                                        int registers_per_match,
-                                        int* max_matches) {
-#ifdef V8_INTERPRETED_REGEXP
-  // Global loop in interpreted regexp is not implemented.  Therefore we choose
-  // the size of the offsets vector so that it can only store one match.
-  *max_matches = 1;
-  return registers_per_match;
-#else  // V8_INTERPRETED_REGEXP
-  int size = Max(registers_per_match, OffsetsVector::kStaticOffsetsVectorSize);
-  *max_matches = size / registers_per_match;
-  return size;
-#endif  // V8_INTERPRETED_REGEXP
-}
-
-
-int RegExpImpl::IrregexpExecRaw(
-    Handle<JSRegExp> regexp,
-    Handle<String> subject,
-    int index,
-    Vector<int> output) {
+int RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp,
+                                Handle<String> subject,
+                                int index,
+                                int32_t* output,
+                                int output_size) {
   Isolate* isolate = regexp->GetIsolate();
 
   Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate);
@@ -552,15 +563,19 @@ int RegExpImpl::IrregexpExecRaw(
   bool is_ascii = subject->IsAsciiRepresentationUnderneath();
 
 #ifndef V8_INTERPRETED_REGEXP
-  ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
+  ASSERT(output_size >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
   do {
     EnsureCompiledIrregexp(regexp, subject, is_ascii);
     Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);
+    // The stack is used to allocate registers for the compiled regexp code.
+    // This means that in case of failure, the output registers array is left
+    // untouched and contains the capture results from the previous successful
+    // match.  We can use that to set the last match info lazily.
     NativeRegExpMacroAssembler::Result res =
         NativeRegExpMacroAssembler::Match(code,
                                           subject,
-                                          output.start(),
-                                          output.length(),
+                                          output,
+                                          output_size,
                                           index,
                                           isolate);
     if (res != NativeRegExpMacroAssembler::RETRY) {
@@ -587,22 +602,29 @@ int RegExpImpl::IrregexpExecRaw(
   return RE_EXCEPTION;
 #else  // V8_INTERPRETED_REGEXP
 
-  ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp));
+  ASSERT(output_size >= IrregexpNumberOfRegisters(*irregexp));
   // We must have done EnsureCompiledIrregexp, so we can get the number of
   // registers.
-  int* register_vector = output.start();
   int number_of_capture_registers =
       (IrregexpNumberOfCaptures(*irregexp) + 1) * 2;
+  int32_t* raw_output = &output[number_of_capture_registers];
+  // We do not touch the actual capture result registers until we know there
+  // has been a match so that we can use those capture results to set the
+  // last match info.
   for (int i = number_of_capture_registers - 1; i >= 0; i--) {
-    register_vector[i] = -1;
+    raw_output[i] = -1;
   }
   Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate);
 
   IrregexpResult result = IrregexpInterpreter::Match(isolate,
                                                      byte_codes,
                                                      subject,
-                                                     register_vector,
+                                                     raw_output,
                                                      index);
+  if (result == RE_SUCCESS) {
+    // Copy capture results to the start of the registers array.
+    memcpy(output, raw_output, number_of_capture_registers * sizeof(int32_t));
+  }
   if (result == RE_EXCEPTION) {
     ASSERT(!isolate->has_pending_exception());
     isolate->StackOverflow();
@@ -612,50 +634,44 @@ int RegExpImpl::IrregexpExecRaw(
 }
 
 
-Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp,
+Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
                                         Handle<String> subject,
                                         int previous_index,
                                         Handle<JSArray> last_match_info) {
-  Isolate* isolate = jsregexp->GetIsolate();
-  ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP);
+  Isolate* isolate = regexp->GetIsolate();
+  ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
 
   // Prepare space for the return values.
-#ifdef V8_INTERPRETED_REGEXP
-#ifdef DEBUG
+#if defined(V8_INTERPRETED_REGEXP) && defined(DEBUG)
   if (FLAG_trace_regexp_bytecodes) {
-    String* pattern = jsregexp->Pattern();
+    String* pattern = regexp->Pattern();
     PrintF("\n\nRegexp match:   /%s/\n\n", *(pattern->ToCString()));
     PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
   }
 #endif
-#endif
-  int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject);
+  int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject);
   if (required_registers < 0) {
     // Compiling failed with an exception.
     ASSERT(isolate->has_pending_exception());
     return Handle<Object>::null();
   }
 
-  OffsetsVector registers(required_registers, isolate);
+  int32_t* output_registers = NULL;
+  if (required_registers > Isolate::kJSRegexpStaticOffsetsVectorSize) {
+    output_registers = NewArray<int32_t>(required_registers);
+  }
+  SmartArrayPointer<int32_t> auto_release(output_registers);
+  if (output_registers == NULL) {
+    output_registers = isolate->jsregexp_static_offsets_vector();
+  }
 
-  int res = RegExpImpl::IrregexpExecRaw(jsregexp, subject, previous_index,
-                                        Vector<int>(registers.vector(),
-                                                    registers.length()));
+  int res = RegExpImpl::IrregexpExecRaw(
+      regexp, subject, previous_index, output_registers, required_registers);
   if (res == RE_SUCCESS) {
-    int capture_register_count =
-        (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2;
-    last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead);
-    AssertNoAllocation no_gc;
-    int* register_vector = registers.vector();
-    FixedArray* array = FixedArray::cast(last_match_info->elements());
-    for (int i = 0; i < capture_register_count; i += 2) {
-      SetCapture(array, i, register_vector[i]);
-      SetCapture(array, i + 1, register_vector[i + 1]);
-    }
-    SetLastCaptureCount(array, capture_register_count);
-    SetLastSubject(array, *subject);
-    SetLastInput(array, *subject);
-    return last_match_info;
+    int capture_count =
+        IrregexpNumberOfCaptures(FixedArray::cast(regexp->data()));
+    return SetLastMatchInfo(
+        last_match_info, subject, capture_count, output_registers);
   }
   if (res == RE_EXCEPTION) {
     ASSERT(isolate->has_pending_exception());
@@ -666,6 +682,145 @@ Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp,
 }
 
 
+Handle<JSArray> RegExpImpl::SetLastMatchInfo(Handle<JSArray> last_match_info,
+                                             Handle<String> subject,
+                                             int capture_count,
+                                             int32_t* match) {
+  int capture_register_count = (capture_count + 1) * 2;
+  last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead);
+  AssertNoAllocation no_gc;
+  FixedArray* array = FixedArray::cast(last_match_info->elements());
+  if (match != NULL) {
+    for (int i = 0; i < capture_register_count; i += 2) {
+      SetCapture(array, i, match[i]);
+      SetCapture(array, i + 1, match[i + 1]);
+    }
+  }
+  SetLastCaptureCount(array, capture_register_count);
+  SetLastSubject(array, *subject);
+  SetLastInput(array, *subject);
+  return last_match_info;
+}
+
+
+RegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp,
+                                     Handle<String> subject,
+                                     bool is_global,
+                                     Isolate* isolate) {
+#ifdef V8_INTERPRETED_REGEXP
+  bool interpreted = true;
+#else
+  bool interpreted = false;
+#endif  // V8_INTERPRETED_REGEXP
+
+  regexp_ = regexp;
+  subject_ = subject;
+
+  if (regexp_->TypeTag() == JSRegExp::ATOM) {
+    static const int kAtomRegistersPerMatch = 2;
+    registers_per_match_ = kAtomRegistersPerMatch;
+    // There is no distinction between interpreted and native for atom regexps.
+    interpreted = false;
+  } else {
+    registers_per_match_ = RegExpImpl::IrregexpPrepare(regexp_, subject_);
+    if (registers_per_match_ < 0) {
+      num_matches_ = -1;  // Signal exception.
+      return;
+    }
+  }
+
+  if (is_global && !interpreted) {
+    register_array_size_ =
+        Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize);
+    max_matches_ = register_array_size_ / registers_per_match_;
+  } else {
+    // Global loop in interpreted regexp is not implemented.  We choose
+    // the size of the offsets vector so that it can only store one match.
+    register_array_size_ = registers_per_match_;
+    max_matches_ = 1;
+  }
+
+  if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
+    register_array_ = NewArray<int32_t>(register_array_size_);
+  } else {
+    register_array_ = isolate->jsregexp_static_offsets_vector();
+  }
+
+  // Set state so that fetching the results the first time triggers a call
+  // to the compiled regexp.
+  current_match_index_ = max_matches_ - 1;
+  num_matches_ = max_matches_;
+  ASSERT(registers_per_match_ >= 2);  // Each match has at least one capture.
+  ASSERT_GE(register_array_size_, registers_per_match_);
+  int32_t* last_match =
+      &register_array_[current_match_index_ * registers_per_match_];
+  last_match[0] = -1;
+  last_match[1] = 0;
+}
+
+
+RegExpImpl::GlobalCache::~GlobalCache() {
+  // Deallocate the register array if we allocated it in the constructor
+  // (as opposed to using the existing jsregexp_static_offsets_vector).
+  if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
+    DeleteArray(register_array_);
+  }
+}
+
+
+int32_t* RegExpImpl::GlobalCache::FetchNext() {
+  current_match_index_++;
+  if (current_match_index_ >= num_matches_) {
+    // Current batch of results exhausted.
+    // Fail if last batch was not even fully filled.
+    if (num_matches_ < max_matches_) {
+      num_matches_ = 0;  // Signal failed match.
+      return NULL;
+    }
+
+    int32_t* last_match =
+        &register_array_[(current_match_index_ - 1) * registers_per_match_];
+    int last_end_index = last_match[1];
+
+    if (regexp_->TypeTag() == JSRegExp::ATOM) {
+      num_matches_ = RegExpImpl::AtomExecRaw(regexp_,
+                                             subject_,
+                                             last_end_index,
+                                             register_array_,
+                                             register_array_size_);
+    } else {
+      int last_start_index = last_match[0];
+      if (last_start_index == last_end_index) last_end_index++;
+      if (last_end_index > subject_->length()) {
+        num_matches_ = 0;  // Signal failed match.
+        return NULL;
+      }
+      num_matches_ = RegExpImpl::IrregexpExecRaw(regexp_,
+                                                 subject_,
+                                                 last_end_index,
+                                                 register_array_,
+                                                 register_array_size_);
+    }
+
+    if (num_matches_ <= 0) return NULL;
+    current_match_index_ = 0;
+    return register_array_;
+  } else {
+    return &register_array_[current_match_index_ * registers_per_match_];
+  }
+}
+
+
+int32_t* RegExpImpl::GlobalCache::LastSuccessfulMatch() {
+  int index = current_match_index_ * registers_per_match_;
+  if (num_matches_ == 0) {
+    // After a failed match we shift back by one result.
+    index -= registers_per_match_;
+  }
+  return &register_array_[index];
+}
+
+
 // -------------------------------------------------------------------
 // Implementation of the Irregexp regular expression engine.
 //
index 9a84237..96825ce 100644 (file)
@@ -93,6 +93,14 @@ class RegExpImpl {
                           JSRegExp::Flags flags,
                           Handle<String> match_pattern);
 
+
+  static int AtomExecRaw(Handle<JSRegExp> regexp,
+                         Handle<String> subject,
+                         int index,
+                         int32_t* output,
+                         int output_size);
+
+
   static Handle<Object> AtomExec(Handle<JSRegExp> regexp,
                                  Handle<String> subject,
                                  int index,
@@ -105,17 +113,11 @@ class RegExpImpl {
   // This ensures that the regexp is compiled for the subject, and that
   // the subject is flat.
   // Returns the number of integer spaces required by IrregexpExecOnce
-  // as its "registers" argument. If the regexp cannot be compiled,
+  // as its "registers" argument.  If the regexp cannot be compiled,
   // an exception is set as pending, and this function returns negative.
   static int IrregexpPrepare(Handle<JSRegExp> regexp,
                              Handle<String> subject);
 
-  // Calculate the size of offsets vector for the case of global regexp
-  // and the number of matches this vector is able to store.
-  static int GlobalOffsetsVectorSize(Handle<JSRegExp> regexp,
-                                     int registers_per_match,
-                                     int* max_matches);
-
   // Execute a regular expression on the subject, starting from index.
   // If matching succeeds, return the number of matches.  This can be larger
   // than one in the case of global regular expressions.
@@ -125,17 +127,57 @@ class RegExpImpl {
   static int IrregexpExecRaw(Handle<JSRegExp> regexp,
                              Handle<String> subject,
                              int index,
-                             Vector<int> registers);
+                             int32_t* output,
+                             int output_size);
 
   // Execute an Irregexp bytecode pattern.
   // On a successful match, the result is a JSArray containing
-  // captured positions. On a failure, the result is the null value.
+  // captured positions.  On a failure, the result is the null value.
   // Returns an empty handle in case of an exception.
   static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp,
                                      Handle<String> subject,
                                      int index,
                                      Handle<JSArray> lastMatchInfo);
 
+  // Set last match info.  If match is NULL, then setting captures is omitted.
+  static Handle<JSArray> SetLastMatchInfo(Handle<JSArray> last_match_info,
+                                          Handle<String> subject,
+                                          int capture_count,
+                                          int32_t* match);
+
+
+  class GlobalCache {
+   public:
+    GlobalCache(Handle<JSRegExp> regexp,
+                Handle<String> subject,
+                bool is_global,
+                Isolate* isolate);
+
+    ~GlobalCache();
+
+    // Fetch the next entry in the cache for global regexp match results.
+    // This does not set the last match info.  Upon failure, NULL is returned.
+    // The cause can be checked with Result().  The previous
+    // result is still in available in memory when a failure happens.
+    int32_t* FetchNext();
+
+    int32_t* LastSuccessfulMatch();
+
+    inline bool HasException() { return num_matches_ < 0; }
+
+   private:
+    int num_matches_;
+    int max_matches_;
+    int current_match_index_;
+    int registers_per_match_;
+    // Pointer to the last set of captures.
+    int32_t* register_array_;
+    int register_array_size_;
+    Handle<JSRegExp> regexp_;
+    Handle<String> subject_;
+  };
+
+
   // Array index in the lastMatchInfo array.
   static const int kLastCaptureCount = 0;
   static const int kLastSubject = 1;
@@ -195,30 +237,10 @@ class RegExpImpl {
   static const int kRegWxpCompiledLimit = 1 * MB;
 
  private:
-  static String* last_ascii_string_;
-  static String* two_byte_cached_string_;
-
   static bool CompileIrregexp(
       Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii);
   static inline bool EnsureCompiledIrregexp(
       Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii);
-
-
-  // Set the subject cache.  The previous string buffer is not deleted, so the
-  // caller should ensure that it doesn't leak.
-  static void SetSubjectCache(String* subject,
-                              char* utf8_subject,
-                              int uft8_length,
-                              int character_position,
-                              int utf8_position);
-
-  // A one element cache of the last utf8_subject string and its length.  The
-  // subject JS String object is cached in the heap.  We also cache a
-  // translation between position and utf8 position.
-  static char* utf8_subject_cache_;
-  static int utf8_length_cache_;
-  static int utf8_position_;
-  static int character_position_;
 };
 
 
@@ -1622,40 +1644,6 @@ class RegExpEngine: public AllStatic {
 };
 
 
-class OffsetsVector {
- public:
-  inline OffsetsVector(int num_registers, Isolate* isolate)
-      : offsets_vector_length_(num_registers) {
-    if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
-      vector_ = NewArray<int>(offsets_vector_length_);
-    } else {
-      vector_ = isolate->jsregexp_static_offsets_vector();
-    }
-  }
-  inline ~OffsetsVector() {
-    if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
-      DeleteArray(vector_);
-      vector_ = NULL;
-    }
-  }
-  inline int* vector() { return vector_; }
-  inline int length() { return offsets_vector_length_; }
-
-  static const int kStaticOffsetsVectorSize =
-      Isolate::kJSRegexpStaticOffsetsVectorSize;
-
- private:
-  static Address static_offsets_vector_address(Isolate* isolate) {
-    return reinterpret_cast<Address>(isolate->jsregexp_static_offsets_vector());
-  }
-
-  int* vector_;
-  int offsets_vector_length_;
-
-  friend class ExternalReference;
-};
-
-
 } }  // namespace v8::internal
 
 #endif  // V8_JSREGEXP_H_
index abc82a7..a5c80b8 100644 (file)
@@ -4977,7 +4977,8 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
   STATIC_ASSERT(kSmiTagSize + kSmiShiftSize == 1);
   __ Addu(a2, a2, Operand(2));  // a2 was a smi.
   // Check that the static offsets vector buffer is large enough.
-  __ Branch(&runtime, hi, a2, Operand(OffsetsVector::kStaticOffsetsVectorSize));
+  __ Branch(
+      &runtime, hi, a2, Operand(Isolate::kJSRegexpStaticOffsetsVectorSize));
 
   // a2: Number of capture registers
   // regexp_data: RegExp data (FixedArray)
index 5b0bfc3..466ee32 100644 (file)
@@ -2574,28 +2574,24 @@ class ReplacementStringBuilder {
 class CompiledReplacement {
  public:
   explicit CompiledReplacement(Zone* zone)
-      : parts_(1, zone), replacement_substrings_(0, zone),
-        simple_hint_(false),
-        zone_(zone) {}
+      : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
 
-  void Compile(Handle<String> replacement,
+  // Return whether the replacement is simple.
+  bool Compile(Handle<String> replacement,
                int capture_count,
                int subject_length);
 
+  // Use Apply only if Compile returned false.
   void Apply(ReplacementStringBuilder* builder,
              int match_from,
              int match_to,
-             Handle<JSArray> last_match_info);
+             int32_t* match);
 
   // Number of distinct parts of the replacement pattern.
   int parts() {
     return parts_.length();
   }
 
-  bool simple_hint() {
-    return simple_hint_;
-  }
-
   Zone* zone() const { return zone_; }
 
  private:
@@ -2656,11 +2652,11 @@ class CompiledReplacement {
   };
 
   template<typename Char>
-  static bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
-                                      Vector<Char> characters,
-                                      int capture_count,
-                                      int subject_length,
-                                      Zone* zone) {
+  bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
+                               Vector<Char> characters,
+                               int capture_count,
+                               int subject_length,
+                               Zone* zone) {
     int length = characters.length();
     int last = 0;
     for (int i = 0; i < length; i++) {
@@ -2754,7 +2750,7 @@ class CompiledReplacement {
     }
     if (length > last) {
       if (last == 0) {
-        parts->Add(ReplacementPart::ReplacementString(), zone);
+        // Replacement is simple.  Do not use Apply to do the replacement.
         return true;
       } else {
         parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
@@ -2765,33 +2761,35 @@ class CompiledReplacement {
 
   ZoneList<ReplacementPart> parts_;
   ZoneList<Handle<String> > replacement_substrings_;
-  bool simple_hint_;
   Zone* zone_;
 };
 
 
-void CompiledReplacement::Compile(Handle<String> replacement,
+bool CompiledReplacement::Compile(Handle<String> replacement,
                                   int capture_count,
                                   int subject_length) {
   {
     AssertNoAllocation no_alloc;
     String::FlatContent content = replacement->GetFlatContent();
     ASSERT(content.IsFlat());
+    bool simple = false;
     if (content.IsAscii()) {
-      simple_hint_ = ParseReplacementPattern(&parts_,
-                                             content.ToAsciiVector(),
-                                             capture_count,
-                                             subject_length,
-                                             zone());
+      simple = ParseReplacementPattern(&parts_,
+                                       content.ToAsciiVector(),
+                                       capture_count,
+                                       subject_length,
+                                       zone());
     } else {
       ASSERT(content.IsTwoByte());
-      simple_hint_ = ParseReplacementPattern(&parts_,
-                                             content.ToUC16Vector(),
-                                             capture_count,
-                                             subject_length,
-                                             zone());
+      simple = ParseReplacementPattern(&parts_,
+                                       content.ToUC16Vector(),
+                                       capture_count,
+                                       subject_length,
+                                       zone());
     }
+    if (simple) return true;
   }
+
   Isolate* isolate = replacement->GetIsolate();
   // Find substrings of replacement string and create them as String objects.
   int substring_index = 0;
@@ -2811,13 +2809,15 @@ void CompiledReplacement::Compile(Handle<String> replacement,
       substring_index++;
     }
   }
+  return false;
 }
 
 
 void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
                                 int match_from,
                                 int match_to,
-                                Handle<JSArray> last_match_info) {
+                                int32_t* match) {
+  ASSERT_LT(0, parts_.length());
   for (int i = 0, n = parts_.length(); i < n; i++) {
     ReplacementPart part = parts_[i];
     switch (part.tag) {
@@ -2833,9 +2833,8 @@ void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
       }
       case SUBJECT_CAPTURE: {
         int capture = part.data;
-        FixedArray* match_info = FixedArray::cast(last_match_info->elements());
-        int from = RegExpImpl::GetCapture(match_info, capture * 2);
-        int to = RegExpImpl::GetCapture(match_info, capture * 2 + 1);
+        int from = match[capture * 2];
+        int to = match[capture * 2 + 1];
         if (from >= 0 && to > from) {
           builder->AddSubjectSlice(from, to);
         }
@@ -2957,85 +2956,19 @@ void FindStringIndicesDispatch(Isolate* isolate,
 }
 
 
-// Two smis before and after the match, for very long strings.
-const int kMaxBuilderEntriesPerRegExpMatch = 5;
-
-
-static void SetLastMatchInfoNoCaptures(Handle<String> subject,
-                                       Handle<JSArray> last_match_info,
-                                       int match_start,
-                                       int match_end) {
-  // Fill last_match_info with a single capture.
-  last_match_info->EnsureSize(2 + RegExpImpl::kLastMatchOverhead);
-  AssertNoAllocation no_gc;
-  FixedArray* elements = FixedArray::cast(last_match_info->elements());
-  RegExpImpl::SetLastCaptureCount(elements, 2);
-  RegExpImpl::SetLastInput(elements, *subject);
-  RegExpImpl::SetLastSubject(elements, *subject);
-  RegExpImpl::SetCapture(elements, 0, match_start);
-  RegExpImpl::SetCapture(elements, 1, match_end);
-}
-
-
-template <typename SubjectChar, typename PatternChar>
-static bool SearchStringMultiple(Isolate* isolate,
-                                 Vector<const SubjectChar> subject,
-                                 Vector<const PatternChar> pattern,
-                                 String* pattern_string,
-                                 FixedArrayBuilder* builder,
-                                 int* match_pos) {
-  int pos = *match_pos;
-  int subject_length = subject.length();
-  int pattern_length = pattern.length();
-  int max_search_start = subject_length - pattern_length;
-  StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
-  while (pos <= max_search_start) {
-    if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
-      *match_pos = pos;
-      return false;
-    }
-    // Position of end of previous match.
-    int match_end = pos + pattern_length;
-    int new_pos = search.Search(subject, match_end);
-    if (new_pos >= 0) {
-      // A match.
-      if (new_pos > match_end) {
-        ReplacementStringBuilder::AddSubjectSlice(builder,
-            match_end,
-            new_pos);
-      }
-      pos = new_pos;
-      builder->Add(pattern_string);
-    } else {
-      break;
-    }
-  }
-
-  if (pos < max_search_start) {
-    ReplacementStringBuilder::AddSubjectSlice(builder,
-                                              pos + pattern_length,
-                                              subject_length);
-  }
-  *match_pos = pos;
-  return true;
-}
-
-
-
-
 template<typename ResultSeqString>
 MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString(
     Isolate* isolate,
     Handle<String> subject,
     Handle<JSRegExp> pattern_regexp,
     Handle<String> replacement,
-    Handle<JSArray> last_match_info,
-    Zone* zone) {
+    Handle<JSArray> last_match_info) {
   ASSERT(subject->IsFlat());
   ASSERT(replacement->IsFlat());
 
-  ZoneScope zone_space(isolate->runtime_zone(), DELETE_ON_EXIT);
-  ZoneList<int> indices(8, isolate->runtime_zone());
+  Zone* zone = isolate->runtime_zone();
+  ZoneScope zone_space(zone, DELETE_ON_EXIT);
+  ZoneList<int> indices(8, zone);
   ASSERT_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
   String* pattern =
       String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
@@ -3043,8 +2976,8 @@ MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString(
   int pattern_len = pattern->length();
   int replacement_len = replacement->length();
 
-  FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff,
-                            zone);
+  FindStringIndicesDispatch(
+      isolate, *subject, pattern, &indices, 0xffffffff, zone);
 
   int matches = indices.length();
   if (matches == 0) return *subject;
@@ -3099,10 +3032,9 @@ MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString(
                         subject_len);
   }
 
-  SetLastMatchInfoNoCaptures(subject,
-                             last_match_info,
-                             indices.at(matches - 1),
-                             indices.at(matches - 1) + pattern_len);
+  int32_t match_indices[] = { indices.at(matches - 1),
+                              indices.at(matches - 1) + pattern_len };
+  RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices);
 
   return *result;
 }
@@ -3110,139 +3042,101 @@ MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString(
 
 MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString(
     Isolate* isolate,
-    String* subject,
-    JSRegExp* regexp,
-    String* replacement,
-    JSArray* last_match_info,
-    Zone* zone) {
+    Handle<String> subject,
+    Handle<JSRegExp> regexp,
+    Handle<String> replacement,
+    Handle<JSArray> last_match_info) {
   ASSERT(subject->IsFlat());
   ASSERT(replacement->IsFlat());
 
-  HandleScope handles(isolate);
-
-  int length = subject->length();
-  Handle<String> subject_handle(subject);
-  Handle<JSRegExp> regexp_handle(regexp);
-  Handle<String> replacement_handle(replacement);
-  Handle<JSArray> last_match_info_handle(last_match_info);
-  Handle<Object> match = RegExpImpl::Exec(regexp_handle,
-                                          subject_handle,
-                                          0,
-                                          last_match_info_handle);
-  if (match.is_null()) {
-    return Failure::Exception();
-  }
-  if (match->IsNull()) {
-    return *subject_handle;
-  }
-
-  int capture_count = regexp_handle->CaptureCount();
+  bool is_global = regexp->GetFlags().is_global();
+  int capture_count = regexp->CaptureCount();
+  int subject_length = subject->length();
 
   // CompiledReplacement uses zone allocation.
+  Zone* zone = isolate->runtime_zone();
   ZoneScope zonescope(zone, DELETE_ON_EXIT);
   CompiledReplacement compiled_replacement(zone);
-
-  compiled_replacement.Compile(replacement_handle,
-                               capture_count,
-                               length);
-
-  bool is_global = regexp_handle->GetFlags().is_global();
+  bool simple_replace = compiled_replacement.Compile(replacement,
+                                                     capture_count,
+                                                     subject_length);
 
   // Shortcut for simple non-regexp global replacements
   if (is_global &&
-          regexp_handle->TypeTag() == JSRegExp::ATOM &&
-           compiled_replacement.simple_hint()) {
-         if (subject_handle->HasOnlyAsciiChars() &&
-             replacement_handle->HasOnlyAsciiChars()) {
+      regexp->TypeTag() == JSRegExp::ATOM &&
+      simple_replace) {
+    if (subject->HasOnlyAsciiChars() && replacement->HasOnlyAsciiChars()) {
       return StringReplaceAtomRegExpWithString<SeqAsciiString>(
-                 isolate,
-                 subject_handle,
-                 regexp_handle,
-                 replacement_handle,
-                 last_match_info_handle,
-                 zone);
-         } else {
+          isolate, subject, regexp, replacement, last_match_info);
+    } else {
       return StringReplaceAtomRegExpWithString<SeqTwoByteString>(
-                 isolate,
-                 subject_handle,
-                 regexp_handle,
-                 replacement_handle,
-                 last_match_info_handle,
-                 zone);
+          isolate, subject, regexp, replacement, last_match_info);
     }
   }
 
+  RegExpImpl::GlobalCache global_cache(regexp, subject, is_global, isolate);
+  if (global_cache.HasException()) return Failure::Exception();
+
+  int32_t* current_match = global_cache.FetchNext();
+  if (current_match == NULL) {
+    if (global_cache.HasException()) return Failure::Exception();
+    return *subject;
+  }
+
   // Guessing the number of parts that the final result string is built
   // from. Global regexps can match any number of times, so we guess
   // conservatively.
   int expected_parts =
       (compiled_replacement.parts() + 1) * (is_global ? 4 : 1) + 1;
   ReplacementStringBuilder builder(isolate->heap(),
-                                   subject_handle,
+                                   subject,
                                    expected_parts);
 
-  // Index of end of last match.
-  int prev = 0;
-
-
   // Number of parts added by compiled replacement plus preceeding
   // string and possibly suffix after last match.  It is possible for
   // all components to use two elements when encoded as two smis.
   const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
-  bool matched = true;
+
+  int prev = 0;
+
   do {
-    ASSERT(last_match_info_handle->HasFastObjectElements());
-    // Increase the capacity of the builder before entering local handle-scope,
-    // so its internal buffer can safely allocate a new handle if it grows.
     builder.EnsureCapacity(parts_added_per_loop);
 
-    HandleScope loop_scope(isolate);
-    int start, end;
-    {
-      AssertNoAllocation match_info_array_is_not_in_a_handle;
-      FixedArray* match_info_array =
-          FixedArray::cast(last_match_info_handle->elements());
-
-      ASSERT_EQ(capture_count * 2 + 2,
-                RegExpImpl::GetLastCaptureCount(match_info_array));
-      start = RegExpImpl::GetCapture(match_info_array, 0);
-      end = RegExpImpl::GetCapture(match_info_array, 1);
-    }
+    int start = current_match[0];
+    int end = current_match[1];
 
     if (prev < start) {
       builder.AddSubjectSlice(prev, start);
     }
-    compiled_replacement.Apply(&builder,
-                               start,
-                               end,
-                               last_match_info_handle);
 
+    if (simple_replace) {
+      builder.AddString(replacement);
+    } else {
+      compiled_replacement.Apply(&builder,
+                                 start,
+                                 end,
+                                 current_match);
+    }
     prev = end;
 
     // Only continue checking for global regexps.
     if (!is_global) break;
 
-    // Continue from where the match ended, unless it was an empty match.
-    int next = end;
-    if (start == end) {
-      next = end + 1;
-      if (next > length) break;
-    }
+    current_match = global_cache.FetchNext();
+  } while (current_match != NULL);
 
-    match = RegExpImpl::Exec(regexp_handle,
-                             subject_handle,
-                             next,
-                             last_match_info_handle);
-    if (match.is_null()) {
-      return Failure::Exception();
-    }
-    matched = !match->IsNull();
-  } while (matched);
+  if (global_cache.HasException()) return Failure::Exception();
 
-  if (prev < length) {
-    builder.AddSubjectSlice(prev, length);
+  if (prev < subject_length) {
+    builder.EnsureCapacity(2);
+    builder.AddSubjectSlice(prev, subject_length);
   }
 
+  RegExpImpl::SetLastMatchInfo(last_match_info,
+                               subject,
+                               capture_count,
+                               global_cache.LastSuccessfulMatch());
+
   return *(builder.ToString());
 }
 
@@ -3250,69 +3144,51 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString(
 template <typename ResultSeqString>
 MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString(
     Isolate* isolate,
-    String* subject,
-    JSRegExp* regexp,
-    JSArray* last_match_info,
-    Zone* zone) {
+    Handle<String> subject,
+    Handle<JSRegExp> regexp,
+    Handle<JSArray> last_match_info) {
   ASSERT(subject->IsFlat());
 
-  HandleScope handles(isolate);
-
-  Handle<String> subject_handle(subject);
-  Handle<JSRegExp> regexp_handle(regexp);
-  Handle<JSArray> last_match_info_handle(last_match_info);
+  bool is_global = regexp->GetFlags().is_global();
 
   // Shortcut for simple non-regexp global replacements
-  if (regexp_handle->GetFlags().is_global() &&
-      regexp_handle->TypeTag() == JSRegExp::ATOM) {
-    Handle<String> empty_string_handle(HEAP->empty_string());
-    if (subject_handle->HasOnlyAsciiChars()) {
+  if (is_global &&
+      regexp->TypeTag() == JSRegExp::ATOM) {
+    Handle<String> empty_string(HEAP->empty_string());
+    if (subject->HasOnlyAsciiChars()) {
       return StringReplaceAtomRegExpWithString<SeqAsciiString>(
           isolate,
-          subject_handle,
-          regexp_handle,
-          empty_string_handle,
-          last_match_info_handle,
-          zone);
+          subject,
+          regexp,
+          empty_string,
+          last_match_info);
     } else {
       return StringReplaceAtomRegExpWithString<SeqTwoByteString>(
           isolate,
-          subject_handle,
-          regexp_handle,
-          empty_string_handle,
-          last_match_info_handle,
-          zone);
+          subject,
+          regexp,
+          empty_string,
+          last_match_info);
     }
   }
 
-  Handle<Object> match = RegExpImpl::Exec(regexp_handle,
-                                          subject_handle,
-                                          0,
-                                          last_match_info_handle);
-  if (match.is_null()) return Failure::Exception();
-  if (match->IsNull()) return *subject_handle;
-
-  ASSERT(last_match_info_handle->HasFastObjectElements());
+  RegExpImpl::GlobalCache global_cache(regexp, subject, is_global, isolate);
+  if (global_cache.HasException()) return Failure::Exception();
 
-  int start, end;
-  {
-    AssertNoAllocation match_info_array_is_not_in_a_handle;
-    FixedArray* match_info_array =
-        FixedArray::cast(last_match_info_handle->elements());
-
-    start = RegExpImpl::GetCapture(match_info_array, 0);
-    end = RegExpImpl::GetCapture(match_info_array, 1);
+  int32_t* current_match = global_cache.FetchNext();
+  if (current_match == NULL) {
+    if (global_cache.HasException()) return Failure::Exception();
+    return *subject;
   }
 
-  bool global = regexp_handle->GetFlags().is_global();
+  int start = current_match[0];
+  int end = current_match[1];
+  int capture_count = regexp->CaptureCount();
+  int subject_length = subject->length();
 
-  if (start == end && !global) return *subject_handle;
+  int new_length = subject_length - (end - start);
+  if (new_length == 0) return isolate->heap()->empty_string();
 
-  int length = subject_handle->length();
-  int new_length = length - (end - start);
-  if (new_length == 0) {
-    return isolate->heap()->empty_string();
-  }
   Handle<ResultSeqString> answer;
   if (ResultSeqString::kHasAsciiEncoding) {
     answer = Handle<ResultSeqString>::cast(
@@ -3322,73 +3198,55 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString(
         isolate->factory()->NewRawTwoByteString(new_length));
   }
 
-  // If the regexp isn't global, only match once.
-  if (!global) {
-    if (start > 0) {
-      String::WriteToFlat(*subject_handle,
-                          answer->GetChars(),
-                          0,
-                          start);
-    }
-    if (end < length) {
-      String::WriteToFlat(*subject_handle,
-                          answer->GetChars() + start,
-                          end,
-                          length);
+  if (!is_global) {
+    RegExpImpl::SetLastMatchInfo(
+        last_match_info, subject, capture_count, current_match);
+    if (start == end) {
+      return *subject;
+    } else {
+      if (start > 0) {
+        String::WriteToFlat(*subject, answer->GetChars(), 0, start);
+      }
+      if (end < subject_length) {
+        String::WriteToFlat(
+            *subject, answer->GetChars() + start, end, subject_length);
+      }
+      return *answer;
     }
-    return *answer;
   }
 
-  int prev = 0;  // Index of end of last match.
-  int next = 0;  // Start of next search (prev unless last match was empty).
+  int prev = 0;
   int position = 0;
 
   do {
+    start = current_match[0];
+    end = current_match[1];
     if (prev < start) {
       // Add substring subject[prev;start] to answer string.
-      String::WriteToFlat(*subject_handle,
-                          answer->GetChars() + position,
-                          prev,
-                          start);
+      String::WriteToFlat(
+          *subject, answer->GetChars() + position, prev, start);
       position += start - prev;
     }
     prev = end;
-    next = end;
-    // Continue from where the match ended, unless it was an empty match.
-    if (start == end) {
-      next++;
-      if (next > length) break;
-    }
-    match = RegExpImpl::Exec(regexp_handle,
-                             subject_handle,
-                             next,
-                             last_match_info_handle);
-    if (match.is_null()) return Failure::Exception();
-    if (match->IsNull()) break;
-
-    ASSERT(last_match_info_handle->HasFastObjectElements());
-    HandleScope loop_scope(isolate);
-    {
-      AssertNoAllocation match_info_array_is_not_in_a_handle;
-      FixedArray* match_info_array =
-          FixedArray::cast(last_match_info_handle->elements());
-      start = RegExpImpl::GetCapture(match_info_array, 0);
-      end = RegExpImpl::GetCapture(match_info_array, 1);
-    }
-  } while (true);
 
-  if (prev < length) {
+    current_match = global_cache.FetchNext();
+  } while (current_match != NULL);
+
+  if (global_cache.HasException()) return Failure::Exception();
+
+  RegExpImpl::SetLastMatchInfo(last_match_info,
+                               subject,
+                               capture_count,
+                               global_cache.LastSuccessfulMatch());
+
+  if (prev < subject_length) {
     // Add substring subject[prev;length] to answer string.
-    String::WriteToFlat(*subject_handle,
-                        answer->GetChars() + position,
-                        prev,
-                        length);
-    position += length - prev;
+    String::WriteToFlat(
+        *subject, answer->GetChars() + position, prev, subject_length);
+    position += subject_length - prev;
   }
 
-  if (position == 0) {
-    return isolate->heap()->empty_string();
-  }
+  if (position == 0) return isolate->heap()->empty_string();
 
   // Shorten string and fill
   int string_size = ResultSeqString::SizeFor(position);
@@ -3411,50 +3269,31 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString(
 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringReplaceRegExpWithString) {
   ASSERT(args.length() == 4);
 
-  CONVERT_ARG_CHECKED(String, subject, 0);
-  if (!subject->IsFlat()) {
-    Object* flat_subject;
-    { MaybeObject* maybe_flat_subject = subject->TryFlatten();
-      if (!maybe_flat_subject->ToObject(&flat_subject)) {
-        return maybe_flat_subject;
-      }
-    }
-    subject = String::cast(flat_subject);
-  }
+  HandleScope scope(isolate);
 
-  CONVERT_ARG_CHECKED(String, replacement, 2);
-  if (!replacement->IsFlat()) {
-    Object* flat_replacement;
-    { MaybeObject* maybe_flat_replacement = replacement->TryFlatten();
-      if (!maybe_flat_replacement->ToObject(&flat_replacement)) {
-        return maybe_flat_replacement;
-      }
-    }
-    replacement = String::cast(flat_replacement);
-  }
+  CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
+  CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
+  CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
+  CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
 
-  CONVERT_ARG_CHECKED(JSRegExp, regexp, 1);
-  CONVERT_ARG_CHECKED(JSArray, last_match_info, 3);
+  if (!subject->IsFlat()) subject = FlattenGetString(subject);
+
+  if (!replacement->IsFlat()) replacement = FlattenGetString(replacement);
 
   ASSERT(last_match_info->HasFastObjectElements());
 
-  Zone* zone = isolate->runtime_zone();
   if (replacement->length() == 0) {
     if (subject->HasOnlyAsciiChars()) {
       return StringReplaceRegExpWithEmptyString<SeqAsciiString>(
-          isolate, subject, regexp, last_match_info, zone);
+          isolate, subject, regexp, last_match_info);
     } else {
       return StringReplaceRegExpWithEmptyString<SeqTwoByteString>(
-          isolate, subject, regexp, last_match_info, zone);
+          isolate, subject, regexp, last_match_info);
     }
   }
 
-  return StringReplaceRegExpWithString(isolate,
-                                       subject,
-                                       regexp,
-                                       replacement,
-                                       last_match_info,
-                                       zone);
+  return StringReplaceRegExpWithString(
+      isolate, subject, regexp, replacement, last_match_info);
 }
 
 
@@ -3777,46 +3616,45 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringMatch) {
   CONVERT_ARG_HANDLE_CHECKED(JSArray, regexp_info, 2);
   HandleScope handles;
 
-  Handle<Object> match = RegExpImpl::Exec(regexp, subject, 0, regexp_info);
+  RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
+  if (global_cache.HasException()) return Failure::Exception();
 
-  if (match.is_null()) {
-    return Failure::Exception();
-  }
-  if (match->IsNull()) {
-    return isolate->heap()->null_value();
-  }
-  int length = subject->length();
+  int capture_count = regexp->CaptureCount();
 
   Zone* zone = isolate->runtime_zone();
   ZoneScope zone_space(zone, DELETE_ON_EXIT);
   ZoneList<int> offsets(8, zone);
-  int start;
-  int end;
-  do {
-    {
-      AssertNoAllocation no_alloc;
-      FixedArray* elements = FixedArray::cast(regexp_info->elements());
-      start = Smi::cast(elements->get(RegExpImpl::kFirstCapture))->value();
-      end = Smi::cast(elements->get(RegExpImpl::kFirstCapture + 1))->value();
-    }
-    offsets.Add(start, zone);
-    offsets.Add(end, zone);
-    if (start == end) if (++end > length) break;
-    match = RegExpImpl::Exec(regexp, subject, end, regexp_info);
-    if (match.is_null()) {
-      return Failure::Exception();
-    }
-  } while (!match->IsNull());
+
+  while (true) {
+    int32_t* match = global_cache.FetchNext();
+    if (match == NULL) break;
+    offsets.Add(match[0], zone);  // start
+    offsets.Add(match[1], zone);  // end
+  }
+
+  if (global_cache.HasException()) return Failure::Exception();
+
+  if (offsets.length() == 0) {
+    // Not a single match.
+    return isolate->heap()->null_value();
+  }
+
+  RegExpImpl::SetLastMatchInfo(regexp_info,
+                               subject,
+                               capture_count,
+                               global_cache.LastSuccessfulMatch());
+
   int matches = offsets.length() / 2;
   Handle<FixedArray> elements = isolate->factory()->NewFixedArray(matches);
-  Handle<String> substring = isolate->factory()->
-    NewSubString(subject, offsets.at(0), offsets.at(1));
+  Handle<String> substring =
+      isolate->factory()->NewSubString(subject, offsets.at(0), offsets.at(1));
   elements->set(0, *substring);
-  for (int i = 1; i < matches ; i++) {
+  for (int i = 1; i < matches; i++) {
+    HandleScope temp_scope(isolate);
     int from = offsets.at(i * 2);
     int to = offsets.at(i * 2 + 1);
-    Handle<String> substring = isolate->factory()->
-        NewProperSubString(subject, from, to);
+    Handle<String> substring =
+        isolate->factory()->NewProperSubString(subject, from, to);
     elements->set(i, *substring);
   }
   Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(elements);
@@ -3825,294 +3663,104 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringMatch) {
 }
 
 
-static bool SearchStringMultiple(Isolate* isolate,
-                                 Handle<String> subject,
-                                 Handle<String> pattern,
-                                 Handle<JSArray> last_match_info,
-                                 FixedArrayBuilder* builder) {
-  ASSERT(subject->IsFlat());
-  ASSERT(pattern->IsFlat());
-
-  // Treating as if a previous match was before first character.
-  int match_pos = -pattern->length();
-
-  for (;;) {  // Break when search complete.
-    builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
-    AssertNoAllocation no_gc;
-    String::FlatContent subject_content = subject->GetFlatContent();
-    String::FlatContent pattern_content = pattern->GetFlatContent();
-    if (subject_content.IsAscii()) {
-      Vector<const char> subject_vector = subject_content.ToAsciiVector();
-      if (pattern_content.IsAscii()) {
-        if (SearchStringMultiple(isolate,
-                                 subject_vector,
-                                 pattern_content.ToAsciiVector(),
-                                 *pattern,
-                                 builder,
-                                 &match_pos)) break;
-      } else {
-        if (SearchStringMultiple(isolate,
-                                 subject_vector,
-                                 pattern_content.ToUC16Vector(),
-                                 *pattern,
-                                 builder,
-                                 &match_pos)) break;
-      }
-    } else {
-      Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
-      if (pattern_content.IsAscii()) {
-        if (SearchStringMultiple(isolate,
-                                 subject_vector,
-                                 pattern_content.ToAsciiVector(),
-                                 *pattern,
-                                 builder,
-                                 &match_pos)) break;
-      } else {
-        if (SearchStringMultiple(isolate,
-                                 subject_vector,
-                                 pattern_content.ToUC16Vector(),
-                                 *pattern,
-                                 builder,
-                                 &match_pos)) break;
-      }
-    }
-  }
-
-  if (match_pos >= 0) {
-    SetLastMatchInfoNoCaptures(subject,
-                               last_match_info,
-                               match_pos,
-                               match_pos + pattern->length());
-    return true;
-  }
-  return false;  // No matches at all.
-}
-
-
-static int SearchRegExpNoCaptureMultiple(
-    Isolate* isolate,
-    Handle<String> subject,
-    Handle<JSRegExp> regexp,
-    Handle<JSArray> last_match_array,
-    FixedArrayBuilder* builder) {
-  ASSERT(subject->IsFlat());
-  ASSERT(regexp->CaptureCount() == 0);
-  int match_start = -1;
-  int match_end = 0;
-  int pos = 0;
-  int registers_per_match = RegExpImpl::IrregexpPrepare(regexp, subject);
-  if (registers_per_match < 0) return RegExpImpl::RE_EXCEPTION;
-
-  int max_matches;
-  int num_registers = RegExpImpl::GlobalOffsetsVectorSize(regexp,
-                                                          registers_per_match,
-                                                          &max_matches);
-  OffsetsVector registers(num_registers, isolate);
-  Vector<int32_t> register_vector(registers.vector(), registers.length());
-  int subject_length = subject->length();
-  bool first = true;
-  for (;;) {  // Break on failure, return on exception.
-    int num_matches = RegExpImpl::IrregexpExecRaw(regexp,
-                                                  subject,
-                                                  pos,
-                                                  register_vector);
-    if (num_matches > 0) {
-      for (int match_index = 0; match_index < num_matches; match_index++) {
-        int32_t* current_match = &register_vector[match_index * 2];
-        match_start = current_match[0];
-        builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
-        if (match_end < match_start) {
-          ReplacementStringBuilder::AddSubjectSlice(builder,
-                                                    match_end,
-                                                    match_start);
-        }
-        match_end = current_match[1];
-        HandleScope loop_scope(isolate);
-        if (!first) {
-          builder->Add(*isolate->factory()->NewProperSubString(subject,
-                                                               match_start,
-                                                               match_end));
-        } else {
-          builder->Add(*isolate->factory()->NewSubString(subject,
-                                                         match_start,
-                                                         match_end));
-          first = false;
-        }
-      }
-
-      // If we did not get the maximum number of matches, we can stop here
-      // since there are no matches left.
-      if (num_matches < max_matches) break;
-
-      if (match_start != match_end) {
-        pos = match_end;
-      } else {
-        pos = match_end + 1;
-        if (pos > subject_length) break;
-      }
-    } else if (num_matches == 0) {
-      break;
-    } else {
-      ASSERT_EQ(num_matches, RegExpImpl::RE_EXCEPTION);
-      return RegExpImpl::RE_EXCEPTION;
-    }
-  }
-
-  if (match_start >= 0) {
-    if (match_end < subject_length) {
-      ReplacementStringBuilder::AddSubjectSlice(builder,
-                                                match_end,
-                                                subject_length);
-    }
-    SetLastMatchInfoNoCaptures(subject,
-                               last_match_array,
-                               match_start,
-                               match_end);
-    return RegExpImpl::RE_SUCCESS;
-  } else {
-    return RegExpImpl::RE_FAILURE;  // No matches at all.
-  }
-}
-
-
 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
 // separate last match info.  See comment on that function.
+template<bool has_capture>
 static int SearchRegExpMultiple(
     Isolate* isolate,
     Handle<String> subject,
     Handle<JSRegExp> regexp,
     Handle<JSArray> last_match_array,
-    FixedArrayBuilder* builder,
-    Zone* zone) {
-
+    FixedArrayBuilder* builder) {
   ASSERT(subject->IsFlat());
-  int registers_per_match = RegExpImpl::IrregexpPrepare(regexp, subject);
-  if (registers_per_match < 0) return RegExpImpl::RE_EXCEPTION;
+  ASSERT_NE(has_capture, regexp->CaptureCount() == 0);
 
-  int max_matches;
-  int num_registers = RegExpImpl::GlobalOffsetsVectorSize(regexp,
-                                                          registers_per_match,
-                                                          &max_matches);
-  OffsetsVector registers(num_registers, isolate);
-  Vector<int32_t> register_vector(registers.vector(), registers.length());
-
-  int num_matches = RegExpImpl::IrregexpExecRaw(regexp,
-                                                subject,
-                                                0,
-                                                register_vector);
+  RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
+  if (global_cache.HasException()) return RegExpImpl::RE_EXCEPTION;
 
   int capture_count = regexp->CaptureCount();
   int subject_length = subject->length();
 
   // Position to search from.
-  int pos = 0;
-  // End of previous match. Differs from pos if match was empty.
+  int match_start = -1;
   int match_end = 0;
   bool first = true;
 
-  if (num_matches > 0) {
-    do {
-      int match_start = 0;
-      for (int match_index = 0; match_index < num_matches; match_index++) {
-        int32_t* current_match =
-            &register_vector[match_index * registers_per_match];
-        match_start = current_match[0];
-        builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
-        if (match_end < match_start) {
-          ReplacementStringBuilder::AddSubjectSlice(builder,
-                                                    match_end,
-                                                    match_start);
-        }
-        match_end = current_match[1];
-
-        {
-          // Avoid accumulating new handles inside loop.
-          HandleScope temp_scope(isolate);
-          // Arguments array to replace function is match, captures, index and
-          // subject, i.e., 3 + capture count in total.
-          Handle<FixedArray> elements =
-              isolate->factory()->NewFixedArray(3 + capture_count);
-          Handle<String> match;
-          if (!first) {
-            match = isolate->factory()->NewProperSubString(subject,
-                                                           match_start,
-                                                           match_end);
-          } else {
-            match = isolate->factory()->NewSubString(subject,
-                                                     match_start,
-                                                     match_end);
-          }
-          elements->set(0, *match);
-          for (int i = 1; i <= capture_count; i++) {
-            int start = current_match[i * 2];
-            if (start >= 0) {
-              int end = current_match[i * 2 + 1];
-              ASSERT(start <= end);
-              Handle<String> substring;
-              if (!first) {
-                substring =
-                    isolate->factory()->NewProperSubString(subject, start, end);
-              } else {
-                substring =
-                    isolate->factory()->NewSubString(subject, start, end);
-              }
-              elements->set(i, *substring);
-            } else {
-              ASSERT(current_match[i * 2 + 1] < 0);
-              elements->set(i, isolate->heap()->undefined_value());
-            }
-          }
-          elements->set(capture_count + 1, Smi::FromInt(match_start));
-          elements->set(capture_count + 2, *subject);
-          builder->Add(*isolate->factory()->NewJSArrayWithElements(elements));
-        }
+  // Two smis before and after the match, for very long strings.
+  static const int kMaxBuilderEntriesPerRegExpMatch = 5;
+
+  while (true) {
+    int32_t* current_match = global_cache.FetchNext();
+    if (current_match == NULL) break;
+    match_start = current_match[0];
+    builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
+    if (match_end < match_start) {
+      ReplacementStringBuilder::AddSubjectSlice(builder,
+                                                match_end,
+                                                match_start);
+    }
+    match_end = current_match[1];
+    {
+      // Avoid accumulating new handles inside loop.
+      HandleScope temp_scope(isolate);
+      Handle<String> match;
+      if (!first) {
+        match = isolate->factory()->NewProperSubString(subject,
+                                                       match_start,
+                                                       match_end);
+      } else {
+        match = isolate->factory()->NewSubString(subject,
+                                                 match_start,
+                                                 match_end);
         first = false;
       }
 
-      // If we did not get the maximum number of matches, we can stop here
-      // since there are no matches left.
-      if (num_matches < max_matches) break;
-
-      if (match_end > match_start) {
-        pos = match_end;
-      } else {
-        pos = match_end + 1;
-        if (pos > subject_length) {
-          break;
+      if (has_capture) {
+        // Arguments array to replace function is match, captures, index and
+        // subject, i.e., 3 + capture count in total.
+        Handle<FixedArray> elements =
+            isolate->factory()->NewFixedArray(3 + capture_count);
+
+        elements->set(0, *match);
+        for (int i = 1; i <= capture_count; i++) {
+          int start = current_match[i * 2];
+          if (start >= 0) {
+            int end = current_match[i * 2 + 1];
+            ASSERT(start <= end);
+            Handle<String> substring =
+                isolate->factory()->NewSubString(subject, start, end);
+            elements->set(i, *substring);
+          } else {
+            ASSERT(current_match[i * 2 + 1] < 0);
+            elements->set(i, isolate->heap()->undefined_value());
+          }
         }
+        elements->set(capture_count + 1, Smi::FromInt(match_start));
+        elements->set(capture_count + 2, *subject);
+        builder->Add(*isolate->factory()->NewJSArrayWithElements(elements));
+      } else {
+        builder->Add(*match);
       }
+    }
+  }
 
-      num_matches = RegExpImpl::IrregexpExecRaw(regexp,
-                                                subject,
-                                                pos,
-                                                register_vector);
-    } while (num_matches > 0);
-
-    if (num_matches != RegExpImpl::RE_EXCEPTION) {
-      // Finished matching, with at least one match.
-      if (match_end < subject_length) {
-        ReplacementStringBuilder::AddSubjectSlice(builder,
-                                                  match_end,
-                                                  subject_length);
-      }
+  if (global_cache.HasException()) return RegExpImpl::RE_EXCEPTION;
 
-      int last_match_capture_count = (capture_count + 1) * 2;
-      int last_match_array_size =
-          last_match_capture_count + RegExpImpl::kLastMatchOverhead;
-      last_match_array->EnsureSize(last_match_array_size);
-      AssertNoAllocation no_gc;
-      FixedArray* elements = FixedArray::cast(last_match_array->elements());
-      // We have to set this even though the rest of the last match array is
-      // ignored.
-      RegExpImpl::SetLastCaptureCount(elements, last_match_capture_count);
-      // These are also read without consulting the override.
-      RegExpImpl::SetLastSubject(elements, *subject);
-      RegExpImpl::SetLastInput(elements, *subject);
-      return RegExpImpl::RE_SUCCESS;
+  if (match_start >= 0) {
+    // Finished matching, with at least one match.
+    if (match_end < subject_length) {
+      ReplacementStringBuilder::AddSubjectSlice(builder,
+                                                match_end,
+                                                subject_length);
     }
+
+    RegExpImpl::SetLastMatchInfo(
+        last_match_array, subject, capture_count, NULL);
+
+    return RegExpImpl::RE_SUCCESS;
+  } else {
+    return RegExpImpl::RE_FAILURE;  // No matches at all.
   }
-  // No matches at all, return failure or exception result directly.
-  return num_matches;
 }
 
 
@@ -4141,34 +3789,15 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_RegExpExecMultiple) {
   }
   FixedArrayBuilder builder(result_elements);
 
-  if (regexp->TypeTag() == JSRegExp::ATOM) {
-    Handle<String> pattern(
-        String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex)));
-    ASSERT(pattern->IsFlat());
-    if (SearchStringMultiple(isolate, subject, pattern,
-                             last_match_info, &builder)) {
-      return *builder.ToJSArray(result_array);
-    }
-    return isolate->heap()->null_value();
-  }
-
-  ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
-
   int result;
   if (regexp->CaptureCount() == 0) {
-    result = SearchRegExpNoCaptureMultiple(isolate,
-                                           subject,
-                                           regexp,
-                                           last_match_info,
-                                           &builder);
+    result = SearchRegExpMultiple<false>(
+        isolate, subject, regexp, last_match_info, &builder);
   } else {
-    result = SearchRegExpMultiple(isolate,
-                                  subject,
-                                  regexp,
-                                  last_match_info,
-                                  &builder,
-                                  isolate->runtime_zone());
+    result = SearchRegExpMultiple<true>(
+        isolate, subject, regexp, last_match_info, &builder);
   }
+
   if (result == RegExpImpl::RE_SUCCESS) return *builder.ToJSArray(result_array);
   if (result == RegExpImpl::RE_FAILURE) return isolate->heap()->null_value();
   ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION);
index 9c0ebf9..ec9c69f 100644 (file)
@@ -29,6 +29,7 @@
 #define V8_UNICODE_INL_H_
 
 #include "unicode.h"
+#include "checks.h"
 
 namespace unibrow {
 
@@ -144,6 +145,7 @@ uchar CharacterStream::GetNext() {
   } else {
     remaining_--;
   }
+  ASSERT(BoundsCheck(cursor_));
   return result;
 }
 
index 94ab1b4..91b16c9 100644 (file)
@@ -201,6 +201,7 @@ class CharacterStream {
 
  protected:
   virtual void FillBuffer() = 0;
+  virtual bool BoundsCheck(unsigned offset) = 0;
   // The number of characters left in the current buffer
   unsigned remaining_;
   // The current offset within the buffer
@@ -228,6 +229,9 @@ class InputBuffer : public CharacterStream {
   InputBuffer() { }
   explicit InputBuffer(Input input) { Reset(input); }
   virtual void FillBuffer();
+  virtual bool BoundsCheck(unsigned offset) {
+    return (buffer_ != util_buffer_) || (offset < kSize);
+  }
 
   // A custom offset that can be used by the string implementation to
   // mark progress within the encoded string.
index 832616e..3fa93b2 100644 (file)
@@ -2791,7 +2791,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
   // Calculate number of capture registers (number_of_captures + 1) * 2.
   __ leal(rdx, Operand(rdx, rdx, times_1, 2));
   // Check that the static offsets vector buffer is large enough.
-  __ cmpl(rdx, Immediate(OffsetsVector::kStaticOffsetsVectorSize));
+  __ cmpl(rdx, Immediate(Isolate::kJSRegexpStaticOffsetsVectorSize));
   __ j(above, &runtime);
 
   // rax: RegExp data (FixedArray)
index 50356e7..e433b92 100644 (file)
@@ -267,6 +267,7 @@ TEST(Parser) {
   CHECK_PARSE_EQ("\\u003z", "'u003z'");
   CHECK_PARSE_EQ("foo[z]*", "(: 'foo' (# 0 - g [z]))");
 
+  CHECK_SIMPLE("", false);
   CHECK_SIMPLE("a", true);
   CHECK_SIMPLE("a|b", false);
   CHECK_SIMPLE("a\\n", false);
@@ -1349,7 +1350,7 @@ TEST(MacroAssembler) {
   V8::Initialize(NULL);
   byte codes[1024];
   RegExpMacroAssemblerIrregexp m(Vector<byte>(codes, 1024),
-                                 Isolate::Current()->zone());
+                                 Isolate::Current()->runtime_zone());
   // ^f(o)o.
   Label fail, fail2, start;
   uc16 foo_chars[3];
index c4f72f4..4557100 100644 (file)
@@ -1,4 +1,4 @@
-// Copyright 2011 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
 
 // Check that we can traverse very deep stacks of ConsStrings using
 // StringInputBuffer.  Check that Get(int) works on very deep stacks
@@ -691,3 +691,20 @@ TEST(RegExpOverflow) {
   CHECK(result.IsEmpty());
   CHECK(context->HasOutOfMemoryException());
 }
+
+
+TEST(StringReplaceAtomTwoByteResult) {
+  InitializeVM();
+  HandleScope scope;
+  LocalContext context;
+  v8::Local<v8::Value> result = CompileRun(
+      "var subject = 'ascii~only~string~'; "
+      "var replace = '\x80';            "
+      "subject.replace(/~/g, replace);  ");
+  CHECK(result->IsString());
+  Handle<String> string = v8::Utils::OpenHandle(v8::String::Cast(*result));
+  CHECK(string->IsSeqTwoByteString());
+
+  v8::Local<v8::String> expected = v8_str("ascii\x80only\x80string\x80");
+  CHECK(expected->Equals(result));
+}
index 1652774..093dba1 100644 (file)
@@ -239,4 +239,16 @@ for (var m = 0; m < 200; m++) {
 
   // Test 3a: String.match.
   test_match(test_3_expectation, subject, /a1/g);
-}
\ No newline at end of file
+}
+
+
+// Test String hashing (compiling regular expression includes hashing).
+var crosscheck = "\x80";
+for (var i = 0; i < 12; i++) crosscheck += crosscheck;
+new RegExp(crosscheck);
+
+var subject = "ascii~only~string~here~";
+var replacement = "\x80";
+var result = subject.replace(/~/g, replacement);
+for (var i = 0; i < 5; i++) result += result;
+new RegExp(result);