Irregexp is specialized on subject character type.
authorlrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Mon, 8 Dec 2008 12:43:01 +0000 (12:43 +0000)
committerlrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Mon, 8 Dec 2008 12:43:01 +0000 (12:43 +0000)
git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@937 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

src/jsregexp.cc
src/jsregexp.h
src/objects.h
src/regexp-macro-assembler-ia32.cc
test/cctest/test-regexp.cc

index d289eaae9ba2e6777803893e54f404f3bc2576d0..3d1930ea9ea2d6b59550099b5dda935deeb2ea22 100644 (file)
@@ -201,6 +201,50 @@ static inline void ThrowRegExpException(Handle<JSRegExp> re,
 }
 
 
+// Generic RegExp methods. Dispatches to implementation specific methods.
+
+
+class OffsetsVector {
+ public:
+  inline OffsetsVector(int num_registers)
+      : offsets_vector_length_(num_registers) {
+    if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
+      vector_ = NewArray<int>(offsets_vector_length_);
+    } else {
+      vector_ = static_offsets_vector_;
+    }
+  }
+
+
+  inline ~OffsetsVector() {
+    if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
+      DeleteArray(vector_);
+      vector_ = NULL;
+    }
+  }
+
+
+  inline int* vector() {
+    return vector_;
+  }
+
+
+  inline int length() {
+    return offsets_vector_length_;
+  }
+
+ private:
+  int* vector_;
+  int offsets_vector_length_;
+  static const int kStaticOffsetsVectorSize = 50;
+  static int static_offsets_vector_[kStaticOffsetsVectorSize];
+};
+
+
+int OffsetsVector::static_offsets_vector_[
+    OffsetsVector::kStaticOffsetsVectorSize];
+
+
 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
                                    Handle<String> pattern,
                                    Handle<String> flag_str) {
@@ -224,7 +268,7 @@ Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
                            pattern,
                            parse_result.error,
                            "malformed_regexp");
-      return Handle<Object>();
+      return Handle<Object>::null();
     }
     RegExpAtom* atom = parse_result.tree->AsAtom();
     if (atom != NULL && !flags.is_ignore_case()) {
@@ -237,20 +281,10 @@ Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
         result = AtomCompile(re, pattern, flags, pattern);
       }
     } else {
-      RegExpNode* node = NULL;
-      Handle<FixedArray> irregexp_data =
-          RegExpEngine::Compile(&parse_result,
-                                &node,
-                                flags.is_ignore_case(),
-                                flags.is_multiline(),
-                                pattern);
-      if (irregexp_data.is_null()) {
-        if (FLAG_disable_jscre) {
-          UNIMPLEMENTED();
-        }
-        result = JscrePrepare(re, pattern, flags);
+      if (FLAG_irregexp) {
+        result = IrregexpPrepare(re, pattern, flags);
       } else {
-        result = IrregexpPrepare(re, pattern, flags, irregexp_data);
+        result = JscrePrepare(re, pattern, flags);
       }
     }
     Object* data = re->data();
@@ -270,18 +304,30 @@ Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
                                 Handle<String> subject,
                                 Handle<Object> index) {
   switch (regexp->TypeTag()) {
+    case JSRegExp::ATOM:
+      return AtomExec(regexp, subject, index);
+    case JSRegExp::IRREGEXP: {
+      Handle<Object> result = IrregexpExec(regexp, subject, index);
+      if (!result.is_null()) {
+        return result;
+      }
+      // We couldn't handle the regexp using Irregexp, so fall back
+      // on JSCRE. We rejoice at the though of the day when this is
+      // no longer needed.
+      // Reset the JSRegExp to use JSCRE.
+      JscrePrepare(regexp,
+                   Handle<String>(regexp->Pattern()),
+                   regexp->GetFlags());
+      // Fall-through to JSCRE.
+    }
     case JSRegExp::JSCRE:
       if (FLAG_disable_jscre) {
         UNIMPLEMENTED();
       }
       return JscreExec(regexp, subject, index);
-    case JSRegExp::ATOM:
-      return AtomExec(regexp, subject, index);
-    case JSRegExp::IRREGEXP:
-      return IrregexpExec(regexp, subject, index);
     default:
       UNREACHABLE();
-      return Handle<Object>();
+      return Handle<Object>::null();
   }
 }
 
@@ -289,22 +335,37 @@ Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
                                 Handle<String> subject) {
   switch (regexp->TypeTag()) {
+    case JSRegExp::ATOM:
+      return AtomExecGlobal(regexp, subject);
+    case JSRegExp::IRREGEXP: {
+      Handle<Object> result = IrregexpExecGlobal(regexp, subject);
+      if (!result.is_null()) {
+        return result;
+      }
+      // We couldn't handle the regexp using Irregexp, so fall back
+      // on JSCRE. We rejoice at the though of the day when this is
+      // no longer needed.
+      // Reset the JSRegExp to use JSCRE.
+      JscrePrepare(regexp,
+                   Handle<String>(regexp->Pattern()),
+                   regexp->GetFlags());
+      // Fall-through to JSCRE.
+    }
     case JSRegExp::JSCRE:
       if (FLAG_disable_jscre) {
         UNIMPLEMENTED();
       }
       return JscreExecGlobal(regexp, subject);
-    case JSRegExp::ATOM:
-      return AtomExecGlobal(regexp, subject);
-    case JSRegExp::IRREGEXP:
-      return IrregexpExecGlobal(regexp, subject);
     default:
       UNREACHABLE();
-      return Handle<Object>();
+      return Handle<Object>::null();
   }
 }
 
 
+// RegExp Atom implementation: Simple string search using indexOf.
+
+
 Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re,
                                        Handle<String> pattern,
                                        JSRegExp::Flags flags,
@@ -366,6 +427,21 @@ Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re,
 }
 
 
+// JSCRE implementation.
+
+
+int RegExpImpl::JscreNumberOfCaptures(Handle<JSRegExp> re) {
+  FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
+  return Smi::cast(value->get(kJscreNumberOfCapturesIndex))->value();
+}
+
+
+ByteArray* RegExpImpl::JscreInternal(Handle<JSRegExp> re) {
+  FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
+  return ByteArray::cast(value->get(kJscreInternalIndex));
+}
+
+
 Handle<Object>RegExpImpl::JscrePrepare(Handle<JSRegExp> re,
                                        Handle<String> pattern,
                                        JSRegExp::Flags flags) {
@@ -375,20 +451,11 @@ Handle<Object>RegExpImpl::JscrePrepare(Handle<JSRegExp> re,
 }
 
 
-Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
-                                          Handle<String> pattern,
-                                          JSRegExp::Flags flags,
-                                          Handle<FixedArray> irregexp_data) {
-  Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags, irregexp_data);
-  return re;
-}
-
-
-static inline Object* DoCompile(String* pattern,
-                                JSRegExp::Flags flags,
-                                unsigned* number_of_captures,
-                                const char** error_message,
-                                v8::jscre::JscreRegExp** code) {
+static inline Object* JscreDoCompile(String* pattern,
+                                     JSRegExp::Flags flags,
+                                     unsigned* number_of_captures,
+                                     const char** error_message,
+                                     v8::jscre::JscreRegExp** code) {
   v8::jscre::JSRegExpIgnoreCaseOption case_option = flags.is_ignore_case()
     ? v8::jscre::JSRegExpIgnoreCase
     : v8::jscre::JSRegExpDoNotIgnoreCase;
@@ -417,16 +484,16 @@ static inline Object* DoCompile(String* pattern,
 }
 
 
-void CompileWithRetryAfterGC(Handle<String> pattern,
-                             JSRegExp::Flags flags,
-                             unsigned* number_of_captures,
-                             const char** error_message,
-                             v8::jscre::JscreRegExp** code) {
-  CALL_HEAP_FUNCTION_VOID(DoCompile(*pattern,
-                                    flags,
-                                    number_of_captures,
-                                    error_message,
-                                    code));
+static void JscreCompileWithRetryAfterGC(Handle<String> pattern,
+                                         JSRegExp::Flags flags,
+                                         unsigned* number_of_captures,
+                                         const char** error_message,
+                                         v8::jscre::JscreRegExp** code) {
+  CALL_HEAP_FUNCTION_VOID(JscreDoCompile(*pattern,
+                                         flags,
+                                         number_of_captures,
+                                         error_message,
+                                         code));
 }
 
 
@@ -445,11 +512,11 @@ Handle<Object> RegExpImpl::JscreCompile(Handle<JSRegExp> re) {
   v8::jscre::JscreRegExp* code = NULL;
   FlattenString(pattern);
 
-  CompileWithRetryAfterGC(two_byte_pattern,
-                          flags,
-                          &number_of_captures,
-                          &error_message,
-                          &code);
+  JscreCompileWithRetryAfterGC(two_byte_pattern,
+                               flags,
+                               &number_of_captures,
+                               &error_message,
+                               &code);
 
   if (code == NULL) {
     // Throw an exception.
@@ -476,92 +543,31 @@ Handle<Object> RegExpImpl::JscreCompile(Handle<JSRegExp> re) {
 }
 
 
-Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp,
-                                            int num_captures,
-                                            Handle<String> two_byte_subject,
-                                            int previous_index,
-                                            int* offsets_vector,
-                                            int offsets_vector_length) {
-#ifdef DEBUG
-  if (FLAG_trace_regexp_bytecodes) {
-    String* pattern = regexp->Pattern();
-    PrintF("\n\nRegexp match:   /%s/\n\n", *(pattern->ToCString()));
-    PrintF("\n\nSubject string: '%s'\n\n", *(two_byte_subject->ToCString()));
-  }
-#endif
-  ASSERT(StringShape(*two_byte_subject).IsTwoByteRepresentation());
-  ASSERT(two_byte_subject->IsFlat(StringShape(*two_byte_subject)));
-  bool rc;
-
-  for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {
-    offsets_vector[i] = -1;
+Handle<Object> RegExpImpl::JscreExec(Handle<JSRegExp> regexp,
+                                     Handle<String> subject,
+                                     Handle<Object> index) {
+  ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
+  if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
+    Handle<Object> compile_result = JscreCompile(regexp);
+    if (compile_result.is_null()) return compile_result;
   }
+  ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());
 
-  LOG(RegExpExecEvent(regexp, previous_index, two_byte_subject));
-
-  FixedArray* irregexp =
-      FixedArray::cast(regexp->DataAt(JSRegExp::kIrregexpDataIndex));
-  int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value();
+  int num_captures = JscreNumberOfCaptures(regexp);
 
-  switch (tag) {
-    case RegExpMacroAssembler::kIA32Implementation: {
-#ifndef ARM
-      Code* code = Code::cast(irregexp->get(kIrregexpCodeIndex));
-      Address start_addr =
-          Handle<SeqTwoByteString>::cast(two_byte_subject)->GetCharsAddress();
-      int string_offset =
-          start_addr - reinterpret_cast<Address>(*two_byte_subject);
-      int start_offset = string_offset + previous_index * sizeof(uc16);
-      int end_offset =
-          string_offset + two_byte_subject->length() * sizeof(uc16);
-      rc = RegExpMacroAssemblerIA32::Execute(code,
-                                             two_byte_subject.location(),
-                                             start_offset,
-                                             end_offset,
-                                             offsets_vector,
-                                             previous_index == 0);
-      if (rc) {
-        // Capture values are relative to start_offset only.
-        for (int i = 0; i < offsets_vector_length; i++) {
-          if (offsets_vector[i] >= 0) {
-            offsets_vector[i] += previous_index;
-          }
-        }
-      }
-      break;
-#else
-      UNIMPLEMENTED();
-      rc = false;
-      break;
-#endif
-    }
-    case RegExpMacroAssembler::kBytecodeImplementation: {
-      Handle<ByteArray> byte_codes = IrregexpCode(regexp);
+  OffsetsVector offsets((num_captures + 1) * 3);
 
-      rc = IrregexpInterpreter::Match(byte_codes,
-                                      two_byte_subject,
-                                      offsets_vector,
-                                      previous_index);
-      break;
-    }
-    case RegExpMacroAssembler::kARMImplementation:
-    default:
-      UNREACHABLE();
-      rc = false;
-      break;
-  }
+  int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
 
-  if (!rc) {
-    return Factory::null_value();
-  }
+  Handle<String> subject16 = CachedStringToTwoByte(subject);
 
-  Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
-  // The captures come in (start, end+1) pairs.
-  for (int i = 0; i < 2 * (num_captures+1); i += 2) {
-    array->set(i, Smi::FromInt(offsets_vector[i]));
-    array->set(i+1, Smi::FromInt(offsets_vector[i+1]));
-  }
-  return Factory::NewJSArrayWithElements(array);
+  return JscreExecOnce(regexp,
+                       num_captures,
+                       subject,
+                       previous_index,
+                       subject16->GetTwoByteData(),
+                       offsets.vector(),
+                       offsets.length());
 }
 
 
@@ -617,76 +623,8 @@ Handle<Object> RegExpImpl::JscreExecOnce(Handle<JSRegExp> regexp,
 }
 
 
-class OffsetsVector {
- public:
-  inline OffsetsVector(int num_registers)
-      : offsets_vector_length_(num_registers) {
-    if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
-      vector_ = NewArray<int>(offsets_vector_length_);
-    } else {
-      vector_ = static_offsets_vector_;
-    }
-  }
-
-
-  inline ~OffsetsVector() {
-    if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
-      DeleteArray(vector_);
-      vector_ = NULL;
-    }
-  }
-
-
-  inline int* vector() {
-    return vector_;
-  }
-
-
-  inline int length() {
-    return offsets_vector_length_;
-  }
-
- private:
-  int* vector_;
-  int offsets_vector_length_;
-  static const int kStaticOffsetsVectorSize = 50;
-  static int static_offsets_vector_[kStaticOffsetsVectorSize];
-};
-
-
-int OffsetsVector::static_offsets_vector_[
-    OffsetsVector::kStaticOffsetsVectorSize];
-
-
-Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
-                                        Handle<String> subject,
-                                        Handle<Object> index) {
-  ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
-  ASSERT(!regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsUndefined());
-
-  // Prepare space for the return values.
-  int number_of_registers = IrregexpNumberOfRegisters(regexp);
-  OffsetsVector offsets(number_of_registers);
-
-  int num_captures = IrregexpNumberOfCaptures(regexp);
-
-  int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
-
-  Handle<String> subject16 = CachedStringToTwoByte(subject);
-
-  Handle<Object> result(IrregexpExecOnce(regexp,
-                                         num_captures,
-                                         subject16,
-                                         previous_index,
-                                         offsets.vector(),
-                                         offsets.length()));
-  return result;
-}
-
-
-Handle<Object> RegExpImpl::JscreExec(Handle<JSRegExp> regexp,
-                                     Handle<String> subject,
-                                     Handle<Object> index) {
+Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp,
+                                           Handle<String> subject) {
   ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
   if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
     Handle<Object> compile_result = JscreCompile(regexp);
@@ -694,35 +632,11 @@ Handle<Object> RegExpImpl::JscreExec(Handle<JSRegExp> regexp,
   }
   ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());
 
+  // Prepare space for the return values.
   int num_captures = JscreNumberOfCaptures(regexp);
 
   OffsetsVector offsets((num_captures + 1) * 3);
 
-  int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
-
-  Handle<String> subject16 = CachedStringToTwoByte(subject);
-
-  Handle<Object> result(JscreExecOnce(regexp,
-                                      num_captures,
-                                      subject,
-                                      previous_index,
-                                      subject16->GetTwoByteData(),
-                                      offsets.vector(),
-                                      offsets.length()));
-
-  return result;
-}
-
-
-Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
-                                              Handle<String> subject) {
-  ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
-  ASSERT(!regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsUndefined());
-
-  // Prepare space for the return values.
-  int number_of_registers = IrregexpNumberOfRegisters(regexp);
-  OffsetsVector offsets(number_of_registers);
-
   int previous_index = 0;
 
   Handle<JSArray> result = Factory::NewJSArray(0);
@@ -737,12 +651,13 @@ Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
       // string length, there is no match.
       matches = Factory::null_value();
     } else {
-      matches = IrregexpExecOnce(regexp,
-                                 IrregexpNumberOfCaptures(regexp),
-                                 subject16,
-                                 previous_index,
-                                 offsets.vector(),
-                                 offsets.length());
+      matches = JscreExecOnce(regexp,
+                              num_captures,
+                              subject,
+                              previous_index,
+                              subject16->GetTwoByteData(),
+                              offsets.vector(),
+                              offsets.length());
 
       if (matches->IsJSArray()) {
         SetElement(result, i, matches);
@@ -766,19 +681,146 @@ Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
 }
 
 
-Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp,
-                                           Handle<String> subject) {
-  ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
-  if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
-    Handle<Object> compile_result = JscreCompile(regexp);
-    if (compile_result.is_null()) return compile_result;
+// Irregexp implementation.
+
+
+static Handle<FixedArray> GetCompiledIrregexp(Handle<JSRegExp> re,
+                                              bool is_ascii) {
+  ASSERT(re->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray());
+  Handle<FixedArray> alternatives(
+      FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex)));
+  ASSERT_EQ(2, alternatives->length());
+
+  int index = is_ascii ? 0 : 1;
+  Object* entry = alternatives->get(index);
+  if (!entry->IsNull()) {
+    return Handle<FixedArray>(FixedArray::cast(entry));
+  }
+
+  // Compile the RegExp.
+  ZoneScope zone_scope(DELETE_ON_EXIT);
+
+  JSRegExp::Flags flags = re->GetFlags();
+
+  Handle<String> pattern(re->Pattern());
+  StringShape shape(*pattern);
+  if (!pattern->IsFlat(shape)) {
+    pattern->Flatten(shape);
+  }
+
+  RegExpParseResult parse_result;
+  FlatStringReader reader(pattern);
+  if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) {
+    // Throw an exception if we fail to parse the pattern.
+    // THIS SHOULD NOT HAPPEN. We already parsed it successfully once.
+    ThrowRegExpException(re,
+                         pattern,
+                         parse_result.error,
+                         "malformed_regexp");
+    return Handle<FixedArray>::null();
+  }
+  Handle<FixedArray> compiled_entry =
+      RegExpEngine::Compile(&parse_result,
+                            NULL,
+                            flags.is_ignore_case(),
+                            flags.is_multiline(),
+                            pattern,
+                            is_ascii);
+  if (!compiled_entry.is_null()) {
+    alternatives->set(index, *compiled_entry);
+  }
+  return compiled_entry;
+}
+
+
+int RegExpImpl::IrregexpNumberOfCaptures(Handle<FixedArray> irre) {
+  return Smi::cast(irre->get(kIrregexpNumberOfCapturesIndex))->value();
+}
+
+
+int RegExpImpl::IrregexpNumberOfRegisters(Handle<FixedArray> irre) {
+  return Smi::cast(irre->get(kIrregexpNumberOfRegistersIndex))->value();
+}
+
+
+Handle<ByteArray> RegExpImpl::IrregexpByteCode(Handle<FixedArray> irre) {
+  ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value()
+      == RegExpMacroAssembler::kBytecodeImplementation);
+  return Handle<ByteArray>(ByteArray::cast(irre->get(kIrregexpCodeIndex)));
+}
+
+
+Handle<Code> RegExpImpl::IrregexpNativeCode(Handle<FixedArray> irre) {
+  ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value()
+      != RegExpMacroAssembler::kBytecodeImplementation);
+  return Handle<Code>(Code::cast(irre->get(kIrregexpCodeIndex)));
+}
+
+
+Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
+                                          Handle<String> pattern,
+                                          JSRegExp::Flags flags) {
+  // Make space for ASCII and UC16 versions.
+  Handle<FixedArray> alternatives = Factory::NewFixedArray(2);
+  alternatives->set_null(0);
+  alternatives->set_null(1);
+  Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags, alternatives);
+  return re;
+}
+
+
+Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
+                                        Handle<String> subject,
+                                        Handle<Object> index) {
+  ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
+  ASSERT(regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray());
+
+  bool is_ascii = StringShape(*subject).IsAsciiRepresentation();
+  Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii);
+  if (irregexp.is_null()) {
+    // We can't handle the RegExp with IRRegExp.
+    return Handle<Object>::null();
   }
-  ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());
 
   // Prepare space for the return values.
-  int num_captures = JscreNumberOfCaptures(regexp);
+  int number_of_registers = IrregexpNumberOfRegisters(irregexp);
+  OffsetsVector offsets(number_of_registers);
 
-  OffsetsVector offsets((num_captures + 1) * 3);
+  int num_captures = IrregexpNumberOfCaptures(irregexp);
+
+  int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
+
+#ifdef DEBUG
+  if (FLAG_trace_regexp_bytecodes) {
+    String* pattern = regexp->Pattern();
+    PrintF("\n\nRegexp match:   /%s/\n\n", *(pattern->ToCString()));
+    PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
+  }
+#endif
+  LOG(RegExpExecEvent(regexp, previous_index, subject));
+  return IrregexpExecOnce(irregexp,
+                          num_captures,
+                          subject,
+                          previous_index,
+                          offsets.vector(),
+                          offsets.length());
+}
+
+
+Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
+                                              Handle<String> subject) {
+  ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
+
+  StringShape shape(*subject);
+  bool is_ascii = shape.IsAsciiRepresentation();
+  Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii);
+  if (irregexp.is_null()) {
+    return Handle<Object>::null();
+  }
+
+  // Prepare space for the return values.
+  int number_of_registers = IrregexpNumberOfRegisters(irregexp);
+  OffsetsVector offsets(number_of_registers);
 
   int previous_index = 0;
 
@@ -786,7 +828,9 @@ Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp,
   int i = 0;
   Handle<Object> matches;
 
-  Handle<String> subject16 = CachedStringToTwoByte(subject);
+  if (!subject->IsFlat(shape)) {
+    subject->Flatten(shape);
+  }
 
   do {
     if (previous_index > subject->length() || previous_index < 0) {
@@ -794,13 +838,20 @@ Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp,
       // string length, there is no match.
       matches = Factory::null_value();
     } else {
-      matches = JscreExecOnce(regexp,
-                              num_captures,
-                              subject,
-                              previous_index,
-                              subject16->GetTwoByteData(),
-                              offsets.vector(),
-                              offsets.length());
+#ifdef DEBUG
+      if (FLAG_trace_regexp_bytecodes) {
+        String* pattern = regexp->Pattern();
+        PrintF("\n\nRegexp match:   /%s/\n\n", *(pattern->ToCString()));
+        PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
+      }
+#endif
+      LOG(RegExpExecEvent(regexp, previous_index, subject));
+      matches = IrregexpExecOnce(irregexp,
+                                 IrregexpNumberOfCaptures(irregexp),
+                                 subject,
+                                 previous_index,
+                                 offsets.vector(),
+                                 offsets.length());
 
       if (matches->IsJSArray()) {
         SetElement(result, i, matches);
@@ -824,36 +875,120 @@ Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp,
 }
 
 
-int RegExpImpl::JscreNumberOfCaptures(Handle<JSRegExp> re) {
-  FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
-  return Smi::cast(value->get(kJscreNumberOfCapturesIndex))->value();
-}
+Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp,
+                                            int num_captures,
+                                            Handle<String> subject,
+                                            int previous_index,
+                                            int* offsets_vector,
+                                            int offsets_vector_length) {
+  bool rc;
 
+  int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value();
 
-ByteArray* RegExpImpl::JscreInternal(Handle<JSRegExp> re) {
-  FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
-  return ByteArray::cast(value->get(kJscreInternalIndex));
-}
+  switch (tag) {
+    case RegExpMacroAssembler::kIA32Implementation: {
+#ifndef ARM
+      if (!subject->IsFlat(StringShape(*subject))) {
+        FlattenString(subject);
+      }
+      Handle<Code> code = IrregexpNativeCode(irregexp);
 
+      StringShape shape(*subject);
 
-int RegExpImpl::IrregexpNumberOfCaptures(Handle<JSRegExp> re) {
-  FixedArray* value =
-      FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex));
-  return Smi::cast(value->get(kIrregexpNumberOfCapturesIndex))->value();
-}
+      // Character offsets into string.
+      int start_offset = previous_index;
+      int end_offset = subject->length(shape);
 
+      if (shape.IsCons()) {
+        subject = Handle<String>(ConsString::cast(*subject)->first());
+      } else if (shape.IsSliced()) {
+        SlicedString* slice = SlicedString::cast(*subject);
+        start_offset += slice->start();
+        end_offset += slice->start();
+        subject = Handle<String>(slice->buffer());
+      }
 
-int RegExpImpl::IrregexpNumberOfRegisters(Handle<JSRegExp> re) {
-  FixedArray* value =
-      FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex));
-  return Smi::cast(value->get(kIrregexpNumberOfRegistersIndex))->value();
-}
+      // String is now either Sequential or External
+      StringShape flatshape(*subject);
+      bool is_ascii = flatshape.IsAsciiRepresentation();
+      int char_size = is_ascii ? sizeof(char) : sizeof(uc16);  // NOLINT
 
+      if (flatshape.IsExternal()) {
+        const byte* address;
+        if (is_ascii) {
+          ExternalAsciiString* ext = ExternalAsciiString::cast(*subject);
+          address = reinterpret_cast<const byte*>(ext->resource()->data());
+        } else {
+          ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject);
+          address = reinterpret_cast<const byte*>(ext->resource()->data());
+        }
+        rc = RegExpMacroAssemblerIA32::Execute(
+            *code,
+            &address,
+            start_offset * char_size,
+            end_offset * char_size,
+            offsets_vector,
+            previous_index == 0);
+      } else {  // Sequential string
+        int byte_offset =
+            is_ascii ? SeqAsciiString::kHeaderSize - kHeapObjectTag:
+                       SeqTwoByteString::kHeaderSize - kHeapObjectTag;
+        rc = RegExpMacroAssemblerIA32::Execute(
+            *code,
+            subject.location(),
+            byte_offset + start_offset * char_size,
+            byte_offset + end_offset * char_size,
+            offsets_vector,
+            previous_index == 0);
+      }
 
-Handle<ByteArray> RegExpImpl::IrregexpCode(Handle<JSRegExp> re) {
-  FixedArray* value =
-      FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex));
-  return Handle<ByteArray>(ByteArray::cast(value->get(kIrregexpCodeIndex)));
+      if (rc) {
+        // Capture values are relative to start_offset only.
+        for (int i = 0; i < offsets_vector_length; i++) {
+          if (offsets_vector[i] >= 0) {
+            offsets_vector[i] += previous_index;
+          }
+        }
+      }
+      break;
+#else
+      UNIMPLEMENTED();
+      rc = false;
+      break;
+#endif
+    }
+    case RegExpMacroAssembler::kBytecodeImplementation: {
+      for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {
+        offsets_vector[i] = -1;
+      }
+      Handle<ByteArray> byte_codes = IrregexpByteCode(irregexp);
+
+      Handle<String> two_byte_subject = CachedStringToTwoByte(subject);
+
+      rc = IrregexpInterpreter::Match(byte_codes,
+                                      two_byte_subject,
+                                      offsets_vector,
+                                      previous_index);
+      break;
+    }
+    case RegExpMacroAssembler::kARMImplementation:
+    default:
+      UNREACHABLE();
+      rc = false;
+      break;
+  }
+
+  if (!rc) {
+    return Factory::null_value();
+  }
+
+  Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
+  // The captures come in (start, end+1) pairs.
+  for (int i = 0; i < 2 * (num_captures+1); i += 2) {
+    array->set(i, Smi::FromInt(offsets_vector[i]));
+    array->set(i+1, Smi::FromInt(offsets_vector[i+1]));
+  }
+  return Factory::NewJSArrayWithElements(array);
 }
 
 
@@ -3475,7 +3610,8 @@ Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
                                          RegExpNode** node_return,
                                          bool ignore_case,
                                          bool is_multiline,
-                                         Handle<String> pattern) {
+                                         Handle<String> pattern,
+                                         bool is_ascii) {
   RegExpCompiler compiler(input->capture_count, ignore_case);
   // Wrap the body of the regexp in capture #0.
   RegExpNode* captured_body = RegExpCapture::ToNode(input->tree,
@@ -3500,10 +3636,6 @@ Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
   NodeInfo info = *node->info();
   node = node->EnsureExpanded(&info);
 
-  if (!FLAG_irregexp) {
-    return Handle<FixedArray>::null();
-  }
-
   if (is_multiline && !FLAG_attempt_multiline_irregexp) {
     return Handle<FixedArray>::null();
   }
@@ -3512,7 +3644,13 @@ Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
 #ifdef ARM
     // Unimplemented, fall-through to bytecode implementation.
 #else  // IA32
-    RegExpMacroAssemblerIA32 macro_assembler(RegExpMacroAssemblerIA32::UC16,
+    RegExpMacroAssemblerIA32::Mode mode;
+    if (is_ascii) {
+      mode = RegExpMacroAssemblerIA32::ASCII;
+    } else {
+      mode = RegExpMacroAssemblerIA32::UC16;
+    }
+    RegExpMacroAssemblerIA32 macro_assembler(mode,
                                              (input->capture_count + 1) * 2);
     return compiler.Assemble(&macro_assembler,
                              node,
index 53c81701fd368bd7c6111a96314ea153676eabd3..3dc2bc1d115bf34f5dbb1deea42523d6887ad08f 100644 (file)
@@ -48,6 +48,9 @@ class RegExpImpl {
   // This function calls the garbage collector if necessary.
   static Handle<String> ToString(Handle<Object> value);
 
+  // Parses the RegExp pattern and prepares the JSRegExp object with
+  // generic data and choice of implementation - as well as what
+  // the implementation wants to store in the data field.
   static Handle<Object> Compile(Handle<JSRegExp> re,
                                 Handle<String> pattern,
                                 Handle<String> flags);
@@ -71,12 +74,10 @@ class RegExpImpl {
                                      Handle<String> pattern,
                                      JSRegExp::Flags flags);
 
-  // Stores a compiled RegExp pattern in the JSRegExp object.
-  // The pattern is compiled by Irregexp.
+  // Prepares a JSRegExp object with Irregexp-specific data.
   static Handle<Object> IrregexpPrepare(Handle<JSRegExp> re,
                                         Handle<String> pattern,
-                                        JSRegExp::Flags flags,
-                                        Handle<FixedArray> irregexp_data);
+                                        JSRegExp::Flags flags);
 
 
   // Compile the pattern using JSCRE and store the result in the
@@ -140,9 +141,10 @@ class RegExpImpl {
   static int JscreNumberOfCaptures(Handle<JSRegExp> re);
   static ByteArray* JscreInternal(Handle<JSRegExp> re);
 
-  static int IrregexpNumberOfCaptures(Handle<JSRegExp> re);
-  static int IrregexpNumberOfRegisters(Handle<JSRegExp> re);
-  static Handle<ByteArray> IrregexpCode(Handle<JSRegExp> re);
+  static int IrregexpNumberOfCaptures(Handle<FixedArray> re);
+  static int IrregexpNumberOfRegisters(Handle<FixedArray> re);
+  static Handle<ByteArray> IrregexpByteCode(Handle<FixedArray> re);
+  static Handle<Code> IrregexpNativeCode(Handle<FixedArray> re);
 
   // Call jsRegExpExecute once
   static Handle<Object> JscreExecOnce(Handle<JSRegExp> regexp,
@@ -153,7 +155,7 @@ class RegExpImpl {
                                       int* ovector,
                                       int ovector_length);
 
-  static Handle<Object> IrregexpExecOnce(Handle<JSRegExp> regexp,
+  static Handle<Object> IrregexpExecOnce(Handle<FixedArray> regexp,
                                          int num_captures,
                                          Handle<String> subject16,
                                          int previous_index,
@@ -1082,7 +1084,9 @@ class RegExpEngine: public AllStatic {
                                     RegExpNode** node_return,
                                     bool ignore_case,
                                     bool multiline,
-                                    Handle<String> pattern);
+                                    Handle<String> pattern,
+                                    bool is_ascii);
+
   static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
 };
 
index c3494c889d1806c4370f0ce9a87792205278fe81..1d2ba896eb57dd82deb4e281b15a2905f520d7c9 100644 (file)
@@ -2924,7 +2924,7 @@ class JSRegExp: public JSObject {
   // ATOM: A simple string to match against using an indexOf operation.
   // IRREGEXP: Compiled with Irregexp.
   // IRREGEXP_NATIVE: Compiled to native code with Irregexp.
-  enum Type { NOT_COMPILED, JSCRE, ATOM, IRREGEXP, IRREGEXP_NATIVE };
+  enum Type { NOT_COMPILED, JSCRE, ATOM, IRREGEXP };
   enum Flag { NONE = 0, GLOBAL = 1, IGNORE_CASE = 2, MULTILINE = 4 };
 
   class Flags {
index a6b2f026c7fdbd35e8c8499317edee4e670a4c03..21993b7e7627c5e8cde3c3c68c4e9c1eb41ae0aa 100644 (file)
@@ -111,9 +111,10 @@ RegExpMacroAssemblerIA32::~RegExpMacroAssemblerIA32() {
 
 
 void RegExpMacroAssemblerIA32::AdvanceCurrentPosition(int by) {
-  ASSERT(by > 0);
-  Label inside_string;
-  __ add(Operand(edi), Immediate(by * char_size()));
+  if (by != 0) {
+    Label inside_string;
+    __ add(Operand(edi), Immediate(by * char_size()));
+  }
 }
 
 
@@ -138,7 +139,7 @@ void RegExpMacroAssemblerIA32::Bind(Label* label) {
 void RegExpMacroAssemblerIA32::CheckBitmap(uc16 start,
                                            Label* bitmap,
                                            Label* on_zero) {
-  UNREACHABLE();
+  UNIMPLEMENTED();
   __ mov(eax, current_character());
   __ sub(Operand(eax), Immediate(start));
   __ cmp(eax, 64);  // FIXME: 64 = length_of_bitmap_in_bits.
@@ -683,6 +684,8 @@ int RegExpMacroAssemblerIA32::CaseInsensitiveCompareUC16(uc16** buffer,
                                                          int byte_offset1,
                                                          int byte_offset2,
                                                          size_t byte_length) {
+  // This function MUST NOT cause a garbage collection. A GC might move
+  // the calling generated code and invalidate the stacked return address.
   ASSERT(byte_length % 2 == 0);
   Address buffer_address = reinterpret_cast<Address>(*buffer);
   uc16* substring1 = reinterpret_cast<uc16*>(buffer_address + byte_offset1);
index 4bd3222c11e3d6d7a36c45e023dcd181c5cb903c..b3e54f38f3f19d29abdda1c8516f4b13771bb48b 100644 (file)
@@ -355,7 +355,7 @@ TEST(CharacterClassEscapes) {
 }
 
 
-static RegExpNode* Compile(const char* input, bool multiline) {
+static RegExpNode* Compile(const char* input, bool multiline, bool is_ascii) {
   V8::Initialize(NULL);
   FlatStringReader reader(CStrVector(input));
   RegExpParseResult result;
@@ -363,17 +363,18 @@ static RegExpNode* Compile(const char* input, bool multiline) {
     return NULL;
   RegExpNode* node = NULL;
   Handle<String> pattern = Factory::NewStringFromUtf8(CStrVector(input));
-  RegExpEngine::Compile(&result, &node, false, multiline, pattern);
+  RegExpEngine::Compile(&result, &node, false, multiline, pattern, is_ascii);
   return node;
 }
 
 
 static void Execute(const char* input,
                     bool multiline,
+                    bool is_ascii,
                     bool dot_output = false) {
   v8::HandleScope scope;
   ZoneScope zone_scope(DELETE_ON_EXIT);
-  RegExpNode* node = Compile(input, multiline);
+  RegExpNode* node = Compile(input, multiline, is_ascii);
   USE(node);
 #ifdef DEBUG
   if (dot_output) {
@@ -1130,7 +1131,7 @@ TEST(LatinCanonicalize) {
 TEST(SimplePropagation) {
   v8::HandleScope scope;
   ZoneScope zone_scope(DELETE_ON_EXIT);
-  RegExpNode* node = Compile("(a|^b|c)", false);
+  RegExpNode* node = Compile("(a|^b|c)", false, true);
   CHECK(node->info()->follows_start_interest);
 }
 
@@ -1300,5 +1301,5 @@ TEST(CharClassDifference) {
 
 TEST(Graph) {
   V8::Initialize(NULL);
-  Execute("(?=[d#.])", false, true);
+  Execute("(?=[d#.])", false, true, true);
 }