Maintain the invariant that the pattern string in an atom regexp is always
authorsandholm@chromium.org <sandholm@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Tue, 30 Nov 2010 13:16:36 +0000 (13:16 +0000)
committersandholm@chromium.org <sandholm@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Tue, 30 Nov 2010 13:16:36 +0000 (13:16 +0000)
a flat non-cons string.
Review URL: http://codereview.chromium.org/5270006

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5907 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

src/handles.cc
src/jsregexp.cc
src/runtime.cc

index 37a5011..4430926 100644 (file)
@@ -224,13 +224,7 @@ void FlattenString(Handle<String> string) {
 
 
 Handle<String> FlattenGetString(Handle<String> string) {
-  Handle<String> result;
-  CALL_AND_RETRY(string->TryFlatten(),
-                 { result = Handle<String>(String::cast(__object__));
-                   break; },
-                 return Handle<String>());
-  ASSERT(string->IsFlat());
-  return result;
+  CALL_HEAP_FUNCTION(string->TryFlatten(), String);
 }
 
 
index 8cd13bc..82c1b20 100644 (file)
@@ -33,6 +33,7 @@
 #include "factory.h"
 #include "jsregexp.h"
 #include "platform.h"
+#include "string-search.h"
 #include "runtime.h"
 #include "top.h"
 #include "compilation-cache.h"
@@ -120,7 +121,7 @@ Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
     re->set_data(*cached);
     return re;
   }
-  FlattenString(pattern);
+  pattern = FlattenGetString(pattern);
   CompilationZoneScope zone_scope(DELETE_ON_EXIT);
   PostponeInterruptsScope postpone;
   RegExpCompileData parse_result;
@@ -205,23 +206,61 @@ static void SetAtomLastCapture(FixedArray* array,
   RegExpImpl::SetCapture(array, 1, to);
 }
 
+  /* template <typename SubjectChar>, typename PatternChar>
+static int ReStringMatch(Vector<const SubjectChar> sub_vector,
+                         Vector<const PatternChar> pat_vector,
+                         int start_index) {
 
+  int pattern_length = pat_vector.length();
+  if (pattern_length == 0) return start_index;
+
+  int subject_length = sub_vector.length();
+  if (start_index + pattern_length > subject_length) return -1;
+  return SearchString(sub_vector, pat_vector, start_index);
+}
+  */
 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
                                     Handle<String> subject,
                                     int index,
                                     Handle<JSArray> last_match_info) {
-  Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
-
-  uint32_t start_index = index;
+  ASSERT(0 <= index);
+  ASSERT(index <= subject->length());
 
-  int value = Runtime::StringMatch(subject, needle, start_index);
-  if (value == -1) return Factory::null_value();
+  if (!subject->IsFlat()) FlattenString(subject);
+  AssertNoAllocation no_heap_allocation;  // ensure vectors stay valid
+  // Extract flattened substrings of cons strings before determining asciiness.
+  String* seq_sub = *subject;
+  if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first();
+
+  String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex));
+  int needle_len = needle->length();
+
+  if (needle_len != 0) {
+    if (index + needle_len > subject->length()) return Factory::null_value();
+    // dispatch on type of strings
+    index = (needle->IsAsciiRepresentation()
+             ? (seq_sub->IsAsciiRepresentation()
+                ? SearchString(seq_sub->ToAsciiVector(),
+                               needle->ToAsciiVector(),
+                               index)
+                : SearchString(seq_sub->ToUC16Vector(),
+                               needle->ToAsciiVector(),
+                               index))
+             : (seq_sub->IsAsciiRepresentation()
+                ? SearchString(seq_sub->ToAsciiVector(),
+                               needle->ToUC16Vector(),
+                               index)
+                : SearchString(seq_sub->ToUC16Vector(),
+                               needle->ToUC16Vector(),
+                               index)));
+    if (index == -1) return Factory::null_value();
+  }
   ASSERT(last_match_info->HasFastElements());
 
   {
     NoHandleAllocation no_handles;
     FixedArray* array = FixedArray::cast(last_match_info->elements());
-    SetAtomLastCapture(array, *subject, value, value + needle->length());
+    SetAtomLastCapture(array, *subject, index, index + needle_len);
   }
   return last_match_info;
 }
@@ -364,7 +403,7 @@ int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
     AssertNoAllocation no_gc;
     String* sequential_string = *subject;
     if (subject->IsConsString()) {
-      sequential_string =  ConsString::cast(*subject)->first();
+      sequential_string = ConsString::cast(*subject)->first();
     }
     is_ascii = sequential_string->IsAsciiRepresentation();
   }
index 8faed90..6c66850 100644 (file)
@@ -2019,10 +2019,7 @@ class ReplacementStringBuilder {
   }
 
   Handle<JSArray> GetParts() {
-    Handle<JSArray> result =
-        Factory::NewJSArrayWithElements(array_builder_.array());
-    result->set_length(Smi::FromInt(array_builder_.length()));
-    return result;
+    return array_builder_.ToJSArray();
   }
 
  private:
@@ -2597,7 +2594,7 @@ static MaybeObject* Runtime_StringReplaceRegExpWithString(Arguments args) {
 
 // Perform string match of pattern on subject, starting at start index.
 // Caller must ensure that 0 <= start_index <= sub->length(),
-// and should check that pat->length() + start_index <= sub->length()
+// and should check that pat->length() + start_index <= sub->length().
 int Runtime::StringMatch(Handle<String> sub,
                          Handle<String> pat,
                          int start_index) {
@@ -3196,7 +3193,7 @@ static MaybeObject* Runtime_RegExpExecMultiple(Arguments args) {
   if (regexp->TypeTag() == JSRegExp::ATOM) {
     Handle<String> pattern(
         String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex)));
-    if (!pattern->IsFlat()) FlattenString(pattern);
+    ASSERT(pattern->IsFlat());
     if (SearchStringMultiple(subject, pattern, last_match_info, &builder)) {
       return *builder.ToJSArray(result_array);
     }