Simplify code by removing special-casing for single-character patterns
authorsandholm@chromium.org <sandholm@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Tue, 31 Aug 2010 09:22:53 +0000 (09:22 +0000)
committersandholm@chromium.org <sandholm@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Tue, 31 Aug 2010 09:22:53 +0000 (09:22 +0000)
Review URL: http://codereview.chromium.org/3276004

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5380 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

src/runtime.cc

index 70065be..6b40a9b 100644 (file)
@@ -2818,40 +2818,6 @@ static int BoyerMooreIndexOf(Vector<const schar> subject,
 }
 
 
-template <typename schar>
-static inline int SingleCharIndexOf(Vector<const schar> string,
-                                    schar pattern_char,
-                                    int start_index) {
-  if (sizeof(schar) == 1) {
-    const schar* pos = reinterpret_cast<const schar*>(
-        memchr(string.start() + start_index,
-               pattern_char,
-               string.length() - start_index));
-    if (pos == NULL) return -1;
-    return static_cast<int>(pos - string.start());
-  }
-  for (int i = start_index, n = string.length(); i < n; i++) {
-    if (pattern_char == string[i]) {
-      return i;
-    }
-  }
-  return -1;
-}
-
-
-template <typename schar>
-static int SingleCharLastIndexOf(Vector<const schar> string,
-                                 schar pattern_char,
-                                 int start_index) {
-  for (int i = start_index; i >= 0; i--) {
-    if (pattern_char == string[i]) {
-      return i;
-    }
-  }
-  return -1;
-}
-
-
 // Trivial string search for shorter strings.
 // On return, if "complete" is set to true, the return value is the
 // final result of searching for the patter in the subject.
@@ -2863,6 +2829,7 @@ static int SimpleIndexOf(Vector<const schar> subject,
                          Vector<const pchar> pattern,
                          int idx,
                          bool* complete) {
+  ASSERT(pattern.length() > 1);
   // Badness is a count of how much work we have done.  When we have
   // done enough work we decide it's probably worth switching to a better
   // algorithm.
@@ -2925,12 +2892,12 @@ static int SimpleIndexOf(Vector<const schar> subject,
       if (subject[i] != pattern_first_char) continue;
     }
     int j = 1;
-    do {
+    while (j < pattern.length()) {
       if (pattern[j] != subject[i+j]) {
         break;
       }
       j++;
-    } while (j < pattern.length());
+    }
     if (j == pattern.length()) {
       return i;
     }
@@ -3032,54 +2999,15 @@ int Runtime::StringMatch(Handle<String> sub,
   int subject_length = sub->length();
   if (start_index + pattern_length > subject_length) return -1;
 
-  if (!sub->IsFlat()) {
-    FlattenString(sub);
-  }
-
-  // Searching for one specific character is common.  For one
-  // character patterns linear search is necessary, so any smart
-  // algorithm is unnecessary overhead.
-  if (pattern_length == 1) {
-    AssertNoAllocation no_heap_allocation;  // ensure vectors stay valid
-    String* seq_sub = *sub;
-    if (seq_sub->IsConsString()) {
-      seq_sub = ConsString::cast(seq_sub)->first();
-    }
-    if (seq_sub->IsAsciiRepresentation()) {
-      uc16 pchar = pat->Get(0);
-      if (pchar > String::kMaxAsciiCharCode) {
-        return -1;
-      }
-      Vector<const char> ascii_vector =
-          seq_sub->ToAsciiVector().SubVector(start_index, subject_length);
-      const void* pos = memchr(ascii_vector.start(),
-                               static_cast<const char>(pchar),
-                               static_cast<size_t>(ascii_vector.length()));
-      if (pos == NULL) {
-        return -1;
-      }
-      return static_cast<int>(reinterpret_cast<const char*>(pos)
-          - ascii_vector.start() + start_index);
-    }
-    return SingleCharIndexOf(seq_sub->ToUC16Vector(),
-                             pat->Get(0),
-                             start_index);
-  }
-
-  if (!pat->IsFlat()) {
-    FlattenString(pat);
-  }
+  if (!sub->IsFlat()) FlattenString(sub);
+  if (!pat->IsFlat()) FlattenString(pat);
 
   AssertNoAllocation no_heap_allocation;  // ensure vectors stay valid
   // Extract flattened substrings of cons strings before determining asciiness.
   String* seq_sub = *sub;
-  if (seq_sub->IsConsString()) {
-    seq_sub = ConsString::cast(seq_sub)->first();
-  }
+  if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first();
   String* seq_pat = *pat;
-  if (seq_pat->IsConsString()) {
-    seq_pat = ConsString::cast(seq_pat)->first();
-  }
+  if (seq_pat->IsConsString()) seq_pat = ConsString::cast(seq_pat)->first();
 
   // dispatch on type of strings
   if (seq_pat->IsAsciiRepresentation()) {
@@ -3169,30 +3097,8 @@ static Object* Runtime_StringLastIndexOf(Arguments args) {
     return Smi::FromInt(start_index);
   }
 
-  if (!sub->IsFlat()) {
-    FlattenString(sub);
-  }
-
-  if (pat_length == 1) {
-    AssertNoAllocation no_heap_allocation;  // ensure vectors stay valid
-    if (sub->IsAsciiRepresentation()) {
-      uc16 pchar = pat->Get(0);
-      if (pchar > String::kMaxAsciiCharCode) {
-        return Smi::FromInt(-1);
-      }
-      return Smi::FromInt(SingleCharLastIndexOf(sub->ToAsciiVector(),
-                                                static_cast<char>(pat->Get(0)),
-                                                start_index));
-    } else {
-      return Smi::FromInt(SingleCharLastIndexOf(sub->ToUC16Vector(),
-                                                pat->Get(0),
-                                                start_index));
-    }
-  }
-
-  if (!pat->IsFlat()) {
-    FlattenString(pat);
-  }
+  if (!sub->IsFlat()) FlattenString(sub);
+  if (!pat->IsFlat()) FlattenString(pat);
 
   AssertNoAllocation no_heap_allocation;  // ensure vectors stay valid
 
@@ -3370,88 +3276,6 @@ static void SetLastMatchInfoNoCaptures(Handle<String> subject,
 }
 
 
-template <typename schar>
-static bool SearchCharMultiple(Vector<schar> subject,
-                               String* pattern,
-                               schar pattern_char,
-                               FixedArrayBuilder* builder,
-                               int* match_pos) {
-  // Position of last match.
-  int pos = *match_pos;
-  int subject_length = subject.length();
-  while (pos < subject_length) {
-    int match_end = pos + 1;
-    if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
-      *match_pos = pos;
-      return false;
-    }
-    int new_pos = SingleCharIndexOf(subject, pattern_char, match_end);
-    if (new_pos >= 0) {
-      // Match has been found.
-      if (new_pos > match_end) {
-        ReplacementStringBuilder::AddSubjectSlice(builder, match_end, new_pos);
-      }
-      pos = new_pos;
-      builder->Add(pattern);
-    } else {
-      break;
-    }
-  }
-  if (pos + 1 < subject_length) {
-    ReplacementStringBuilder::AddSubjectSlice(builder, pos + 1, subject_length);
-  }
-  *match_pos = pos;
-  return true;
-}
-
-
-static bool SearchCharMultiple(Handle<String> subject,
-                               Handle<String> pattern,
-                               Handle<JSArray> last_match_info,
-                               FixedArrayBuilder* builder) {
-  ASSERT(subject->IsFlat());
-  ASSERT_EQ(1, pattern->length());
-  uc16 pattern_char = pattern->Get(0);
-  // Treating position before first as initial "previous match position".
-  int match_pos = -1;
-
-  for (;;) {  // Break when search complete.
-    builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
-    AssertNoAllocation no_gc;
-    if (subject->IsAsciiRepresentation()) {
-      if (pattern_char > String::kMaxAsciiCharCode) {
-        break;
-      }
-      Vector<const char> subject_vector = subject->ToAsciiVector();
-      char pattern_ascii_char = static_cast<char>(pattern_char);
-      bool complete = SearchCharMultiple<const char>(subject_vector,
-                                                     *pattern,
-                                                     pattern_ascii_char,
-                                                     builder,
-                                                     &match_pos);
-      if (complete) break;
-    } else {
-      Vector<const uc16> subject_vector = subject->ToUC16Vector();
-      bool complete = SearchCharMultiple<const uc16>(subject_vector,
-                                                     *pattern,
-                                                     pattern_char,
-                                                     builder,
-                                                     &match_pos);
-      if (complete) break;
-    }
-  }
-
-  if (match_pos >= 0) {
-    SetLastMatchInfoNoCaptures(subject,
-                               last_match_info,
-                               match_pos,
-                               match_pos + 1);
-    return true;
-  }
-  return false;  // No matches at all.
-}
-
-
 template <typename schar, typename pchar>
 static bool SearchStringMultiple(Vector<schar> subject,
                                  String* pattern,
@@ -3529,7 +3353,6 @@ static bool SearchStringMultiple(Handle<String> subject,
                                  FixedArrayBuilder* builder) {
   ASSERT(subject->IsFlat());
   ASSERT(pattern->IsFlat());
-  ASSERT(pattern->length() > 1);
 
   // Treating as if a previous match was before first character.
   int match_pos = -pattern->length();
@@ -3787,14 +3610,6 @@ static Object* Runtime_RegExpExecMultiple(Arguments args) {
   if (regexp->TypeTag() == JSRegExp::ATOM) {
     Handle<String> pattern(
         String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex)));
-    int pattern_length = pattern->length();
-    if (pattern_length == 1) {
-      if (SearchCharMultiple(subject, pattern, last_match_info, &builder)) {
-        return *builder.ToJSArray(result_array);
-      }
-      return Heap::null_value();
-    }
-
     if (!pattern->IsFlat()) FlattenString(pattern);
     if (SearchStringMultiple(subject, pattern, last_match_info, &builder)) {
       return *builder.ToJSArray(result_array);
@@ -5396,23 +5211,6 @@ void FindStringIndices(Vector<const schar> subject,
   }
 }
 
-template <typename schar>
-inline void FindCharIndices(Vector<const schar> subject,
-                            const schar pattern_char,
-                            ZoneList<int>* indices,
-                            unsigned int limit) {
-  // Collect indices of pattern_char in subject, and the end-of-string index.
-  // Stop after finding at most limit values.
-  int index = 0;
-  while (limit > 0) {
-    index = SingleCharIndexOf(subject, pattern_char, index);
-    if (index < 0) return;
-    indices->Add(index);
-    index++;
-    limit--;
-  }
-}
-
 
 static Object* Runtime_StringSplit(Arguments args) {
   ASSERT(args.length() == 3);
@@ -5438,49 +5236,33 @@ static Object* Runtime_StringSplit(Arguments args) {
   // Find (up to limit) indices of separator and end-of-string in subject
   int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit);
   ZoneList<int> indices(initial_capacity);
-  if (pattern_length == 1) {
-    // Special case, go directly to fast single-character split.
-    AssertNoAllocation nogc;
-    uc16 pattern_char = pattern->Get(0);
-    if (subject->IsTwoByteRepresentation()) {
-      FindCharIndices(subject->ToUC16Vector(), pattern_char,
-                      &indices,
-                      limit);
-    } else if (pattern_char <= String::kMaxAsciiCharCode) {
-      FindCharIndices(subject->ToAsciiVector(),
-                      static_cast<char>(pattern_char),
-                      &indices,
-                      limit);
+  if (!pattern->IsFlat()) FlattenString(pattern);
+  AssertNoAllocation nogc;
+  if (subject->IsAsciiRepresentation()) {
+    Vector<const char> subject_vector = subject->ToAsciiVector();
+    if (pattern->IsAsciiRepresentation()) {
+      FindStringIndices(subject_vector,
+                        pattern->ToAsciiVector(),
+                        &indices,
+                        limit);
+    } else {
+      FindStringIndices(subject_vector,
+                        pattern->ToUC16Vector(),
+                        &indices,
+                        limit);
     }
   } else {
-    if (!pattern->IsFlat()) FlattenString(pattern);
-    AssertNoAllocation nogc;
-    if (subject->IsAsciiRepresentation()) {
-      Vector<const char> subject_vector = subject->ToAsciiVector();
-      if (pattern->IsAsciiRepresentation()) {
-        FindStringIndices(subject_vector,
-                          pattern->ToAsciiVector(),
-                          &indices,
-                          limit);
-      } else {
-        FindStringIndices(subject_vector,
-                          pattern->ToUC16Vector(),
-                          &indices,
-                          limit);
-      }
+    Vector<const uc16> subject_vector = subject->ToUC16Vector();
+    if (pattern->IsAsciiRepresentation()) {
+      FindStringIndices(subject_vector,
+                        pattern->ToAsciiVector(),
+                        &indices,
+                        limit);
     } else {
-      Vector<const uc16> subject_vector = subject->ToUC16Vector();
-      if (pattern->IsAsciiRepresentation()) {
-        FindStringIndices(subject_vector,
-                          pattern->ToAsciiVector(),
-                          &indices,
-                          limit);
-      } else {
-        FindStringIndices(subject_vector,
-                          pattern->ToUC16Vector(),
-                          &indices,
-                          limit);
-      }
+      FindStringIndices(subject_vector,
+                        pattern->ToUC16Vector(),
+                        &indices,
+                        limit);
     }
   }
   if (static_cast<uint32_t>(indices.length()) < limit) {