Refactor implementation for String.prototype.replace.
authoryangguo@chromium.org <yangguo@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Wed, 27 Feb 2013 14:14:45 +0000 (14:14 +0000)
committeryangguo@chromium.org <yangguo@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Wed, 27 Feb 2013 14:14:45 +0000 (14:14 +0000)
R=ulan@chromium.org
BUG=

Review URL: https://chromiumcodereview.appspot.com/12177015

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13761 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

src/jsregexp.cc
src/runtime.cc
src/runtime.h
src/string.js
test/mjsunit/string-replace.js

index 61ab2db..9b82da1 100644 (file)
@@ -687,7 +687,7 @@ Handle<JSArray> RegExpImpl::SetLastMatchInfo(Handle<JSArray> last_match_info,
                                              Handle<String> subject,
                                              int capture_count,
                                              int32_t* match) {
-  CHECK(last_match_info->HasFastObjectElements());
+  ASSERT(last_match_info->HasFastObjectElements());
   int capture_register_count = (capture_count + 1) * 2;
   last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead);
   AssertNoAllocation no_gc;
index b64f461..f712e54 100644 (file)
@@ -2898,7 +2898,7 @@ void FindStringIndicesDispatch(Isolate* isolate,
 
 
 template<typename ResultSeqString>
-MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString(
+MUST_USE_RESULT static MaybeObject* StringReplaceGlobalAtomRegExpWithString(
     Isolate* isolate,
     Handle<String> subject,
     Handle<JSRegExp> pattern_regexp,
@@ -2921,9 +2921,7 @@ MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString(
       isolate, *subject, pattern, &indices, 0xffffffff, zone);
 
   int matches = indices.length();
-  if (matches == 0) {
-    return isolate->heap()->undefined_value();
-  }
+  if (matches == 0) return *subject;
 
   // Detect integer overflow.
   int64_t result_len_64 =
@@ -2983,7 +2981,7 @@ MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString(
 }
 
 
-MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString(
+MUST_USE_RESULT static MaybeObject* StringReplaceGlobalRegExpWithString(
     Isolate* isolate,
     Handle<String> subject,
     Handle<JSRegExp> regexp,
@@ -2992,7 +2990,6 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString(
   ASSERT(subject->IsFlat());
   ASSERT(replacement->IsFlat());
 
-  bool is_global = regexp->GetFlags().is_global();
   int capture_count = regexp->CaptureCount();
   int subject_length = subject->length();
 
@@ -3005,33 +3002,30 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString(
                                                      subject_length);
 
   // Shortcut for simple non-regexp global replacements
-  if (is_global &&
-      regexp->TypeTag() == JSRegExp::ATOM &&
-      simple_replace) {
+  if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) {
     if (subject->IsOneByteConvertible() &&
         replacement->IsOneByteConvertible()) {
-      return StringReplaceAtomRegExpWithString<SeqOneByteString>(
+      return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
           isolate, subject, regexp, replacement, last_match_info);
     } else {
-      return StringReplaceAtomRegExpWithString<SeqTwoByteString>(
+      return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
           isolate, subject, regexp, replacement, last_match_info);
     }
   }
 
-  RegExpImpl::GlobalCache global_cache(regexp, subject, is_global, isolate);
+  RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
   if (global_cache.HasException()) return Failure::Exception();
 
   int32_t* current_match = global_cache.FetchNext();
   if (current_match == NULL) {
     if (global_cache.HasException()) return Failure::Exception();
-    return isolate->heap()->undefined_value();
+    return *subject;
   }
 
   // Guessing the number of parts that the final result string is built
   // from. Global regexps can match any number of times, so we guess
   // conservatively.
-  int expected_parts =
-      (compiled_replacement.parts() + 1) * (is_global ? 4 : 1) + 1;
+  int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
   ReplacementStringBuilder builder(isolate->heap(),
                                    subject,
                                    expected_parts);
@@ -3063,9 +3057,6 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString(
     }
     prev = end;
 
-    // Only continue checking for global regexps.
-    if (!is_global) break;
-
     current_match = global_cache.FetchNext();
   } while (current_match != NULL);
 
@@ -3086,43 +3077,32 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString(
 
 
 template <typename ResultSeqString>
-MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString(
+MUST_USE_RESULT static MaybeObject* StringReplaceGlobalRegExpWithEmptyString(
     Isolate* isolate,
     Handle<String> subject,
     Handle<JSRegExp> regexp,
     Handle<JSArray> last_match_info) {
   ASSERT(subject->IsFlat());
 
-  bool is_global = regexp->GetFlags().is_global();
-
   // Shortcut for simple non-regexp global replacements
-  if (is_global &&
-      regexp->TypeTag() == JSRegExp::ATOM) {
+  if (regexp->TypeTag() == JSRegExp::ATOM) {
     Handle<String> empty_string = isolate->factory()->empty_string();
     if (subject->IsOneByteRepresentation()) {
-      return StringReplaceAtomRegExpWithString<SeqOneByteString>(
-          isolate,
-          subject,
-          regexp,
-          empty_string,
-          last_match_info);
+      return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
+          isolate, subject, regexp, empty_string, last_match_info);
     } else {
-      return StringReplaceAtomRegExpWithString<SeqTwoByteString>(
-          isolate,
-          subject,
-          regexp,
-          empty_string,
-          last_match_info);
+      return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
+          isolate, subject, regexp, empty_string, last_match_info);
     }
   }
 
-  RegExpImpl::GlobalCache global_cache(regexp, subject, is_global, isolate);
+  RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
   if (global_cache.HasException()) return Failure::Exception();
 
   int32_t* current_match = global_cache.FetchNext();
   if (current_match == NULL) {
     if (global_cache.HasException()) return Failure::Exception();
-    return isolate->heap()->undefined_value();
+    return *subject;
   }
 
   int start = current_match[0];
@@ -3142,23 +3122,6 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString(
         isolate->factory()->NewRawTwoByteString(new_length));
   }
 
-  if (!is_global) {
-    RegExpImpl::SetLastMatchInfo(
-        last_match_info, subject, capture_count, current_match);
-    if (start == end) {
-      return *subject;
-    } else {
-      if (start > 0) {
-        String::WriteToFlat(*subject, answer->GetChars(), 0, start);
-      }
-      if (end < subject_length) {
-        String::WriteToFlat(
-            *subject, answer->GetChars() + start, end, subject_length);
-      }
-      return *answer;
-    }
-  }
-
   int prev = 0;
   int position = 0;
 
@@ -3167,8 +3130,7 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString(
     end = current_match[1];
     if (prev < start) {
       // Add substring subject[prev;start] to answer string.
-      String::WriteToFlat(
-          *subject, answer->GetChars() + position, prev, start);
+      String::WriteToFlat(*subject, answer->GetChars() + position, prev, start);
       position += start - prev;
     }
     prev = end;
@@ -3210,7 +3172,7 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString(
 }
 
 
-RUNTIME_FUNCTION(MaybeObject*, Runtime_StringReplaceRegExpWithString) {
+RUNTIME_FUNCTION(MaybeObject*, Runtime_StringReplaceGlobalRegExpWithString) {
   ASSERT(args.length() == 4);
 
   HandleScope scope(isolate);
@@ -3220,21 +3182,23 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringReplaceRegExpWithString) {
   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
   CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
 
-  if (!subject->IsFlat()) subject = FlattenGetString(subject);
+  ASSERT(regexp->GetFlags().is_global());
 
-  if (!replacement->IsFlat()) replacement = FlattenGetString(replacement);
+  if (!subject->IsFlat()) subject = FlattenGetString(subject);
 
   if (replacement->length() == 0) {
     if (subject->IsOneByteConvertible()) {
-      return StringReplaceRegExpWithEmptyString<SeqOneByteString>(
+      return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
           isolate, subject, regexp, last_match_info);
     } else {
-      return StringReplaceRegExpWithEmptyString<SeqTwoByteString>(
+      return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
           isolate, subject, regexp, last_match_info);
     }
   }
 
-  return StringReplaceRegExpWithString(
+  if (!replacement->IsFlat()) replacement = FlattenGetString(replacement);
+
+  return StringReplaceGlobalRegExpWithString(
       isolate, subject, regexp, replacement, last_match_info);
 }
 
index 2625c69..3e51d14 100644 (file)
@@ -202,7 +202,7 @@ namespace internal {
   F(StringLastIndexOf, 3, 1) \
   F(StringLocaleCompare, 2, 1) \
   F(SubString, 3, 1) \
-  F(StringReplaceRegExpWithString, 4, 1) \
+  F(StringReplaceGlobalRegExpWithString, 4, 1) \
   F(StringReplaceOneCharWithString, 3, 1) \
   F(StringMatch, 3, 1) \
   F(StringTrim, 3, 1) \
index 0349837..6054e90 100644 (file)
@@ -219,60 +219,79 @@ function StringReplace(search, replace) {
   }
   var subject = TO_STRING_INLINE(this);
 
-  // Delegate to one of the regular expression variants if necessary.
+  // Decision tree for dispatch
+  // .. regexp search
+  // .... string replace
+  // ...... non-global search
+  // ........ empty string replace
+  // ........ non-empty string replace (with $-expansion)
+  // ...... global search
+  // ........ no need to circumvent last match info override
+  // ........ need to circument last match info override
+  // .... function replace
+  // ...... global search
+  // ...... non-global search
+  // .. string search
+  // .... special case that replaces with one single character
+  // ...... function replace
+  // ...... string replace (with $-expansion)
+
   if (IS_REGEXP(search)) {
-    // Emulate RegExp.prototype.exec's side effect in step 5, even though
+    // Emulate RegExp.prototype.exec's side effect in step 5, even if
     // value is discarded.
     ToInteger(search.lastIndex);
     %_Log('regexp', 'regexp-replace,%0r,%1S', [search, subject]);
-    if (IS_SPEC_FUNCTION(replace)) {
-      if (search.global) {
-        return StringReplaceGlobalRegExpWithFunction(subject, search, replace);
-      } else {
-        return StringReplaceNonGlobalRegExpWithFunction(subject,
-                                                        search,
-                                                        replace);
-      }
-    } else {
-      if (lastMatchInfoOverride == null) {
-        var answer = %StringReplaceRegExpWithString(subject,
-                                                    search,
-                                                    TO_STRING_INLINE(replace),
-                                                    lastMatchInfo);
-        if (IS_UNDEFINED(answer)) {  // No match.  Return subject string.
-          search.lastIndex = 0;
+
+    if (!IS_SPEC_FUNCTION(replace)) {
+      if (!search.global) {
+        // Non-global regexp search, string replace.
+        var match = DoRegExpExec(search, subject, 0);
+        if (match == null) {
+          search.lastIndex = 0
           return subject;
         }
-        if (search.global) search.lastIndex = 0;
-        return answer;
+        replace = TO_STRING_INLINE(replace);
+        if (replace.length == 0) {
+          return %_SubString(subject, 0, match[CAPTURE0]) +
+                 %_SubString(subject, match[CAPTURE1], subject.length)
+        }
+        return ExpandReplacement(replace, subject, lastMatchInfo,
+                                 %_SubString(subject, 0, match[CAPTURE0])) +
+               %_SubString(subject, match[CAPTURE1], subject.length);
+      }
+
+      // Global regexp search, string replace.
+      search.lastIndex = 0;
+      if (lastMatchInfoOverride == null) {
+        return %StringReplaceGlobalRegExpWithString(
+            subject, search, replace, lastMatchInfo);
       } else {
         // We use this hack to detect whether StringReplaceRegExpWithString
         // found at least one hit. In that case we need to remove any
         // override.
         var saved_subject = lastMatchInfo[LAST_SUBJECT_INDEX];
         lastMatchInfo[LAST_SUBJECT_INDEX] = 0;
-        var answer = %StringReplaceRegExpWithString(subject,
-                                                    search,
-                                                    TO_STRING_INLINE(replace),
-                                                    lastMatchInfo);
-        if (IS_UNDEFINED(answer)) {  // No match.  Return subject string.
-          search.lastIndex = 0;
-          lastMatchInfo[LAST_SUBJECT_INDEX] = saved_subject;
-          return subject;
-        }
+        var answer = %StringReplaceGlobalRegExpWithString(
+            subject, search, replace, lastMatchInfo);
         if (%_IsSmi(lastMatchInfo[LAST_SUBJECT_INDEX])) {
           lastMatchInfo[LAST_SUBJECT_INDEX] = saved_subject;
         } else {
           lastMatchInfoOverride = null;
         }
-        if (search.global) search.lastIndex = 0;
         return answer;
       }
     }
+
+    if (search.global) {
+      // Global regexp search, function replace.
+      return StringReplaceGlobalRegExpWithFunction(subject, search, replace);
+    }
+    // Non-global regexp search, function replace.
+    return StringReplaceNonGlobalRegExpWithFunction(subject, search, replace);
   }
 
-  // Convert the search argument to a string and search for it.
   search = TO_STRING_INLINE(search);
+
   if (search.length == 1 &&
       subject.length > 0xFF &&
       IS_STRING(replace) &&
@@ -295,8 +314,10 @@ function StringReplace(search, replace) {
   } else {
     reusableMatchInfo[CAPTURE0] = start;
     reusableMatchInfo[CAPTURE1] = end;
-    replace = TO_STRING_INLINE(replace);
-    result = ExpandReplacement(replace, subject, reusableMatchInfo, result);
+    result = ExpandReplacement(TO_STRING_INLINE(replace),
+                               subject,
+                               reusableMatchInfo,
+                               result);
   }
 
   return result + %_SubString(subject, end, subject.length);
@@ -333,6 +354,31 @@ function ExpandReplacement(string, subject, matchInfo, result) {
       } else if (peek == 39) {  // $' - suffix
         ++position;
         result += %_SubString(subject, matchInfo[CAPTURE1], subject.length);
+      } else if (peek >= 48 && peek <= 57) {
+        // Valid indices are $1 .. $9, $01 .. $09 and $10 .. $99
+        var scaled_index = (peek - 48) << 1;
+        var advance = 1;
+        var number_of_captures = NUMBER_OF_CAPTURES(matchInfo);
+        if (position + 1 < string.length) {
+          var next = %_StringCharCodeAt(string, position + 1);
+          if (next >= 48 && next <= 57) {
+            var new_scaled_index = scaled_index * 10 + ((next - 48) << 1);
+            if (new_scaled_index < number_of_captures) {
+              scaled_index = new_scaled_index;
+              advance = 2;
+            }
+          }
+        }
+        if (scaled_index != 0 && scaled_index < number_of_captures) {
+          var start = matchInfo[CAPTURE(scaled_index)];
+          if (start >= 0) {
+            result +=
+              %_SubString(subject, start, matchInfo[CAPTURE(scaled_index + 1)]);
+          }
+          position += advance;
+        } else {
+          result += '$';
+        }
       } else {
         result += '$';
       }
index 6b022df..502a7a4 100644 (file)
@@ -212,3 +212,56 @@ var str = 'She sells seashells by the seashore.';
 var re = /sh/g;
 assertEquals('She sells sea$schells by the sea$schore.',
              str.replace(re,"$$" + 'sch'))
+
+
+var replace_obj = { length: 0, toString: function() { return "x"; }};
+assertEquals("axc", "abc".replace(/b/, replace_obj));
+
+var search_obj = { length: 1, toString: function() { return "b"; }};
+assertEquals("axc", "abc".replace(search_obj, function() { return "x"; }));
+
+var regexp99pattern = "";
+var subject = "";
+for (var i = 0; i < 99; i++) {
+  regexp99pattern += "(.)";
+  subject += String.fromCharCode(i + 24);
+}
+
+function testIndices99(re) {
+  // Test $1 .. $99
+  for (var i = 1; i < 100; i++) {
+    assertEquals(String.fromCharCode(i + 23),
+                 subject.replace(re, "$" + i));
+  }
+
+  // Test $01 .. $09
+  for (var i = 1; i < 10; i++) {
+    assertEquals(String.fromCharCode(i + 23),
+                 subject.replace(re, "$0" + i));
+  }
+
+  assertEquals("$0", subject.replace(re, "$0"));
+  assertEquals("$00", subject.replace(re, "$00"));
+  assertEquals(String.fromCharCode(10 + 23) + "0",
+               subject.replace(re, "$100"));
+}
+
+testIndices99(new RegExp(regexp99pattern));
+testIndices99(new RegExp(regexp99pattern, "g"));
+
+var regexp59pattern = "";
+for (var i = 0; i < 59; i++) regexp59pattern += "(.)";
+
+function testIndices59(re) {
+  // Test $60 .. $99.  Captures reach up to 59.  Per spec, how to deal
+  // with this is implementation-dependent. We interpret $60 as $6
+  // followed by "0", $61 as $6, followed by "1" and so on.
+  var tail = subject.substr(59);
+  for (var i = 60; i < 100; i++) {
+    assertEquals(String.fromCharCode(i / 10 + 23) + (i % 10) + tail,
+                 subject.replace(re, "$" + i));
+  }
+}
+
+testIndices59(new RegExp(regexp59pattern));
+testIndices59(new RegExp(regexp59pattern, "g"));