From 4cbe7100e657c33b6f60fd45d7059685f79872cf Mon Sep 17 00:00:00 2001 From: "yangguo@chromium.org" Date: Wed, 27 Feb 2013 14:14:45 +0000 Subject: [PATCH] Refactor implementation for String.prototype.replace. R=ulan@chromium.org BUG= Review URL: https://chromiumcodereview.appspot.com/12177015 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13761 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/jsregexp.cc | 2 +- src/runtime.cc | 88 ++++++++++---------------------- src/runtime.h | 2 +- src/string.js | 112 +++++++++++++++++++++++++++++------------ test/mjsunit/string-replace.js | 53 +++++++++++++++++++ 5 files changed, 160 insertions(+), 97 deletions(-) diff --git a/src/jsregexp.cc b/src/jsregexp.cc index 61ab2db..9b82da1 100644 --- a/src/jsregexp.cc +++ b/src/jsregexp.cc @@ -687,7 +687,7 @@ Handle RegExpImpl::SetLastMatchInfo(Handle last_match_info, Handle subject, int capture_count, int32_t* match) { - CHECK(last_match_info->HasFastObjectElements()); + ASSERT(last_match_info->HasFastObjectElements()); int capture_register_count = (capture_count + 1) * 2; last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead); AssertNoAllocation no_gc; diff --git a/src/runtime.cc b/src/runtime.cc index b64f461..f712e54 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -2898,7 +2898,7 @@ void FindStringIndicesDispatch(Isolate* isolate, template -MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString( +MUST_USE_RESULT static MaybeObject* StringReplaceGlobalAtomRegExpWithString( Isolate* isolate, Handle subject, Handle pattern_regexp, @@ -2921,9 +2921,7 @@ MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString( isolate, *subject, pattern, &indices, 0xffffffff, zone); int matches = indices.length(); - if (matches == 0) { - return isolate->heap()->undefined_value(); - } + if (matches == 0) return *subject; // Detect integer overflow. int64_t result_len_64 = @@ -2983,7 +2981,7 @@ MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString( } -MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString( +MUST_USE_RESULT static MaybeObject* StringReplaceGlobalRegExpWithString( Isolate* isolate, Handle subject, Handle regexp, @@ -2992,7 +2990,6 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString( ASSERT(subject->IsFlat()); ASSERT(replacement->IsFlat()); - bool is_global = regexp->GetFlags().is_global(); int capture_count = regexp->CaptureCount(); int subject_length = subject->length(); @@ -3005,33 +3002,30 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString( subject_length); // Shortcut for simple non-regexp global replacements - if (is_global && - regexp->TypeTag() == JSRegExp::ATOM && - simple_replace) { + if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) { if (subject->IsOneByteConvertible() && replacement->IsOneByteConvertible()) { - return StringReplaceAtomRegExpWithString( + return StringReplaceGlobalAtomRegExpWithString( isolate, subject, regexp, replacement, last_match_info); } else { - return StringReplaceAtomRegExpWithString( + return StringReplaceGlobalAtomRegExpWithString( isolate, subject, regexp, replacement, last_match_info); } } - RegExpImpl::GlobalCache global_cache(regexp, subject, is_global, isolate); + RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate); if (global_cache.HasException()) return Failure::Exception(); int32_t* current_match = global_cache.FetchNext(); if (current_match == NULL) { if (global_cache.HasException()) return Failure::Exception(); - return isolate->heap()->undefined_value(); + return *subject; } // Guessing the number of parts that the final result string is built // from. Global regexps can match any number of times, so we guess // conservatively. - int expected_parts = - (compiled_replacement.parts() + 1) * (is_global ? 4 : 1) + 1; + int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1; ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts); @@ -3063,9 +3057,6 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString( } prev = end; - // Only continue checking for global regexps. - if (!is_global) break; - current_match = global_cache.FetchNext(); } while (current_match != NULL); @@ -3086,43 +3077,32 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString( template -MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString( +MUST_USE_RESULT static MaybeObject* StringReplaceGlobalRegExpWithEmptyString( Isolate* isolate, Handle subject, Handle regexp, Handle last_match_info) { ASSERT(subject->IsFlat()); - bool is_global = regexp->GetFlags().is_global(); - // Shortcut for simple non-regexp global replacements - if (is_global && - regexp->TypeTag() == JSRegExp::ATOM) { + if (regexp->TypeTag() == JSRegExp::ATOM) { Handle empty_string = isolate->factory()->empty_string(); if (subject->IsOneByteRepresentation()) { - return StringReplaceAtomRegExpWithString( - isolate, - subject, - regexp, - empty_string, - last_match_info); + return StringReplaceGlobalAtomRegExpWithString( + isolate, subject, regexp, empty_string, last_match_info); } else { - return StringReplaceAtomRegExpWithString( - isolate, - subject, - regexp, - empty_string, - last_match_info); + return StringReplaceGlobalAtomRegExpWithString( + isolate, subject, regexp, empty_string, last_match_info); } } - RegExpImpl::GlobalCache global_cache(regexp, subject, is_global, isolate); + RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate); if (global_cache.HasException()) return Failure::Exception(); int32_t* current_match = global_cache.FetchNext(); if (current_match == NULL) { if (global_cache.HasException()) return Failure::Exception(); - return isolate->heap()->undefined_value(); + return *subject; } int start = current_match[0]; @@ -3142,23 +3122,6 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString( isolate->factory()->NewRawTwoByteString(new_length)); } - if (!is_global) { - RegExpImpl::SetLastMatchInfo( - last_match_info, subject, capture_count, current_match); - if (start == end) { - return *subject; - } else { - if (start > 0) { - String::WriteToFlat(*subject, answer->GetChars(), 0, start); - } - if (end < subject_length) { - String::WriteToFlat( - *subject, answer->GetChars() + start, end, subject_length); - } - return *answer; - } - } - int prev = 0; int position = 0; @@ -3167,8 +3130,7 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString( end = current_match[1]; if (prev < start) { // Add substring subject[prev;start] to answer string. - String::WriteToFlat( - *subject, answer->GetChars() + position, prev, start); + String::WriteToFlat(*subject, answer->GetChars() + position, prev, start); position += start - prev; } prev = end; @@ -3210,7 +3172,7 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString( } -RUNTIME_FUNCTION(MaybeObject*, Runtime_StringReplaceRegExpWithString) { +RUNTIME_FUNCTION(MaybeObject*, Runtime_StringReplaceGlobalRegExpWithString) { ASSERT(args.length() == 4); HandleScope scope(isolate); @@ -3220,21 +3182,23 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringReplaceRegExpWithString) { CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1); CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3); - if (!subject->IsFlat()) subject = FlattenGetString(subject); + ASSERT(regexp->GetFlags().is_global()); - if (!replacement->IsFlat()) replacement = FlattenGetString(replacement); + if (!subject->IsFlat()) subject = FlattenGetString(subject); if (replacement->length() == 0) { if (subject->IsOneByteConvertible()) { - return StringReplaceRegExpWithEmptyString( + return StringReplaceGlobalRegExpWithEmptyString( isolate, subject, regexp, last_match_info); } else { - return StringReplaceRegExpWithEmptyString( + return StringReplaceGlobalRegExpWithEmptyString( isolate, subject, regexp, last_match_info); } } - return StringReplaceRegExpWithString( + if (!replacement->IsFlat()) replacement = FlattenGetString(replacement); + + return StringReplaceGlobalRegExpWithString( isolate, subject, regexp, replacement, last_match_info); } diff --git a/src/runtime.h b/src/runtime.h index 2625c69..3e51d14 100644 --- a/src/runtime.h +++ b/src/runtime.h @@ -202,7 +202,7 @@ namespace internal { F(StringLastIndexOf, 3, 1) \ F(StringLocaleCompare, 2, 1) \ F(SubString, 3, 1) \ - F(StringReplaceRegExpWithString, 4, 1) \ + F(StringReplaceGlobalRegExpWithString, 4, 1) \ F(StringReplaceOneCharWithString, 3, 1) \ F(StringMatch, 3, 1) \ F(StringTrim, 3, 1) \ diff --git a/src/string.js b/src/string.js index 0349837..6054e90 100644 --- a/src/string.js +++ b/src/string.js @@ -219,60 +219,79 @@ function StringReplace(search, replace) { } var subject = TO_STRING_INLINE(this); - // Delegate to one of the regular expression variants if necessary. + // Decision tree for dispatch + // .. regexp search + // .... string replace + // ...... non-global search + // ........ empty string replace + // ........ non-empty string replace (with $-expansion) + // ...... global search + // ........ no need to circumvent last match info override + // ........ need to circument last match info override + // .... function replace + // ...... global search + // ...... non-global search + // .. string search + // .... special case that replaces with one single character + // ...... function replace + // ...... string replace (with $-expansion) + if (IS_REGEXP(search)) { - // Emulate RegExp.prototype.exec's side effect in step 5, even though + // Emulate RegExp.prototype.exec's side effect in step 5, even if // value is discarded. ToInteger(search.lastIndex); %_Log('regexp', 'regexp-replace,%0r,%1S', [search, subject]); - if (IS_SPEC_FUNCTION(replace)) { - if (search.global) { - return StringReplaceGlobalRegExpWithFunction(subject, search, replace); - } else { - return StringReplaceNonGlobalRegExpWithFunction(subject, - search, - replace); - } - } else { - if (lastMatchInfoOverride == null) { - var answer = %StringReplaceRegExpWithString(subject, - search, - TO_STRING_INLINE(replace), - lastMatchInfo); - if (IS_UNDEFINED(answer)) { // No match. Return subject string. - search.lastIndex = 0; + + if (!IS_SPEC_FUNCTION(replace)) { + if (!search.global) { + // Non-global regexp search, string replace. + var match = DoRegExpExec(search, subject, 0); + if (match == null) { + search.lastIndex = 0 return subject; } - if (search.global) search.lastIndex = 0; - return answer; + replace = TO_STRING_INLINE(replace); + if (replace.length == 0) { + return %_SubString(subject, 0, match[CAPTURE0]) + + %_SubString(subject, match[CAPTURE1], subject.length) + } + return ExpandReplacement(replace, subject, lastMatchInfo, + %_SubString(subject, 0, match[CAPTURE0])) + + %_SubString(subject, match[CAPTURE1], subject.length); + } + + // Global regexp search, string replace. + search.lastIndex = 0; + if (lastMatchInfoOverride == null) { + return %StringReplaceGlobalRegExpWithString( + subject, search, replace, lastMatchInfo); } else { // We use this hack to detect whether StringReplaceRegExpWithString // found at least one hit. In that case we need to remove any // override. var saved_subject = lastMatchInfo[LAST_SUBJECT_INDEX]; lastMatchInfo[LAST_SUBJECT_INDEX] = 0; - var answer = %StringReplaceRegExpWithString(subject, - search, - TO_STRING_INLINE(replace), - lastMatchInfo); - if (IS_UNDEFINED(answer)) { // No match. Return subject string. - search.lastIndex = 0; - lastMatchInfo[LAST_SUBJECT_INDEX] = saved_subject; - return subject; - } + var answer = %StringReplaceGlobalRegExpWithString( + subject, search, replace, lastMatchInfo); if (%_IsSmi(lastMatchInfo[LAST_SUBJECT_INDEX])) { lastMatchInfo[LAST_SUBJECT_INDEX] = saved_subject; } else { lastMatchInfoOverride = null; } - if (search.global) search.lastIndex = 0; return answer; } } + + if (search.global) { + // Global regexp search, function replace. + return StringReplaceGlobalRegExpWithFunction(subject, search, replace); + } + // Non-global regexp search, function replace. + return StringReplaceNonGlobalRegExpWithFunction(subject, search, replace); } - // Convert the search argument to a string and search for it. search = TO_STRING_INLINE(search); + if (search.length == 1 && subject.length > 0xFF && IS_STRING(replace) && @@ -295,8 +314,10 @@ function StringReplace(search, replace) { } else { reusableMatchInfo[CAPTURE0] = start; reusableMatchInfo[CAPTURE1] = end; - replace = TO_STRING_INLINE(replace); - result = ExpandReplacement(replace, subject, reusableMatchInfo, result); + result = ExpandReplacement(TO_STRING_INLINE(replace), + subject, + reusableMatchInfo, + result); } return result + %_SubString(subject, end, subject.length); @@ -333,6 +354,31 @@ function ExpandReplacement(string, subject, matchInfo, result) { } else if (peek == 39) { // $' - suffix ++position; result += %_SubString(subject, matchInfo[CAPTURE1], subject.length); + } else if (peek >= 48 && peek <= 57) { + // Valid indices are $1 .. $9, $01 .. $09 and $10 .. $99 + var scaled_index = (peek - 48) << 1; + var advance = 1; + var number_of_captures = NUMBER_OF_CAPTURES(matchInfo); + if (position + 1 < string.length) { + var next = %_StringCharCodeAt(string, position + 1); + if (next >= 48 && next <= 57) { + var new_scaled_index = scaled_index * 10 + ((next - 48) << 1); + if (new_scaled_index < number_of_captures) { + scaled_index = new_scaled_index; + advance = 2; + } + } + } + if (scaled_index != 0 && scaled_index < number_of_captures) { + var start = matchInfo[CAPTURE(scaled_index)]; + if (start >= 0) { + result += + %_SubString(subject, start, matchInfo[CAPTURE(scaled_index + 1)]); + } + position += advance; + } else { + result += '$'; + } } else { result += '$'; } diff --git a/test/mjsunit/string-replace.js b/test/mjsunit/string-replace.js index 6b022df..502a7a4 100644 --- a/test/mjsunit/string-replace.js +++ b/test/mjsunit/string-replace.js @@ -212,3 +212,56 @@ var str = 'She sells seashells by the seashore.'; var re = /sh/g; assertEquals('She sells sea$schells by the sea$schore.', str.replace(re,"$$" + 'sch')) + + +var replace_obj = { length: 0, toString: function() { return "x"; }}; +assertEquals("axc", "abc".replace(/b/, replace_obj)); + +var search_obj = { length: 1, toString: function() { return "b"; }}; +assertEquals("axc", "abc".replace(search_obj, function() { return "x"; })); + +var regexp99pattern = ""; +var subject = ""; +for (var i = 0; i < 99; i++) { + regexp99pattern += "(.)"; + subject += String.fromCharCode(i + 24); +} + +function testIndices99(re) { + // Test $1 .. $99 + for (var i = 1; i < 100; i++) { + assertEquals(String.fromCharCode(i + 23), + subject.replace(re, "$" + i)); + } + + // Test $01 .. $09 + for (var i = 1; i < 10; i++) { + assertEquals(String.fromCharCode(i + 23), + subject.replace(re, "$0" + i)); + } + + assertEquals("$0", subject.replace(re, "$0")); + assertEquals("$00", subject.replace(re, "$00")); + assertEquals(String.fromCharCode(10 + 23) + "0", + subject.replace(re, "$100")); +} + +testIndices99(new RegExp(regexp99pattern)); +testIndices99(new RegExp(regexp99pattern, "g")); + +var regexp59pattern = ""; +for (var i = 0; i < 59; i++) regexp59pattern += "(.)"; + +function testIndices59(re) { + // Test $60 .. $99. Captures reach up to 59. Per spec, how to deal + // with this is implementation-dependent. We interpret $60 as $6 + // followed by "0", $61 as $6, followed by "1" and so on. + var tail = subject.substr(59); + for (var i = 60; i < 100; i++) { + assertEquals(String.fromCharCode(i / 10 + 23) + (i % 10) + tail, + subject.replace(re, "$" + i)); + } +} + +testIndices59(new RegExp(regexp59pattern)); +testIndices59(new RegExp(regexp59pattern, "g")); -- 2.7.4