From eb656c723b6b7375d07dfe8c7cdb820b4c058709 Mon Sep 17 00:00:00 2001 From: "lrn@chromium.org" Date: Tue, 17 Mar 2009 12:44:20 +0000 Subject: [PATCH] Moved subject and index before matches in RegExp lastMatchInfo. Some minor changes, and removed the new handlescope in the inner loop of replace. Only really affects replaces on extremely long strings. git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1524 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/heap.cc | 1 - src/jsregexp.cc | 38 ++++++++++++++++++-------------------- src/jsregexp.h | 8 +++----- src/macros.py | 12 ++++++------ src/regexp-delay.js | 9 ++++----- src/string.js | 2 +- src/v8natives.js | 2 +- test/cctest/test-regexp.cc | 16 ++++++++++------ 8 files changed, 43 insertions(+), 45 deletions(-) diff --git a/src/heap.cc b/src/heap.cc index daefaaf..b9ba68d 100644 --- a/src/heap.cc +++ b/src/heap.cc @@ -34,7 +34,6 @@ #include "compilation-cache.h" #include "debug.h" #include "global-handles.h" -#include "jsregexp.h" #include "mark-compact.h" #include "natives.h" #include "scanner.h" diff --git a/src/jsregexp.cc b/src/jsregexp.cc index 7acf639..068b0a7 100644 --- a/src/jsregexp.cc +++ b/src/jsregexp.cc @@ -307,9 +307,10 @@ Handle RegExpImpl::AtomExecGlobal(Handle re, // that is attached to the global RegExp object. We will be returning // an array of these. Handle array = Factory::NewFixedArray(kFirstCapture + 2); + SetLastCaptureCount(*array, 2); + // Ignore subject and input fields. SetCapture(*array, 0, value); SetCapture(*array, 1, end); - SetLastCaptureCount(*array, 2); Handle pair = Factory::NewJSArrayWithElements(array); SetElement(result, match_count, pair); match_count++; @@ -561,22 +562,22 @@ Handle RegExpImpl::IrregexpExecGlobal(Handle regexp, // Create an array that looks like the static last_match_info array // that is attached to the global RegExp object. We will be returning // an array of these. - Handle matches_array(JSArray::cast(*matches)->elements()); + int match_length = kFirstCapture + number_of_capture_registers; Handle latest_match = - Factory::NewJSArray(kFirstCapture + number_of_capture_registers); - Handle latest_match_array(latest_match->elements()); - - for (int i = 0; i < number_of_capture_registers; i++) { - SetCapture(*latest_match_array, i, GetCapture(*matches_array, i)); - } - SetLastCaptureCount(*latest_match_array, number_of_capture_registers); - + Factory::NewJSArray(match_length); + + AssertNoAllocation no_allocation; + FixedArray* match_array = JSArray::cast(*matches)->elements(); + match_array->CopyTo(0, + latest_match->elements(), + 0, + match_length); SetElement(result, result_length, latest_match); result_length++; - previous_index = GetCapture(*matches_array, 1); - if (GetCapture(*matches_array, 0) == previous_index) + previous_index = GetCapture(match_array, 1); + if (GetCapture(match_array, 0) == previous_index) { previous_index++; - + } } else { ASSERT(matches->IsNull()); return result; @@ -636,18 +637,15 @@ Handle RegExpImpl::IrregexpExecOnce(Handle regexp, } FixedArray* array = last_match_info->elements(); - // Clear previous input/string values to avoid potential memory leak. - SetLastSubject(array, Heap::empty_string()); - SetLastInput(array, Heap::empty_string()); ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead); // The captures come in (start, end+1) pairs. - for (int i = 0; i < number_of_capture_registers; i += 2) { - SetCapture(array, i, offsets_vector[i]); - SetCapture(array, i + 1, offsets_vector[i + 1]); - } SetLastCaptureCount(array, number_of_capture_registers); SetLastSubject(array, *original_subject); SetLastInput(array, *original_subject); + for (int i = 0; i < number_of_capture_registers; i+=2) { + SetCapture(array, i, offsets_vector[i]); + SetCapture(array, i + 1, offsets_vector[i + 1]); + } return last_match_info; } diff --git a/src/jsregexp.h b/src/jsregexp.h index f6e2052..2393f7b 100644 --- a/src/jsregexp.h +++ b/src/jsregexp.h @@ -112,7 +112,7 @@ class RegExpImpl { static const int kLastCaptureCount = 0; static const int kLastSubject = 1; static const int kLastInput = 2; - static const int kFirstCapture = 1; + static const int kFirstCapture = 3; static const int kLastMatchOverhead = 3; // Used to access the lastMatchInfo array. @@ -125,13 +125,11 @@ class RegExpImpl { } static void SetLastSubject(FixedArray* array, String* to) { - int capture_count = GetLastCaptureCount(array); - array->set(capture_count + kLastSubject, to); + array->set(kLastSubject, to); } static void SetLastInput(FixedArray* array, String* to) { - int capture_count = GetLastCaptureCount(array); - array->set(capture_count + kLastInput, to); + array->set(kLastInput, to); } static void SetCapture(FixedArray* array, int index, int to) { diff --git a/src/macros.py b/src/macros.py index a3db5f9..d78ecd9 100644 --- a/src/macros.py +++ b/src/macros.py @@ -102,7 +102,7 @@ const ORIGINAL_DATE = (global.Date, $Date); # Constants used on an array to implement the properties of the RegExp object. const REGEXP_NUMBER_OF_CAPTURES = 0; -const REGEXP_FIRST_CAPTURE = 1; +const REGEXP_FIRST_CAPTURE = 3; # We can't put macros in macros so we use constants here. # REGEXP_NUMBER_OF_CAPTURES @@ -111,10 +111,10 @@ macro NUMBER_OF_CAPTURES(array) = ((array)[0]); # Last input and last subject are after the captures so we can omit them on # results returned from global searches. Beware - these evaluate their # arguments twice. -macro LAST_SUBJECT(array) = ((array)[(array)[0] + 1]); -macro LAST_INPUT(array) = ((array)[(array)[0] + 2]); +macro LAST_SUBJECT(array) = ((array)[1]); +macro LAST_INPUT(array) = ((array)[2]); # REGEXP_FIRST_CAPTURE -macro CAPTURE(index) = (1 + (index)); -const CAPTURE0 = 1; -const CAPTURE1 = 2; +macro CAPTURE(index) = (3 + (index)); +const CAPTURE0 = 3; +const CAPTURE1 = 4; diff --git a/src/regexp-delay.js b/src/regexp-delay.js index e80c39f..3362e88 100644 --- a/src/regexp-delay.js +++ b/src/regexp-delay.js @@ -300,7 +300,7 @@ function RegExpGetRightContext() { // The properties $1..$9 are the first nine capturing substrings of the last // successful match, or ''. The function RegExpMakeCaptureGetter will be -// called with indeces from 1 to 9. +// called with indices from 1 to 9. function RegExpMakeCaptureGetter(n) { return function() { var index = n * 2; @@ -321,10 +321,10 @@ function RegExpMakeCaptureGetter(n) { // the subject string for the last successful match. var lastMatchInfo = [ 2, // REGEXP_NUMBER_OF_CAPTURES - 0, // REGEXP_FIRST_CAPTURE + 0 - 0, // REGEXP_FIRST_CAPTURE + 1 "", // Last subject. void 0, // Last input - settable with RegExpSetInput. + 0, // REGEXP_FIRST_CAPTURE + 0 + 0, // REGEXP_FIRST_CAPTURE + 1 ]; // ------------------------------------------------------------------- @@ -353,8 +353,7 @@ function SetupRegExp() { return IS_UNDEFINED(regExpInput) ? "" : regExpInput; } function RegExpSetInput(string) { - lastMatchInfo[lastMatchInfo[REGEXP_NUMBER_OF_CAPTURES] + 2] = - ToString(string); + LAST_INPUT(lastMatchInfo) = ToString(string); }; // All these accessors are set with the 'never_used' flag set to true. diff --git a/src/string.js b/src/string.js index de35902..18ef3d8 100644 --- a/src/string.js +++ b/src/string.js @@ -236,7 +236,7 @@ function StringReplace(search, replace) { // needle is a string rather than a regexp. In this case we can't update // lastMatchArray without erroneously affecting the properties on the global // RegExp object. -var reusableMatchInfo = [2, -1, -1, "", ""]; +var reusableMatchInfo = [2, "", "", -1, -1]; // Helper function for regular expressions in String.prototype.replace. diff --git a/src/v8natives.js b/src/v8natives.js index 094f79c..9772e2f 100644 --- a/src/v8natives.js +++ b/src/v8natives.js @@ -74,7 +74,6 @@ function GlobalIsFinite(number) { // ECMA-262 - 15.1.2.2 function GlobalParseInt(string, radix) { if (radix === void 0) { - radix = 0; // Some people use parseInt instead of Math.floor. This // optimization makes parseInt on a Smi 12 times faster (60ns // vs 800ns). The following optimization makes parseInt on a @@ -87,6 +86,7 @@ function GlobalParseInt(string, radix) { // Truncate number. return string | 0; } + radix = 0; } else { radix = TO_INT32(radix); if (!(radix == 0 || (2 <= radix && radix <= 36))) diff --git a/test/cctest/test-regexp.cc b/test/cctest/test-regexp.cc index 50fe862..11055c7 100644 --- a/test/cctest/test-regexp.cc +++ b/test/cctest/test-regexp.cc @@ -641,18 +641,22 @@ TEST(MacroAssembler) { Handle array = Handle::cast(m.GetCode(source)); int captures[5]; - Handle f1 = - Factory::NewStringFromAscii(CStrVector("foobar")); - CHECK(IrregexpInterpreter::Match(array, f1, captures, 0)); + const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'}; + Handle f1_16 = + Factory::NewStringFromTwoByte(Vector(str1, 6)); + + CHECK(IrregexpInterpreter::Match(array, f1_16, captures, 0)); CHECK_EQ(0, captures[0]); CHECK_EQ(3, captures[1]); CHECK_EQ(1, captures[2]); CHECK_EQ(2, captures[3]); CHECK_EQ(84, captures[4]); - Handle f2 = - Factory::NewStringFromAscii(CStrVector("barfoo")); - CHECK(!IrregexpInterpreter::Match(array, f2, captures, 0)); + const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'}; + Handle f2_16 = + Factory::NewStringFromTwoByte(Vector(str2, 6)); + + CHECK(!IrregexpInterpreter::Match(array, f2_16, captures, 0)); CHECK_EQ(42, captures[0]); } -- 2.7.4