From d5ac565882c6df2600677ffbc4a8f27d51f09cd5 Mon Sep 17 00:00:00 2001 From: "lrn@chromium.org" Date: Thu, 14 Oct 2010 12:54:00 +0000 Subject: [PATCH] Restructure RegExp exec cache code. Review URL: http://codereview.chromium.org/3778004 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5622 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/regexp.js | 150 ++++++++++++++++++++++++++++--------------------- src/string.js | 5 +- test/mjsunit/regexp.js | 87 ++++++++++++++++++++++++++++ 3 files changed, 176 insertions(+), 66 deletions(-) diff --git a/src/regexp.js b/src/regexp.js index 59c4040..f87a5ee 100644 --- a/src/regexp.js +++ b/src/regexp.js @@ -126,11 +126,11 @@ function RegExpCache() { this.regExp = 0; this.subject = 0; this.replaceString = 0; - this.lastIndex = 0; // Also used for splitLimit when type is "split" this.answer = 0; // answerSaved marks whether the contents of answer is valid for a cache // hit in RegExpExec, StringMatch and StringSplit. this.answerSaved = false; + this.splitLimit = 0; // Used only when type is "split". } @@ -181,22 +181,30 @@ function RegExpExec(string) { var cache = regExpCache; var saveAnswer = false; + var lastIndex = this.lastIndex; + + // Since cache.subject is always a string, a matching input can not + // cause visible side-effects when converted to a string, so we can omit + // the conversion required by the specification. + // Likewise, the regexp.lastIndex and regexp.global properties are value + // properties that are not configurable, so reading them can also not cause + // any side effects (converting lastIndex to a number can, though). if (%_ObjectEquals(cache.type, 'exec') && - %_ObjectEquals(cache.lastIndex, this.lastIndex) && + %_ObjectEquals(0, lastIndex) && %_IsRegExpEquivalent(cache.regExp, this) && %_ObjectEquals(cache.subject, string)) { if (cache.answerSaved) { - // If this regexp is global, cache.lastIndex is zero, so we only get - // here if this.lastIndex is zero, and resulting this.lastIndex - // must be zero too, so no change is necessary. - if (!this.global) this.lastIndex = lastMatchInfo[CAPTURE1]; + // The regexp.lastIndex value must be 0 for non-global RegExps, and for + // global RegExps we only cache negative results, which gives a lastIndex + // of zero as well. + this.lastIndex = 0; return %_RegExpCloneResult(cache.answer); } else { saveAnswer = true; } } - if (%_ArgumentsLength() == 0) { + if (%_ArgumentsLength() === 0) { var regExpInput = LAST_INPUT(lastMatchInfo); if (IS_UNDEFINED(regExpInput)) { throw MakeError('no_input_to_regexp', [this]); @@ -209,41 +217,48 @@ function RegExpExec(string) { } else { s = ToString(string); } - var lastIndex = this.lastIndex; - - var i = this.global ? TO_INTEGER(lastIndex) : 0; + var global = this.global; - if (i < 0 || i > s.length) { - this.lastIndex = 0; - return null; + // Conversion is required by the ES5 specification (RegExp.prototype.exec + // algorithm, step 5) even if the value is discarded for non-global RegExps. + var i = TO_INTEGER(lastIndex); + if (global) { + if (i < 0 || i > s.length) { + this.lastIndex = 0; + return null; + } + } else { + i = 0; } %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, s, lastIndex]); // matchIndices is either null or the lastMatchInfo array. var matchIndices = %_RegExpExec(this, s, i, lastMatchInfo); - if (matchIndices == null) { - if (this.global) { + if (matchIndices === null) { + if (global) { + // Cache negative result only if initial lastIndex was zero. this.lastIndex = 0; - if (lastIndex != 0) return matchIndices; + if (lastIndex !== 0) return matchIndices; } - cache.lastIndex = lastIndex; cache.regExp = this; - cache.subject = s; - cache.answer = matchIndices; // Null. + cache.subject = s; // Always a string. + cache.answer = null; cache.answerSaved = true; // Safe since no cloning is needed. cache.type = 'exec'; return matchIndices; // No match. } + + // Successful match. lastMatchInfoOverride = null; var result = BuildResultFromMatchInfo(matchIndices, s); - if (this.global) { + if (global) { + // Don't cache positive results for global regexps. this.lastIndex = lastMatchInfo[CAPTURE1]; } else { cache.regExp = this; cache.subject = s; - cache.lastIndex = lastIndex; if (saveAnswer) cache.answer = %_RegExpCloneResult(result); cache.answerSaved = saveAnswer; cache.type = 'exec'; @@ -273,32 +288,49 @@ function RegExpTest(string) { } string = regExpInput; } - var s; - if (IS_STRING(string)) { - s = string; - } else { - s = ToString(string); - } var lastIndex = this.lastIndex; + var cache = regExpCache; if (%_ObjectEquals(cache.type, 'test') && %_IsRegExpEquivalent(cache.regExp, this) && %_ObjectEquals(cache.subject, string) && - %_ObjectEquals(cache.lastIndex, lastIndex)) { - // If this regexp is not global, cache.lastIndex is zero, so we only get - // here if this.lastIndex is zero, and resulting this.lastIndex - // must be zero too, so no change is necessary. - if (this.global) this.lastIndex = lastMatchInfo[CAPTURE1]; + %_ObjectEquals(0, lastIndex)) { + // The regexp.lastIndex value must be 0 for non-global RegExps, and for + // global RegExps we only cache negative results, which gives a resulting + // lastIndex of zero as well. + if (global) this.lastIndex = 0; return cache.answer; } + var s; + if (IS_STRING(string)) { + s = string; + } else { + s = ToString(string); + } + var length = s.length; + + // Conversion is required by the ES5 specification (RegExp.prototype.exec + // algorithm, step 5) even if the value is discarded for non-global RegExps. + var i = TO_INTEGER(lastIndex); + if (global) { + if (i < 0 || i > length) { + this.lastIndex = 0; + return false; + } + } else { + i = 0; + } + + var global = this.global; + // Remove irrelevant preceeding '.*' in a test regexp. The expression // checks whether this.source starts with '.*' and that the third // char is not a '?' - if (%_StringCharCodeAt(this.source,0) == 46 && // '.' - %_StringCharCodeAt(this.source,1) == 42 && // '*' - %_StringCharCodeAt(this.source,2) != 63) { // '?' + if (%_StringCharCodeAt(this.source, 0) == 46 && // '.' + %_StringCharCodeAt(this.source, 1) == 42 && // '*' + %_StringCharCodeAt(this.source, 2) != 63) { // '?' if (!%_ObjectEquals(regexp_key, this)) { regexp_key = this; regexp_val = new $RegExp(this.source.substring(2, this.source.length), @@ -309,33 +341,28 @@ function RegExpTest(string) { if (!regexp_val.test(s)) return false; } - var length = s.length; - var i = this.global ? TO_INTEGER(lastIndex) : 0; - - cache.type = 'test'; - cache.regExp = this; - cache.subject = s; - cache.lastIndex = i; - - if (i < 0 || i > length) { - this.lastIndex = 0; - cache.answer = false; - return false; - } - %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, s, lastIndex]); // matchIndices is either null or the lastMatchInfo array. var matchIndices = %_RegExpExec(this, s, i, lastMatchInfo); - if (matchIndices == null) { - if (this.global) this.lastIndex = 0; - cache.answer = false; - return false; + var result = (matchIndices !== null); + if (result) { + lastMatchInfoOverride = null; } - lastMatchInfoOverride = null; - if (this.global) this.lastIndex = lastMatchInfo[CAPTURE1]; - cache.answer = true; - return true; + if (global) { + if (result) { + this.lastIndex = lastMatchInfo[CAPTURE1]; + return true; + } else { + this.lastIndex = 0; + if (lastIndex !== 0) return false; + } + } + cache.type = 'test'; + cache.regExp = this; + cache.subject = s; + cache.answer = result; + return result; } @@ -345,12 +372,9 @@ function RegExpToString() { // ecma_2/RegExp/properties-001.js. var src = this.source ? this.source : '(?:)'; var result = '/' + src + '/'; - if (this.global) - result += 'g'; - if (this.ignoreCase) - result += 'i'; - if (this.multiline) - result += 'm'; + if (this.global) result += 'g'; + if (this.ignoreCase) result += 'i'; + if (this.multiline) result += 'm'; return result; } diff --git a/src/string.js b/src/string.js index 30eedb3..d97f632 100644 --- a/src/string.js +++ b/src/string.js @@ -611,7 +611,7 @@ function StringSplit(separator, limit) { if (%_ObjectEquals(cache.type, 'split') && %_IsRegExpEquivalent(cache.regExp, separator) && %_ObjectEquals(cache.subject, subject) && - %_ObjectEquals(cache.lastIndex, limit)) { + %_ObjectEquals(cache.splitLimit, limit)) { if (cache.answerSaved) { return CloneDenseArray(cache.answer); } else { @@ -622,8 +622,7 @@ function StringSplit(separator, limit) { cache.type = 'split'; cache.regExp = separator; cache.subject = subject; - // Reuse lastIndex field for split limit when type is "split". - cache.lastIndex = limit; + cache.splitLimit = limit; %_Log('regexp', 'regexp-split,%0S,%1r', [subject, separator]); diff --git a/test/mjsunit/regexp.js b/test/mjsunit/regexp.js index db8b133..5836ddb 100644 --- a/test/mjsunit/regexp.js +++ b/test/mjsunit/regexp.js @@ -502,3 +502,90 @@ for (var i = 0; i < 100; i++) { res[3] = "Glopglyf"; assertEquals("Arglebargle", res.foobar); } + +// Test that we perform the spec required conversions in the correct order. +var log; +var string = "the string"; +var fakeLastIndex = { + valueOf: function() { + log.push("li"); + return 0; + } + }; +var fakeString = { + toString: function() { + log.push("ts"); + return string; + }, + length: 0 + }; + +var re = /str/; +log = []; +re.lastIndex = fakeLastIndex; +var result = re.exec(fakeString); +assertEquals(["str"], result); +assertEquals(["ts", "li"], log); + +// Again, to check if caching interferes. +log = []; +re.lastIndex = fakeLastIndex; +result = re.exec(fakeString); +assertEquals(["str"], result); +assertEquals(["ts", "li"], log); + +// And one more time, just to be certain. +log = []; +re.lastIndex = fakeLastIndex; +result = re.exec(fakeString); +assertEquals(["str"], result); +assertEquals(["ts", "li"], log); + +// Now with a global regexp, where lastIndex is actually used. +re = /str/g; +log = []; +re.lastIndex = fakeLastIndex; +var result = re.exec(fakeString); +assertEquals(["str"], result); +assertEquals(["ts", "li"], log); + +// Again, to check if caching interferes. +log = []; +re.lastIndex = fakeLastIndex; +result = re.exec(fakeString); +assertEquals(["str"], result); +assertEquals(["ts", "li"], log); + +// And one more time, just to be certain. +log = []; +re.lastIndex = fakeLastIndex; +result = re.exec(fakeString); +assertEquals(["str"], result); +assertEquals(["ts", "li"], log); + + +// Check that properties of RegExp have the correct permissions. +var re = /x/g; +var desc = Object.getOwnPropertyDescriptor(re, "global"); +assertEquals(true, desc.value); +assertEquals(false, desc.configurable); +assertEquals(false, desc.enumerable); +assertEquals(false, desc.writable); + +desc = Object.getOwnPropertyDescriptor(re, "multiline"); +assertEquals(false, desc.value); +assertEquals(false, desc.configurable); +assertEquals(false, desc.enumerable); +assertEquals(false, desc.writable); + +desc = Object.getOwnPropertyDescriptor(re, "ignoreCase"); +assertEquals(false, desc.value); +assertEquals(false, desc.configurable); +assertEquals(false, desc.enumerable); +assertEquals(false, desc.writable); + +desc = Object.getOwnPropertyDescriptor(re, "lastIndex"); +assertEquals(0, desc.value); +assertEquals(false, desc.configurable); +assertEquals(false, desc.enumerable); +assertEquals(true, desc.writable); -- 2.7.4