From: erik.corry@gmail.com Date: Fri, 17 Jun 2011 08:01:12 +0000 (+0000) Subject: Refix issue 1472. The previous fix worked for the example in the bug X-Git-Tag: upstream/4.7.83~19134 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c95ecb1fcdb6dd50b8d829adbeb6e2d70b5dd04e;p=platform%2Fupstream%2Fv8.git Refix issue 1472. The previous fix worked for the example in the bug report, but was not general enough to catch all cases. This is a new approach. Includes regression test! Review URL: http://codereview.chromium.org/7193007 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@8318 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- diff --git a/src/ast.cc b/src/ast.cc index 9f83d16..03df229 100644 --- a/src/ast.cc +++ b/src/ast.cc @@ -925,26 +925,6 @@ bool RegExpCapture::IsAnchoredAtEnd() { } -bool RegExpDisjunction::ContainsExpandedQuantifier() { - if (contains_expanded_quantifier_) return true; - int len = alternatives_->length(); - for (int i = 0; i < len; i++) { - if (alternatives_->at(i)->ContainsExpandedQuantifier()) return true; - } - return false; -} - - -bool RegExpAlternative::ContainsExpandedQuantifier() { - if (contains_expanded_quantifier_) return true; - int len = nodes_->length(); - for (int i = 0; i < len; i++) { - if (nodes_->at(i)->ContainsExpandedQuantifier()) return true; - } - return false; -} - - // Convert regular expression trees to a simple sexp representation. // This representation should be different from the input grammar // in as many cases as possible, to make it more difficult for incorrect diff --git a/src/ast.h b/src/ast.h index 5823f92..0101249 100644 --- a/src/ast.h +++ b/src/ast.h @@ -1751,7 +1751,6 @@ class RegExpVisitor BASE_EMBEDDED { class RegExpTree: public ZoneObject { public: static const int kInfinity = kMaxInt; - RegExpTree() : contains_expanded_quantifier_(false) { } virtual ~RegExpTree() { } virtual void* Accept(RegExpVisitor* visitor, void* data) = 0; virtual RegExpNode* ToNode(RegExpCompiler* compiler, @@ -1761,12 +1760,6 @@ class RegExpTree: public ZoneObject { virtual bool IsAnchoredAtEnd() { return false; } virtual int min_match() = 0; virtual int max_match() = 0; - virtual bool ContainsExpandedQuantifier() { - return contains_expanded_quantifier_; - } - void set_contains_expanded_quantifier(bool value) { - contains_expanded_quantifier_ = value; - } // Returns the interval of registers used for captures within this // expression. virtual Interval CaptureRegisters() { return Interval::Empty(); } @@ -1777,9 +1770,6 @@ class RegExpTree: public ZoneObject { virtual bool Is##Name(); FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE) #undef MAKE_ASTYPE - - protected: - bool contains_expanded_quantifier_; }; @@ -1796,7 +1786,6 @@ class RegExpDisjunction: public RegExpTree { virtual bool IsAnchoredAtEnd(); virtual int min_match() { return min_match_; } virtual int max_match() { return max_match_; } - virtual bool ContainsExpandedQuantifier(); ZoneList* alternatives() { return alternatives_; } private: ZoneList* alternatives_; @@ -1818,7 +1807,6 @@ class RegExpAlternative: public RegExpTree { virtual bool IsAnchoredAtEnd(); virtual int min_match() { return min_match_; } virtual int max_match() { return max_match_; } - virtual bool ContainsExpandedQuantifier(); ZoneList* nodes() { return nodes_; } private: ZoneList* nodes_; @@ -1968,8 +1956,7 @@ class RegExpQuantifier: public RegExpTree { min_(min), max_(max), min_match_(min * body->min_match()), - type_(type), - contains_expanded_quantifier_(false) { + type_(type) { if (max > 0 && body->max_match() > kInfinity / max) { max_match_ = kInfinity; } else { @@ -1991,9 +1978,6 @@ class RegExpQuantifier: public RegExpTree { virtual bool IsQuantifier(); virtual int min_match() { return min_match_; } virtual int max_match() { return max_match_; } - virtual bool ContainsExpandedQuantifier() { - return contains_expanded_quantifier_ || body_->ContainsExpandedQuantifier(); - } int min() { return min_; } int max() { return max_; } bool is_possessive() { return type_ == POSSESSIVE; } @@ -2008,7 +1992,6 @@ class RegExpQuantifier: public RegExpTree { int min_match_; int max_match_; Type type_; - bool contains_expanded_quantifier_; }; @@ -2030,9 +2013,6 @@ class RegExpCapture: public RegExpTree { virtual bool IsCapture(); virtual int min_match() { return body_->min_match(); } virtual int max_match() { return body_->max_match(); } - virtual bool ContainsExpandedQuantifier() { - return contains_expanded_quantifier_ || body_->ContainsExpandedQuantifier(); - } RegExpTree* body() { return body_; } int index() { return index_; } static int StartRegister(int index) { return index * 2; } @@ -2064,9 +2044,6 @@ class RegExpLookahead: public RegExpTree { virtual bool IsAnchoredAtStart(); virtual int min_match() { return 0; } virtual int max_match() { return 0; } - virtual bool ContainsExpandedQuantifier() { - return contains_expanded_quantifier_ || body_->ContainsExpandedQuantifier(); - } RegExpTree* body() { return body_; } bool is_positive() { return is_positive_; } int capture_count() { return capture_count_; } diff --git a/src/jsregexp.cc b/src/jsregexp.cc index 1ce6791..73dbdb0 100644 --- a/src/jsregexp.cc +++ b/src/jsregexp.cc @@ -810,6 +810,11 @@ class RegExpCompiler { inline bool ignore_case() { return ignore_case_; } inline bool ascii() { return ascii_; } + int current_expansion_factor() { return current_expansion_factor_; } + void set_current_expansion_factor(int value) { + current_expansion_factor_ = value; + } + static const int kNoRegister = -1; private: @@ -821,6 +826,7 @@ class RegExpCompiler { bool ignore_case_; bool ascii_; bool reg_exp_too_big_; + int current_expansion_factor_; }; @@ -848,7 +854,8 @@ RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii) recursion_depth_(0), ignore_case_(ignore_case), ascii_(ascii), - reg_exp_too_big_(false) { + reg_exp_too_big_(false), + current_expansion_factor_(1) { accept_ = new EndNode(EndNode::ACCEPT); ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); } @@ -3730,6 +3737,44 @@ RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, } +// Scoped object to keep track of how much we unroll quantifier loops in the +// regexp graph generator. +class RegExpExpansionLimiter { + public: + static const int kMaxExpansionFactor = 6; + RegExpExpansionLimiter(RegExpCompiler* compiler, int factor) + : compiler_(compiler), + saved_expansion_factor_(compiler->current_expansion_factor()), + ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) { + ASSERT(factor > 0); + if (ok_to_expand_) { + if (factor > kMaxExpansionFactor) { + // Avoid integer overflow of the current expansion factor. + ok_to_expand_ = false; + compiler->set_current_expansion_factor(kMaxExpansionFactor + 1); + } else { + int new_factor = saved_expansion_factor_ * factor; + ok_to_expand_ = (new_factor <= kMaxExpansionFactor); + compiler->set_current_expansion_factor(new_factor); + } + } + } + + ~RegExpExpansionLimiter() { + compiler_->set_current_expansion_factor(saved_expansion_factor_); + } + + bool ok_to_expand() { return ok_to_expand_; } + + private: + RegExpCompiler* compiler_; + int saved_expansion_factor_; + bool ok_to_expand_; + + DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter); +}; + + RegExpNode* RegExpQuantifier::ToNode(int min, int max, bool is_greedy, @@ -3766,45 +3811,49 @@ RegExpNode* RegExpQuantifier::ToNode(int min, bool needs_capture_clearing = !capture_registers.is_empty(); if (body_can_be_empty) { body_start_reg = compiler->AllocateRegister(); - } else if (FLAG_regexp_optimization && - !body->ContainsExpandedQuantifier() && - !needs_capture_clearing) { + } else if (FLAG_regexp_optimization && !needs_capture_clearing) { // Only unroll if there are no captures and the body can't be // empty. - if (min > 0 && min <= kMaxUnrolledMinMatches) { - int new_max = (max == kInfinity) ? max : max - min; - // Recurse once to get the loop or optional matches after the fixed ones. - RegExpNode* answer = ToNode( - 0, new_max, is_greedy, body, compiler, on_success, true); - // Unroll the forced matches from 0 to min. This can cause chains of - // TextNodes (which the parser does not generate). These should be - // combined if it turns out they hinder good code generation. - for (int i = 0; i < min; i++) { - answer = body->ToNode(compiler, answer); + { + RegExpExpansionLimiter limiter( + compiler, min + ((max != min) ? 1 : 0)); + if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) { + int new_max = (max == kInfinity) ? max : max - min; + // Recurse once to get the loop or optional matches after the fixed + // ones. + RegExpNode* answer = ToNode( + 0, new_max, is_greedy, body, compiler, on_success, true); + // Unroll the forced matches from 0 to min. This can cause chains of + // TextNodes (which the parser does not generate). These should be + // combined if it turns out they hinder good code generation. + for (int i = 0; i < min; i++) { + answer = body->ToNode(compiler, answer); + } + return answer; } - if (min > 1) body->set_contains_expanded_quantifier(true); - return answer; } - if (max <= kMaxUnrolledMaxMatches) { - ASSERT(min == 0); - // Unroll the optional matches up to max. - RegExpNode* answer = on_success; - for (int i = 0; i < max; i++) { - ChoiceNode* alternation = new ChoiceNode(2); - if (is_greedy) { - alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, - answer))); - alternation->AddAlternative(GuardedAlternative(on_success)); - } else { - alternation->AddAlternative(GuardedAlternative(on_success)); - alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, - answer))); + if (max <= kMaxUnrolledMaxMatches && min == 0) { + ASSERT(max > 0); // Due to the 'if' above. + RegExpExpansionLimiter limiter(compiler, max); + if (limiter.ok_to_expand()) { + // Unroll the optional matches up to max. + RegExpNode* answer = on_success; + for (int i = 0; i < max; i++) { + ChoiceNode* alternation = new ChoiceNode(2); + if (is_greedy) { + alternation->AddAlternative( + GuardedAlternative(body->ToNode(compiler, answer))); + alternation->AddAlternative(GuardedAlternative(on_success)); + } else { + alternation->AddAlternative(GuardedAlternative(on_success)); + alternation->AddAlternative( + GuardedAlternative(body->ToNode(compiler, answer))); + } + answer = alternation; + if (not_at_start) alternation->set_not_at_start(); } - answer = alternation; - if (not_at_start) alternation->set_not_at_start(); + return answer; } - if (max > 1) body->set_contains_expanded_quantifier(true); - return answer; } } bool has_min = min > 0; @@ -4130,12 +4179,6 @@ void CharacterRange::Split(ZoneList* base, } -static void AddUncanonicals(Isolate* isolate, - ZoneList* ranges, - int bottom, - int top); - - void CharacterRange::AddCaseEquivalents(ZoneList* ranges, bool is_ascii) { Isolate* isolate = Isolate::Current(); @@ -4296,101 +4339,6 @@ SetRelation CharacterRange::WordCharacterRelation( } -static void AddUncanonicals(Isolate* isolate, - ZoneList* ranges, - int bottom, - int top) { - unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; - // Zones with no case mappings. There is a DEBUG-mode loop to assert that - // this table is correct. - // 0x0600 - 0x0fff - // 0x1100 - 0x1cff - // 0x2000 - 0x20ff - // 0x2200 - 0x23ff - // 0x2500 - 0x2bff - // 0x2e00 - 0xa5ff - // 0xa800 - 0xfaff - // 0xfc00 - 0xfeff - const int boundary_count = 18; - int boundaries[] = { - 0x600, 0x1000, 0x1100, 0x1d00, 0x2000, 0x2100, 0x2200, 0x2400, 0x2500, - 0x2c00, 0x2e00, 0xa600, 0xa800, 0xfb00, 0xfc00, 0xff00}; - - // Special ASCII rule from spec can save us some work here. - if (bottom == 0x80 && top == 0xffff) return; - - if (top <= boundaries[0]) { - CharacterRange range(bottom, top); - range.AddCaseEquivalents(ranges, false); - return; - } - - // Split up very large ranges. This helps remove ranges where there are no - // case mappings. - for (int i = 0; i < boundary_count; i++) { - if (bottom < boundaries[i] && top >= boundaries[i]) { - AddUncanonicals(isolate, ranges, bottom, boundaries[i] - 1); - AddUncanonicals(isolate, ranges, boundaries[i], top); - return; - } - } - - // If we are completely in a zone with no case mappings then we are done. - for (int i = 0; i < boundary_count; i += 2) { - if (bottom >= boundaries[i] && top < boundaries[i + 1]) { -#ifdef DEBUG - for (int j = bottom; j <= top; j++) { - unsigned current_char = j; - int length = isolate->jsregexp_uncanonicalize()->get(current_char, - '\0', chars); - for (int k = 0; k < length; k++) { - ASSERT(chars[k] == current_char); - } - } -#endif - return; - } - } - - // Step through the range finding equivalent characters. - ZoneList *characters = new ZoneList(100); - for (int i = bottom; i <= top; i++) { - int length = isolate->jsregexp_uncanonicalize()->get(i, '\0', chars); - for (int j = 0; j < length; j++) { - uc32 chr = chars[j]; - if (chr != i && (chr < bottom || chr > top)) { - characters->Add(chr); - } - } - } - - // Step through the equivalent characters finding simple ranges and - // adding ranges to the character class. - if (characters->length() > 0) { - int new_from = characters->at(0); - int new_to = new_from; - for (int i = 1; i < characters->length(); i++) { - int chr = characters->at(i); - if (chr == new_to + 1) { - new_to++; - } else { - if (new_to == new_from) { - ranges->Add(CharacterRange::Singleton(new_from)); - } else { - ranges->Add(CharacterRange(new_from, new_to)); - } - new_from = new_to = chr; - } - } - if (new_to == new_from) { - ranges->Add(CharacterRange::Singleton(new_from)); - } else { - ranges->Add(CharacterRange(new_from, new_to)); - } - } -} - - ZoneList* CharacterSet::ranges() { if (ranges_ == NULL) { ranges_ = new ZoneList(2); diff --git a/test/mjsunit/regress/regress-1472.js b/test/mjsunit/regress/regress-1472.js new file mode 100644 index 0000000..b2a30d2 --- /dev/null +++ b/test/mjsunit/regress/regress-1472.js @@ -0,0 +1,40 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Avoid excessive expansions of regexp repetitions inside regexp repetitions. +// Some of these caused stack overflows, others cause out-of-memory. +var r1 = /(?:a(?:b(?:c(?:d(?:e(?:f(?:g(?:h(?:i(?:j(?:k(?:l(?:m(?:n(?:o(?:p(?:q(?:r(?:s(?:t(?:u(?:v(?:w(?:x(?:y(?:z(?:FooBar)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)/; +"xxx".match(r1); + +var r2 = /(?:a(?:b(?:c(?:d(?:e(?:f(?:g(?:h(?:i(?:j(?:k(?:l(?:FooBar){0,2}){0,2}){0,2}){0,2}){0,2}){0,2}){0,2}){0,2}){0,2}){0,2}){0,2}){0,2}){0,2}/; +"xxx".match(r2); + +var r3 = /(?:a(?:b(?:c(?:d(?:e(?:f(?:g(?:h(?:i(?:j(?:k(?:l(?:FooBar){2}){2}){2}){2}){2}){2}){2}){2}){2}){2}){2}){2}){2}/; +"xxx".match(r3); + +var r4 = /(?:a(?:b(?:c(?:d(?:e(?:f(?:g(?:h(?:i(?:FooBar){3,6}){3,6}){3,6}){3,6}){3,6}){3,6}){3,6}){3,6}){3,6}){3,6}/; +"xxx".match(r4);