From ae4fcd970295ccafe7e641c43277937b27e8d1d4 Mon Sep 17 00:00:00 2001 From: "erik.corry@gmail.com" Date: Fri, 1 Jun 2012 11:28:52 +0000 Subject: [PATCH] Limit work done analyzing regexps with very large fanout. BUG=128821 Review URL: https://chromiumcodereview.appspot.com/10448117 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@11696 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/jsregexp.cc | 27 ++++++++++++++++++++------- src/jsregexp.h | 19 ++++++++++++++++--- test/mjsunit/regexp-capture-3.js | 1 - test/mjsunit/regexp-capture.js | 2 ++ 4 files changed, 38 insertions(+), 11 deletions(-) diff --git a/src/jsregexp.cc b/src/jsregexp.cc index 5168368..2c8376a 100644 --- a/src/jsregexp.cc +++ b/src/jsregexp.cc @@ -2192,12 +2192,14 @@ int ActionNode::EatsAtLeast(int still_to_find, void ActionNode::FillInBMInfo(int offset, int recursion_depth, + int budget, BoyerMooreLookahead* bm, bool not_at_start) { if (type_ == BEGIN_SUBMATCH) { bm->SetRest(offset); } else if (type_ != POSITIVE_SUBMATCH_SUCCESS) { - on_success()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); + on_success()->FillInBMInfo( + offset, recursion_depth + 1, budget - 1, bm, not_at_start); } SaveBMInfo(bm, not_at_start, offset); } @@ -2221,11 +2223,13 @@ int AssertionNode::EatsAtLeast(int still_to_find, void AssertionNode::FillInBMInfo(int offset, int recursion_depth, + int budget, BoyerMooreLookahead* bm, bool not_at_start) { // Match the behaviour of EatsAtLeast on this node. if (type() == AT_START && not_at_start) return; - on_success()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); + on_success()->FillInBMInfo( + offset, recursion_depth + 1, budget - 1, bm, not_at_start); SaveBMInfo(bm, not_at_start, offset); } @@ -2808,15 +2812,18 @@ void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, void LoopChoiceNode::FillInBMInfo(int offset, int recursion_depth, + int budget, BoyerMooreLookahead* bm, bool not_at_start) { if (body_can_be_zero_length_ || - recursion_depth > RegExpCompiler::kMaxRecursion) { + recursion_depth > RegExpCompiler::kMaxRecursion || + budget <= 0) { bm->SetRest(offset); SaveBMInfo(bm, not_at_start, offset); return; } - ChoiceNode::FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); + ChoiceNode::FillInBMInfo( + offset, recursion_depth + 1, budget - 1, bm, not_at_start); SaveBMInfo(bm, not_at_start, offset); } @@ -2918,7 +2925,7 @@ void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { if (eats_at_least >= 1) { BoyerMooreLookahead* bm = new BoyerMooreLookahead(eats_at_least, compiler); - FillInBMInfo(0, 0, bm, not_at_start); + FillInBMInfo(0, 0, kFillInBMBudget, bm, not_at_start); if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE; if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE; } @@ -3856,7 +3863,7 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { BoyerMooreLookahead* bm = new BoyerMooreLookahead(eats_at_least, compiler); GuardedAlternative alt0 = alternatives_->at(0); - alt0.node()->FillInBMInfo(0, 0, bm, not_at_start); + alt0.node()->FillInBMInfo(0, 0, kFillInBMBudget, bm, not_at_start); skip_was_emitted = bm->EmitSkipInstructions(macro_assembler); } } else { @@ -5597,6 +5604,7 @@ void Analysis::VisitAssertion(AssertionNode* that) { void BackReferenceNode::FillInBMInfo(int offset, int recursion_depth, + int budget, BoyerMooreLookahead* bm, bool not_at_start) { // Working out the set of characters that a backreference can match is too @@ -5612,9 +5620,11 @@ STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize == void ChoiceNode::FillInBMInfo(int offset, int recursion_depth, + int budget, BoyerMooreLookahead* bm, bool not_at_start) { ZoneList* alts = alternatives(); + budget = (budget - 1) / alts->length(); for (int i = 0; i < alts->length(); i++) { GuardedAlternative& alt = alts->at(i); if (alt.guards() != NULL && alt.guards()->length() != 0) { @@ -5622,7 +5632,8 @@ void ChoiceNode::FillInBMInfo(int offset, SaveBMInfo(bm, not_at_start, offset); return; } - alt.node()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); + alt.node()->FillInBMInfo( + offset, recursion_depth + 1, budget, bm, not_at_start); } SaveBMInfo(bm, not_at_start, offset); } @@ -5630,6 +5641,7 @@ void ChoiceNode::FillInBMInfo(int offset, void TextNode::FillInBMInfo(int initial_offset, int recursion_depth, + int budget, BoyerMooreLookahead* bm, bool not_at_start) { if (initial_offset >= bm->length()) return; @@ -5686,6 +5698,7 @@ void TextNode::FillInBMInfo(int initial_offset, } on_success()->FillInBMInfo(offset, recursion_depth + 1, + budget - 1, bm, true); // Not at start after a text node. if (initial_offset == 0) set_bm_info(not_at_start, bm); diff --git a/src/jsregexp.h b/src/jsregexp.h index e438eab..3601c1a 100644 --- a/src/jsregexp.h +++ b/src/jsregexp.h @@ -580,9 +580,12 @@ class RegExpNode: public ZoneObject { // Collects information on the possible code units (mod 128) that can match if // we look forward. This is used for a Boyer-Moore-like string searching // implementation. TODO(erikcorry): This should share more code with - // EatsAtLeast, GetQuickCheckDetails. + // EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit + // the number of nodes we are willing to look at in order to create this data. + static const int kFillInBMBudget = 200; virtual void FillInBMInfo(int offset, int recursion_depth, + int budget, BoyerMooreLookahead* bm, bool not_at_start) { UNREACHABLE(); @@ -685,9 +688,11 @@ class SeqRegExpNode: public RegExpNode { virtual RegExpNode* FilterASCII(int depth); virtual void FillInBMInfo(int offset, int recursion_depth, + int budget, BoyerMooreLookahead* bm, bool not_at_start) { - on_success_->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); + on_success_->FillInBMInfo( + offset, recursion_depth + 1, budget - 1, bm, not_at_start); if (offset == 0) set_bm_info(not_at_start, bm); } @@ -742,6 +747,7 @@ class ActionNode: public SeqRegExpNode { } virtual void FillInBMInfo(int offset, int recursion_depth, + int budget, BoyerMooreLookahead* bm, bool not_at_start); Type type() { return type_; } @@ -813,6 +819,7 @@ class TextNode: public SeqRegExpNode { RegExpCompiler* compiler); virtual void FillInBMInfo(int offset, int recursion_depth, + int budget, BoyerMooreLookahead* bm, bool not_at_start); void CalculateOffsets(); @@ -875,6 +882,7 @@ class AssertionNode: public SeqRegExpNode { bool not_at_start); virtual void FillInBMInfo(int offset, int recursion_depth, + int budget, BoyerMooreLookahead* bm, bool not_at_start); AssertionNodeType type() { return type_; } @@ -915,6 +923,7 @@ class BackReferenceNode: public SeqRegExpNode { } virtual void FillInBMInfo(int offset, int recursion_depth, + int budget, BoyerMooreLookahead* bm, bool not_at_start); @@ -942,6 +951,7 @@ class EndNode: public RegExpNode { } virtual void FillInBMInfo(int offset, int recursion_depth, + int budget, BoyerMooreLookahead* bm, bool not_at_start) { // Returning 0 from EatsAtLeast should ensure we never get here. @@ -1034,6 +1044,7 @@ class ChoiceNode: public RegExpNode { bool not_at_start); virtual void FillInBMInfo(int offset, int recursion_depth, + int budget, BoyerMooreLookahead* bm, bool not_at_start); @@ -1086,10 +1097,11 @@ class NegativeLookaheadChoiceNode: public ChoiceNode { bool not_at_start); virtual void FillInBMInfo(int offset, int recursion_depth, + int budget, BoyerMooreLookahead* bm, bool not_at_start) { alternatives_->at(1).node()->FillInBMInfo( - offset, recursion_depth + 1, bm, not_at_start); + offset, recursion_depth + 1, budget - 1, bm, not_at_start); if (offset == 0) set_bm_info(not_at_start, bm); } // For a negative lookahead we don't emit the quick check for the @@ -1121,6 +1133,7 @@ class LoopChoiceNode: public ChoiceNode { bool not_at_start); virtual void FillInBMInfo(int offset, int recursion_depth, + int budget, BoyerMooreLookahead* bm, bool not_at_start); RegExpNode* loop_node() { return loop_node_; } diff --git a/test/mjsunit/regexp-capture-3.js b/test/mjsunit/regexp-capture-3.js index 06e07e7..b676f01 100755 --- a/test/mjsunit/regexp-capture-3.js +++ b/test/mjsunit/regexp-capture-3.js @@ -162,7 +162,6 @@ assertEquals("*foo * baz", a); // string we can test that the relevant node is removed by verifying that // there is no hang. function NoHang(re) { - print(re); "This is an ASCII string that could take forever".match(re); } diff --git a/test/mjsunit/regexp-capture.js b/test/mjsunit/regexp-capture.js index 8aae717..3073094 100755 --- a/test/mjsunit/regexp-capture.js +++ b/test/mjsunit/regexp-capture.js @@ -56,3 +56,5 @@ assertEquals(["bbc", "b"], /^(b+|a){1,2}?bc/.exec("bbc")); assertEquals(["bbaa", "a", "", "a"], /((\3|b)\2(a)){2,}/.exec("bbaababbabaaaaabbaaaabba")); +// From crbug.com/128821 - don't hang: +"".match(/((a|i|A|I|u|o|U|O)(s|c|b|c|d|f|g|h|j|k|l|m|n|p|q|r|s|t|v|w|x|y|z|B|C|D|F|G|H|J|K|L|M|N|P|Q|R|S|T|V|W|X|Y|Z)*) de\/da([.,!?\s]|$)/); -- 2.7.4