Avoid overdeep recursion in regexp where a guarded expression with a
authorerik.corry@gmail.com <erik.corry@gmail.com@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Thu, 31 May 2012 11:59:04 +0000 (11:59 +0000)
committererik.corry@gmail.com <erik.corry@gmail.com@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Thu, 31 May 2012 11:59:04 +0000 (11:59 +0000)
minimum repetition count is inside another quantifier.
Bug=129926
Review URL: https://chromiumcodereview.appspot.com/10451092

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@11686 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

src/jsregexp.cc
src/jsregexp.h
test/mjsunit/regexp-capture-3.js

index c5e73ed65302784338aff0981e5a9c235a0d4561..51683682fdb3159dca2daa548947c6735e48c576 100644 (file)
@@ -2191,12 +2191,13 @@ int ActionNode::EatsAtLeast(int still_to_find,
 
 
 void ActionNode::FillInBMInfo(int offset,
+                              int recursion_depth,
                               BoyerMooreLookahead* bm,
                               bool not_at_start) {
   if (type_ == BEGIN_SUBMATCH) {
     bm->SetRest(offset);
   } else if (type_ != POSITIVE_SUBMATCH_SUCCESS) {
-    on_success()->FillInBMInfo(offset, bm, not_at_start);
+    on_success()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start);
   }
   SaveBMInfo(bm, not_at_start, offset);
 }
@@ -2218,11 +2219,13 @@ int AssertionNode::EatsAtLeast(int still_to_find,
 }
 
 
-void AssertionNode::FillInBMInfo(
-    int offset, BoyerMooreLookahead* bm, bool not_at_start) {
+void AssertionNode::FillInBMInfo(int offset,
+                                 int recursion_depth,
+                                 BoyerMooreLookahead* bm,
+                                 bool not_at_start) {
   // Match the behaviour of EatsAtLeast on this node.
   if (type() == AT_START && not_at_start) return;
-  on_success()->FillInBMInfo(offset, bm, not_at_start);
+  on_success()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start);
   SaveBMInfo(bm, not_at_start, offset);
 }
 
@@ -2803,14 +2806,17 @@ void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
 }
 
 
-void LoopChoiceNode::FillInBMInfo(
-    int offset, BoyerMooreLookahead* bm, bool not_at_start) {
-  if (body_can_be_zero_length_) {
+void LoopChoiceNode::FillInBMInfo(int offset,
+                                  int recursion_depth,
+                                  BoyerMooreLookahead* bm,
+                                  bool not_at_start) {
+  if (body_can_be_zero_length_ ||
+      recursion_depth > RegExpCompiler::kMaxRecursion) {
     bm->SetRest(offset);
     SaveBMInfo(bm, not_at_start, offset);
     return;
   }
-  ChoiceNode::FillInBMInfo(offset, bm, not_at_start);
+  ChoiceNode::FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start);
   SaveBMInfo(bm, not_at_start, offset);
 }
 
@@ -2912,7 +2918,7 @@ void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
     if (eats_at_least >= 1) {
       BoyerMooreLookahead* bm =
           new BoyerMooreLookahead(eats_at_least, compiler);
-      FillInBMInfo(0, bm, not_at_start);
+      FillInBMInfo(0, 0, bm, not_at_start);
       if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE;
       if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE;
     }
@@ -3850,7 +3856,7 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
             BoyerMooreLookahead* bm =
                 new BoyerMooreLookahead(eats_at_least, compiler);
             GuardedAlternative alt0 = alternatives_->at(0);
-            alt0.node()->FillInBMInfo(0, bm, not_at_start);
+            alt0.node()->FillInBMInfo(0, 0, bm, not_at_start);
             skip_was_emitted = bm->EmitSkipInstructions(macro_assembler);
           }
         } else {
@@ -5589,8 +5595,10 @@ void Analysis::VisitAssertion(AssertionNode* that) {
 }
 
 
-void BackReferenceNode::FillInBMInfo(
-    int offset, BoyerMooreLookahead* bm, bool not_at_start) {
+void BackReferenceNode::FillInBMInfo(int offset,
+                                     int recursion_depth,
+                                     BoyerMooreLookahead* bm,
+                                     bool not_at_start) {
   // Working out the set of characters that a backreference can match is too
   // hard, so we just say that any character can match.
   bm->SetRest(offset);
@@ -5602,8 +5610,10 @@ STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize ==
               RegExpMacroAssembler::kTableSize);
 
 
-void ChoiceNode::FillInBMInfo(
-    int offset, BoyerMooreLookahead* bm, bool not_at_start) {
+void ChoiceNode::FillInBMInfo(int offset,
+                              int recursion_depth,
+                              BoyerMooreLookahead* bm,
+                              bool not_at_start) {
   ZoneList<GuardedAlternative>* alts = alternatives();
   for (int i = 0; i < alts->length(); i++) {
     GuardedAlternative& alt = alts->at(i);
@@ -5612,14 +5622,16 @@ void ChoiceNode::FillInBMInfo(
       SaveBMInfo(bm, not_at_start, offset);
       return;
     }
-    alt.node()->FillInBMInfo(offset, bm, not_at_start);
+    alt.node()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start);
   }
   SaveBMInfo(bm, not_at_start, offset);
 }
 
 
-void TextNode::FillInBMInfo(
-    int initial_offset, BoyerMooreLookahead* bm, bool not_at_start) {
+void TextNode::FillInBMInfo(int initial_offset,
+                            int recursion_depth,
+                            BoyerMooreLookahead* bm,
+                            bool not_at_start) {
   if (initial_offset >= bm->length()) return;
   int offset = initial_offset;
   int max_char = bm->max_char();
@@ -5673,6 +5685,7 @@ void TextNode::FillInBMInfo(
     return;
   }
   on_success()->FillInBMInfo(offset,
+                             recursion_depth + 1,
                              bm,
                              true);  // Not at start after a text node.
   if (initial_offset == 0) set_bm_info(not_at_start, bm);
index 2e90e9ad5af2865884d1675ad58cfba1dff77e0a..e438eab91578d30249c56d5802703b760366900e 100644 (file)
@@ -581,8 +581,10 @@ class RegExpNode: public ZoneObject {
   // we look forward.  This is used for a Boyer-Moore-like string searching
   // implementation.  TODO(erikcorry):  This should share more code with
   // EatsAtLeast, GetQuickCheckDetails.
-  virtual void FillInBMInfo(
-      int offset, BoyerMooreLookahead* bm, bool not_at_start) {
+  virtual void FillInBMInfo(int offset,
+                            int recursion_depth,
+                            BoyerMooreLookahead* bm,
+                            bool not_at_start) {
     UNREACHABLE();
   }
 
@@ -681,9 +683,11 @@ class SeqRegExpNode: public RegExpNode {
   RegExpNode* on_success() { return on_success_; }
   void set_on_success(RegExpNode* node) { on_success_ = node; }
   virtual RegExpNode* FilterASCII(int depth);
-  virtual void FillInBMInfo(
-      int offset, BoyerMooreLookahead* bm, bool not_at_start) {
-    on_success_->FillInBMInfo(offset, bm, not_at_start);
+  virtual void FillInBMInfo(int offset,
+                            int recursion_depth,
+                            BoyerMooreLookahead* bm,
+                            bool not_at_start) {
+    on_success_->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start);
     if (offset == 0) set_bm_info(not_at_start, bm);
   }
 
@@ -736,8 +740,10 @@ class ActionNode: public SeqRegExpNode {
     return on_success()->GetQuickCheckDetails(
         details, compiler, filled_in, not_at_start);
   }
-  virtual void FillInBMInfo(
-      int offset, BoyerMooreLookahead* bm, bool not_at_start);
+  virtual void FillInBMInfo(int offset,
+                            int recursion_depth,
+                            BoyerMooreLookahead* bm,
+                            bool not_at_start);
   Type type() { return type_; }
   // TODO(erikcorry): We should allow some action nodes in greedy loops.
   virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
@@ -805,8 +811,10 @@ class TextNode: public SeqRegExpNode {
   virtual int GreedyLoopTextLength();
   virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
       RegExpCompiler* compiler);
-  virtual void FillInBMInfo(
-      int offset, BoyerMooreLookahead* bm, bool not_at_start);
+  virtual void FillInBMInfo(int offset,
+                            int recursion_depth,
+                            BoyerMooreLookahead* bm,
+                            bool not_at_start);
   void CalculateOffsets();
   virtual RegExpNode* FilterASCII(int depth);
 
@@ -865,8 +873,10 @@ class AssertionNode: public SeqRegExpNode {
                                     RegExpCompiler* compiler,
                                     int filled_in,
                                     bool not_at_start);
-  virtual void FillInBMInfo(
-      int offset, BoyerMooreLookahead* bm, bool not_at_start);
+  virtual void FillInBMInfo(int offset,
+                            int recursion_depth,
+                            BoyerMooreLookahead* bm,
+                            bool not_at_start);
   AssertionNodeType type() { return type_; }
   void set_type(AssertionNodeType type) { type_ = type; }
 
@@ -903,8 +913,10 @@ class BackReferenceNode: public SeqRegExpNode {
                                     bool not_at_start) {
     return;
   }
-  virtual void FillInBMInfo(
-      int offset, BoyerMooreLookahead* bm, bool not_at_start);
+  virtual void FillInBMInfo(int offset,
+                            int recursion_depth,
+                            BoyerMooreLookahead* bm,
+                            bool not_at_start);
 
  private:
   int start_reg_;
@@ -928,8 +940,10 @@ class EndNode: public RegExpNode {
     // Returning 0 from EatsAtLeast should ensure we never get here.
     UNREACHABLE();
   }
-  virtual void FillInBMInfo(
-      int offset, BoyerMooreLookahead* bm, bool not_at_start) {
+  virtual void FillInBMInfo(int offset,
+                            int recursion_depth,
+                            BoyerMooreLookahead* bm,
+                            bool not_at_start) {
     // Returning 0 from EatsAtLeast should ensure we never get here.
     UNREACHABLE();
   }
@@ -1018,8 +1032,10 @@ class ChoiceNode: public RegExpNode {
                                     RegExpCompiler* compiler,
                                     int characters_filled_in,
                                     bool not_at_start);
-  virtual void FillInBMInfo(
-      int offset, BoyerMooreLookahead* bm, bool not_at_start);
+  virtual void FillInBMInfo(int offset,
+                            int recursion_depth,
+                            BoyerMooreLookahead* bm,
+                            bool not_at_start);
 
   bool being_calculated() { return being_calculated_; }
   bool not_at_start() { return not_at_start_; }
@@ -1068,9 +1084,12 @@ class NegativeLookaheadChoiceNode: public ChoiceNode {
                                     RegExpCompiler* compiler,
                                     int characters_filled_in,
                                     bool not_at_start);
-  virtual void FillInBMInfo(
-      int offset, BoyerMooreLookahead* bm, bool not_at_start) {
-    alternatives_->at(1).node()->FillInBMInfo(offset, bm, not_at_start);
+  virtual void FillInBMInfo(int offset,
+                            int recursion_depth,
+                            BoyerMooreLookahead* bm,
+                            bool not_at_start) {
+    alternatives_->at(1).node()->FillInBMInfo(
+        offset, recursion_depth + 1, bm, not_at_start);
     if (offset == 0) set_bm_info(not_at_start, bm);
   }
   // For a negative lookahead we don't emit the quick check for the
@@ -1100,8 +1119,10 @@ class LoopChoiceNode: public ChoiceNode {
                                     RegExpCompiler* compiler,
                                     int characters_filled_in,
                                     bool not_at_start);
-  virtual void FillInBMInfo(
-      int offset, BoyerMooreLookahead* bm, bool not_at_start);
+  virtual void FillInBMInfo(int offset,
+                            int recursion_depth,
+                            BoyerMooreLookahead* bm,
+                            bool not_at_start);
   RegExpNode* loop_node() { return loop_node_; }
   RegExpNode* continue_node() { return continue_node_; }
   bool body_can_be_zero_length() { return body_can_be_zero_length_; }
index 9bdd6005995172b2f2caf92d5a1de385e7505c5c..06e07e7488a1ed9b620589183a2a7448420cdf13 100755 (executable)
@@ -216,3 +216,5 @@ regex10.exec(input0);
 var regex11 = /^(?:[^\u0000-\u0080]|[0-9a-z?,.!&\s#()])+$/i;
 regex11.exec(input0);
 
+var regex12 = /u(\xf0{8}?\D*?|( ? !)$h??(|)*?(||)+?\6((?:\W\B|--\d-*-|)?$){0, }?|^Y( ? !1)\d+)+a/;
+regex12.exec("");