Fix assert triggered in fast/regex/pcre-test-4.html We were not filtering out
authorerik.corry@gmail.com <erik.corry@gmail.com@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Thu, 3 May 2012 08:22:12 +0000 (08:22 +0000)
committererik.corry@gmail.com <erik.corry@gmail.com@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Thu, 3 May 2012 08:22:12 +0000 (08:22 +0000)
all the nodes that had non-ASCII characters.  That has been fixed, but because
of the protection against over-deep recursion when filtering it is wrong to
assert that all nodes were filtered.  This change therefore also makes sure we
can cope with non-filtered nodes by adding back some code removed in
https://chromiumcodereview.appspot.com/10174017/
Review URL: https://chromiumcodereview.appspot.com/10358008

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@11487 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

src/jsregexp.cc
test/mjsunit/regexp-capture-3.js

index f91e20594a7a9287bcada5c1730e62c5fd21737e..54f681856d70938464996b3618dea7095e7ebf98 100644 (file)
@@ -2426,9 +2426,15 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
         QuickCheckDetails::Position* pos =
             details->positions(characters_filled_in);
         uc16 c = quarks[i];
-        // We should already have filtered out nodes that have non-ASCII
-        // characters if we are matching against an ASCII string.
-        ASSERT(c <= char_mask);
+        if (c > char_mask) {
+          // If we expect a non-ASCII character from an ASCII string,
+          // there is no way we can match. Not even case independent
+          // matching can turn an ASCII character into non-ASCII or
+          // vice versa.
+          details->set_cannot_match();
+          pos->determines_perfectly = false;
+          return;
+        }
         if (compiler->ignore_case()) {
           unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
           int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(),
@@ -2490,9 +2496,11 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
         int first_range = 0;
         while (ranges->at(first_range).from() > char_mask) {
           first_range++;
-          // We should already have filtered out nodes that cannot match
-          // so the first range should be a valid range.
-          ASSERT(first_range != ranges->length());
+          if (first_range == ranges->length()) {
+            details->set_cannot_match();
+            pos->determines_perfectly = false;
+            return;
+          }
         }
         CharacterRange range = ranges->at(first_range);
         uc16 from = range.from();
@@ -2540,10 +2548,12 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
     }
   }
   ASSERT(characters_filled_in != details->characters());
-  on_success()-> GetQuickCheckDetails(details,
-                                      compiler,
-                                      characters_filled_in,
-                                      true);
+  if (!details->cannot_match()) {
+    on_success()-> GetQuickCheckDetails(details,
+                                        compiler,
+                                        characters_filled_in,
+                                        true);
+  }
 }
 
 
@@ -2687,12 +2697,14 @@ RegExpNode* LoopChoiceNode::FilterASCII(int depth) {
   if (info()->replacement_calculated) return replacement();
   if (depth < 0) return this;
   if (info()->visited) return this;
-  VisitMarker marker(info());
+  {
+    VisitMarker marker(info());
 
-  RegExpNode* continue_replacement = continue_node_->FilterASCII(depth - 1);
-  // If we can't continue after the loop then there is no sense in doing the
-  // loop.
-  if (continue_replacement == NULL) return set_replacement(NULL);
+    RegExpNode* continue_replacement = continue_node_->FilterASCII(depth - 1);
+    // If we can't continue after the loop then there is no sense in doing the
+    // loop.
+    if (continue_replacement == NULL) return set_replacement(NULL);
+  }
 
   return ChoiceNode::FilterASCII(depth - 1);
 }
index 66a2e45e28fc95fd87028fc167b5b01e51e1d444..5cf7d8d689868d198554c025d096e0b8b093b5de 100755 (executable)
@@ -187,3 +187,28 @@ NoHang(/(a|(((.*)*)*x)ø|(((.*)*)*x)å)/);  // 2 out of 3 branches pruned.
 var s = "Don't prune based on a repetition of length 0";
 assertEquals(null, s.match(/å{1,1}prune/));
 assertEquals("prune", (s.match(/å{0,0}prune/)[0]));
+
+// Some very deep regexps where FilterASCII gives up in order not to make the
+// stack overflow.
+var regex6 = /a*\u0100*\w/;
+var input0 = "a";
+regex6.exec(input0);
+
+var re = "\u0100*\\w";
+
+for (var i = 0; i < 200; i++) re = "a*" + re;
+
+var regex7 = new RegExp(re);
+regex7.exec(input0);
+
+var regex8 = new RegExp(re, "i");
+regex8.exec(input0);
+
+re = "[\u0100]*\\w";
+for (var i = 0; i < 200; i++) re = "a*" + re;
+
+var regex9 = new RegExp(re);
+regex9.exec(input0);
+
+var regex10 = new RegExp(re, "i");
+regex10.exec(input0);