From baf7ebd6dad86d1fdd8758fb30d119d36f9b1c3a Mon Sep 17 00:00:00 2001 From: "erik.corry@gmail.com" Date: Thu, 3 May 2012 08:22:12 +0000 Subject: [PATCH] Fix assert triggered in fast/regex/pcre-test-4.html We were not filtering out all the nodes that had non-ASCII characters. That has been fixed, but because of the protection against over-deep recursion when filtering it is wrong to assert that all nodes were filtered. This change therefore also makes sure we can cope with non-filtered nodes by adding back some code removed in https://chromiumcodereview.appspot.com/10174017/ Review URL: https://chromiumcodereview.appspot.com/10358008 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@11487 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/jsregexp.cc | 42 ++++++++++++++++++++------------ test/mjsunit/regexp-capture-3.js | 25 +++++++++++++++++++ 2 files changed, 52 insertions(+), 15 deletions(-) diff --git a/src/jsregexp.cc b/src/jsregexp.cc index f91e20594..54f681856 100644 --- a/src/jsregexp.cc +++ b/src/jsregexp.cc @@ -2426,9 +2426,15 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, QuickCheckDetails::Position* pos = details->positions(characters_filled_in); uc16 c = quarks[i]; - // We should already have filtered out nodes that have non-ASCII - // characters if we are matching against an ASCII string. - ASSERT(c <= char_mask); + if (c > char_mask) { + // If we expect a non-ASCII character from an ASCII string, + // there is no way we can match. Not even case independent + // matching can turn an ASCII character into non-ASCII or + // vice versa. + details->set_cannot_match(); + pos->determines_perfectly = false; + return; + } if (compiler->ignore_case()) { unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(), @@ -2490,9 +2496,11 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, int first_range = 0; while (ranges->at(first_range).from() > char_mask) { first_range++; - // We should already have filtered out nodes that cannot match - // so the first range should be a valid range. - ASSERT(first_range != ranges->length()); + if (first_range == ranges->length()) { + details->set_cannot_match(); + pos->determines_perfectly = false; + return; + } } CharacterRange range = ranges->at(first_range); uc16 from = range.from(); @@ -2540,10 +2548,12 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, } } ASSERT(characters_filled_in != details->characters()); - on_success()-> GetQuickCheckDetails(details, - compiler, - characters_filled_in, - true); + if (!details->cannot_match()) { + on_success()-> GetQuickCheckDetails(details, + compiler, + characters_filled_in, + true); + } } @@ -2687,12 +2697,14 @@ RegExpNode* LoopChoiceNode::FilterASCII(int depth) { if (info()->replacement_calculated) return replacement(); if (depth < 0) return this; if (info()->visited) return this; - VisitMarker marker(info()); + { + VisitMarker marker(info()); - RegExpNode* continue_replacement = continue_node_->FilterASCII(depth - 1); - // If we can't continue after the loop then there is no sense in doing the - // loop. - if (continue_replacement == NULL) return set_replacement(NULL); + RegExpNode* continue_replacement = continue_node_->FilterASCII(depth - 1); + // If we can't continue after the loop then there is no sense in doing the + // loop. + if (continue_replacement == NULL) return set_replacement(NULL); + } return ChoiceNode::FilterASCII(depth - 1); } diff --git a/test/mjsunit/regexp-capture-3.js b/test/mjsunit/regexp-capture-3.js index 66a2e45e2..5cf7d8d68 100755 --- a/test/mjsunit/regexp-capture-3.js +++ b/test/mjsunit/regexp-capture-3.js @@ -187,3 +187,28 @@ NoHang(/(a|(((.*)*)*x)ø|(((.*)*)*x)å)/); // 2 out of 3 branches pruned. var s = "Don't prune based on a repetition of length 0"; assertEquals(null, s.match(/å{1,1}prune/)); assertEquals("prune", (s.match(/å{0,0}prune/)[0])); + +// Some very deep regexps where FilterASCII gives up in order not to make the +// stack overflow. +var regex6 = /a*\u0100*\w/; +var input0 = "a"; +regex6.exec(input0); + +var re = "\u0100*\\w"; + +for (var i = 0; i < 200; i++) re = "a*" + re; + +var regex7 = new RegExp(re); +regex7.exec(input0); + +var regex8 = new RegExp(re, "i"); +regex8.exec(input0); + +re = "[\u0100]*\\w"; +for (var i = 0; i < 200; i++) re = "a*" + re; + +var regex9 = new RegExp(re); +regex9.exec(input0); + +var regex10 = new RegExp(re, "i"); +regex10.exec(input0); -- 2.34.1