From 585e36b40ea30acac8af431a6740791e69bc8842 Mon Sep 17 00:00:00 2001 From: "erik.corry@gmail.com" Date: Tue, 20 Jan 2009 11:36:28 +0000 Subject: [PATCH] Optimization: The quick check should ignore the negative lookahead instead of insisting that it should match. Review URL: http://codereview.chromium.org/18360 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1106 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/jsregexp.cc | 38 ++++++++++++++++++++++++++++---------- src/jsregexp.h | 22 ++++++++++++++++++++++ src/regexp-macro-assembler-ia32.cc | 2 +- 3 files changed, 51 insertions(+), 11 deletions(-) diff --git a/src/jsregexp.cc b/src/jsregexp.cc index e2d1d02..befaa38 100644 --- a/src/jsregexp.cc +++ b/src/jsregexp.cc @@ -1980,7 +1980,6 @@ int BackReferenceNode::EatsAtLeast(int recursion_depth) { } - int TextNode::EatsAtLeast(int recursion_depth) { int answer = Length(); if (answer >= 4) return answer; @@ -1989,6 +1988,26 @@ int TextNode::EatsAtLeast(int recursion_depth) { } +int NegativeLookaheadChoiceNode:: EatsAtLeast(int recursion_depth) { + if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; + // Alternative 0 is the negative lookahead, alternative 1 is what comes + // afterwards. + RegExpNode* node = alternatives_->at(1).node(); + return node->EatsAtLeast(recursion_depth + 1); +} + + +void NegativeLookaheadChoiceNode::GetQuickCheckDetails( + QuickCheckDetails* details, + RegExpCompiler* compiler, + int filled_in) { + // Alternative 0 is the negative lookahead, alternative 1 is what comes + // afterwards. + RegExpNode* node = alternatives_->at(1).node(); + return node->GetQuickCheckDetails(details, compiler, filled_in); +} + + int ChoiceNode::EatsAtLeastHelper(int recursion_depth, RegExpNode* ignore_this_node) { if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; @@ -3024,7 +3043,8 @@ bool ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { new_trace.quick_check_performed()->Clear(); alt_gen->expects_preload = preload_is_current; bool generate_full_check_inline = false; - if (alternative.node()->EmitQuickCheck(compiler, + if (try_to_emit_quick_check_for_alternative(i) && + alternative.node()->EmitQuickCheck(compiler, &new_trace, preload_has_checked_bounds, &alt_gen->possible_success, @@ -3957,19 +3977,17 @@ RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, // NegativeSubmatchSuccess will unwind the stack including everything the // choice node set up and backtrack. If the first alternative fails then // the second alternative is tried, which is exactly the desired result - // for a negative lookahead. In the case where the dispatch table - // determines that the first alternative cannot match we will save time - // by not trying it. Things are not quite so well-optimized if the - // dispatch table determines that the second alternative cannot match. - // In this case we could optimize by immediately backtracking. - ChoiceNode* choice_node = new ChoiceNode(2); + // for a negative lookahead. The NegativeLookaheadChoiceNode is a special + // ChoiceNode that knows to ignore the first exit when calculating quick + // checks. GuardedAlternative body_alt( body()->ToNode( compiler, success = new NegativeSubmatchSuccess(stack_pointer_register, position_register))); - choice_node->AddAlternative(body_alt); - choice_node->AddAlternative(GuardedAlternative(on_success)); + ChoiceNode* choice_node = + new NegativeLookaheadChoiceNode(body_alt, + GuardedAlternative(on_success)); return ActionNode::BeginSubmatch(stack_pointer_register, position_register, choice_node); diff --git a/src/jsregexp.h b/src/jsregexp.h index 1693713..a41a951 100644 --- a/src/jsregexp.h +++ b/src/jsregexp.h @@ -980,6 +980,7 @@ class ChoiceNode: public RegExpNode { bool being_calculated() { return being_calculated_; } void set_being_calculated(bool b) { being_calculated_ = b; } + virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } protected: int GreedyLoopTextLength(GuardedAlternative *alternative); @@ -1003,6 +1004,27 @@ class ChoiceNode: public RegExpNode { }; +class NegativeLookaheadChoiceNode: public ChoiceNode { + public: + explicit NegativeLookaheadChoiceNode(GuardedAlternative this_must_fail, + GuardedAlternative then_do_this) + : ChoiceNode(2) { + AddAlternative(this_must_fail); + AddAlternative(then_do_this); + } + virtual int EatsAtLeast(int recursion_depth); + virtual void GetQuickCheckDetails(QuickCheckDetails* details, + RegExpCompiler* compiler, + int characters_filled_in); + // For a negative lookahead we don't emit the quick check for the + // alternative that is expected to fail. This is because quick check code + // starts by loading enough characters for the alternative that takes fewest + // characters, but on a negative lookahead the negative branch did not take + // part in that calculation (EatsAtLeast) so the assumptions don't hold. + virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } +}; + + class LoopChoiceNode: public ChoiceNode { public: explicit LoopChoiceNode(bool body_can_be_zero_length) diff --git a/src/regexp-macro-assembler-ia32.cc b/src/regexp-macro-assembler-ia32.cc index 6f11b86..a3998ef 100644 --- a/src/regexp-macro-assembler-ia32.cc +++ b/src/regexp-macro-assembler-ia32.cc @@ -877,7 +877,7 @@ void RegExpMacroAssemblerIA32::LoadCurrentCharacter(int cp_offset, Label* on_end_of_input, bool check_bounds, int characters) { - ASSERT(cp_offset >= 0); + ASSERT(cp_offset >= -1); // ^ and \b can look behind one character. ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works) CheckPosition(cp_offset + characters - 1, on_end_of_input); LoadCurrentCharacterUnchecked(cp_offset, characters); -- 2.7.4