Irregexp: Preload more characters when we are not at the
authorerik.corry@gmail.com <erik.corry@gmail.com@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Fri, 3 Dec 2010 09:54:06 +0000 (09:54 +0000)
committererik.corry@gmail.com <erik.corry@gmail.com@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Fri, 3 Dec 2010 09:54:06 +0000 (09:54 +0000)
start of the input and some alternations in the disjunction
are anchored.
Review URL: http://codereview.chromium.org/5524006

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5915 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

src/jsregexp.cc
src/jsregexp.h

index 82c1b20..e0f2e62 100644 (file)
@@ -1650,41 +1650,64 @@ RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
 }
 
 
-int ActionNode::EatsAtLeast(int still_to_find, int recursion_depth) {
+int ActionNode::EatsAtLeast(int still_to_find,
+                            int recursion_depth,
+                            bool not_at_start) {
   if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
   if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0;  // Rewinds input!
-  return on_success()->EatsAtLeast(still_to_find, recursion_depth + 1);
+  return on_success()->EatsAtLeast(still_to_find,
+                                   recursion_depth + 1,
+                                   not_at_start);
 }
 
 
-int AssertionNode::EatsAtLeast(int still_to_find, int recursion_depth) {
+int AssertionNode::EatsAtLeast(int still_to_find,
+                               int recursion_depth,
+                               bool not_at_start) {
   if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
-  return on_success()->EatsAtLeast(still_to_find, recursion_depth + 1);
+  // If we know we are not at the start and we are asked "how many characters
+  // will you match if you succeed?" then we can answer anything since false
+  // implies false.  So lets just return the max answer (still_to_find) since
+  // that won't prevent us from preloading a lot of characters for the other
+  // branches in the node graph.
+  if (type() == AT_START && not_at_start) return still_to_find;
+  return on_success()->EatsAtLeast(still_to_find,
+                                   recursion_depth + 1,
+                                   not_at_start);
 }
 
 
-int BackReferenceNode::EatsAtLeast(int still_to_find, int recursion_depth) {
+int BackReferenceNode::EatsAtLeast(int still_to_find,
+                                   int recursion_depth,
+                                   bool not_at_start) {
   if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
-  return on_success()->EatsAtLeast(still_to_find, recursion_depth + 1);
+  return on_success()->EatsAtLeast(still_to_find,
+                                   recursion_depth + 1,
+                                   not_at_start);
 }
 
 
-int TextNode::EatsAtLeast(int still_to_find, int recursion_depth) {
+int TextNode::EatsAtLeast(int still_to_find,
+                          int recursion_depth,
+                          bool not_at_start) {
   int answer = Length();
   if (answer >= still_to_find) return answer;
   if (recursion_depth > RegExpCompiler::kMaxRecursion) return answer;
+  // We are not at start after this node so we set the last argument to 'true'.
   return answer + on_success()->EatsAtLeast(still_to_find - answer,
-                                            recursion_depth + 1);
+                                            recursion_depth + 1,
+                                            true);
 }
 
 
 int NegativeLookaheadChoiceNode::EatsAtLeast(int still_to_find,
-                                             int recursion_depth) {
+                                             int recursion_depth,
+                                             bool not_at_start) {
   if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
   // Alternative 0 is the negative lookahead, alternative 1 is what comes
   // afterwards.
   RegExpNode* node = alternatives_->at(1).node();
-  return node->EatsAtLeast(still_to_find, recursion_depth + 1);
+  return node->EatsAtLeast(still_to_find, recursion_depth + 1, not_at_start);
 }
 
 
@@ -1702,7 +1725,8 @@ void NegativeLookaheadChoiceNode::GetQuickCheckDetails(
 
 int ChoiceNode::EatsAtLeastHelper(int still_to_find,
                                   int recursion_depth,
-                                  RegExpNode* ignore_this_node) {
+                                  RegExpNode* ignore_this_node,
+                                  bool not_at_start) {
   if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
   int min = 100;
   int choice_count = alternatives_->length();
@@ -1710,20 +1734,31 @@ int ChoiceNode::EatsAtLeastHelper(int still_to_find,
     RegExpNode* node = alternatives_->at(i).node();
     if (node == ignore_this_node) continue;
     int node_eats_at_least = node->EatsAtLeast(still_to_find,
-                                               recursion_depth + 1);
+                                               recursion_depth + 1,
+                                               not_at_start);
     if (node_eats_at_least < min) min = node_eats_at_least;
   }
   return min;
 }
 
 
-int LoopChoiceNode::EatsAtLeast(int still_to_find, int recursion_depth) {
-  return EatsAtLeastHelper(still_to_find, recursion_depth, loop_node_);
+int LoopChoiceNode::EatsAtLeast(int still_to_find,
+                                int recursion_depth,
+                                bool not_at_start) {
+  return EatsAtLeastHelper(still_to_find,
+                           recursion_depth,
+                           loop_node_,
+                           not_at_start);
 }
 
 
-int ChoiceNode::EatsAtLeast(int still_to_find, int recursion_depth) {
-  return EatsAtLeastHelper(still_to_find, recursion_depth, NULL);
+int ChoiceNode::EatsAtLeast(int still_to_find,
+                            int recursion_depth,
+                            bool not_at_start) {
+  return EatsAtLeastHelper(still_to_find,
+                           recursion_depth,
+                           NULL,
+                           not_at_start);
 }
 
 
@@ -2630,8 +2665,9 @@ void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
 }
 
 
-int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler) {
-  int preload_characters = EatsAtLeast(4, 0);
+int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,
+                                           bool not_at_start) {
+  int preload_characters = EatsAtLeast(4, 0, not_at_start);
   if (compiler->macro_assembler()->CanReadUnaligned()) {
     bool ascii = compiler->ascii();
     if (ascii) {
@@ -2839,7 +2875,9 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
 
   int first_normal_choice = greedy_loop ? 1 : 0;
 
-  int preload_characters = CalculatePreloadCharacters(compiler);
+  int preload_characters =
+      CalculatePreloadCharacters(compiler,
+                                 current_trace->at_start() == Trace::FALSE);
   bool preload_is_current =
       (current_trace->characters_preloaded() == preload_characters);
   bool preload_has_checked_bounds = preload_is_current;
index 87adf55..6f04be3 100644 (file)
@@ -596,8 +596,13 @@ class RegExpNode: public ZoneObject {
   // How many characters must this node consume at a minimum in order to
   // succeed.  If we have found at least 'still_to_find' characters that
   // must be consumed there is no need to ask any following nodes whether
-  // they are sure to eat any more characters.
-  virtual int EatsAtLeast(int still_to_find, int recursion_depth) = 0;
+  // they are sure to eat any more characters.  The not_at_start argument is
+  // used to indicate that we know we are not at the start of the input.  In
+  // this case anchored branches will always fail and can be ignored when
+  // determining how many characters are consumed on success.
+  virtual int EatsAtLeast(int still_to_find,
+                          int recursion_depth,
+                          bool not_at_start) = 0;
   // Emits some quick code that checks whether the preloaded characters match.
   // Falls through on certain failure, jumps to the label on possible success.
   // If the node cannot make a quick check it does nothing and returns false.
@@ -765,7 +770,9 @@ class ActionNode: public SeqRegExpNode {
                                      RegExpNode* on_success);
   virtual void Accept(NodeVisitor* visitor);
   virtual void Emit(RegExpCompiler* compiler, Trace* trace);
-  virtual int EatsAtLeast(int still_to_find, int recursion_depth);
+  virtual int EatsAtLeast(int still_to_find,
+                          int recursion_depth,
+                          bool not_at_start);
   virtual void GetQuickCheckDetails(QuickCheckDetails* details,
                                     RegExpCompiler* compiler,
                                     int filled_in,
@@ -829,7 +836,9 @@ class TextNode: public SeqRegExpNode {
   }
   virtual void Accept(NodeVisitor* visitor);
   virtual void Emit(RegExpCompiler* compiler, Trace* trace);
-  virtual int EatsAtLeast(int still_to_find, int recursion_depth);
+  virtual int EatsAtLeast(int still_to_find,
+                          int recursion_depth,
+                          bool not_at_start);
   virtual void GetQuickCheckDetails(QuickCheckDetails* details,
                                     RegExpCompiler* compiler,
                                     int characters_filled_in,
@@ -897,7 +906,9 @@ class AssertionNode: public SeqRegExpNode {
   }
   virtual void Accept(NodeVisitor* visitor);
   virtual void Emit(RegExpCompiler* compiler, Trace* trace);
-  virtual int EatsAtLeast(int still_to_find, int recursion_depth);
+  virtual int EatsAtLeast(int still_to_find,
+                          int recursion_depth,
+                          bool not_at_start);
   virtual void GetQuickCheckDetails(QuickCheckDetails* details,
                                     RegExpCompiler* compiler,
                                     int filled_in,
@@ -925,7 +936,9 @@ class BackReferenceNode: public SeqRegExpNode {
   int start_register() { return start_reg_; }
   int end_register() { return end_reg_; }
   virtual void Emit(RegExpCompiler* compiler, Trace* trace);
-  virtual int EatsAtLeast(int still_to_find, int recursion_depth);
+  virtual int EatsAtLeast(int still_to_find,
+                          int recursion_depth,
+                          bool not_at_start);
   virtual void GetQuickCheckDetails(QuickCheckDetails* details,
                                     RegExpCompiler* compiler,
                                     int characters_filled_in,
@@ -946,7 +959,9 @@ class EndNode: public RegExpNode {
   explicit EndNode(Action action) : action_(action) { }
   virtual void Accept(NodeVisitor* visitor);
   virtual void Emit(RegExpCompiler* compiler, Trace* trace);
-  virtual int EatsAtLeast(int still_to_find, int recursion_depth) { return 0; }
+  virtual int EatsAtLeast(int still_to_find,
+                          int recursion_depth,
+                          bool not_at_start) { return 0; }
   virtual void GetQuickCheckDetails(QuickCheckDetails* details,
                                     RegExpCompiler* compiler,
                                     int characters_filled_in,
@@ -1028,10 +1043,13 @@ class ChoiceNode: public RegExpNode {
   ZoneList<GuardedAlternative>* alternatives() { return alternatives_; }
   DispatchTable* GetTable(bool ignore_case);
   virtual void Emit(RegExpCompiler* compiler, Trace* trace);
-  virtual int EatsAtLeast(int still_to_find, int recursion_depth);
+  virtual int EatsAtLeast(int still_to_find,
+                          int recursion_depth,
+                          bool not_at_start);
   int EatsAtLeastHelper(int still_to_find,
                         int recursion_depth,
-                        RegExpNode* ignore_this_node);
+                        RegExpNode* ignore_this_node,
+                        bool not_at_start);
   virtual void GetQuickCheckDetails(QuickCheckDetails* details,
                                     RegExpCompiler* compiler,
                                     int characters_filled_in,
@@ -1054,7 +1072,7 @@ class ChoiceNode: public RegExpNode {
   void GenerateGuard(RegExpMacroAssembler* macro_assembler,
                      Guard* guard,
                      Trace* trace);
-  int CalculatePreloadCharacters(RegExpCompiler* compiler);
+  int CalculatePreloadCharacters(RegExpCompiler* compiler, bool not_at_start);
   void EmitOutOfLineContinuation(RegExpCompiler* compiler,
                                  Trace* trace,
                                  GuardedAlternative alternative,
@@ -1077,7 +1095,9 @@ class NegativeLookaheadChoiceNode: public ChoiceNode {
     AddAlternative(this_must_fail);
     AddAlternative(then_do_this);
   }
-  virtual int EatsAtLeast(int still_to_find, int recursion_depth);
+  virtual int EatsAtLeast(int still_to_find,
+                          int recursion_depth,
+                          bool not_at_start);
   virtual void GetQuickCheckDetails(QuickCheckDetails* details,
                                     RegExpCompiler* compiler,
                                     int characters_filled_in,
@@ -1102,7 +1122,9 @@ class LoopChoiceNode: public ChoiceNode {
   void AddLoopAlternative(GuardedAlternative alt);
   void AddContinueAlternative(GuardedAlternative alt);
   virtual void Emit(RegExpCompiler* compiler, Trace* trace);
-  virtual int EatsAtLeast(int still_to_find, int recursion_depth);
+  virtual int EatsAtLeast(int still_to_find,
+                          int recursion_depth,
+                          bool not_at_start);
   virtual void GetQuickCheckDetails(QuickCheckDetails* details,
                                     RegExpCompiler* compiler,
                                     int characters_filled_in,