Clears captures of look-aheads on backtrack.
authorlrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Fri, 23 Jan 2009 13:34:51 +0000 (13:34 +0000)
committerlrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Fri, 23 Jan 2009 13:34:51 +0000 (13:34 +0000)
Reduces number of pushes when flushing a trace. Some are converted to clears
in the undo-code instead, and some just ignored if they have no value worth restoring.

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1136 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

13 files changed:
src/ast.h
src/jsregexp.cc
src/jsregexp.h
src/parser.cc
src/regexp-macro-assembler-ia32.cc
src/regexp-macro-assembler-ia32.h
src/regexp-macro-assembler-irregexp.cc
src/regexp-macro-assembler-irregexp.h
src/regexp-macro-assembler-tracer.cc
src/regexp-macro-assembler-tracer.h
src/regexp-macro-assembler.h
test/mjsunit/regexp-lookahead.js [new file with mode: 0644]
test/mjsunit/regress/regress-187.js [moved from test/mjsunit/bugs/bug-187.js with 100% similarity]

index 00bfddb..ea4c947 100644 (file)
--- a/src/ast.h
+++ b/src/ast.h
@@ -1521,9 +1521,15 @@ class RegExpCapture: public RegExpTree {
 
 class RegExpLookahead: public RegExpTree {
  public:
-  RegExpLookahead(RegExpTree* body, bool is_positive)
+  RegExpLookahead(RegExpTree* body,
+                  bool is_positive,
+                  int capture_count,
+                  int capture_from)
       : body_(body),
-        is_positive_(is_positive) { }
+        is_positive_(is_positive),
+        capture_count_(capture_count),
+        capture_from_(capture_from) { }
+
   virtual void* Accept(RegExpVisitor* visitor, void* data);
   virtual RegExpNode* ToNode(RegExpCompiler* compiler,
                              RegExpNode* on_success);
@@ -1535,9 +1541,13 @@ class RegExpLookahead: public RegExpTree {
   virtual int max_match() { return 0; }
   RegExpTree* body() { return body_; }
   bool is_positive() { return is_positive_; }
+  int capture_count() { return capture_count_; }
+  int capture_from() { return capture_from_; }
  private:
   RegExpTree* body_;
   bool is_positive_;
+  int capture_count_;
+  int capture_from_;
 };
 
 
index 5a782f8..f56e3a7 100644 (file)
@@ -1360,41 +1360,44 @@ int Trace::FindAffectedRegisters(OutSet* affected_registers) {
 }
 
 
-void Trace::PushAffectedRegisters(RegExpMacroAssembler* assembler,
-                                  int max_register,
-                                  OutSet& affected_registers) {
-  // Stay safe and check every half times the limit.
-  // (Round up in case the limit is 1).
-  int push_limit = (assembler->stack_limit_slack() + 1) / 2;
-  for (int reg = 0, pushes = 0; reg <= max_register; reg++) {
-    if (affected_registers.Get(reg)) {
-      pushes++;
-      RegExpMacroAssembler::StackCheckFlag check_stack_limit =
-          (pushes % push_limit) == 0 ?
-                RegExpMacroAssembler::kCheckStackLimit :
-                RegExpMacroAssembler::kNoStackLimitCheck;
-      assembler->PushRegister(reg, check_stack_limit);
-    }
-  }
-}
-
-
 void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
                                      int max_register,
-                                     OutSet& affected_registers) {
+                                     OutSet& registers_to_pop,
+                                     OutSet& registers_to_clear) {
   for (int reg = max_register; reg >= 0; reg--) {
-    if (affected_registers.Get(reg)) assembler->PopRegister(reg);
+    if (registers_to_pop.Get(reg)) assembler->PopRegister(reg);
+    else if (registers_to_clear.Get(reg)) {
+      int clear_to = reg;
+      while (reg > 0 && registers_to_pop.Get(reg - 1)) {
+        reg--;
+      }
+      assembler->ClearRegisters(reg, clear_to);
+    }
   }
 }
 
 
 void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
                                    int max_register,
-                                   OutSet& affected_registers) {
+                                   OutSet& affected_registers,
+                                   OutSet* registers_to_pop,
+                                   OutSet* registers_to_clear) {
+  // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
+  const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
+
   for (int reg = 0; reg <= max_register; reg++) {
     if (!affected_registers.Get(reg)) {
       continue;
     }
+    // Count pushes performed to force a stack limit check occasionally.
+    int pushes = 0;
+
+    // The chronologically first deferred action in the trace
+    // is used to infer the action needed to restore a register
+    // to its previous state (or not, if it's safe to ignore it).
+    enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };
+    DeferredActionUndoType undo_action = IGNORE;
+
     int value = 0;
     bool absolute = false;
     bool clear = false;
@@ -1409,8 +1412,16 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
           case ActionNode::SET_REGISTER: {
             Trace::DeferredSetRegister* psr =
                 static_cast<Trace::DeferredSetRegister*>(action);
-            value += psr->value();
-            absolute = true;
+            if (!absolute) {
+              value += psr->value();
+              absolute = true;
+            }
+            // SET_REGISTER is currently only used for newly introduced loop
+            // counters. They can have a significant previous value if they
+            // occour in a loop. TODO(lrn): Propagate this information, so
+            // we can set undo_action to IGNORE if we know there is no value to
+            // restore.
+            undo_action = RESTORE;
             ASSERT_EQ(store_position, -1);
             ASSERT(!clear);
             break;
@@ -1421,6 +1432,7 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
             }
             ASSERT_EQ(store_position, -1);
             ASSERT(!clear);
+            undo_action = RESTORE;
             break;
           case ActionNode::STORE_POSITION: {
             Trace::DeferredCapture* pc =
@@ -1428,6 +1440,19 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
             if (!clear && store_position == -1) {
               store_position = pc->cp_offset();
             }
+
+            // For captures we know that stores and clears alternate.
+            // Other register, are never cleared, and if the occur
+            // inside a loop, they might be assigned more than once.
+            if (reg <= 1) {
+              // Registers zero and one, aka "capture zero", is
+              // always set correctly if we succeed. There is no
+              // need to undo a setting on backtrack, because we
+              // will set it again or fail.
+              undo_action = IGNORE;
+            } else {
+              undo_action = pc->is_capture() ? CLEAR : RESTORE;
+            }
             ASSERT(!absolute);
             ASSERT_EQ(value, 0);
             break;
@@ -1436,8 +1461,10 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
             // Since we're scanning in reverse order, if we've already
             // set the position we have to ignore historically earlier
             // clearing operations.
-            if (store_position == -1)
+            if (store_position == -1) {
               clear = true;
+            }
+            undo_action = RESTORE;
             ASSERT(!absolute);
             ASSERT_EQ(value, 0);
             break;
@@ -1448,10 +1475,27 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
         }
       }
     }
+    // Prepare for the undo-action (e.g., push if it's going to be popped).
+    if (undo_action == RESTORE) {
+      pushes++;
+      RegExpMacroAssembler::StackCheckFlag stack_check =
+          RegExpMacroAssembler::kNoStackLimitCheck;
+      if (pushes == push_limit) {
+        stack_check = RegExpMacroAssembler::kCheckStackLimit;
+        pushes = 0;
+      }
+
+      assembler->PushRegister(reg, stack_check);
+      registers_to_pop->Set(reg);
+    } else if (undo_action == CLEAR) {
+      registers_to_clear->Set(reg);
+    }
+    // Perform the chronologically last action (or accumulated increment)
+    // for the register.
     if (store_position != -1) {
       assembler->WriteCurrentPositionToRegister(reg, store_position);
     } else if (clear) {
-      assembler->ClearRegister(reg);
+      assembler->ClearRegisters(reg, reg);
     } else if (absolute) {
       assembler->SetRegister(reg, value);
     } else if (value != 0) {
@@ -1486,9 +1530,15 @@ bool Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
 
   // Generate deferred actions here along with code to undo them again.
   OutSet affected_registers;
+
   int max_register = FindAffectedRegisters(&affected_registers);
-  PushAffectedRegisters(assembler, max_register, affected_registers);
-  PerformDeferredActions(assembler, max_register, affected_registers);
+  OutSet registers_to_pop;
+  OutSet registers_to_clear;
+  PerformDeferredActions(assembler,
+                         max_register,
+                         affected_registers,
+                         &registers_to_pop,
+                         &registers_to_clear);
   if (backtrack() != NULL) {
     // Here we have a concrete backtrack location.  These are set up by choice
     // nodes and so they indicate that we have a deferred save of the current
@@ -1511,7 +1561,10 @@ bool Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
   if (backtrack() != NULL) {
     assembler->PopCurrentPosition();
   }
-  RestoreAffectedRegisters(assembler, max_register, affected_registers);
+  RestoreAffectedRegisters(assembler,
+                           max_register,
+                           registers_to_pop,
+                           registers_to_clear);
   if (backtrack() == NULL) {
     assembler->Backtrack();
   } else {
@@ -1523,15 +1576,26 @@ bool Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
 
 
 bool NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {
-  if (!trace->is_trivial()) {
-    return trace->Flush(compiler, this);
-  }
   RegExpMacroAssembler* assembler = compiler->macro_assembler();
+
+  // Omit flushing the trace. We discard the entire stack frame anyway.
+
   if (!label()->is_bound()) {
+    // We are completely independent of the trace, since we ignore it,
+    // so this code can be used as the generic version.
     assembler->Bind(label());
   }
+
+  // Throw away everything on the backtrack stack since the start
+  // of the negative submatch and restore the character position.
   assembler->ReadCurrentPositionFromRegister(current_position_register_);
   assembler->ReadStackPointerFromRegister(stack_pointer_register_);
+  if (clear_capture_count_ > 0) {
+    // Clear any captures that might have been performed during the success
+    // of the body of the negative look-ahead.
+    int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1;
+    assembler->ClearRegisters(clear_capture_start_, clear_capture_end);
+  }
   // Now that we have unwound the stack we find at the top of the stack the
   // backtrack that the BeginSubmatch node got.
   assembler->Backtrack();
@@ -1587,9 +1651,12 @@ ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) {
 }
 
 
-ActionNode* ActionNode::StorePosition(int reg, RegExpNode* on_success) {
+ActionNode* ActionNode::StorePosition(int reg,
+                                      bool is_capture,
+                                      RegExpNode* on_success) {
   ActionNode* result = new ActionNode(STORE_POSITION, on_success);
   result->data_.u_position_register.reg = reg;
+  result->data_.u_position_register.is_capture = is_capture;
   return result;
 }
 
@@ -1615,10 +1682,14 @@ ActionNode* ActionNode::BeginSubmatch(int stack_reg,
 
 ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg,
                                                 int position_reg,
+                                                int clear_register_count,
+                                                int clear_register_from,
                                                 RegExpNode* on_success) {
   ActionNode* result = new ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success);
   result->data_.u_submatch.stack_pointer_register = stack_reg;
   result->data_.u_submatch.current_position_register = position_reg;
+  result->data_.u_submatch.clear_register_count = clear_register_count;
+  result->data_.u_submatch.clear_register_from = clear_register_from;
   return result;
 }
 
@@ -3170,7 +3241,9 @@ bool ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
   switch (type_) {
     case STORE_POSITION: {
       Trace::DeferredCapture
-          new_capture(data_.u_position_register.reg, trace);
+          new_capture(data_.u_position_register.reg,
+                      data_.u_position_register.is_capture,
+                      trace);
       Trace new_trace = *trace;
       new_trace.add_action(&new_capture);
       return on_success()->Emit(compiler, &new_trace);
@@ -3235,13 +3308,31 @@ bool ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
       assembler->Bind(&skip_empty_check);
       return on_success()->Emit(compiler, trace);
     }
-    case POSITIVE_SUBMATCH_SUCCESS:
+    case POSITIVE_SUBMATCH_SUCCESS: {
       if (!trace->is_trivial()) return trace->Flush(compiler, this);
       assembler->ReadCurrentPositionFromRegister(
           data_.u_submatch.current_position_register);
       assembler->ReadStackPointerFromRegister(
           data_.u_submatch.stack_pointer_register);
-      return on_success()->Emit(compiler, trace);
+      int clear_register_count = data_.u_submatch.clear_register_count;
+      if (clear_register_count == 0) {
+        return on_success()->Emit(compiler, trace);
+      }
+      int clear_registers_from = data_.u_submatch.clear_register_from;
+      Label clear_registers_backtrack;
+      Trace new_trace = *trace;
+      new_trace.set_backtrack(&clear_registers_backtrack);
+      bool ok = on_success()->Emit(compiler, &new_trace);
+      if (!ok) { return false; }
+
+      assembler->Bind(&clear_registers_backtrack);
+      int clear_registers_to = clear_registers_from + clear_register_count - 1;
+      assembler->ClearRegisters(clear_registers_from, clear_registers_to);
+
+      ASSERT(trace->backtrack() == NULL);
+      assembler->Backtrack();
+      return true;
+    }
     default:
       UNREACHABLE();
       return false;
@@ -3859,7 +3950,7 @@ RegExpNode* RegExpQuantifier::ToNode(int min,
   if (body_can_be_empty) {
     // If the body can be empty we need to store the start position
     // so we can bail out if it was empty.
-    body_node = ActionNode::StorePosition(body_start_reg, body_node);
+    body_node = ActionNode::StorePosition(body_start_reg, false, body_node);
   }
   if (needs_capture_clearing) {
     // Before entering the body of this loop we need to clear captures.
@@ -3921,6 +4012,8 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
          newline_atom,
          ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
                                              position_register,
+                                             0,  // No captures inside.
+                                             -1,  // Ignored if no captures.
                                              on_success));
       // Create an end-of-input matcher.
       RegExpNode* end_of_line = ActionNode::BeginSubmatch(
@@ -3959,16 +4052,26 @@ RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
                                     RegExpNode* on_success) {
   int stack_pointer_register = compiler->AllocateRegister();
   int position_register = compiler->AllocateRegister();
+
+  const int registers_per_capture = 2;
+  const int register_of_first_capture = 2;
+  int register_count = capture_count_ * registers_per_capture;
+  int register_start =
+    register_of_first_capture + capture_from_ * registers_per_capture;
+
   RegExpNode* success;
   if (is_positive()) {
-    return ActionNode::BeginSubmatch(
+    RegExpNode* node = ActionNode::BeginSubmatch(
         stack_pointer_register,
         position_register,
         body()->ToNode(
             compiler,
             ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
                                                 position_register,
+                                                register_count,
+                                                register_start,
                                                 on_success)));
+    return node;
   } else {
     // We use a ChoiceNode for a negative lookahead because it has most of
     // the characteristics we need.  It has the body of the lookahead as its
@@ -3984,7 +4087,9 @@ RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
         body()->ToNode(
             compiler,
             success = new NegativeSubmatchSuccess(stack_pointer_register,
-                                                  position_register)));
+                                                  position_register,
+                                                  register_count,
+                                                  register_start)));
     ChoiceNode* choice_node =
         new NegativeLookaheadChoiceNode(body_alt,
                                         GuardedAlternative(on_success));
@@ -4007,9 +4112,9 @@ RegExpNode* RegExpCapture::ToNode(RegExpTree* body,
                                   RegExpNode* on_success) {
   int start_reg = RegExpCapture::StartRegister(index);
   int end_reg = RegExpCapture::EndRegister(index);
-  RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success);
+  RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
   RegExpNode* body_node = body->ToNode(compiler, store_end);
-  return ActionNode::StorePosition(start_reg, body_node);
+  return ActionNode::StorePosition(start_reg, true, body_node);
 }
 
 
index a41a951..959eddd 100644 (file)
@@ -719,13 +719,17 @@ class ActionNode: public SeqRegExpNode {
   };
   static ActionNode* SetRegister(int reg, int val, RegExpNode* on_success);
   static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
-  static ActionNode* StorePosition(int reg, RegExpNode* on_success);
+  static ActionNode* StorePosition(int reg,
+                                   bool is_capture,
+                                   RegExpNode* on_success);
   static ActionNode* ClearCaptures(Interval range, RegExpNode* on_success);
   static ActionNode* BeginSubmatch(int stack_pointer_reg,
                                    int position_reg,
                                    RegExpNode* on_success);
   static ActionNode* PositiveSubmatchSuccess(int stack_pointer_reg,
                                              int restore_reg,
+                                             int clear_capture_count,
+                                             int clear_capture_from,
                                              RegExpNode* on_success);
   static ActionNode* EmptyMatchCheck(int start_register,
                                      int repetition_register,
@@ -755,10 +759,13 @@ class ActionNode: public SeqRegExpNode {
     } u_increment_register;
     struct {
       int reg;
+      bool is_capture;
     } u_position_register;
     struct {
       int stack_pointer_register;
       int current_position_register;
+      int clear_register_count;
+      int clear_register_from;
     } u_submatch;
     struct {
       int start_register;
@@ -913,15 +920,22 @@ class EndNode: public RegExpNode {
 
 class NegativeSubmatchSuccess: public EndNode {
  public:
-  NegativeSubmatchSuccess(int stack_pointer_reg, int position_reg)
+  NegativeSubmatchSuccess(int stack_pointer_reg,
+                          int position_reg,
+                          int clear_capture_count,
+                          int clear_capture_start)
       : EndNode(NEGATIVE_SUBMATCH_SUCCESS),
         stack_pointer_register_(stack_pointer_reg),
-        current_position_register_(position_reg) { }
+        current_position_register_(position_reg),
+        clear_capture_count_(clear_capture_count),
+        clear_capture_start_(clear_capture_start) { }
   virtual bool Emit(RegExpCompiler* compiler, Trace* trace);
 
  private:
   int stack_pointer_register_;
   int current_position_register_;
+  int clear_capture_count_;
+  int clear_capture_start_;
 };
 
 
@@ -1087,18 +1101,20 @@ class Trace {
     friend class Trace;
   };
 
-  class DeferredCapture: public DeferredAction {
+  class DeferredCapture : public DeferredAction {
    public:
-    DeferredCapture(int reg, Trace* trace)
+    DeferredCapture(int reg, bool is_capture, Trace* trace)
         : DeferredAction(ActionNode::STORE_POSITION, reg),
           cp_offset_(trace->cp_offset()) { }
     int cp_offset() { return cp_offset_; }
+    bool is_capture() { return is_capture_; }
    private:
     int cp_offset_;
+    bool is_capture_;
     void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
   };
 
-  class DeferredSetRegister :public DeferredAction {
+  class DeferredSetRegister : public DeferredAction {
    public:
     DeferredSetRegister(int reg, int value)
         : DeferredAction(ActionNode::SET_REGISTER, reg),
@@ -1118,7 +1134,7 @@ class Trace {
     Interval range_;
   };
 
-  class DeferredIncrementRegister: public DeferredAction {
+  class DeferredIncrementRegister : public DeferredAction {
    public:
     explicit DeferredIncrementRegister(int reg)
         : DeferredAction(ActionNode::INCREMENT_REGISTER, reg) { }
@@ -1189,13 +1205,13 @@ class Trace {
   int FindAffectedRegisters(OutSet* affected_registers);
   void PerformDeferredActions(RegExpMacroAssembler* macro,
                                int max_register,
-                               OutSet& affected_registers);
+                               OutSet& affected_registers,
+                               OutSet* registers_to_pop,
+                               OutSet* registers_to_clear);
   void RestoreAffectedRegisters(RegExpMacroAssembler* macro,
                                 int max_register,
-                                OutSet& affected_registers);
-  void PushAffectedRegisters(RegExpMacroAssembler* macro,
-                             int max_register,
-                             OutSet& affected_registers);
+                                OutSet& registers_to_pop,
+                                OutSet& registers_to_clear);
   int cp_offset_;
   DeferredAction* actions_;
   Label* backtrack_;
index 9e7b4b3..6b92429 100644 (file)
@@ -4149,7 +4149,10 @@ RegExpTree* RegExpParser::ParseGroup() {
   } else {
     ASSERT(type == '=' || type == '!');
     bool is_positive = (type == '=');
-    return new RegExpLookahead(body, is_positive);
+    return new RegExpLookahead(body,
+                               is_positive,
+                               end_capture_index - capture_index,
+                               capture_index);
   }
 }
 
index ea54542..e58177c 100644 (file)
@@ -332,15 +332,29 @@ void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
     __ push(ebx);
     const int four_arguments = 4;
     FrameAlign(four_arguments, ecx);
-    // Put arguments into allocated stack area.
+    // Put arguments into allocated stack area, last argument highest on stack.
+    // Parameters are
+    //   UC16** buffer - really the String** of the input string
+    //   int byte_offset1 - byte offset from *buffer of start of capture
+    //   int byte_offset2 - byte offset from *buffer of current position
+    //   size_t byte_length - length of capture in bytes(!)
+
+    // Set byte_length.
     __ mov(Operand(esp, 3 * kPointerSize), ebx);
+    // Set byte_offset2.
+    // Found by adding negative string-end offset of current position (edi)
+    // to String** offset of end of string.
     __ mov(ecx, Operand(ebp, kInputEndOffset));
     __ add(edi, Operand(ecx));
     __ mov(Operand(esp, 2 * kPointerSize), edi);
+    // Set byte_offset1.
+    // Start of capture, where eax already holds string-end negative offset.
     __ add(eax, Operand(ecx));
     __ mov(Operand(esp, 1 * kPointerSize), eax);
+    // Set buffer. Original String** parameter to regexp code.
     __ mov(eax, Operand(ebp, kInputBuffer));
     __ mov(Operand(esp, 0 * kPointerSize), eax);
+
     Address function_address = FUNCTION_ADDR(&CaseInsensitiveCompareUC16);
     CallCFunction(function_address, four_arguments);
     // Pop original values before reacting on result value.
@@ -946,9 +960,12 @@ void RegExpMacroAssemblerIA32::WriteCurrentPositionToRegister(int reg,
 }
 
 
-void RegExpMacroAssemblerIA32::ClearRegister(int reg) {
+void RegExpMacroAssemblerIA32::ClearRegisters(int reg_from, int reg_to) {
+  ASSERT(reg_from <= reg_to);
   __ mov(eax, Operand(ebp, kInputStartMinusOne));
-  __ mov(register_location(reg), eax);
+  for (int reg = reg_from; reg <= reg_to; reg++) {
+    __ mov(register_location(reg), eax);
+  }
 }
 
 
@@ -987,8 +1004,8 @@ RegExpMacroAssemblerIA32::Result RegExpMacroAssemblerIA32::Execute(
                             stack_top);
 
   if (result < 0 && !Top::has_pending_exception()) {
-    // We detected a stack overflow in RegExp code, but haven't created
-    // the exception yet.
+    // We detected a stack overflow (on the backtrack stack) in RegExp code,
+    // but haven't created the exception yet.
     Top::StackOverflow();
   }
   return (result < 0) ? EXCEPTION : (result ? SUCCESS : FAILURE);
@@ -1170,6 +1187,9 @@ void RegExpMacroAssemblerIA32::CheckStackLimit() {
 
 
 void RegExpMacroAssemblerIA32::FrameAlign(int num_arguments, Register scratch) {
+  // TODO(lrn): Since we no longer use the system stack arbitrarily, we
+  // know the current stack alignment - esp points to the last regexp register.
+  // We can do this simpler then.
   int frameAlignment = OS::ActivationFrameAlignment();
   if (frameAlignment != 0) {
     // Make stack end at alignment and make room for num_arguments words
index dd74c65..93f31b3 100644 (file)
@@ -107,7 +107,7 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
   virtual void SetRegister(int register_index, int to);
   virtual void Succeed();
   virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
-  virtual void ClearRegister(int reg);
+  virtual void ClearRegisters(int reg_from, int reg_to);
   virtual void WriteStackPointerToRegister(int reg);
 
   static Result Execute(Code* code,
index 89d2482..05b08f3 100644 (file)
@@ -104,8 +104,11 @@ void RegExpMacroAssemblerIrregexp::WriteCurrentPositionToRegister(
 }
 
 
-void RegExpMacroAssemblerIrregexp::ClearRegister(int reg) {
-  SetRegister(reg, -1);
+void RegExpMacroAssemblerIrregexp::ClearRegisters(int reg_from, int reg_to) {
+  ASSERT(reg_from <= reg_to);
+  for (int reg = reg_from; reg <= reg_to; reg++) {
+    SetRegister(reg, -1);
+  }
 }
 
 
index 9e9784d..0d5999f 100644 (file)
@@ -66,7 +66,7 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
   virtual void AdvanceRegister(int reg, int by);  // r[reg] += by.
   virtual void SetRegister(int register_index, int to);
   virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
-  virtual void ClearRegister(int reg);
+  virtual void ClearRegisters(int reg_from, int reg_to);
   virtual void ReadCurrentPositionFromRegister(int reg);
   virtual void WriteStackPointerToRegister(int reg);
   virtual void ReadStackPointerFromRegister(int reg);
index 541f909..74345d8 100644 (file)
@@ -150,9 +150,9 @@ void RegExpMacroAssemblerTracer::WriteCurrentPositionToRegister(int reg,
 }
 
 
-void RegExpMacroAssemblerTracer::ClearRegister(int reg) {
-  PrintF(" ClearRegister(register=%d);\n", reg);
-  assembler_->ClearRegister(reg);
+void RegExpMacroAssemblerTracer::ClearRegisters(int reg_from, int reg_to) {
+  PrintF(" ClearRegister(from=%d, to=%d);\n", reg_from, reg_to);
+  assembler_->ClearRegisters(reg_from, reg_to);
 }
 
 
index 49576e0..d3aeff7 100644 (file)
@@ -107,7 +107,7 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
   virtual void SetRegister(int register_index, int to);
   virtual void Succeed();
   virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
-  virtual void ClearRegister(int reg);
+  virtual void ClearRegisters(int reg_from, int reg_to);
   virtual void WriteStackPointerToRegister(int reg);
  private:
   RegExpMacroAssembler* assembler_;
index 74133d5..cf17697 100644 (file)
@@ -168,7 +168,7 @@ class RegExpMacroAssembler {
   virtual void SetRegister(int register_index, int to) = 0;
   virtual void Succeed() = 0;
   virtual void WriteCurrentPositionToRegister(int reg, int cp_offset) = 0;
-  virtual void ClearRegister(int reg) = 0;
+  virtual void ClearRegisters(int reg_from, int reg_to) = 0;
   virtual void WriteStackPointerToRegister(int reg) = 0;
 
  private:
diff --git a/test/mjsunit/regexp-lookahead.js b/test/mjsunit/regexp-lookahead.js
new file mode 100644 (file)
index 0000000..1188b56
--- /dev/null
@@ -0,0 +1,166 @@
+// Copyright 2009 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Tests captures in positive and negative look-ahead in regular expressions.
+
+function stringEscape(string) {
+  // Converts string to source literal.
+  return '"' + string.replace(/["\\]/g, "\\$1") + '"';
+}
+
+function testRE(re, input, expected_result) {
+  var testName = re + ".test(" + stringEscape(input) +")";
+  if (expected_result) {
+    assertTrue(re.test(input), testName);
+  } else {
+    assertFalse(re.test(input), testName);
+  }
+}
+
+function execRE(re, input, expected_result) {
+  var testName = re + ".exec('" + stringEscape(input) +"')";
+  assertEquals(expected_result, re.exec(input), testName);
+}
+
+// Test of simple positive lookahead.
+
+var re = /^(?=a)/;
+testRE(re, "a", true);
+testRE(re, "b", false);
+execRE(re, "a", [""]);
+
+re = /^(?=\woo)f\w/;
+testRE(re, "foo", true);
+testRE(re, "boo", false);
+testRE(re, "fao", false);
+testRE(re, "foa", false);
+execRE(re, "foo", ["fo"]);
+
+re = /(?=\w).(?=\W)/;
+testRE(re, ".a! ", true);
+testRE(re, ".! ", false);
+testRE(re, ".ab! ", true);
+execRE(re, ".ab! ", ["b"]);
+
+re = /(?=f(?=[^f]o))../;
+testRE(re, ", foo!", true);
+testRE(re, ", fo!", false);
+testRE(re, ", ffo", false);
+execRE(re, ", foo!", ["fo"]);
+
+// Positive lookahead with captures.
+re = /^[^\'\"]*(?=([\'\"])).*\1(\w+)\1/;
+testRE(re, "  'foo' ", true);
+testRE(re, '  "foo" ', true);
+testRE(re, " \" 'foo' ", false);
+testRE(re, " ' \"foo\" ", false);
+testRE(re, "  'foo\" ", false);
+testRE(re, "  \"foo' ", false);
+execRE(re, "  'foo' ", ["  'foo'", "'", "foo"]);
+execRE(re, '  "foo" ', ['  "foo"', '"', 'foo']);
+
+// Captures are cleared on backtrack past the look-ahead.
+re = /^(?:(?=(.))a|b)\1$/;
+testRE(re, "aa", true);
+testRE(re, "b", true);
+testRE(re, "bb", false);
+testRE(re, "a", false);
+execRE(re, "aa", ["aa", "a"]);
+execRE(re, "b", ["b", undefined]);
+
+re = /^(?=(.)(?=(.)\1\2)\2\1)\1\2/;
+testRE(re, "abab", true);
+testRE(re, "ababxxxxxxxx", true);
+testRE(re, "aba", false);
+execRE(re, "abab", ["ab", "a", "b"]);
+
+re = /^(?:(?=(.))a|b|c)$/;
+testRE(re, "a", true);
+testRE(re, "b", true);
+testRE(re, "c", true);
+testRE(re, "d", false);
+execRE(re, "a", ["a", "a"]);
+execRE(re, "b", ["b", undefined]);
+execRE(re, "c", ["c", undefined]);
+
+execRE(/^(?=(b))b/, "b", ["b", "b"]);
+execRE(/^(?:(?=(b))|a)b/, "ab", ["ab", undefined]);
+execRE(/^(?:(?=(b)(?:(?=(c))|d))|)bd/, "bd", ["bd", "b", undefined]);
+
+
+
+// Test of Negative Look-Ahead.
+
+re = /(?!x)./;
+testRE(re, "y", true);
+testRE(re, "x", false);
+execRE(re, "y", ["y"]);
+
+re = /(?!(\d))|\d/;
+testRE(re, "4", true);
+execRE(re, "4", ["4", undefined]);
+execRE(re, "x", ["", undefined]);
+
+
+// Test mixed nested look-ahead with captures.
+
+re = /^(?=(x)(?=(y)))/;
+testRE(re, "xy", true);
+testRE(re, "xz", false);
+execRE(re, "xy", ["", "x", "y"]);
+
+re = /^(?!(x)(?!(y)))/;
+testRE(re, "xy", true);
+testRE(re, "xz", false);
+execRE(re, "xy", ["", undefined, undefined]);
+
+re = /^(?=(x)(?!(y)))/;
+testRE(re, "xz", true);
+testRE(re, "xy", false)
+execRE(re, "xz", ["", "x", undefined]);
+
+re = /^(?!(x)(?=(y)))/;
+testRE(re, "xz", true);
+testRE(re, "xy", false);
+execRE(re, "xz", ["", undefined, undefined]);
+
+re = /^(?=(x)(?!(y)(?=(z))))/;
+testRE(re, "xaz", true);
+testRE(re, "xya", true);
+testRE(re, "xyz", false);
+testRE(re, "a", false);
+execRE(re, "xaz", ["", "x", undefined, undefined]);
+execRE(re, "xya", ["", "x", undefined, undefined]);
+
+re = /^(?!(x)(?=(y)(?!(z))))/;
+testRE(re, "a", true);
+testRE(re, "xa", true);
+testRE(re, "xyz", true);
+testRE(re, "xya", false);
+execRE(re, "a", ["", undefined, undefined, undefined]);
+execRE(re, "xa", ["", undefined, undefined, undefined]);
+execRE(re, "xyz", ["", undefined, undefined, undefined]);