From: lrn@chromium.org Date: Fri, 23 Jan 2009 13:34:51 +0000 (+0000) Subject: Clears captures of look-aheads on backtrack. X-Git-Tag: upstream/4.7.83~24760 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=18c2d3ef4eafaef32106284fea8aeae550c3fdff;p=platform%2Fupstream%2Fv8.git Clears captures of look-aheads on backtrack. Reduces number of pushes when flushing a trace. Some are converted to clears in the undo-code instead, and some just ignored if they have no value worth restoring. git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1136 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- diff --git a/src/ast.h b/src/ast.h index 00bfddb..ea4c947 100644 --- a/src/ast.h +++ b/src/ast.h @@ -1521,9 +1521,15 @@ class RegExpCapture: public RegExpTree { class RegExpLookahead: public RegExpTree { public: - RegExpLookahead(RegExpTree* body, bool is_positive) + RegExpLookahead(RegExpTree* body, + bool is_positive, + int capture_count, + int capture_from) : body_(body), - is_positive_(is_positive) { } + is_positive_(is_positive), + capture_count_(capture_count), + capture_from_(capture_from) { } + virtual void* Accept(RegExpVisitor* visitor, void* data); virtual RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success); @@ -1535,9 +1541,13 @@ class RegExpLookahead: public RegExpTree { virtual int max_match() { return 0; } RegExpTree* body() { return body_; } bool is_positive() { return is_positive_; } + int capture_count() { return capture_count_; } + int capture_from() { return capture_from_; } private: RegExpTree* body_; bool is_positive_; + int capture_count_; + int capture_from_; }; diff --git a/src/jsregexp.cc b/src/jsregexp.cc index 5a782f8..f56e3a7 100644 --- a/src/jsregexp.cc +++ b/src/jsregexp.cc @@ -1360,41 +1360,44 @@ int Trace::FindAffectedRegisters(OutSet* affected_registers) { } -void Trace::PushAffectedRegisters(RegExpMacroAssembler* assembler, - int max_register, - OutSet& affected_registers) { - // Stay safe and check every half times the limit. - // (Round up in case the limit is 1). - int push_limit = (assembler->stack_limit_slack() + 1) / 2; - for (int reg = 0, pushes = 0; reg <= max_register; reg++) { - if (affected_registers.Get(reg)) { - pushes++; - RegExpMacroAssembler::StackCheckFlag check_stack_limit = - (pushes % push_limit) == 0 ? - RegExpMacroAssembler::kCheckStackLimit : - RegExpMacroAssembler::kNoStackLimitCheck; - assembler->PushRegister(reg, check_stack_limit); - } - } -} - - void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler, int max_register, - OutSet& affected_registers) { + OutSet& registers_to_pop, + OutSet& registers_to_clear) { for (int reg = max_register; reg >= 0; reg--) { - if (affected_registers.Get(reg)) assembler->PopRegister(reg); + if (registers_to_pop.Get(reg)) assembler->PopRegister(reg); + else if (registers_to_clear.Get(reg)) { + int clear_to = reg; + while (reg > 0 && registers_to_pop.Get(reg - 1)) { + reg--; + } + assembler->ClearRegisters(reg, clear_to); + } } } void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, int max_register, - OutSet& affected_registers) { + OutSet& affected_registers, + OutSet* registers_to_pop, + OutSet* registers_to_clear) { + // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1. + const int push_limit = (assembler->stack_limit_slack() + 1) / 2; + for (int reg = 0; reg <= max_register; reg++) { if (!affected_registers.Get(reg)) { continue; } + // Count pushes performed to force a stack limit check occasionally. + int pushes = 0; + + // The chronologically first deferred action in the trace + // is used to infer the action needed to restore a register + // to its previous state (or not, if it's safe to ignore it). + enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR }; + DeferredActionUndoType undo_action = IGNORE; + int value = 0; bool absolute = false; bool clear = false; @@ -1409,8 +1412,16 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, case ActionNode::SET_REGISTER: { Trace::DeferredSetRegister* psr = static_cast(action); - value += psr->value(); - absolute = true; + if (!absolute) { + value += psr->value(); + absolute = true; + } + // SET_REGISTER is currently only used for newly introduced loop + // counters. They can have a significant previous value if they + // occour in a loop. TODO(lrn): Propagate this information, so + // we can set undo_action to IGNORE if we know there is no value to + // restore. + undo_action = RESTORE; ASSERT_EQ(store_position, -1); ASSERT(!clear); break; @@ -1421,6 +1432,7 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, } ASSERT_EQ(store_position, -1); ASSERT(!clear); + undo_action = RESTORE; break; case ActionNode::STORE_POSITION: { Trace::DeferredCapture* pc = @@ -1428,6 +1440,19 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, if (!clear && store_position == -1) { store_position = pc->cp_offset(); } + + // For captures we know that stores and clears alternate. + // Other register, are never cleared, and if the occur + // inside a loop, they might be assigned more than once. + if (reg <= 1) { + // Registers zero and one, aka "capture zero", is + // always set correctly if we succeed. There is no + // need to undo a setting on backtrack, because we + // will set it again or fail. + undo_action = IGNORE; + } else { + undo_action = pc->is_capture() ? CLEAR : RESTORE; + } ASSERT(!absolute); ASSERT_EQ(value, 0); break; @@ -1436,8 +1461,10 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, // Since we're scanning in reverse order, if we've already // set the position we have to ignore historically earlier // clearing operations. - if (store_position == -1) + if (store_position == -1) { clear = true; + } + undo_action = RESTORE; ASSERT(!absolute); ASSERT_EQ(value, 0); break; @@ -1448,10 +1475,27 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, } } } + // Prepare for the undo-action (e.g., push if it's going to be popped). + if (undo_action == RESTORE) { + pushes++; + RegExpMacroAssembler::StackCheckFlag stack_check = + RegExpMacroAssembler::kNoStackLimitCheck; + if (pushes == push_limit) { + stack_check = RegExpMacroAssembler::kCheckStackLimit; + pushes = 0; + } + + assembler->PushRegister(reg, stack_check); + registers_to_pop->Set(reg); + } else if (undo_action == CLEAR) { + registers_to_clear->Set(reg); + } + // Perform the chronologically last action (or accumulated increment) + // for the register. if (store_position != -1) { assembler->WriteCurrentPositionToRegister(reg, store_position); } else if (clear) { - assembler->ClearRegister(reg); + assembler->ClearRegisters(reg, reg); } else if (absolute) { assembler->SetRegister(reg, value); } else if (value != 0) { @@ -1486,9 +1530,15 @@ bool Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) { // Generate deferred actions here along with code to undo them again. OutSet affected_registers; + int max_register = FindAffectedRegisters(&affected_registers); - PushAffectedRegisters(assembler, max_register, affected_registers); - PerformDeferredActions(assembler, max_register, affected_registers); + OutSet registers_to_pop; + OutSet registers_to_clear; + PerformDeferredActions(assembler, + max_register, + affected_registers, + ®isters_to_pop, + ®isters_to_clear); if (backtrack() != NULL) { // Here we have a concrete backtrack location. These are set up by choice // nodes and so they indicate that we have a deferred save of the current @@ -1511,7 +1561,10 @@ bool Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) { if (backtrack() != NULL) { assembler->PopCurrentPosition(); } - RestoreAffectedRegisters(assembler, max_register, affected_registers); + RestoreAffectedRegisters(assembler, + max_register, + registers_to_pop, + registers_to_clear); if (backtrack() == NULL) { assembler->Backtrack(); } else { @@ -1523,15 +1576,26 @@ bool Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) { bool NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) { - if (!trace->is_trivial()) { - return trace->Flush(compiler, this); - } RegExpMacroAssembler* assembler = compiler->macro_assembler(); + + // Omit flushing the trace. We discard the entire stack frame anyway. + if (!label()->is_bound()) { + // We are completely independent of the trace, since we ignore it, + // so this code can be used as the generic version. assembler->Bind(label()); } + + // Throw away everything on the backtrack stack since the start + // of the negative submatch and restore the character position. assembler->ReadCurrentPositionFromRegister(current_position_register_); assembler->ReadStackPointerFromRegister(stack_pointer_register_); + if (clear_capture_count_ > 0) { + // Clear any captures that might have been performed during the success + // of the body of the negative look-ahead. + int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1; + assembler->ClearRegisters(clear_capture_start_, clear_capture_end); + } // Now that we have unwound the stack we find at the top of the stack the // backtrack that the BeginSubmatch node got. assembler->Backtrack(); @@ -1587,9 +1651,12 @@ ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) { } -ActionNode* ActionNode::StorePosition(int reg, RegExpNode* on_success) { +ActionNode* ActionNode::StorePosition(int reg, + bool is_capture, + RegExpNode* on_success) { ActionNode* result = new ActionNode(STORE_POSITION, on_success); result->data_.u_position_register.reg = reg; + result->data_.u_position_register.is_capture = is_capture; return result; } @@ -1615,10 +1682,14 @@ ActionNode* ActionNode::BeginSubmatch(int stack_reg, ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg, int position_reg, + int clear_register_count, + int clear_register_from, RegExpNode* on_success) { ActionNode* result = new ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success); result->data_.u_submatch.stack_pointer_register = stack_reg; result->data_.u_submatch.current_position_register = position_reg; + result->data_.u_submatch.clear_register_count = clear_register_count; + result->data_.u_submatch.clear_register_from = clear_register_from; return result; } @@ -3170,7 +3241,9 @@ bool ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) { switch (type_) { case STORE_POSITION: { Trace::DeferredCapture - new_capture(data_.u_position_register.reg, trace); + new_capture(data_.u_position_register.reg, + data_.u_position_register.is_capture, + trace); Trace new_trace = *trace; new_trace.add_action(&new_capture); return on_success()->Emit(compiler, &new_trace); @@ -3235,13 +3308,31 @@ bool ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) { assembler->Bind(&skip_empty_check); return on_success()->Emit(compiler, trace); } - case POSITIVE_SUBMATCH_SUCCESS: + case POSITIVE_SUBMATCH_SUCCESS: { if (!trace->is_trivial()) return trace->Flush(compiler, this); assembler->ReadCurrentPositionFromRegister( data_.u_submatch.current_position_register); assembler->ReadStackPointerFromRegister( data_.u_submatch.stack_pointer_register); - return on_success()->Emit(compiler, trace); + int clear_register_count = data_.u_submatch.clear_register_count; + if (clear_register_count == 0) { + return on_success()->Emit(compiler, trace); + } + int clear_registers_from = data_.u_submatch.clear_register_from; + Label clear_registers_backtrack; + Trace new_trace = *trace; + new_trace.set_backtrack(&clear_registers_backtrack); + bool ok = on_success()->Emit(compiler, &new_trace); + if (!ok) { return false; } + + assembler->Bind(&clear_registers_backtrack); + int clear_registers_to = clear_registers_from + clear_register_count - 1; + assembler->ClearRegisters(clear_registers_from, clear_registers_to); + + ASSERT(trace->backtrack() == NULL); + assembler->Backtrack(); + return true; + } default: UNREACHABLE(); return false; @@ -3859,7 +3950,7 @@ RegExpNode* RegExpQuantifier::ToNode(int min, if (body_can_be_empty) { // If the body can be empty we need to store the start position // so we can bail out if it was empty. - body_node = ActionNode::StorePosition(body_start_reg, body_node); + body_node = ActionNode::StorePosition(body_start_reg, false, body_node); } if (needs_capture_clearing) { // Before entering the body of this loop we need to clear captures. @@ -3921,6 +4012,8 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, newline_atom, ActionNode::PositiveSubmatchSuccess(stack_pointer_register, position_register, + 0, // No captures inside. + -1, // Ignored if no captures. on_success)); // Create an end-of-input matcher. RegExpNode* end_of_line = ActionNode::BeginSubmatch( @@ -3959,16 +4052,26 @@ RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) { int stack_pointer_register = compiler->AllocateRegister(); int position_register = compiler->AllocateRegister(); + + const int registers_per_capture = 2; + const int register_of_first_capture = 2; + int register_count = capture_count_ * registers_per_capture; + int register_start = + register_of_first_capture + capture_from_ * registers_per_capture; + RegExpNode* success; if (is_positive()) { - return ActionNode::BeginSubmatch( + RegExpNode* node = ActionNode::BeginSubmatch( stack_pointer_register, position_register, body()->ToNode( compiler, ActionNode::PositiveSubmatchSuccess(stack_pointer_register, position_register, + register_count, + register_start, on_success))); + return node; } else { // We use a ChoiceNode for a negative lookahead because it has most of // the characteristics we need. It has the body of the lookahead as its @@ -3984,7 +4087,9 @@ RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, body()->ToNode( compiler, success = new NegativeSubmatchSuccess(stack_pointer_register, - position_register))); + position_register, + register_count, + register_start))); ChoiceNode* choice_node = new NegativeLookaheadChoiceNode(body_alt, GuardedAlternative(on_success)); @@ -4007,9 +4112,9 @@ RegExpNode* RegExpCapture::ToNode(RegExpTree* body, RegExpNode* on_success) { int start_reg = RegExpCapture::StartRegister(index); int end_reg = RegExpCapture::EndRegister(index); - RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success); + RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success); RegExpNode* body_node = body->ToNode(compiler, store_end); - return ActionNode::StorePosition(start_reg, body_node); + return ActionNode::StorePosition(start_reg, true, body_node); } diff --git a/src/jsregexp.h b/src/jsregexp.h index a41a951..959eddd 100644 --- a/src/jsregexp.h +++ b/src/jsregexp.h @@ -719,13 +719,17 @@ class ActionNode: public SeqRegExpNode { }; static ActionNode* SetRegister(int reg, int val, RegExpNode* on_success); static ActionNode* IncrementRegister(int reg, RegExpNode* on_success); - static ActionNode* StorePosition(int reg, RegExpNode* on_success); + static ActionNode* StorePosition(int reg, + bool is_capture, + RegExpNode* on_success); static ActionNode* ClearCaptures(Interval range, RegExpNode* on_success); static ActionNode* BeginSubmatch(int stack_pointer_reg, int position_reg, RegExpNode* on_success); static ActionNode* PositiveSubmatchSuccess(int stack_pointer_reg, int restore_reg, + int clear_capture_count, + int clear_capture_from, RegExpNode* on_success); static ActionNode* EmptyMatchCheck(int start_register, int repetition_register, @@ -755,10 +759,13 @@ class ActionNode: public SeqRegExpNode { } u_increment_register; struct { int reg; + bool is_capture; } u_position_register; struct { int stack_pointer_register; int current_position_register; + int clear_register_count; + int clear_register_from; } u_submatch; struct { int start_register; @@ -913,15 +920,22 @@ class EndNode: public RegExpNode { class NegativeSubmatchSuccess: public EndNode { public: - NegativeSubmatchSuccess(int stack_pointer_reg, int position_reg) + NegativeSubmatchSuccess(int stack_pointer_reg, + int position_reg, + int clear_capture_count, + int clear_capture_start) : EndNode(NEGATIVE_SUBMATCH_SUCCESS), stack_pointer_register_(stack_pointer_reg), - current_position_register_(position_reg) { } + current_position_register_(position_reg), + clear_capture_count_(clear_capture_count), + clear_capture_start_(clear_capture_start) { } virtual bool Emit(RegExpCompiler* compiler, Trace* trace); private: int stack_pointer_register_; int current_position_register_; + int clear_capture_count_; + int clear_capture_start_; }; @@ -1087,18 +1101,20 @@ class Trace { friend class Trace; }; - class DeferredCapture: public DeferredAction { + class DeferredCapture : public DeferredAction { public: - DeferredCapture(int reg, Trace* trace) + DeferredCapture(int reg, bool is_capture, Trace* trace) : DeferredAction(ActionNode::STORE_POSITION, reg), cp_offset_(trace->cp_offset()) { } int cp_offset() { return cp_offset_; } + bool is_capture() { return is_capture_; } private: int cp_offset_; + bool is_capture_; void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; } }; - class DeferredSetRegister :public DeferredAction { + class DeferredSetRegister : public DeferredAction { public: DeferredSetRegister(int reg, int value) : DeferredAction(ActionNode::SET_REGISTER, reg), @@ -1118,7 +1134,7 @@ class Trace { Interval range_; }; - class DeferredIncrementRegister: public DeferredAction { + class DeferredIncrementRegister : public DeferredAction { public: explicit DeferredIncrementRegister(int reg) : DeferredAction(ActionNode::INCREMENT_REGISTER, reg) { } @@ -1189,13 +1205,13 @@ class Trace { int FindAffectedRegisters(OutSet* affected_registers); void PerformDeferredActions(RegExpMacroAssembler* macro, int max_register, - OutSet& affected_registers); + OutSet& affected_registers, + OutSet* registers_to_pop, + OutSet* registers_to_clear); void RestoreAffectedRegisters(RegExpMacroAssembler* macro, int max_register, - OutSet& affected_registers); - void PushAffectedRegisters(RegExpMacroAssembler* macro, - int max_register, - OutSet& affected_registers); + OutSet& registers_to_pop, + OutSet& registers_to_clear); int cp_offset_; DeferredAction* actions_; Label* backtrack_; diff --git a/src/parser.cc b/src/parser.cc index 9e7b4b3..6b92429 100644 --- a/src/parser.cc +++ b/src/parser.cc @@ -4149,7 +4149,10 @@ RegExpTree* RegExpParser::ParseGroup() { } else { ASSERT(type == '=' || type == '!'); bool is_positive = (type == '='); - return new RegExpLookahead(body, is_positive); + return new RegExpLookahead(body, + is_positive, + end_capture_index - capture_index, + capture_index); } } diff --git a/src/regexp-macro-assembler-ia32.cc b/src/regexp-macro-assembler-ia32.cc index ea54542..e58177c 100644 --- a/src/regexp-macro-assembler-ia32.cc +++ b/src/regexp-macro-assembler-ia32.cc @@ -332,15 +332,29 @@ void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase( __ push(ebx); const int four_arguments = 4; FrameAlign(four_arguments, ecx); - // Put arguments into allocated stack area. + // Put arguments into allocated stack area, last argument highest on stack. + // Parameters are + // UC16** buffer - really the String** of the input string + // int byte_offset1 - byte offset from *buffer of start of capture + // int byte_offset2 - byte offset from *buffer of current position + // size_t byte_length - length of capture in bytes(!) + + // Set byte_length. __ mov(Operand(esp, 3 * kPointerSize), ebx); + // Set byte_offset2. + // Found by adding negative string-end offset of current position (edi) + // to String** offset of end of string. __ mov(ecx, Operand(ebp, kInputEndOffset)); __ add(edi, Operand(ecx)); __ mov(Operand(esp, 2 * kPointerSize), edi); + // Set byte_offset1. + // Start of capture, where eax already holds string-end negative offset. __ add(eax, Operand(ecx)); __ mov(Operand(esp, 1 * kPointerSize), eax); + // Set buffer. Original String** parameter to regexp code. __ mov(eax, Operand(ebp, kInputBuffer)); __ mov(Operand(esp, 0 * kPointerSize), eax); + Address function_address = FUNCTION_ADDR(&CaseInsensitiveCompareUC16); CallCFunction(function_address, four_arguments); // Pop original values before reacting on result value. @@ -946,9 +960,12 @@ void RegExpMacroAssemblerIA32::WriteCurrentPositionToRegister(int reg, } -void RegExpMacroAssemblerIA32::ClearRegister(int reg) { +void RegExpMacroAssemblerIA32::ClearRegisters(int reg_from, int reg_to) { + ASSERT(reg_from <= reg_to); __ mov(eax, Operand(ebp, kInputStartMinusOne)); - __ mov(register_location(reg), eax); + for (int reg = reg_from; reg <= reg_to; reg++) { + __ mov(register_location(reg), eax); + } } @@ -987,8 +1004,8 @@ RegExpMacroAssemblerIA32::Result RegExpMacroAssemblerIA32::Execute( stack_top); if (result < 0 && !Top::has_pending_exception()) { - // We detected a stack overflow in RegExp code, but haven't created - // the exception yet. + // We detected a stack overflow (on the backtrack stack) in RegExp code, + // but haven't created the exception yet. Top::StackOverflow(); } return (result < 0) ? EXCEPTION : (result ? SUCCESS : FAILURE); @@ -1170,6 +1187,9 @@ void RegExpMacroAssemblerIA32::CheckStackLimit() { void RegExpMacroAssemblerIA32::FrameAlign(int num_arguments, Register scratch) { + // TODO(lrn): Since we no longer use the system stack arbitrarily, we + // know the current stack alignment - esp points to the last regexp register. + // We can do this simpler then. int frameAlignment = OS::ActivationFrameAlignment(); if (frameAlignment != 0) { // Make stack end at alignment and make room for num_arguments words diff --git a/src/regexp-macro-assembler-ia32.h b/src/regexp-macro-assembler-ia32.h index dd74c65..93f31b3 100644 --- a/src/regexp-macro-assembler-ia32.h +++ b/src/regexp-macro-assembler-ia32.h @@ -107,7 +107,7 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler { virtual void SetRegister(int register_index, int to); virtual void Succeed(); virtual void WriteCurrentPositionToRegister(int reg, int cp_offset); - virtual void ClearRegister(int reg); + virtual void ClearRegisters(int reg_from, int reg_to); virtual void WriteStackPointerToRegister(int reg); static Result Execute(Code* code, diff --git a/src/regexp-macro-assembler-irregexp.cc b/src/regexp-macro-assembler-irregexp.cc index 89d2482..05b08f385 100644 --- a/src/regexp-macro-assembler-irregexp.cc +++ b/src/regexp-macro-assembler-irregexp.cc @@ -104,8 +104,11 @@ void RegExpMacroAssemblerIrregexp::WriteCurrentPositionToRegister( } -void RegExpMacroAssemblerIrregexp::ClearRegister(int reg) { - SetRegister(reg, -1); +void RegExpMacroAssemblerIrregexp::ClearRegisters(int reg_from, int reg_to) { + ASSERT(reg_from <= reg_to); + for (int reg = reg_from; reg <= reg_to; reg++) { + SetRegister(reg, -1); + } } diff --git a/src/regexp-macro-assembler-irregexp.h b/src/regexp-macro-assembler-irregexp.h index 9e9784d..0d5999f 100644 --- a/src/regexp-macro-assembler-irregexp.h +++ b/src/regexp-macro-assembler-irregexp.h @@ -66,7 +66,7 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler { virtual void AdvanceRegister(int reg, int by); // r[reg] += by. virtual void SetRegister(int register_index, int to); virtual void WriteCurrentPositionToRegister(int reg, int cp_offset); - virtual void ClearRegister(int reg); + virtual void ClearRegisters(int reg_from, int reg_to); virtual void ReadCurrentPositionFromRegister(int reg); virtual void WriteStackPointerToRegister(int reg); virtual void ReadStackPointerFromRegister(int reg); diff --git a/src/regexp-macro-assembler-tracer.cc b/src/regexp-macro-assembler-tracer.cc index 541f909..74345d8 100644 --- a/src/regexp-macro-assembler-tracer.cc +++ b/src/regexp-macro-assembler-tracer.cc @@ -150,9 +150,9 @@ void RegExpMacroAssemblerTracer::WriteCurrentPositionToRegister(int reg, } -void RegExpMacroAssemblerTracer::ClearRegister(int reg) { - PrintF(" ClearRegister(register=%d);\n", reg); - assembler_->ClearRegister(reg); +void RegExpMacroAssemblerTracer::ClearRegisters(int reg_from, int reg_to) { + PrintF(" ClearRegister(from=%d, to=%d);\n", reg_from, reg_to); + assembler_->ClearRegisters(reg_from, reg_to); } diff --git a/src/regexp-macro-assembler-tracer.h b/src/regexp-macro-assembler-tracer.h index 49576e0..d3aeff7 100644 --- a/src/regexp-macro-assembler-tracer.h +++ b/src/regexp-macro-assembler-tracer.h @@ -107,7 +107,7 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler { virtual void SetRegister(int register_index, int to); virtual void Succeed(); virtual void WriteCurrentPositionToRegister(int reg, int cp_offset); - virtual void ClearRegister(int reg); + virtual void ClearRegisters(int reg_from, int reg_to); virtual void WriteStackPointerToRegister(int reg); private: RegExpMacroAssembler* assembler_; diff --git a/src/regexp-macro-assembler.h b/src/regexp-macro-assembler.h index 74133d5..cf17697 100644 --- a/src/regexp-macro-assembler.h +++ b/src/regexp-macro-assembler.h @@ -168,7 +168,7 @@ class RegExpMacroAssembler { virtual void SetRegister(int register_index, int to) = 0; virtual void Succeed() = 0; virtual void WriteCurrentPositionToRegister(int reg, int cp_offset) = 0; - virtual void ClearRegister(int reg) = 0; + virtual void ClearRegisters(int reg_from, int reg_to) = 0; virtual void WriteStackPointerToRegister(int reg) = 0; private: diff --git a/test/mjsunit/regexp-lookahead.js b/test/mjsunit/regexp-lookahead.js new file mode 100644 index 0000000..1188b56 --- /dev/null +++ b/test/mjsunit/regexp-lookahead.js @@ -0,0 +1,166 @@ +// Copyright 2009 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Tests captures in positive and negative look-ahead in regular expressions. + +function stringEscape(string) { + // Converts string to source literal. + return '"' + string.replace(/["\\]/g, "\\$1") + '"'; +} + +function testRE(re, input, expected_result) { + var testName = re + ".test(" + stringEscape(input) +")"; + if (expected_result) { + assertTrue(re.test(input), testName); + } else { + assertFalse(re.test(input), testName); + } +} + +function execRE(re, input, expected_result) { + var testName = re + ".exec('" + stringEscape(input) +"')"; + assertEquals(expected_result, re.exec(input), testName); +} + +// Test of simple positive lookahead. + +var re = /^(?=a)/; +testRE(re, "a", true); +testRE(re, "b", false); +execRE(re, "a", [""]); + +re = /^(?=\woo)f\w/; +testRE(re, "foo", true); +testRE(re, "boo", false); +testRE(re, "fao", false); +testRE(re, "foa", false); +execRE(re, "foo", ["fo"]); + +re = /(?=\w).(?=\W)/; +testRE(re, ".a! ", true); +testRE(re, ".! ", false); +testRE(re, ".ab! ", true); +execRE(re, ".ab! ", ["b"]); + +re = /(?=f(?=[^f]o))../; +testRE(re, ", foo!", true); +testRE(re, ", fo!", false); +testRE(re, ", ffo", false); +execRE(re, ", foo!", ["fo"]); + +// Positive lookahead with captures. +re = /^[^\'\"]*(?=([\'\"])).*\1(\w+)\1/; +testRE(re, " 'foo' ", true); +testRE(re, ' "foo" ', true); +testRE(re, " \" 'foo' ", false); +testRE(re, " ' \"foo\" ", false); +testRE(re, " 'foo\" ", false); +testRE(re, " \"foo' ", false); +execRE(re, " 'foo' ", [" 'foo'", "'", "foo"]); +execRE(re, ' "foo" ', [' "foo"', '"', 'foo']); + +// Captures are cleared on backtrack past the look-ahead. +re = /^(?:(?=(.))a|b)\1$/; +testRE(re, "aa", true); +testRE(re, "b", true); +testRE(re, "bb", false); +testRE(re, "a", false); +execRE(re, "aa", ["aa", "a"]); +execRE(re, "b", ["b", undefined]); + +re = /^(?=(.)(?=(.)\1\2)\2\1)\1\2/; +testRE(re, "abab", true); +testRE(re, "ababxxxxxxxx", true); +testRE(re, "aba", false); +execRE(re, "abab", ["ab", "a", "b"]); + +re = /^(?:(?=(.))a|b|c)$/; +testRE(re, "a", true); +testRE(re, "b", true); +testRE(re, "c", true); +testRE(re, "d", false); +execRE(re, "a", ["a", "a"]); +execRE(re, "b", ["b", undefined]); +execRE(re, "c", ["c", undefined]); + +execRE(/^(?=(b))b/, "b", ["b", "b"]); +execRE(/^(?:(?=(b))|a)b/, "ab", ["ab", undefined]); +execRE(/^(?:(?=(b)(?:(?=(c))|d))|)bd/, "bd", ["bd", "b", undefined]); + + + +// Test of Negative Look-Ahead. + +re = /(?!x)./; +testRE(re, "y", true); +testRE(re, "x", false); +execRE(re, "y", ["y"]); + +re = /(?!(\d))|\d/; +testRE(re, "4", true); +execRE(re, "4", ["4", undefined]); +execRE(re, "x", ["", undefined]); + + +// Test mixed nested look-ahead with captures. + +re = /^(?=(x)(?=(y)))/; +testRE(re, "xy", true); +testRE(re, "xz", false); +execRE(re, "xy", ["", "x", "y"]); + +re = /^(?!(x)(?!(y)))/; +testRE(re, "xy", true); +testRE(re, "xz", false); +execRE(re, "xy", ["", undefined, undefined]); + +re = /^(?=(x)(?!(y)))/; +testRE(re, "xz", true); +testRE(re, "xy", false) +execRE(re, "xz", ["", "x", undefined]); + +re = /^(?!(x)(?=(y)))/; +testRE(re, "xz", true); +testRE(re, "xy", false); +execRE(re, "xz", ["", undefined, undefined]); + +re = /^(?=(x)(?!(y)(?=(z))))/; +testRE(re, "xaz", true); +testRE(re, "xya", true); +testRE(re, "xyz", false); +testRE(re, "a", false); +execRE(re, "xaz", ["", "x", undefined, undefined]); +execRE(re, "xya", ["", "x", undefined, undefined]); + +re = /^(?!(x)(?=(y)(?!(z))))/; +testRE(re, "a", true); +testRE(re, "xa", true); +testRE(re, "xyz", true); +testRE(re, "xya", false); +execRE(re, "a", ["", undefined, undefined, undefined]); +execRE(re, "xa", ["", undefined, undefined, undefined]); +execRE(re, "xyz", ["", undefined, undefined, undefined]); diff --git a/test/mjsunit/bugs/bug-187.js b/test/mjsunit/regress/regress-187.js similarity index 100% rename from test/mjsunit/bugs/bug-187.js rename to test/mjsunit/regress/regress-187.js