}
+void IrregexpAssembler::CheckNotRegistersEqual(int reg1,
+ int reg2,
+ Label* on_not_equal) {
+ Emit(BC_CHECK_NOT_REGS_EQUAL);
+ Emit(reg1);
+ Emit(reg2);
+ EmitOrLink(on_not_equal);
+}
+
+
void IrregexpAssembler::CheckRegister(int byte_code,
int reg_index,
uint16_t vs,
// on_mismatch label will never be called.
void CheckNotBackReference(int capture_index, Label* on_mismatch);
void CheckNotBackReferenceNoCase(int capture_index, Label* on_mismatch);
+ void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal);
// Checks a register for strictly-less-than or greater-than-or-equal.
void CheckRegisterLT(int reg_index, uint16_t vs, Label* on_less_than);
V(CHECK_GT, 23, 7) /* check_gr uc16 addr32 */ \
V(CHECK_NOT_BACK_REF, 24, 6) /* check_not_back_ref capture_idx addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE, 25, 6) /* check_not_back_ref_no_case captu... */ \
-V(LOOKUP_MAP1, 26, 11) /* l_map1 start16 bit_map_addr32 addr32 */ \
-V(LOOKUP_MAP2, 27, 99) /* l_map2 start16 half_nibble_map_addr32* */ \
-V(LOOKUP_MAP8, 28, 99) /* l_map8 start16 byte_map addr32* */ \
-V(LOOKUP_HI_MAP8, 29, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \
-V(CHECK_REGISTER_LT, 30, 8) /* check_reg_lt register_index value16 addr32 */ \
-V(CHECK_REGISTER_GE, 31, 8) /* check_reg_ge register_index value16 addr32 */
+V(CHECK_NOT_REGS_EQUAL, 26, 7) /* check_not_regs_equal reg1 reg2 addr32 */ \
+V(LOOKUP_MAP1, 27, 11) /* l_map1 start16 bit_map_addr32 addr32 */ \
+V(LOOKUP_MAP2, 28, 99) /* l_map2 start16 half_nibble_map_addr32* */ \
+V(LOOKUP_MAP8, 29, 99) /* l_map8 start16 byte_map addr32* */ \
+V(LOOKUP_HI_MAP8, 30, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \
+V(CHECK_REGISTER_LT, 31, 8) /* check_reg_lt register_index value16 addr32 */ \
+V(CHECK_REGISTER_GE, 32, 8) /* check_reg_ge register_index value16 addr32 */
#define DECLARE_BYTECODES(name, code, length) \
static const int BC_##name = code;
DEFINE_bool(trace_regexps, false, "trace Irregexp execution")
DEFINE_bool(irregexp_native, false, "use native code Irregexp implementation (IA32 only)")
DEFINE_bool(disable_jscre, false, "abort if JSCRE is used. Only useful with --irregexp")
+DEFINE_bool(attempt_multiline_irregexp, false, "attempt to use Irregexp for multiline regexps")
// Testing flags test/cctest/test-{flags,api,serialization}.cc
DEFINE_bool(testing_bool_flag, true, "testing_bool_flag")
pc = code_base + Load32(new_pc);
break;
}
+ BYTECODE(CHECK_NOT_REGS_EQUAL)
+ if (registers[pc[1]] == registers[pc[2]]) {
+ pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH;
+ } else {
+ pc = code_base + Load32(pc + 3);
+ }
+ break;
BYTECODE(CHECK_NOT_BACK_REF) {
int from = registers[pc[1]];
int len = registers[pc[1] + 1] - from;
// TODO(erikcorry): Implement support.
if (info_.follows_word_interest ||
info_.follows_newline_interest ||
- info_.follows_start_interest ||
- info_.at_end) {
+ info_.follows_start_interest) {
return false;
}
if (label_.is_bound()) {
}
+// EndNodes are special. Because they can be very common and they are very
+// short we normally inline them. That is, if we are asked to emit a GoTo
+// we just emit the entire node. Since they don't have successors this
+// works.
bool EndNode::GoTo(RegExpCompiler* compiler) {
if (info()->follows_word_interest ||
info()->follows_newline_interest ||
- info()->follows_start_interest ||
- info()->at_end) {
+ info()->follows_start_interest) {
return false;
}
- if (!label()->is_bound()) {
- Bind(compiler->macro_assembler());
- }
- switch (action_) {
- case ACCEPT:
- compiler->macro_assembler()->Succeed();
- break;
- case BACKTRACK:
- compiler->macro_assembler()->Backtrack();
- break;
- }
- return true;
+ return Emit(compiler);
}
RegExpMacroAssembler* macro = compiler->macro_assembler();
switch (action_) {
case ACCEPT:
- Bind(macro);
+ if (!label()->is_bound()) Bind(macro);
+ if (info()->at_end) {
+ Label succeed;
+ // LoadCurrentCharacter will go to the label if we are at the end of the
+ // input string.
+ macro->LoadCurrentCharacter(0, &succeed);
+ macro->Backtrack();
+ macro->Bind(&succeed);
+ }
macro->Succeed();
return true;
case BACKTRACK:
- Bind(macro);
+ if (!label()->is_bound()) Bind(macro);
+ ASSERT(!info()->at_end);
macro->Backtrack();
return true;
}
}
-ActionNode* ActionNode::SavePosition(int reg, RegExpNode* on_success) {
- ActionNode* result = new ActionNode(SAVE_POSITION, on_success);
- result->data_.u_position_register.reg = reg;
- return result;
-}
-
-
ActionNode* ActionNode::RestorePosition(int reg, RegExpNode* on_success) {
ActionNode* result = new ActionNode(RESTORE_POSITION, on_success);
result->data_.u_position_register.reg = reg;
}
-ActionNode* ActionNode::BeginSubmatch(int reg, RegExpNode* on_success) {
+ActionNode* ActionNode::BeginSubmatch(int stack_reg,
+ int position_reg,
+ RegExpNode* on_success) {
ActionNode* result = new ActionNode(BEGIN_SUBMATCH, on_success);
- result->data_.u_submatch_stack_pointer_register.reg = reg;
+ result->data_.u_submatch.stack_pointer_register = stack_reg;
+ result->data_.u_submatch.current_position_register = position_reg;
return result;
}
-ActionNode* ActionNode::EscapeSubmatch(int reg, RegExpNode* on_success) {
+ActionNode* ActionNode::EscapeSubmatch(int stack_reg,
+ bool restore_position,
+ int position_reg,
+ RegExpNode* on_success) {
ActionNode* result = new ActionNode(ESCAPE_SUBMATCH, on_success);
- result->data_.u_submatch_stack_pointer_register.reg = reg;
+ result->data_.u_submatch.stack_pointer_register = stack_reg;
+ if (restore_position) {
+ result->data_.u_submatch.current_position_register = position_reg;
+ } else {
+ result->data_.u_submatch.current_position_register = -1;
+ }
return result;
}
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
Bind(macro_assembler);
int element_count = elms_->length();
+ ASSERT(element_count != 0);
int cp_offset = 0;
+ if (info()->at_end) {
+ macro_assembler->Backtrack();
+ return true;
+ }
// First, handle straight character matches.
for (int i = 0; i < element_count; i++) {
TextElement elm = elms_->at(i);
macro->Backtrack();
break;
}
- case SAVE_POSITION:
- macro->WriteCurrentPositionToRegister(
- data_.u_position_register.reg);
- break;
case RESTORE_POSITION:
macro->ReadCurrentPositionFromRegister(
data_.u_position_register.reg);
break;
case BEGIN_SUBMATCH:
+ macro->WriteCurrentPositionToRegister(
+ data_.u_submatch.current_position_register);
macro->WriteStackPointerToRegister(
- data_.u_submatch_stack_pointer_register.reg);
+ data_.u_submatch.stack_pointer_register);
break;
case ESCAPE_SUBMATCH:
+ if (info()->at_end) {
+ Label at_end;
+ // Load current character jumps to the label if we are beyond the string
+ // end.
+ macro->LoadCurrentCharacter(0, &at_end);
+ macro->Backtrack();
+ macro->Bind(&at_end);
+ }
+ if (data_.u_submatch.current_position_register != -1) {
+ macro->ReadCurrentPositionFromRegister(
+ data_.u_submatch.current_position_register);
+ }
macro->ReadStackPointerFromRegister(
- data_.u_submatch_stack_pointer_register.reg);
+ data_.u_submatch.stack_pointer_register);
break;
default:
UNREACHABLE();
macro->IfRegisterLT(start_reg_, 0, on_success()->label());
macro->IfRegisterLT(end_reg_, 0, on_success()->label());
ASSERT_EQ(start_reg_ + 1, end_reg_);
- if (compiler->ignore_case()) {
- macro->CheckNotBackReferenceIgnoreCase(start_reg_, on_failure_->label());
+ if (info()->at_end) {
+ // If we are constrained to match at the end of the input then succeed
+ // iff the back reference is empty.
+ macro->CheckNotRegistersEqual(start_reg_, end_reg_, on_failure_->label());
} else {
- macro->CheckNotBackReference(start_reg_, on_failure_->label());
+ if (compiler->ignore_case()) {
+ macro->CheckNotBackReferenceIgnoreCase(start_reg_, on_failure_->label());
+ } else {
+ macro->CheckNotBackReference(start_reg_, on_failure_->label());
+ }
}
return on_success()->GoTo(compiler);
}
stream()->Add("label=\"$%i:=$pos\", shape=octagon",
that->data_.u_position_register.reg);
break;
- case ActionNode::SAVE_POSITION:
- stream()->Add("label=\"$%i:=$pos\", shape=octagon",
- that->data_.u_position_register.reg);
- break;
case ActionNode::RESTORE_POSITION:
stream()->Add("label=\"$pos:=$%i\", shape=octagon",
that->data_.u_position_register.reg);
break;
case ActionNode::BEGIN_SUBMATCH:
- stream()->Add("label=\"begin\", shape=septagon");
+ stream()->Add("label=\"$%i:=$pos,begin\", shape=septagon",
+ that->data_.u_submatch.current_position_register);
break;
case ActionNode::ESCAPE_SUBMATCH:
stream()->Add("label=\"escape\", shape=septagon");
// fail
return ActionNode::BeginSubmatch(
stack_pointer_register,
- ActionNode::SavePosition(
- position_register,
- body()->ToNode(
- compiler,
- ActionNode::RestorePosition(
- position_register,
- ActionNode::EscapeSubmatch(stack_pointer_register,
- on_success)),
- on_failure)));
+ position_register,
+ body()->ToNode(
+ compiler,
+ ActionNode::EscapeSubmatch(
+ stack_pointer_register,
+ true, // Also restore input position.
+ position_register,
+ on_success),
+ on_failure));
} else {
// begin submatch scope
// try
on_success));
RegExpNode* body_node = body()->ToNode(
compiler,
- ActionNode::EscapeSubmatch(stack_pointer_register, on_failure),
+ ActionNode::EscapeSubmatch(stack_pointer_register,
+ false, // Don't also restore position
+ 0, // Unused arguments.
+ on_failure),
compiler->backtrack());
GuardedAlternative body_alt(body_node);
try_node->AddAlternative(body_alt);
return ActionNode::BeginSubmatch(stack_pointer_register,
- ActionNode::SavePosition(
- position_register,
- try_node));
+ position_register,
+ try_node);
}
}
ActionNode* action = new ActionNode(*this);
action->info()->AddFromPreceding(&full_info);
AddSibling(action);
- action->set_on_success(action->on_success()->PropagateForward(info));
+ if (type_ != ESCAPE_SUBMATCH) {
+ action->set_on_success(action->on_success()->PropagateForward(info));
+ }
return action;
}
alternative.set_node(alternative.node()->PropagateForward(info));
choice->alternatives()->Add(alternative);
}
+ if (!choice->on_failure_->IsBacktrack()) {
+ choice->on_failure_ = choice->on_failure_->PropagateForward(info);
+ }
return choice;
}
RegExpNode* BackReferenceNode::PropagateForward(NodeInfo* info) {
- return PropagateToEndpoint(this, info);
+ NodeInfo full_info(*this->info());
+ full_info.AddFromPreceding(info);
+ RegExpNode* sibling = GetSibling(&full_info);
+ if (sibling != NULL) return sibling;
+ EnsureSiblings();
+ BackReferenceNode* back_ref = new BackReferenceNode(*this);
+ back_ref->info()->AddFromPreceding(&full_info);
+ AddSibling(back_ref);
+ // TODO(erikcorry): A back reference has to have two successors (by default
+ // the same node). The first is used if the back reference matches a non-
+ // empty back reference, the second if it matches an empty one. This doesn't
+ // matter for at_end, which is the only one implemented right now, but it will
+ // matter for other pieces of info.
+ back_ref->set_on_success(back_ref->on_success()->PropagateForward(info));
+ return back_ref;
}
return Handle<FixedArray>::null();
}
+ if (is_multiline && !FLAG_attempt_multiline_irregexp) {
+ return Handle<FixedArray>::null();
+ }
+
if (FLAG_irregexp_native) {
#ifdef ARM
UNIMPLEMENTED();
STORE_REGISTER,
INCREMENT_REGISTER,
STORE_POSITION,
- SAVE_POSITION,
RESTORE_POSITION,
BEGIN_SUBMATCH,
ESCAPE_SUBMATCH
static ActionNode* StoreRegister(int reg, int val, RegExpNode* on_success);
static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
static ActionNode* StorePosition(int reg, RegExpNode* on_success);
- static ActionNode* SavePosition(int reg, RegExpNode* on_success);
static ActionNode* RestorePosition(int reg, RegExpNode* on_success);
- static ActionNode* BeginSubmatch(int reg, RegExpNode* on_success);
- static ActionNode* EscapeSubmatch(int reg, RegExpNode* on_success);
+ static ActionNode* BeginSubmatch(int stack_pointer_reg,
+ int position_reg,
+ RegExpNode* on_success);
+ static ActionNode* EscapeSubmatch(int stack_pointer_reg,
+ bool and_restore_position,
+ int restore_reg,
+ RegExpNode* on_success);
virtual void Accept(NodeVisitor* visitor);
virtual bool Emit(RegExpCompiler* compiler);
virtual RegExpNode* PropagateForward(NodeInfo* info);
int reg;
} u_position_register;
struct {
- int reg;
- } u_submatch_stack_pointer_register;
+ int stack_pointer_register;
+ int current_position_register;
+ } u_submatch;
} data_;
ActionNode(Type type, RegExpNode* on_success)
: SeqRegExpNode(on_success),
}
+void RegExpMacroAssemblerIA32::CheckNotRegistersEqual(int reg1,
+ int reg2,
+ Label* on_not_equal) {
+ __ mov(eax, register_location(reg1));
+ __ mov(ecx, register_location(reg2));
+ __ cmp(ecx, Operand(eax));
+ BranchOrBacktrack(not_equal, on_not_equal);
+}
+
+
void RegExpMacroAssemblerIA32::CheckNotCharacter(uc16 c, Label* on_not_equal) {
__ cmp(edx, c);
BranchOrBacktrack(not_equal, on_not_equal);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
Label* on_no_match);
+ virtual void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal);
virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label* on_not_equal);
virtual void CheckNotCharacterAfterMinusOr(uc16 c,
}
+void RegExpMacroAssemblerIrregexp::CheckNotRegistersEqual(int reg1,
+ int reg2,
+ Label* on_not_equal) {
+ assembler_->CheckNotRegistersEqual(reg1, reg2, on_not_equal);
+}
+
+
void RegExpMacroAssemblerIrregexp::CheckBitmap(uc16 start,
Label* bitmap,
Label* on_zero) {
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
Label* on_no_match);
+ virtual void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal);
virtual void CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure);
assembler_->CheckNotBackReferenceIgnoreCase(start_reg, on_no_match);
}
+
+void RegExpMacroAssemblerTracer::CheckNotRegistersEqual(int reg1,
+ int reg2,
+ Label* on_not_equal) {
+ PrintF(" CheckNotRegistersEqual(reg1=%d, reg2=%d, label[%08x]);\n",
+ reg1,
+ reg2,
+ on_not_equal);
+ assembler_->CheckNotRegistersEqual(reg1, reg2, on_not_equal);
+}
+
+
void RegExpMacroAssemblerTracer::CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure) {
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
Label* on_no_match);
+ virtual void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal);
virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
virtual void CheckNotCharacterAfterOr(uc16 c,
uc16 or_with,
virtual void CheckNotCharacterAfterMinusOr(uc16 c,
uc16 minus_then_or_with,
Label* on_not_equal) = 0;
+ virtual void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal) = 0;
// Dispatch after looking the current character up in a byte map. The
// destinations vector has up to 256 labels.
virtual void DispatchByteMap(
TEST(Graph) {
V8::Initialize(NULL);
- Execute("(?:foo|bar$)", false, true);
+ Execute("foo$(?!bar)", false, true);
}
assertEquals("bar$000", "foox".replace(re, "bar$000"), "$000");
assertEquals("barx", "foox".replace(re, "bar$01"), "$01 2");
assertEquals("barx5", "foox".replace(re, "bar$15"), "$15");
+
+assertFalse(/()foo$\1/.test("football"), "football1");
+assertFalse(/foo$(?=ball)/.test("football"), "football2");
+assertFalse(/foo$(?!bar)/.test("football"), "football3");
+assertTrue(/()foo$\1/.test("foo"), "football4");
+assertTrue(/foo$(?=(ball)?)/.test("foo"), "football5");
+assertTrue(/()foo$(?!bar)/.test("foo"), "football6");
+assertFalse(/(x?)foo$\1/.test("football"), "football7");
+assertFalse(/foo$(?=ball)/.test("football"), "football8");
+assertFalse(/foo$(?!bar)/.test("football"), "football9");
+assertTrue(/(x?)foo$\1/.test("foo"), "football10");
+assertTrue(/foo$(?=(ball)?)/.test("foo"), "football11");
+assertTrue(/foo$(?!bar)/.test("foo"), "football12");
+
+// Check that the back reference has two successors. See
+// BackReferenceNode::PropagateForward.
+assertFalse(/f(o)\b\1/.test('foo'));
+assertTrue(/f(o)\B\1/.test('foo'));
+
+assertFalse(/f(o)$\1/.test('foo'), "backref detects at_end");