Matching a back-reference must handle unbound start-register (but can assume that...
authorlrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Wed, 3 Dec 2008 13:24:34 +0000 (13:24 +0000)
committerlrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Wed, 3 Dec 2008 13:24:34 +0000 (13:24 +0000)
After matching a back reference, the character position is advanced past
the match

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@908 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

src/interpreter-irregexp.cc
src/jsregexp.cc
src/regexp-macro-assembler-ia32.cc
test/cctest/test-regexp.cc

index d5d1a89..f76c135 100644 (file)
@@ -333,6 +333,10 @@ static bool RawMatch(const byte* code_base,
       BYTECODE(CHECK_NOT_BACK_REF) {
         int from = registers[pc[1]];
         int len = registers[pc[1] + 1] - from;
+        if (from < 0 || len <= 0) {
+          pc += BC_CHECK_NOT_BACK_REF_LENGTH;
+          break;
+        }
         if (current + len > subject.length()) {
           pc = code_base + Load32(pc + 2);
           break;
@@ -353,6 +357,10 @@ static bool RawMatch(const byte* code_base,
       BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
         int from = registers[pc[1]];
         int len = registers[pc[1] + 1] - from;
+        if (from < 0 || len <= 0) {
+          pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
+          break;
+        }
         if (current + len > subject.length()) {
           pc = code_base + Load32(pc + 2);
           break;
index 05b8aa0..3046b96 100644 (file)
@@ -1541,10 +1541,6 @@ bool ActionNode::Emit(RegExpCompiler* compiler) {
 bool BackReferenceNode::Emit(RegExpCompiler* compiler) {
   RegExpMacroAssembler* macro = compiler->macro_assembler();
   Bind(macro);
-  // Check whether the registers are uninitialized and always
-  // succeed if they are.
-  macro->IfRegisterLT(start_reg_, 0, on_success()->label());
-  macro->IfRegisterLT(end_reg_, 0, on_success()->label());
   ASSERT_EQ(start_reg_ + 1, end_reg_);
   if (info()->at_end) {
     // If we are constrained to match at the end of the input then succeed
index e58323f..76b4bcf 100644 (file)
@@ -260,20 +260,27 @@ void RegExpMacroAssemblerIA32::CheckNotBackReference(
   __ mov(eax, register_location(start_reg));
   __ mov(ecx, register_location(start_reg + 1));
   __ sub(ecx, Operand(eax));  // Length to check.
-  __ j(less, on_no_match);
+  BranchOrBacktrack(less, on_no_match);
   __ j(equal, &fallthrough);
-  // check that there are sufficient characters left in the input
+  // Check that there are sufficient characters left in the input.
   __ mov(ebx, edi);
   __ add(ebx, Operand(ecx));
-  __ j(greater, on_no_match);
-  __ mov(ebx, Operand(edi));
-  __ push(esi);
+  BranchOrBacktrack(greater, on_no_match);
+
+  __ mov(ebx, edi);
+  __ mov(edx, esi);
   __ add(edi, Operand(esi));
   __ add(esi, Operand(eax));
   __ rep_cmpsb();
-  __ pop(esi);
-  __ mov(edi, Operand(ebx));
-  BranchOrBacktrack(not_equal, on_no_match);
+  __ mov(esi, edx);
+  Label success;
+  __ j(equal, &success);
+  __ mov(edi, ebx);
+  BranchOrBacktrack(no_condition, on_no_match);
+
+  __ bind(&success);
+  __ sub(edi, Operand(esi));
+
   __ bind(&fallthrough);
 }
 
@@ -629,7 +636,6 @@ void RegExpMacroAssemblerIA32::CheckStackLimit() {
         ExternalReference::address_of_stack_guard_limit();
     __ cmp(esp, Operand::StaticVariable(stack_guard_limit));
     __ j(above, &no_preempt, taken);
-
     __ push(edi);  // Current position.
     __ push(edx);  // Current character.
     // Restore original edi, esi.
index 2212949..8325080 100644 (file)
@@ -768,6 +768,56 @@ TEST(MacroAssemblerIA32Backtrack) {
   CHECK(!success);
 }
 
+
+TEST(MacroAssemblerIA32BackReference) {
+  V8::Initialize(NULL);
+
+  // regexp-macro-assembler-ia32 needs a handle scope to allocate
+  // byte-arrays for constants.
+  v8::HandleScope scope;
+
+  RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 3);
+
+  m.WriteCurrentPositionToRegister(0);
+  m.AdvanceCurrentPosition(2);
+  m.WriteCurrentPositionToRegister(1);
+  Label nomatch;
+  m.CheckNotBackReference(0, &nomatch);
+  m.Fail();
+  m.Bind(&nomatch);
+  m.AdvanceCurrentPosition(2);
+  Label missing_match;
+  m.CheckNotBackReference(0, &missing_match);
+  m.WriteCurrentPositionToRegister(2);
+  m.Succeed();
+  m.Bind(&missing_match);
+  m.Fail();
+
+  Handle<Object> code_object = m.GetCode();
+  Handle<Code> code = Handle<Code>::cast(code_object);
+
+  Handle<String> input = Factory::NewStringFromAscii(CStrVector("fooofo"));
+  Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
+  Address start_adr = seq_input->GetCharsAddress();
+  int start_offset = start_adr - reinterpret_cast<Address>(*seq_input);
+  int end_offset = start_offset + seq_input->length();
+
+  int output[3];
+  bool success = RegExpMacroAssemblerIA32::Execute(*code,
+                                                   seq_input.location(),
+                                                   start_offset,
+                                                   end_offset,
+                                                   output,
+                                                   true);
+
+  CHECK(success);
+  CHECK_EQ(0, output[0]);
+  CHECK_EQ(2, output[1]);
+  CHECK_EQ(6, output[2]);
+}
+
+
+
 TEST(MacroAssemblerIA32AtStart) {
   V8::Initialize(NULL);