Makes irregexp-ia32 feature complete wrt. regexps.
authorlrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Fri, 5 Dec 2008 09:18:55 +0000 (09:18 +0000)
committerlrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Fri, 5 Dec 2008 09:18:55 +0000 (09:18 +0000)
git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@920 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

src/regexp-macro-assembler-ia32.cc
src/regexp-macro-assembler-ia32.h
test/cctest/test-regexp.cc
test/mjsunit/regexp.js

index 76b4bcff5dd27a6e369c3e623d5710ce74371fd0..b1aaaf5f0e954b6ac7069882859c6de3bd73b14e 100644 (file)
@@ -27,6 +27,7 @@
 
 #include <string.h>
 #include "v8.h"
+#include "unicode.h"
 #include "log.h"
 #include "ast.h"
 #include "macro-assembler.h"
@@ -240,22 +241,97 @@ void RegExpMacroAssemblerIA32::CheckCurrentPosition(int register_index,
 
 
 void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
-    int start_reg, Label* on_no_match) {
+    int start_reg,
+    Label* on_no_match) {
   Label fallthrough;
   __ mov(eax, register_location(start_reg));
   __ mov(ecx, register_location(start_reg + 1));
   __ sub(ecx, Operand(eax));  // Length to check.
-  __ j(less, on_no_match);
+  BranchOrBacktrack(less, on_no_match);
   __ j(equal, &fallthrough);
 
-  UNIMPLEMENTED();  // TODO(lrn): Call runtime function to do test.
+  if (mode_ == ASCII) {
+    Label success;
+    Label fail;
+    __ push(esi);
+    __ push(edi);
+    __ add(edi, Operand(esi));
+    __ add(esi, Operand(eax));
+    Label loop;
+    __ bind(&loop);
+    __ rep_cmpsb();
+    __ j(equal, &success);
+    // Compare lower-case if letters.
+    __ movzx_b(eax, Operand(edi, -1));
+    __ or_(eax, 0x20);  // To-lower-case
+    __ lea(ebx, Operand(eax, -'a'));
+    __ cmp(ebx, static_cast<int32_t>('z' - 'a'));
+    __ j(above, &fail);
+    __ movzx_b(ebx, Operand(esi, -1));
+    __ or_(ebx, 0x20);  // To-lower-case
+    __ cmp(eax, Operand(ebx));
+    __ j(not_equal, &fail);
+    __ or_(ecx, Operand(ecx));
+    __ j(not_equal, &loop);
+    __ jmp(&success);
+
+    __ bind(&fail);
+    __ pop(edi);
+    __ pop(esi);
+    BranchOrBacktrack(no_condition, on_no_match);
 
+    __ bind(&success);
+    __ pop(eax);  // discard original value of edi
+    __ pop(esi);
+    __ sub(edi, Operand(esi));
+  } else {
+    // store state
+    __ push(esi);
+    __ push(edi);
+    __ push(ecx);
+    // align stack
+    int frameAlignment = OS::ActivationFrameAlignment();
+    if (frameAlignment != 0) {
+      __ mov(ebx, esp);
+      __ sub(Operand(esp), Immediate(5 * kPointerSize));  // args + esp.
+      ASSERT(IsPowerOf2(frameAlignment));
+      __ and_(esp, -frameAlignment);
+      __ mov(Operand(esp, 4 * kPointerSize), ebx);
+    } else {
+      __ sub(Operand(esp), Immediate(4 * kPointerSize));
+    }
+    // Put arguments on stack.
+    __ mov(Operand(esp, 3 * kPointerSize), ecx);
+    __ mov(ebx, Operand(ebp, kInputEndOffset));
+    __ add(edi, Operand(ebx));
+    __ mov(Operand(esp, 2 * kPointerSize), edi);
+    __ add(eax, Operand(ebx));
+    __ mov(Operand(esp, 1 * kPointerSize), eax);
+    __ mov(eax, Operand(ebp, kInputBuffer));
+    __ mov(Operand(esp, 0 * kPointerSize), eax);
+    Address function_address = FUNCTION_ADDR(&CaseInsensitiveCompareUC16);
+    __ mov(Operand(eax),
+        Immediate(reinterpret_cast<int32_t>(function_address)));
+    __ call(Operand(eax));
+    if (frameAlignment != 0) {
+      __ mov(esp, Operand(esp, 4 * kPointerSize));
+    } else {
+      __ add(Operand(esp), Immediate(4 * sizeof(int32_t)));
+    }
+    __ pop(ecx);
+    __ pop(edi);
+    __ pop(esi);
+    __ or_(eax, Operand(eax));
+    BranchOrBacktrack(zero, on_no_match);
+    __ add(edi, Operand(ecx));
+  }
   __ bind(&fallthrough);
 }
 
 
 void RegExpMacroAssemblerIA32::CheckNotBackReference(
-    int start_reg, Label* on_no_match) {
+    int start_reg,
+    Label* on_no_match) {
   Label fallthrough;
   __ mov(eax, register_location(start_reg));
   __ mov(ecx, register_location(start_reg + 1));
@@ -586,6 +662,37 @@ void RegExpMacroAssemblerIA32::WriteStackPointerToRegister(int reg) {
 
 // Private methods:
 
+
+static unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
+
+
+int RegExpMacroAssemblerIA32::CaseInsensitiveCompareUC16(uc16** buffer,
+                                                         int byte_offset1,
+                                                         int byte_offset2,
+                                                         size_t byte_length) {
+  ASSERT(byte_length % 2 == 0);
+  Address buffer_address = reinterpret_cast<Address>(*buffer);
+  uc16* substring1 = reinterpret_cast<uc16*>(buffer_address + byte_offset1);
+  uc16* substring2 = reinterpret_cast<uc16*>(buffer_address + byte_offset2);
+  size_t length = byte_length >> 1;
+
+  for (size_t i = 0; i < length; i++) {
+    unibrow::uchar c1 = substring1[i];
+    unibrow::uchar c2 = substring2[i];
+    if (c1 != c2) {
+      canonicalize.get(c1, '\0', &c1);
+      if (c1 != c2) {
+        canonicalize.get(c2, '\0', &c2);
+        if (c1 != c2) {
+          return 0;
+        }
+      }
+    }
+  }
+  return 1;
+}
+
+
 Operand RegExpMacroAssemblerIA32::register_location(int register_index) {
   ASSERT(register_index < (1<<30));
   if (num_registers_ <= register_index) {
index d303910f16ad065ff39c8a689c7a45ee73ad9f23..f8afe6fea7b41764059cb96d5435cbf7a2ae2ef5 100644 (file)
@@ -119,8 +119,12 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
   static const int kRegExpConstantsSize = 256;
   // Only unroll loops up to this length.
   static const int kMaxInlineStringTests = 8;
-  // Special "character" marking end of input.
-  static const uint32_t kEndOfInput = ~0;
+
+  // Compares two-byte strings case insenstively.
+  static int CaseInsensitiveCompareUC16(uc16** buffer,
+                                        int byte_offset1,
+                                        int byte_offset2,
+                                        size_t byte_length);
 
   // The ebp-relative location of a regexp register.
   Operand register_location(int register_index);
index 8325080b1061d2a22e0a785b70ad65f182b5ac04..cf0733efaafcc8ec922dfcb346245da76f92d51f 100644 (file)
@@ -817,7 +817,6 @@ TEST(MacroAssemblerIA32BackReference) {
 }
 
 
-
 TEST(MacroAssemblerIA32AtStart) {
   V8::Initialize(NULL);
 
@@ -882,6 +881,65 @@ TEST(MacroAssemblerIA32AtStart) {
 
 
 
+
+TEST(MacroAssemblerIA32BackRefNoCase) {
+  V8::Initialize(NULL);
+
+  // regexp-macro-assembler-ia32 needs a handle scope to allocate
+  // byte-arrays for constants.
+  v8::HandleScope scope;
+
+  RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 4);
+
+  Label fail, succ;
+
+  m.WriteCurrentPositionToRegister(0);
+  m.WriteCurrentPositionToRegister(2);
+  m.AdvanceCurrentPosition(3);
+  m.WriteCurrentPositionToRegister(3);
+  m.CheckNotBackReferenceIgnoreCase(2, &fail);  // Match "AbC".
+  m.CheckNotBackReferenceIgnoreCase(2, &fail);  // Match "ABC".
+  Label expected_fail;
+  m.CheckNotBackReferenceIgnoreCase(2, &expected_fail);
+  m.Bind(&fail);
+  m.Fail();
+
+  m.Bind(&expected_fail);
+  m.AdvanceCurrentPosition(3);  // Skip "xYz"
+  m.CheckNotBackReferenceIgnoreCase(2, &succ);
+  m.Fail();
+
+  m.Bind(&succ);
+  m.WriteCurrentPositionToRegister(1);
+  m.Succeed();
+
+  Handle<Object> code_object = m.GetCode();
+  Handle<Code> code = Handle<Code>::cast(code_object);
+
+  Handle<String> input =
+      Factory::NewStringFromAscii(CStrVector("aBcAbCABCxYzab"));
+  Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
+  Address start_adr = seq_input->GetCharsAddress();
+  int start_offset = start_adr - reinterpret_cast<Address>(*seq_input);
+  int end_offset = start_offset + seq_input->length();
+
+  int output[4];
+  bool success = RegExpMacroAssemblerIA32::Execute(*code,
+                                                   seq_input.location(),
+                                                   start_offset,
+                                                   end_offset,
+                                                   output,
+                                                   true);
+
+  CHECK(success);
+  CHECK_EQ(0, output[0]);
+  CHECK_EQ(12, output[1]);
+  CHECK_EQ(0, output[2]);
+  CHECK_EQ(3, output[3]);
+}
+
+
+
 TEST(MacroAssemblerIA32Registers) {
   V8::Initialize(NULL);
 
index 5c8088c6655a2676a087feaa8416bc21cf17a3e7..bbf25dc409d506a80eb9436913141e081374fc4e 100644 (file)
@@ -263,4 +263,43 @@ assertTrue(/foo$(?!bar)/.test("foo"), "football12");
 assertFalse(/f(o)\b\1/.test('foo'));
 assertTrue(/f(o)\B\1/.test('foo'));
 
+// Back-reference, ignore case:
+// ASCII
+assertEquals("xaAx,a", String(/x(a)\1x/i.exec("xaAx")), "\\1 ASCII");
+assertFalse(/x(...)\1/i.test("xaaaaa"), "\\1 ASCII, string short");
+assertTrue(/x((?:))\1\1x/i.test("xx"), "\\1 empty, ASCII");
+assertTrue(/x(?:...|(...))\1x/i.test("xabcx"), "\\1 uncaptured, ASCII");
+assertTrue(/x(?:...|(...))\1x/i.test("xabcABCx"), "\\1 backtrack, ASCII");
+assertEquals("xaBcAbCABCx,aBc",
+             String(/x(...)\1\1x/i.exec("xaBcAbCABCx")),
+             "\\1\\1 ASCII");
+
+for (var i = 0; i < 128; i++) {
+  var testName = "(.)\\1 ~ " + i + "," + (i^0x20);
+  var test = /^(.)\1$/i.test(String.fromCharCode(i, i ^ 0x20))
+  var c = String.fromCharCode(i);
+  if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) {
+    assertTrue(test, testName);
+  } else {
+    assertFalse(test, testName);
+  }
+}
+
+// UC16
+// Characters used:
+// "\u03a3\u03c2\u03c3\u039b\u03bb" - Sigma, final sigma, sigma, Lambda, lamda
+assertEquals("x\u03a3\u03c3x,\u03a3",
+              String(/x(.)\1x/i.exec("x\u03a3\u03c3x")), "\\1 UC16");
+assertFalse(/x(...)\1/i.test("x\u03a3\u03c2\u03c3\u03c2\u03c3"),
+            "\\1 ASCII, string short");
+assertTrue(/\u03a3((?:))\1\1x/i.test("\u03c2x"), "\\1 empty, UC16");
+assertTrue(/x(?:...|(...))\1x/i.test("x\u03a3\u03c2\u03c3x"),
+           "\\1 uncaptured, UC16");
+assertTrue(/x(?:...|(...))\1x/i.test("x\u03c2\u03c3\u039b\u03a3\u03c2\u03bbx"),
+           "\\1 backtrack, UC16");
+var longUC16String = "x\u03a3\u03c2\u039b\u03c2\u03c3\u03bb\u03c3\u03a3\u03bb";
+assertEquals(longUC16String + "," + longUC16String.substring(1,4),
+             String(/x(...)\1\1/i.exec(longUC16String)),
+             "\\1\\1 UC16");
+
 assertFalse(/f(o)$\1/.test('foo'), "backref detects at_end");