#include <string.h>
#include "v8.h"
+#include "unicode.h"
#include "log.h"
#include "ast.h"
#include "macro-assembler.h"
void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
- int start_reg, Label* on_no_match) {
+ int start_reg,
+ Label* on_no_match) {
Label fallthrough;
__ mov(eax, register_location(start_reg));
__ mov(ecx, register_location(start_reg + 1));
__ sub(ecx, Operand(eax)); // Length to check.
- __ j(less, on_no_match);
+ BranchOrBacktrack(less, on_no_match);
__ j(equal, &fallthrough);
- UNIMPLEMENTED(); // TODO(lrn): Call runtime function to do test.
+ if (mode_ == ASCII) {
+ Label success;
+ Label fail;
+ __ push(esi);
+ __ push(edi);
+ __ add(edi, Operand(esi));
+ __ add(esi, Operand(eax));
+ Label loop;
+ __ bind(&loop);
+ __ rep_cmpsb();
+ __ j(equal, &success);
+ // Compare lower-case if letters.
+ __ movzx_b(eax, Operand(edi, -1));
+ __ or_(eax, 0x20); // To-lower-case
+ __ lea(ebx, Operand(eax, -'a'));
+ __ cmp(ebx, static_cast<int32_t>('z' - 'a'));
+ __ j(above, &fail);
+ __ movzx_b(ebx, Operand(esi, -1));
+ __ or_(ebx, 0x20); // To-lower-case
+ __ cmp(eax, Operand(ebx));
+ __ j(not_equal, &fail);
+ __ or_(ecx, Operand(ecx));
+ __ j(not_equal, &loop);
+ __ jmp(&success);
+
+ __ bind(&fail);
+ __ pop(edi);
+ __ pop(esi);
+ BranchOrBacktrack(no_condition, on_no_match);
+ __ bind(&success);
+ __ pop(eax); // discard original value of edi
+ __ pop(esi);
+ __ sub(edi, Operand(esi));
+ } else {
+ // store state
+ __ push(esi);
+ __ push(edi);
+ __ push(ecx);
+ // align stack
+ int frameAlignment = OS::ActivationFrameAlignment();
+ if (frameAlignment != 0) {
+ __ mov(ebx, esp);
+ __ sub(Operand(esp), Immediate(5 * kPointerSize)); // args + esp.
+ ASSERT(IsPowerOf2(frameAlignment));
+ __ and_(esp, -frameAlignment);
+ __ mov(Operand(esp, 4 * kPointerSize), ebx);
+ } else {
+ __ sub(Operand(esp), Immediate(4 * kPointerSize));
+ }
+ // Put arguments on stack.
+ __ mov(Operand(esp, 3 * kPointerSize), ecx);
+ __ mov(ebx, Operand(ebp, kInputEndOffset));
+ __ add(edi, Operand(ebx));
+ __ mov(Operand(esp, 2 * kPointerSize), edi);
+ __ add(eax, Operand(ebx));
+ __ mov(Operand(esp, 1 * kPointerSize), eax);
+ __ mov(eax, Operand(ebp, kInputBuffer));
+ __ mov(Operand(esp, 0 * kPointerSize), eax);
+ Address function_address = FUNCTION_ADDR(&CaseInsensitiveCompareUC16);
+ __ mov(Operand(eax),
+ Immediate(reinterpret_cast<int32_t>(function_address)));
+ __ call(Operand(eax));
+ if (frameAlignment != 0) {
+ __ mov(esp, Operand(esp, 4 * kPointerSize));
+ } else {
+ __ add(Operand(esp), Immediate(4 * sizeof(int32_t)));
+ }
+ __ pop(ecx);
+ __ pop(edi);
+ __ pop(esi);
+ __ or_(eax, Operand(eax));
+ BranchOrBacktrack(zero, on_no_match);
+ __ add(edi, Operand(ecx));
+ }
__ bind(&fallthrough);
}
void RegExpMacroAssemblerIA32::CheckNotBackReference(
- int start_reg, Label* on_no_match) {
+ int start_reg,
+ Label* on_no_match) {
Label fallthrough;
__ mov(eax, register_location(start_reg));
__ mov(ecx, register_location(start_reg + 1));
// Private methods:
+
+static unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
+
+
+int RegExpMacroAssemblerIA32::CaseInsensitiveCompareUC16(uc16** buffer,
+ int byte_offset1,
+ int byte_offset2,
+ size_t byte_length) {
+ ASSERT(byte_length % 2 == 0);
+ Address buffer_address = reinterpret_cast<Address>(*buffer);
+ uc16* substring1 = reinterpret_cast<uc16*>(buffer_address + byte_offset1);
+ uc16* substring2 = reinterpret_cast<uc16*>(buffer_address + byte_offset2);
+ size_t length = byte_length >> 1;
+
+ for (size_t i = 0; i < length; i++) {
+ unibrow::uchar c1 = substring1[i];
+ unibrow::uchar c2 = substring2[i];
+ if (c1 != c2) {
+ canonicalize.get(c1, '\0', &c1);
+ if (c1 != c2) {
+ canonicalize.get(c2, '\0', &c2);
+ if (c1 != c2) {
+ return 0;
+ }
+ }
+ }
+ }
+ return 1;
+}
+
+
Operand RegExpMacroAssemblerIA32::register_location(int register_index) {
ASSERT(register_index < (1<<30));
if (num_registers_ <= register_index) {
}
-
TEST(MacroAssemblerIA32AtStart) {
V8::Initialize(NULL);
+
+TEST(MacroAssemblerIA32BackRefNoCase) {
+ V8::Initialize(NULL);
+
+ // regexp-macro-assembler-ia32 needs a handle scope to allocate
+ // byte-arrays for constants.
+ v8::HandleScope scope;
+
+ RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 4);
+
+ Label fail, succ;
+
+ m.WriteCurrentPositionToRegister(0);
+ m.WriteCurrentPositionToRegister(2);
+ m.AdvanceCurrentPosition(3);
+ m.WriteCurrentPositionToRegister(3);
+ m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "AbC".
+ m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "ABC".
+ Label expected_fail;
+ m.CheckNotBackReferenceIgnoreCase(2, &expected_fail);
+ m.Bind(&fail);
+ m.Fail();
+
+ m.Bind(&expected_fail);
+ m.AdvanceCurrentPosition(3); // Skip "xYz"
+ m.CheckNotBackReferenceIgnoreCase(2, &succ);
+ m.Fail();
+
+ m.Bind(&succ);
+ m.WriteCurrentPositionToRegister(1);
+ m.Succeed();
+
+ Handle<Object> code_object = m.GetCode();
+ Handle<Code> code = Handle<Code>::cast(code_object);
+
+ Handle<String> input =
+ Factory::NewStringFromAscii(CStrVector("aBcAbCABCxYzab"));
+ Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
+ Address start_adr = seq_input->GetCharsAddress();
+ int start_offset = start_adr - reinterpret_cast<Address>(*seq_input);
+ int end_offset = start_offset + seq_input->length();
+
+ int output[4];
+ bool success = RegExpMacroAssemblerIA32::Execute(*code,
+ seq_input.location(),
+ start_offset,
+ end_offset,
+ output,
+ true);
+
+ CHECK(success);
+ CHECK_EQ(0, output[0]);
+ CHECK_EQ(12, output[1]);
+ CHECK_EQ(0, output[2]);
+ CHECK_EQ(3, output[3]);
+}
+
+
+
TEST(MacroAssemblerIA32Registers) {
V8::Initialize(NULL);
assertFalse(/f(o)\b\1/.test('foo'));
assertTrue(/f(o)\B\1/.test('foo'));
+// Back-reference, ignore case:
+// ASCII
+assertEquals("xaAx,a", String(/x(a)\1x/i.exec("xaAx")), "\\1 ASCII");
+assertFalse(/x(...)\1/i.test("xaaaaa"), "\\1 ASCII, string short");
+assertTrue(/x((?:))\1\1x/i.test("xx"), "\\1 empty, ASCII");
+assertTrue(/x(?:...|(...))\1x/i.test("xabcx"), "\\1 uncaptured, ASCII");
+assertTrue(/x(?:...|(...))\1x/i.test("xabcABCx"), "\\1 backtrack, ASCII");
+assertEquals("xaBcAbCABCx,aBc",
+ String(/x(...)\1\1x/i.exec("xaBcAbCABCx")),
+ "\\1\\1 ASCII");
+
+for (var i = 0; i < 128; i++) {
+ var testName = "(.)\\1 ~ " + i + "," + (i^0x20);
+ var test = /^(.)\1$/i.test(String.fromCharCode(i, i ^ 0x20))
+ var c = String.fromCharCode(i);
+ if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) {
+ assertTrue(test, testName);
+ } else {
+ assertFalse(test, testName);
+ }
+}
+
+// UC16
+// Characters used:
+// "\u03a3\u03c2\u03c3\u039b\u03bb" - Sigma, final sigma, sigma, Lambda, lamda
+assertEquals("x\u03a3\u03c3x,\u03a3",
+ String(/x(.)\1x/i.exec("x\u03a3\u03c3x")), "\\1 UC16");
+assertFalse(/x(...)\1/i.test("x\u03a3\u03c2\u03c3\u03c2\u03c3"),
+ "\\1 ASCII, string short");
+assertTrue(/\u03a3((?:))\1\1x/i.test("\u03c2x"), "\\1 empty, UC16");
+assertTrue(/x(?:...|(...))\1x/i.test("x\u03a3\u03c2\u03c3x"),
+ "\\1 uncaptured, UC16");
+assertTrue(/x(?:...|(...))\1x/i.test("x\u03c2\u03c3\u039b\u03a3\u03c2\u03bbx"),
+ "\\1 backtrack, UC16");
+var longUC16String = "x\u03a3\u03c2\u039b\u03c2\u03c3\u03bb\u03c3\u03a3\u03bb";
+assertEquals(longUC16String + "," + longUC16String.substring(1,4),
+ String(/x(...)\1\1/i.exec(longUC16String)),
+ "\\1\\1 UC16");
+
assertFalse(/f(o)$\1/.test('foo'), "backref detects at_end");