From 263bac505bc6a867b9f3f81306c52c7232f946b0 Mon Sep 17 00:00:00 2001 From: "erik.corry@gmail.com" Date: Tue, 20 Jan 2009 18:50:01 +0000 Subject: [PATCH] * Irregexp: Move from a byte-oriented bytecode format to a 32-bit oriented bytecode format. This provides a nice speedup on Intel and probably an even better one on ARM. Also removes the 256-register limitation on the interpreter. Review URL: http://codereview.chromium.org/18363 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1111 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/bytecodes-irregexp.h | 95 +++++++------- src/interpreter-irregexp.cc | 206 ++++++++++++++++++------------ src/regexp-macro-assembler-irregexp-inl.h | 8 +- src/regexp-macro-assembler-irregexp.cc | 127 +++++++++--------- src/regexp-macro-assembler-irregexp.h | 2 +- 5 files changed, 245 insertions(+), 193 deletions(-) diff --git a/src/bytecodes-irregexp.h b/src/bytecodes-irregexp.h index 16d06ef..c7cb908 100644 --- a/src/bytecodes-irregexp.h +++ b/src/bytecodes-irregexp.h @@ -31,50 +31,59 @@ namespace v8 { namespace internal { + +static const int BYTECODE_MASK = 0xff; +static const unsigned int MAX_FIRST_ARG = 0xffffffu; +static const int BYTECODE_SHIFT = 8; + #define BYTECODE_ITERATOR(V) \ -V(BREAK, 0, 1) /* break */ \ -V(PUSH_CP, 1, 5) /* push_cp offset32 */ \ -V(PUSH_BT, 2, 5) /* push_bt addr32 */ \ -V(PUSH_REGISTER, 3, 2) /* push_register register_index */ \ -V(SET_REGISTER_TO_CP, 4, 6) /* set_register_to_cp register_index offset32 */ \ -V(SET_CP_TO_REGISTER, 5, 2) /* set_cp_to_registger register_index */ \ -V(SET_REGISTER_TO_SP, 6, 2) /* set_register_to_sp register_index */ \ -V(SET_SP_TO_REGISTER, 7, 2) /* set_sp_to_registger register_index */ \ -V(SET_REGISTER, 8, 6) /* set_register register_index value32 */ \ -V(ADVANCE_REGISTER, 9, 6) /* advance_register register_index value32 */ \ -V(POP_CP, 10, 1) /* pop_cp */ \ -V(POP_BT, 11, 1) /* pop_bt */ \ -V(POP_REGISTER, 12, 2) /* pop_register register_index */ \ -V(FAIL, 13, 1) /* fail */ \ -V(SUCCEED, 14, 1) /* succeed */ \ -V(ADVANCE_CP, 15, 5) /* advance_cp offset32 */ \ -V(GOTO, 16, 5) /* goto addr32 */ \ -V(LOAD_CURRENT_CHAR, 17, 9) /* load offset32 addr32 */ \ -V(LOAD_CURRENT_CHAR_UNCHECKED, 18, 5) /* load offset32 */ \ -V(LOAD_2_CURRENT_CHARS, 19, 9) /* load offset32 addr32 */ \ -V(LOAD_2_CURRENT_CHARS_UNCHECKED, 20, 5) /* load offset32 */ \ -V(LOAD_4_CURRENT_CHARS, 21, 9) /* load offset32 addr32 */ \ -V(LOAD_4_CURRENT_CHARS_UNCHECKED, 22, 5) /* load offset32 */ \ -V(CHECK_CHAR, 23, 9) /* check_char uint32 addr32 */ \ -V(CHECK_NOT_CHAR, 24, 9) /* check_not_char uint32 addr32 */ \ -V(AND_CHECK_CHAR, 25, 13) /* and_check_char uint32 uint32 addr32 */ \ -V(AND_CHECK_NOT_CHAR, 26, 13) /* and_check_not_char uint32 uint32 addr32 */ \ -V(MINUS_AND_CHECK_NOT_CHAR, 27, 11) /* minus_and_check_not_char uc16 uc16...*/ \ -V(CHECK_LT, 28, 7) /* check_lt uc16 addr32 */ \ -V(CHECK_GT, 29, 7) /* check_gr uc16 addr32 */ \ -V(CHECK_NOT_BACK_REF, 30, 6) /* check_not_back_ref capture_idx addr32 */ \ -V(CHECK_NOT_BACK_REF_NO_CASE, 31, 6) /* check_not_back_ref_no_case captu... */ \ -V(CHECK_NOT_REGS_EQUAL, 32, 7) /* check_not_regs_equal reg1 reg2 addr32 */ \ -V(LOOKUP_MAP1, 33, 11) /* l_map1 start16 bit_map_addr32 addr32 */ \ -V(LOOKUP_MAP2, 34, 99) /* l_map2 start16 half_nibble_map_addr32* */ \ -V(LOOKUP_MAP8, 35, 99) /* l_map8 start16 byte_map addr32* */ \ -V(LOOKUP_HI_MAP8, 36, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \ -V(CHECK_REGISTER_LT, 37, 8) /* check_reg_lt register_index value16 addr32 */ \ -V(CHECK_REGISTER_GE, 38, 8) /* check_reg_ge register_index value16 addr32 */ \ -V(CHECK_REGISTER_EQ_POS, 39, 6) /* check_register_eq_pos index addr32 */ \ -V(CHECK_AT_START, 40, 5) /* check_at_start addr32 */ \ -V(CHECK_NOT_AT_START, 41, 5) /* check_not_at_start addr32 */ \ -V(CHECK_GREEDY, 42, 5) /* check_greedy addr32 */ +V(BREAK, 0, 4) /* bc8 */ \ +V(PUSH_CP, 1, 4) /* bc8 pad24 */ \ +V(PUSH_BT, 2, 8) /* bc8 pad24 offset32 */ \ +V(PUSH_REGISTER, 3, 4) /* bc8 reg_idx24 */ \ +V(SET_REGISTER_TO_CP, 4, 8) /* bc8 reg_idx24 offset32 */ \ +V(SET_CP_TO_REGISTER, 5, 4) /* bc8 reg_idx24 */ \ +V(SET_REGISTER_TO_SP, 6, 4) /* bc8 reg_idx24 */ \ +V(SET_SP_TO_REGISTER, 7, 4) /* bc8 reg_idx24 */ \ +V(SET_REGISTER, 8, 8) /* bc8 reg_idx24 value32 */ \ +V(ADVANCE_REGISTER, 9, 8) /* bc8 reg_idx24 value32 */ \ +V(POP_CP, 10, 4) /* bc8 pad24 */ \ +V(POP_BT, 11, 4) /* bc8 pad24 */ \ +V(POP_REGISTER, 12, 4) /* bc8 reg_idx24 */ \ +V(FAIL, 13, 4) /* bc8 pad24 */ \ +V(SUCCEED, 14, 4) /* bc8 pad24 */ \ +V(ADVANCE_CP, 15, 4) /* bc8 offset24 */ \ +V(GOTO, 16, 8) /* bc8 pad24 addr32 */ \ +V(LOAD_CURRENT_CHAR, 17, 8) /* bc8 offset24 addr32 */ \ +V(LOAD_CURRENT_CHAR_UNCHECKED, 18, 4) /* bc8 offset24 */ \ +V(LOAD_2_CURRENT_CHARS, 19, 8) /* bc8 offset24 addr32 */ \ +V(LOAD_2_CURRENT_CHARS_UNCHECKED, 20, 4) /* bc8 offset24 */ \ +V(LOAD_4_CURRENT_CHARS, 21, 8) /* bc8 offset24 addr32 */ \ +V(LOAD_4_CURRENT_CHARS_UNCHECKED, 22, 4) /* bc8 offset24 */ \ +V(CHECK_4_CHARS, 23, 12) /* bc8 pad24 uint32 addr32 */ \ +V(CHECK_CHAR, 24, 8) /* bc8 pad8 uint16 addr32 */ \ +V(CHECK_NOT_4_CHARS, 25, 12) /* bc8 pad24 uint32 addr32 */ \ +V(CHECK_NOT_CHAR, 26, 8) /* bc8 pad8 uint16 addr32 */ \ +V(AND_CHECK_4_CHARS, 27, 16) /* bc8 pad24 uint32 uint32 addr32 */ \ +V(AND_CHECK_CHAR, 28, 12) /* bc8 pad8 uint16 uint32 addr32 */ \ +V(AND_CHECK_NOT_4_CHARS, 29, 16) /* bc8 pad24 uint32 uint32 addr32 */ \ +V(AND_CHECK_NOT_CHAR, 30, 12) /* bc8 pad8 uint16 uint32 addr32 */ \ +V(MINUS_AND_CHECK_NOT_CHAR, 31, 12) /* bc8 pad8 uc16 uc16 addr32 */ \ +V(CHECK_LT, 32, 8) /* bc8 pad8 uc16 addr32 */ \ +V(CHECK_GT, 33, 8) /* bc8 pad8 uc16 addr32 */ \ +V(CHECK_NOT_BACK_REF, 34, 8) /* bc8 reg_idx24 addr32 */ \ +V(CHECK_NOT_BACK_REF_NO_CASE, 35, 8) /* bc8 reg_idx24 addr32 */ \ +V(CHECK_NOT_REGS_EQUAL, 36, 12) /* bc8 regidx24 reg_idx32 addr32 */ \ +V(LOOKUP_MAP1, 37, 12) /* bc8 pad8 start16 bit_map_addr32 addr32 */ \ +V(LOOKUP_MAP2, 38, 96) /* bc8 pad8 start16 half_nibble_map_addr32* */ \ +V(LOOKUP_MAP8, 39, 96) /* bc8 pad8 start16 byte_map addr32* */ \ +V(LOOKUP_HI_MAP8, 40, 96) /* bc8 start24 byte_map_addr32 addr32* */ \ +V(CHECK_REGISTER_LT, 41, 12) /* bc8 reg_idx24 value32 addr32 */ \ +V(CHECK_REGISTER_GE, 42, 12) /* bc8 reg_idx24 value32 addr32 */ \ +V(CHECK_REGISTER_EQ_POS, 43, 8) /* bc8 reg_idx24 addr32 */ \ +V(CHECK_AT_START, 44, 8) /* bc8 pad24 addr32 */ \ +V(CHECK_NOT_AT_START, 45, 8) /* bc8 pad24 addr32 */ \ +V(CHECK_GREEDY, 46, 8) /* bc8 pad24 addr32 */ #define DECLARE_BYTECODES(name, code, length) \ static const int BC_##name = code; diff --git a/src/interpreter-irregexp.cc b/src/interpreter-irregexp.cc index 0ce0d33..0b28a95 100644 --- a/src/interpreter-irregexp.cc +++ b/src/interpreter-irregexp.cc @@ -97,7 +97,7 @@ static void TraceInterpreter(const byte* code_base, current_char, printable ? current_char : '.', bytecode_name); - for (int i = 1; i < bytecode_length; i++) { + for (int i = 0; i < bytecode_length; i++) { printf(", %02x", pc[i]); } printf(" "); @@ -129,6 +129,17 @@ static void TraceInterpreter(const byte* code_base, #endif +static int32_t Load32Aligned(const byte* pc) { + ASSERT((reinterpret_cast(pc) & 3) == 0); + return *reinterpret_cast(pc); +} + + +static int32_t Load16Aligned(const byte* pc) { + ASSERT((reinterpret_cast(pc) & 1) == 0); + return *reinterpret_cast(pc); +} + template static bool RawMatch(const byte* code_base, @@ -147,7 +158,8 @@ static bool RawMatch(const byte* code_base, } #endif while (true) { - switch (*pc) { + int32_t insn = Load32Aligned(pc); + switch (insn & BYTECODE_MASK) { BYTECODE(BREAK) UNREACHABLE(); return false; @@ -155,45 +167,45 @@ static bool RawMatch(const byte* code_base, if (--backtrack_stack_space < 0) { return false; // No match on backtrack stack overflow. } - *backtrack_sp++ = current + Load32(pc + 1); + *backtrack_sp++ = current; pc += BC_PUSH_CP_LENGTH; break; BYTECODE(PUSH_BT) if (--backtrack_stack_space < 0) { return false; // No match on backtrack stack overflow. } - *backtrack_sp++ = Load32(pc + 1); + *backtrack_sp++ = Load32Aligned(pc + 4); pc += BC_PUSH_BT_LENGTH; break; BYTECODE(PUSH_REGISTER) if (--backtrack_stack_space < 0) { return false; // No match on backtrack stack overflow. } - *backtrack_sp++ = registers[pc[1]]; + *backtrack_sp++ = registers[insn >> BYTECODE_SHIFT]; pc += BC_PUSH_REGISTER_LENGTH; break; BYTECODE(SET_REGISTER) - registers[pc[1]] = Load32(pc + 2); + registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4); pc += BC_SET_REGISTER_LENGTH; break; BYTECODE(ADVANCE_REGISTER) - registers[pc[1]] += Load32(pc + 2); + registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4); pc += BC_ADVANCE_REGISTER_LENGTH; break; BYTECODE(SET_REGISTER_TO_CP) - registers[pc[1]] = current + Load32(pc + 2); + registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4); pc += BC_SET_REGISTER_TO_CP_LENGTH; break; BYTECODE(SET_CP_TO_REGISTER) - current = registers[pc[1]]; + current = registers[insn >> BYTECODE_SHIFT]; pc += BC_SET_CP_TO_REGISTER_LENGTH; break; BYTECODE(SET_REGISTER_TO_SP) - registers[pc[1]] = backtrack_sp - backtrack_stack; + registers[insn >> BYTECODE_SHIFT] = backtrack_sp - backtrack_stack; pc += BC_SET_REGISTER_TO_SP_LENGTH; break; BYTECODE(SET_SP_TO_REGISTER) - backtrack_sp = backtrack_stack + registers[pc[1]]; + backtrack_sp = backtrack_stack + registers[insn >> BYTECODE_SHIFT]; backtrack_stack_space = kBacktrackStackSize - (backtrack_sp - backtrack_stack); pc += BC_SET_SP_TO_REGISTER_LENGTH; @@ -212,7 +224,7 @@ static bool RawMatch(const byte* code_base, BYTECODE(POP_REGISTER) backtrack_stack_space++; --backtrack_sp; - registers[pc[1]] = *backtrack_sp; + registers[insn >> BYTECODE_SHIFT] = *backtrack_sp; pc += BC_POP_REGISTER_LENGTH; break; BYTECODE(FAIL) @@ -220,25 +232,25 @@ static bool RawMatch(const byte* code_base, BYTECODE(SUCCEED) return true; BYTECODE(ADVANCE_CP) - current += Load32(pc + 1); + current += insn >> BYTECODE_SHIFT; pc += BC_ADVANCE_CP_LENGTH; break; BYTECODE(GOTO) - pc = code_base + Load32(pc + 1); + pc = code_base + Load32Aligned(pc + 4); break; BYTECODE(CHECK_GREEDY) if (current == backtrack_sp[-1]) { backtrack_sp--; backtrack_stack_space++; - pc = code_base + Load32(pc + 1); + pc = code_base + Load32Aligned(pc + 4); } else { pc += BC_CHECK_GREEDY_LENGTH; } break; BYTECODE(LOAD_CURRENT_CHAR) { - int pos = current + Load32(pc + 1); + int pos = current + (insn >> BYTECODE_SHIFT); if (pos >= subject.length()) { - pc = code_base + Load32(pc + 5); + pc = code_base + Load32Aligned(pc + 4); } else { current_char = subject[pos]; pc += BC_LOAD_CURRENT_CHAR_LENGTH; @@ -246,15 +258,15 @@ static bool RawMatch(const byte* code_base, break; } BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) { - int pos = current + Load32(pc + 1); + int pos = current + (insn >> BYTECODE_SHIFT); current_char = subject[pos]; pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH; break; } BYTECODE(LOAD_2_CURRENT_CHARS) { - int pos = current + Load32(pc + 1); + int pos = current + (insn >> BYTECODE_SHIFT); if (pos + 2 > subject.length()) { - pc = code_base + Load32(pc + 5); + pc = code_base + Load32Aligned(pc + 4); } else { Char next = subject[pos + 1]; current_char = @@ -264,7 +276,7 @@ static bool RawMatch(const byte* code_base, break; } BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) { - int pos = current + Load32(pc + 1); + int pos = current + (insn >> BYTECODE_SHIFT); Char next = subject[pos + 1]; current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char)))); pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH; @@ -272,9 +284,9 @@ static bool RawMatch(const byte* code_base, } BYTECODE(LOAD_4_CURRENT_CHARS) { ASSERT(sizeof(Char) == 1); - int pos = current + Load32(pc + 1); + int pos = current + (insn >> BYTECODE_SHIFT); if (pos + 4 > subject.length()) { - pc = code_base + Load32(pc + 5); + pc = code_base + Load32Aligned(pc + 4); } else { Char next1 = subject[pos + 1]; Char next2 = subject[pos + 2]; @@ -289,7 +301,7 @@ static bool RawMatch(const byte* code_base, } BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) { ASSERT(sizeof(Char) == 1); - int pos = current + Load32(pc + 1); + int pos = current + (insn >> BYTECODE_SHIFT); Char next1 = subject[pos + 1]; Char next2 = subject[pos + 2]; Char next3 = subject[pos + 3]; @@ -300,100 +312,136 @@ static bool RawMatch(const byte* code_base, pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH; break; } + BYTECODE(CHECK_4_CHARS) { + uint32_t c = Load32Aligned(pc + 4); + if (c == current_char) { + pc = code_base + Load32Aligned(pc + 8); + } else { + pc += BC_CHECK_4_CHARS_LENGTH; + } + break; + } BYTECODE(CHECK_CHAR) { - uint32_t c = Load32(pc + 1); + uint32_t c = (insn >> BYTECODE_SHIFT); if (c == current_char) { - pc = code_base + Load32(pc + 5); + pc = code_base + Load32Aligned(pc + 4); } else { pc += BC_CHECK_CHAR_LENGTH; } break; } + BYTECODE(CHECK_NOT_4_CHARS) { + uint32_t c = Load32Aligned(pc + 4); + if (c != current_char) { + pc = code_base + Load32Aligned(pc + 8); + } else { + pc += BC_CHECK_NOT_4_CHARS_LENGTH; + } + break; + } BYTECODE(CHECK_NOT_CHAR) { - uint32_t c = Load32(pc + 1); + uint32_t c = (insn >> BYTECODE_SHIFT); if (c != current_char) { - pc = code_base + Load32(pc + 5); + pc = code_base + Load32Aligned(pc + 4); } else { pc += BC_CHECK_NOT_CHAR_LENGTH; } break; } + BYTECODE(AND_CHECK_4_CHARS) { + uint32_t c = Load32Aligned(pc + 4); + if (c == (current_char & Load32Aligned(pc + 8))) { + pc = code_base + Load32Aligned(pc + 12); + } else { + pc += BC_AND_CHECK_4_CHARS_LENGTH; + } + break; + } BYTECODE(AND_CHECK_CHAR) { - uint32_t c = Load32(pc + 1); - if (c == (current_char & Load32(pc + 5))) { - pc = code_base + Load32(pc + 9); + uint32_t c = (insn >> BYTECODE_SHIFT); + if (c == (current_char & Load32Aligned(pc + 4))) { + pc = code_base + Load32Aligned(pc + 8); } else { pc += BC_AND_CHECK_CHAR_LENGTH; } break; } + BYTECODE(AND_CHECK_NOT_4_CHARS) { + uint32_t c = Load32Aligned(pc + 4); + if (c != (current_char & Load32Aligned(pc + 8))) { + pc = code_base + Load32Aligned(pc + 12); + } else { + pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH; + } + break; + } BYTECODE(AND_CHECK_NOT_CHAR) { - uint32_t c = Load32(pc + 1); - if (c != (current_char & Load32(pc + 5))) { - pc = code_base + Load32(pc + 9); + uint32_t c = (insn >> BYTECODE_SHIFT); + if (c != (current_char & Load32Aligned(pc + 4))) { + pc = code_base + Load32Aligned(pc + 8); } else { pc += BC_AND_CHECK_NOT_CHAR_LENGTH; } break; } BYTECODE(MINUS_AND_CHECK_NOT_CHAR) { - uint32_t c = Load16(pc + 1); - uint32_t minus = Load16(pc + 3); - uint32_t mask = Load16(pc + 5); + uint32_t c = (insn >> BYTECODE_SHIFT); + uint32_t minus = Load16Aligned(pc + 4); + uint32_t mask = Load16Aligned(pc + 6); if (c != ((current_char - minus) & mask)) { - pc = code_base + Load32(pc + 7); + pc = code_base + Load32Aligned(pc + 8); } else { pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH; } break; } BYTECODE(CHECK_LT) { - uint32_t limit = Load16(pc + 1); + uint32_t limit = (insn >> BYTECODE_SHIFT); if (current_char < limit) { - pc = code_base + Load32(pc + 3); + pc = code_base + Load32Aligned(pc + 4); } else { pc += BC_CHECK_LT_LENGTH; } break; } BYTECODE(CHECK_GT) { - uint32_t limit = Load16(pc + 1); + uint32_t limit = (insn >> BYTECODE_SHIFT); if (current_char > limit) { - pc = code_base + Load32(pc + 3); + pc = code_base + Load32Aligned(pc + 4); } else { pc += BC_CHECK_GT_LENGTH; } break; } BYTECODE(CHECK_REGISTER_LT) - if (registers[pc[1]] < Load16(pc + 2)) { - pc = code_base + Load32(pc + 4); + if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) { + pc = code_base + Load32Aligned(pc + 8); } else { pc += BC_CHECK_REGISTER_LT_LENGTH; } break; BYTECODE(CHECK_REGISTER_GE) - if (registers[pc[1]] >= Load16(pc + 2)) { - pc = code_base + Load32(pc + 4); + if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) { + pc = code_base + Load32Aligned(pc + 8); } else { pc += BC_CHECK_REGISTER_GE_LENGTH; } break; BYTECODE(CHECK_REGISTER_EQ_POS) - if (registers[pc[1]] == current) { - pc = code_base + Load32(pc + 2); + if (registers[insn >> BYTECODE_SHIFT] == current) { + pc = code_base + Load32Aligned(pc + 4); } else { pc += BC_CHECK_REGISTER_EQ_POS_LENGTH; } break; BYTECODE(LOOKUP_MAP1) { // Look up character in a bitmap. If we find a 0, then jump to the - // location at pc + 7. Otherwise fall through! - int index = current_char - Load16(pc + 1); - byte map = code_base[Load32(pc + 3) + (index >> 3)]; + // location at pc + 8. Otherwise fall through! + int index = current_char - (insn >> BYTECODE_SHIFT); + byte map = code_base[Load32Aligned(pc + 4) + (index >> 3)]; map = ((map >> (index & 7)) & 1); if (map == 0) { - pc = code_base + Load32(pc + 7); + pc = code_base + Load32Aligned(pc + 8); } else { pc += BC_LOOKUP_MAP1_LENGTH; } @@ -401,22 +449,22 @@ static bool RawMatch(const byte* code_base, } BYTECODE(LOOKUP_MAP2) { // Look up character in a half-nibble map. If we find 00, then jump to - // the location at pc + 7. If we find 01 then jump to location at + // the location at pc + 8. If we find 01 then jump to location at // pc + 11, etc. - int index = (current_char - Load16(pc + 1)) << 1; - byte map = code_base[Load32(pc + 3) + (index >> 3)]; + int index = (current_char - (insn >> BYTECODE_SHIFT)) << 1; + byte map = code_base[Load32Aligned(pc + 3) + (index >> 3)]; map = ((map >> (index & 7)) & 3); if (map < 2) { if (map == 0) { - pc = code_base + Load32(pc + 7); + pc = code_base + Load32Aligned(pc + 8); } else { - pc = code_base + Load32(pc + 11); + pc = code_base + Load32Aligned(pc + 12); } } else { if (map == 2) { - pc = code_base + Load32(pc + 15); + pc = code_base + Load32Aligned(pc + 16); } else { - pc = code_base + Load32(pc + 19); + pc = code_base + Load32Aligned(pc + 20); } } break; @@ -424,43 +472,43 @@ static bool RawMatch(const byte* code_base, BYTECODE(LOOKUP_MAP8) { // Look up character in a byte map. Use the byte as an index into a // table that follows this instruction immediately. - int index = current_char - Load16(pc + 1); - byte map = code_base[Load32(pc + 3) + index]; - const byte* new_pc = code_base + Load32(pc + 7) + (map << 2); - pc = code_base + Load32(new_pc); + int index = current_char - (insn >> BYTECODE_SHIFT); + byte map = code_base[Load32Aligned(pc + 4) + index]; + const byte* new_pc = code_base + Load32Aligned(pc + 8) + (map << 2); + pc = code_base + Load32Aligned(new_pc); break; } BYTECODE(LOOKUP_HI_MAP8) { // Look up high byte of this character in a byte map. Use the byte as // an index into a table that follows this instruction immediately. - int index = (current_char >> 8) - pc[1]; - byte map = code_base[Load32(pc + 2) + index]; - const byte* new_pc = code_base + Load32(pc + 6) + (map << 2); - pc = code_base + Load32(new_pc); + int index = (current_char >> 8) - (insn >> BYTECODE_SHIFT); + byte map = code_base[Load32Aligned(pc + 4) + index]; + const byte* new_pc = code_base + Load32Aligned(pc + 8) + (map << 2); + pc = code_base + Load32Aligned(new_pc); break; } BYTECODE(CHECK_NOT_REGS_EQUAL) - if (registers[pc[1]] == registers[pc[2]]) { + if (registers[insn >> BYTECODE_SHIFT] == registers[Load32Aligned(pc + 4)]) { pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH; } else { - pc = code_base + Load32(pc + 3); + pc = code_base + Load32Aligned(pc + 8); } break; BYTECODE(CHECK_NOT_BACK_REF) { - int from = registers[pc[1]]; - int len = registers[pc[1] + 1] - from; + int from = registers[insn >> BYTECODE_SHIFT]; + int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; if (from < 0 || len <= 0) { pc += BC_CHECK_NOT_BACK_REF_LENGTH; break; } if (current + len > subject.length()) { - pc = code_base + Load32(pc + 2); + pc = code_base + Load32Aligned(pc + 4); break; } else { int i; for (i = 0; i < len; i++) { if (subject[from + i] != subject[current + i]) { - pc = code_base + Load32(pc + 2); + pc = code_base + Load32Aligned(pc + 4); break; } } @@ -471,28 +519,28 @@ static bool RawMatch(const byte* code_base, break; } BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) { - int from = registers[pc[1]]; - int len = registers[pc[1] + 1] - from; + int from = registers[insn >> BYTECODE_SHIFT]; + int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; if (from < 0 || len <= 0) { pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH; break; } if (current + len > subject.length()) { - pc = code_base + Load32(pc + 2); + pc = code_base + Load32Aligned(pc + 4); break; } else { if (BackRefMatchesNoCase(from, current, len, subject)) { current += len; pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH; } else { - pc = code_base + Load32(pc + 2); + pc = code_base + Load32Aligned(pc + 4); } } break; } BYTECODE(CHECK_AT_START) if (current == 0) { - pc = code_base + Load32(pc + 1); + pc = code_base + Load32Aligned(pc + 4); } else { pc += BC_CHECK_AT_START_LENGTH; } @@ -501,7 +549,7 @@ static bool RawMatch(const byte* code_base, if (current == 0) { pc += BC_CHECK_NOT_AT_START_LENGTH; } else { - pc = code_base + Load32(pc + 1); + pc = code_base + Load32Aligned(pc + 4); } break; default: diff --git a/src/regexp-macro-assembler-irregexp-inl.h b/src/regexp-macro-assembler-irregexp-inl.h index faf12da..d91aaa2 100644 --- a/src/regexp-macro-assembler-irregexp-inl.h +++ b/src/regexp-macro-assembler-irregexp-inl.h @@ -36,12 +36,14 @@ namespace v8 { namespace internal { -void RegExpMacroAssemblerIrregexp::Emit(uint32_t byte) { +void RegExpMacroAssemblerIrregexp::Emit(uint32_t byte, uint32_t twenty_four_bits) { + uint32_t word = ((twenty_four_bits << BYTECODE_SHIFT) | byte); ASSERT(pc_ <= buffer_.length()); - if (pc_ == buffer_.length()) { + if (pc_ + 3 >= buffer_.length()) { Expand(); } - buffer_[pc_++] = byte; + Store32(buffer_.start() + pc_, word); + pc_ += 4; } diff --git a/src/regexp-macro-assembler-irregexp.cc b/src/regexp-macro-assembler-irregexp.cc index deb5ada..d32a09d 100644 --- a/src/regexp-macro-assembler-irregexp.cc +++ b/src/regexp-macro-assembler-irregexp.cc @@ -84,8 +84,7 @@ void RegExpMacroAssemblerIrregexp::EmitOrLink(Label* l) { void RegExpMacroAssemblerIrregexp::PopRegister(int register_index) { - Emit(BC_POP_REGISTER); - Emit(register_index); + Emit(BC_POP_REGISTER, register_index); } @@ -93,16 +92,14 @@ void RegExpMacroAssemblerIrregexp::PushRegister( int register_index, StackCheckFlag check_stack_limit) { ASSERT(register_index >= 0); - Emit(BC_PUSH_REGISTER); - Emit(register_index); + Emit(BC_PUSH_REGISTER, register_index); } void RegExpMacroAssemblerIrregexp::WriteCurrentPositionToRegister( int register_index, int cp_offset) { ASSERT(register_index >= 0); - Emit(BC_SET_REGISTER_TO_CP); - Emit(register_index); + Emit(BC_SET_REGISTER_TO_CP, register_index); Emit32(cp_offset); // Current position offset. } @@ -115,90 +112,83 @@ void RegExpMacroAssemblerIrregexp::ClearRegister(int reg) { void RegExpMacroAssemblerIrregexp::ReadCurrentPositionFromRegister( int register_index) { ASSERT(register_index >= 0); - Emit(BC_SET_CP_TO_REGISTER); - Emit(register_index); + Emit(BC_SET_CP_TO_REGISTER, register_index); } void RegExpMacroAssemblerIrregexp::WriteStackPointerToRegister( int register_index) { ASSERT(register_index >= 0); - Emit(BC_SET_REGISTER_TO_SP); - Emit(register_index); + Emit(BC_SET_REGISTER_TO_SP, register_index); } void RegExpMacroAssemblerIrregexp::ReadStackPointerFromRegister( int register_index) { ASSERT(register_index >= 0); - Emit(BC_SET_SP_TO_REGISTER); - Emit(register_index); + Emit(BC_SET_SP_TO_REGISTER, register_index); } void RegExpMacroAssemblerIrregexp::SetRegister(int register_index, int to) { ASSERT(register_index >= 0); - Emit(BC_SET_REGISTER); - Emit(register_index); + Emit(BC_SET_REGISTER, register_index); Emit32(to); } void RegExpMacroAssemblerIrregexp::AdvanceRegister(int register_index, int by) { ASSERT(register_index >= 0); - Emit(BC_ADVANCE_REGISTER); - Emit(register_index); + Emit(BC_ADVANCE_REGISTER, register_index); Emit32(by); } void RegExpMacroAssemblerIrregexp::PopCurrentPosition() { - Emit(BC_POP_CP); + Emit(BC_POP_CP, 0); } void RegExpMacroAssemblerIrregexp::PushCurrentPosition() { - Emit(BC_PUSH_CP); - Emit32(0); // Current position offset. + Emit(BC_PUSH_CP, 0); } void RegExpMacroAssemblerIrregexp::Backtrack() { - Emit(BC_POP_BT); + Emit(BC_POP_BT, 0); } void RegExpMacroAssemblerIrregexp::GoTo(Label* l) { - Emit(BC_GOTO); + Emit(BC_GOTO, 0); EmitOrLink(l); } void RegExpMacroAssemblerIrregexp::PushBacktrack(Label* l) { - Emit(BC_PUSH_BT); + Emit(BC_PUSH_BT, 0); EmitOrLink(l); } void RegExpMacroAssemblerIrregexp::Succeed() { - Emit(BC_SUCCEED); + Emit(BC_SUCCEED, 0); } void RegExpMacroAssemblerIrregexp::Fail() { - Emit(BC_FAIL); + Emit(BC_FAIL, 0); } void RegExpMacroAssemblerIrregexp::AdvanceCurrentPosition(int by) { - Emit(BC_ADVANCE_CP); - Emit32(by); + Emit(BC_ADVANCE_CP, by); } void RegExpMacroAssemblerIrregexp::CheckGreedyLoop( Label* on_tos_equals_current_position) { - Emit(BC_CHECK_GREEDY); + Emit(BC_CHECK_GREEDY, 0); EmitOrLink(on_tos_equals_current_position); } @@ -227,51 +217,56 @@ void RegExpMacroAssemblerIrregexp::LoadCurrentCharacter(int cp_offset, bytecode = BC_LOAD_CURRENT_CHAR_UNCHECKED; } } - Emit(bytecode); - Emit32(cp_offset); + Emit(bytecode, cp_offset); if (check_bounds) EmitOrLink(on_failure); } void RegExpMacroAssemblerIrregexp::CheckCharacterLT(uc16 limit, Label* on_less) { - Emit(BC_CHECK_LT); - Emit16(limit); + Emit(BC_CHECK_LT, limit); EmitOrLink(on_less); } void RegExpMacroAssemblerIrregexp::CheckCharacterGT(uc16 limit, Label* on_greater) { - Emit(BC_CHECK_GT); - Emit16(limit); + Emit(BC_CHECK_GT, limit); EmitOrLink(on_greater); } void RegExpMacroAssemblerIrregexp::CheckCharacter(uint32_t c, Label* on_equal) { - Emit(BC_CHECK_CHAR); - Emit32(c); + if (c > MAX_FIRST_ARG) { + Emit(BC_CHECK_4_CHARS, 0); + Emit32(c); + } else { + Emit(BC_CHECK_CHAR, c); + } EmitOrLink(on_equal); } void RegExpMacroAssemblerIrregexp::CheckAtStart(Label* on_at_start) { - Emit(BC_CHECK_AT_START); + Emit(BC_CHECK_AT_START, 0); EmitOrLink(on_at_start); } void RegExpMacroAssemblerIrregexp::CheckNotAtStart(Label* on_not_at_start) { - Emit(BC_CHECK_NOT_AT_START); + Emit(BC_CHECK_NOT_AT_START, 0); EmitOrLink(on_not_at_start); } void RegExpMacroAssemblerIrregexp::CheckNotCharacter(uint32_t c, Label* on_not_equal) { - Emit(BC_CHECK_NOT_CHAR); - Emit32(c); + if (c > MAX_FIRST_ARG) { + Emit(BC_CHECK_NOT_4_CHARS, 0); + Emit32(c); + } else { + Emit(BC_CHECK_NOT_CHAR, c); + } EmitOrLink(on_not_equal); } @@ -280,8 +275,12 @@ void RegExpMacroAssemblerIrregexp::CheckCharacterAfterAnd( uint32_t c, uint32_t mask, Label* on_equal) { - Emit(BC_AND_CHECK_CHAR); - Emit32(c); + if (c > MAX_FIRST_ARG) { + Emit(BC_AND_CHECK_4_CHARS, 0); + Emit32(c); + } else { + Emit(BC_AND_CHECK_CHAR, c); + } Emit32(mask); EmitOrLink(on_equal); } @@ -291,8 +290,12 @@ void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterAnd( uint32_t c, uint32_t mask, Label* on_not_equal) { - Emit(BC_AND_CHECK_NOT_CHAR); - Emit32(c); + if (c > MAX_FIRST_ARG) { + Emit(BC_AND_CHECK_NOT_4_CHARS, 0); + Emit32(c); + } else { + Emit(BC_AND_CHECK_NOT_CHAR, c); + } Emit32(mask); EmitOrLink(on_not_equal); } @@ -303,8 +306,7 @@ void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterMinusAnd( uc16 minus, uc16 mask, Label* on_not_equal) { - Emit(BC_MINUS_AND_CHECK_NOT_CHAR); - Emit16(c); + Emit(BC_MINUS_AND_CHECK_NOT_CHAR, c); Emit16(minus); Emit16(mask); EmitOrLink(on_not_equal); @@ -313,8 +315,7 @@ void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterMinusAnd( void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg, Label* on_not_equal) { - Emit(BC_CHECK_NOT_BACK_REF); - Emit(start_reg); + Emit(BC_CHECK_NOT_BACK_REF, start_reg); EmitOrLink(on_not_equal); } @@ -322,8 +323,7 @@ void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg, void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceIgnoreCase( int start_reg, Label* on_not_equal) { - Emit(BC_CHECK_NOT_BACK_REF_NO_CASE); - Emit(start_reg); + Emit(BC_CHECK_NOT_BACK_REF_NO_CASE, start_reg); EmitOrLink(on_not_equal); } @@ -331,9 +331,8 @@ void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceIgnoreCase( void RegExpMacroAssemblerIrregexp::CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal) { - Emit(BC_CHECK_NOT_REGS_EQUAL); - Emit(reg1); - Emit(reg2); + Emit(BC_CHECK_NOT_REGS_EQUAL, reg1); + Emit32(reg2); EmitOrLink(on_not_equal); } @@ -378,15 +377,12 @@ void RegExpMacroAssemblerIrregexp::CheckCharacters( // load below. for (int i = str.length() - 1; i >= 0; i--) { if (check_end_of_string && i == str.length() - 1) { - Emit(BC_LOAD_CURRENT_CHAR); - Emit32(cp_offset + i); + Emit(BC_LOAD_CURRENT_CHAR, cp_offset + i); EmitOrLink(on_failure); } else { - Emit(BC_LOAD_CURRENT_CHAR_UNCHECKED); - Emit32(cp_offset + i); + Emit(BC_LOAD_CURRENT_CHAR_UNCHECKED, cp_offset + i); } - Emit(BC_CHECK_NOT_CHAR); - Emit32(str[i]); + Emit(BC_CHECK_NOT_CHAR, str[i]); EmitOrLink(on_failure); } } @@ -396,9 +392,8 @@ void RegExpMacroAssemblerIrregexp::IfRegisterLT(int register_index, int comparand, Label* on_less_than) { ASSERT(comparand >= 0 && comparand <= 65535); - Emit(BC_CHECK_REGISTER_LT); - Emit(register_index); - Emit16(comparand); + Emit(BC_CHECK_REGISTER_LT, register_index); + Emit32(comparand); EmitOrLink(on_less_than); } @@ -407,24 +402,22 @@ void RegExpMacroAssemblerIrregexp::IfRegisterGE(int register_index, int comparand, Label* on_greater_or_equal) { ASSERT(comparand >= 0 && comparand <= 65535); - Emit(BC_CHECK_REGISTER_GE); - Emit(register_index); - Emit16(comparand); + Emit(BC_CHECK_REGISTER_GE, register_index); + Emit32(comparand); EmitOrLink(on_greater_or_equal); } void RegExpMacroAssemblerIrregexp::IfRegisterEqPos(int register_index, Label* on_eq) { - Emit(BC_CHECK_REGISTER_EQ_POS); - Emit(register_index); + Emit(BC_CHECK_REGISTER_EQ_POS, register_index); EmitOrLink(on_eq); } Handle RegExpMacroAssemblerIrregexp::GetCode(Handle source) { Bind(&backtrack_); - Emit(BC_POP_BT); + Emit(BC_POP_BT, 0); Handle array = Factory::NewByteArray(length()); Copy(array->GetDataStartAddress()); return array; diff --git a/src/regexp-macro-assembler-irregexp.h b/src/regexp-macro-assembler-irregexp.h index 96744c3..9e9784d 100644 --- a/src/regexp-macro-assembler-irregexp.h +++ b/src/regexp-macro-assembler-irregexp.h @@ -120,7 +120,7 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler { // Code and bitmap emission. inline void Emit32(uint32_t x); inline void Emit16(uint32_t x); - inline void Emit(uint32_t x); + inline void Emit(uint32_t bc, uint32_t arg); // Bytecode buffer. int length(); void Copy(Address a); -- 2.7.4