}
-void RegExpMacroAssemblerARM::CheckCharacters(Vector<const uc16> str,
- int cp_offset,
- Label* on_failure,
- bool check_end_of_string) {
- if (on_failure == NULL) {
- // Instead of inlining a backtrack for each test, (re)use the global
- // backtrack target.
- on_failure = &backtrack_label_;
- }
-
- if (check_end_of_string) {
- // Is last character of required match inside string.
- CheckPosition(cp_offset + str.length() - 1, on_failure);
- }
-
- __ add(r0, end_of_input_address(), Operand(current_input_offset()));
- if (cp_offset != 0) {
- int byte_offset = cp_offset * char_size();
- __ add(r0, r0, Operand(byte_offset));
- }
-
- // r0 : Address of characters to match against str.
- int stored_high_byte = 0;
- for (int i = 0; i < str.length(); i++) {
- if (mode_ == ASCII) {
- __ ldrb(r1, MemOperand(r0, char_size(), PostIndex));
- ASSERT(str[i] <= String::kMaxOneByteCharCode);
- __ cmp(r1, Operand(str[i]));
- } else {
- __ ldrh(r1, MemOperand(r0, char_size(), PostIndex));
- uc16 match_char = str[i];
- int match_high_byte = (match_char >> 8);
- if (match_high_byte == 0) {
- __ cmp(r1, Operand(str[i]));
- } else {
- if (match_high_byte != stored_high_byte) {
- __ mov(r2, Operand(match_high_byte));
- stored_high_byte = match_high_byte;
- }
- __ add(r3, r2, Operand(match_char & 0xff));
- __ cmp(r1, r3);
- }
- }
- BranchOrBacktrack(ne, on_failure);
- }
-}
-
-
void RegExpMacroAssemblerARM::CheckGreedyLoop(Label* on_equal) {
__ ldr(r0, MemOperand(backtrack_stackpointer(), 0));
__ cmp(current_input_offset(), r0);
case 'd':
// Match ASCII digits ('0'..'9')
__ sub(r0, current_character(), Operand('0'));
- __ cmp(current_character(), Operand('9' - '0'));
+ __ cmp(r0, Operand('9' - '0'));
BranchOrBacktrack(hi, on_no_match);
return true;
case 'D':
Label* on_equal);
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
- virtual void CheckCharacters(Vector<const uc16> str,
- int cp_offset,
- Label* on_failure,
- bool check_end_of_string);
// A "greedy loop" is a loop that is both greedy and with a simple
// body. It has a particularly simple implementation.
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
}
-void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str,
- int cp_offset,
- Label* on_failure,
- bool check_end_of_string) {
-#ifdef DEBUG
- // If input is ASCII, don't even bother calling here if the string to
- // match contains a non-ASCII character.
- if (mode_ == ASCII) {
- ASSERT(String::IsOneByte(str.start(), str.length()));
- }
-#endif
- int byte_length = str.length() * char_size();
- int byte_offset = cp_offset * char_size();
- if (check_end_of_string) {
- // Check that there are at least str.length() characters left in the input.
- __ cmp(edi, Immediate(-(byte_offset + byte_length)));
- BranchOrBacktrack(greater, on_failure);
- }
-
- if (on_failure == NULL) {
- // Instead of inlining a backtrack, (re)use the global backtrack target.
- on_failure = &backtrack_label_;
- }
-
- // Do one character test first to minimize loading for the case that
- // we don't match at all (loading more than one character introduces that
- // chance of reading unaligned and reading across cache boundaries).
- // If the first character matches, expect a larger chance of matching the
- // string, and start loading more characters at a time.
- if (mode_ == ASCII) {
- __ cmpb(Operand(esi, edi, times_1, byte_offset),
- static_cast<int8_t>(str[0]));
- } else {
- // Don't use 16-bit immediate. The size changing prefix throws off
- // pre-decoding.
- __ movzx_w(eax,
- Operand(esi, edi, times_1, byte_offset));
- __ cmp(eax, static_cast<int32_t>(str[0]));
- }
- BranchOrBacktrack(not_equal, on_failure);
-
- __ lea(ebx, Operand(esi, edi, times_1, 0));
- for (int i = 1, n = str.length(); i < n;) {
- if (mode_ == ASCII) {
- if (i <= n - 4) {
- int combined_chars =
- (static_cast<uint32_t>(str[i + 0]) << 0) |
- (static_cast<uint32_t>(str[i + 1]) << 8) |
- (static_cast<uint32_t>(str[i + 2]) << 16) |
- (static_cast<uint32_t>(str[i + 3]) << 24);
- __ cmp(Operand(ebx, byte_offset + i), Immediate(combined_chars));
- i += 4;
- } else {
- __ cmpb(Operand(ebx, byte_offset + i),
- static_cast<int8_t>(str[i]));
- i += 1;
- }
- } else {
- ASSERT(mode_ == UC16);
- if (i <= n - 2) {
- __ cmp(Operand(ebx, byte_offset + i * sizeof(uc16)),
- Immediate(*reinterpret_cast<const int*>(&str[i])));
- i += 2;
- } else {
- // Avoid a 16-bit immediate operation. It uses the length-changing
- // 0x66 prefix which causes pre-decoder misprediction and pipeline
- // stalls. See
- // "Intel(R) 64 and IA-32 Architectures Optimization Reference Manual"
- // (248966.pdf) section 3.4.2.3 "Length-Changing Prefixes (LCP)"
- __ movzx_w(eax,
- Operand(ebx, byte_offset + i * sizeof(uc16)));
- __ cmp(eax, static_cast<int32_t>(str[i]));
- i += 1;
- }
- }
- BranchOrBacktrack(not_equal, on_failure);
- }
-}
-
-
void RegExpMacroAssemblerIA32::CheckGreedyLoop(Label* on_equal) {
Label fallthrough;
__ cmp(edi, Operand(backtrack_stackpointer(), 0));
Label* on_equal);
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
- virtual void CheckCharacters(Vector<const uc16> str,
- int cp_offset,
- Label* on_failure,
- bool check_end_of_string);
// A "greedy loop" is a loop that is both greedy and with a simple
// body. It has a particularly simple implementation.
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
}
-void RegExpMacroAssemblerMIPS::CheckCharacters(Vector<const uc16> str,
- int cp_offset,
- Label* on_failure,
- bool check_end_of_string) {
- if (on_failure == NULL) {
- // Instead of inlining a backtrack for each test, (re)use the global
- // backtrack target.
- on_failure = &backtrack_label_;
- }
-
- if (check_end_of_string) {
- // Is last character of required match inside string.
- CheckPosition(cp_offset + str.length() - 1, on_failure);
- }
-
- __ Addu(a0, end_of_input_address(), Operand(current_input_offset()));
- if (cp_offset != 0) {
- int byte_offset = cp_offset * char_size();
- __ Addu(a0, a0, Operand(byte_offset));
- }
-
- // a0 : Address of characters to match against str.
- int stored_high_byte = 0;
- for (int i = 0; i < str.length(); i++) {
- if (mode_ == ASCII) {
- __ lbu(a1, MemOperand(a0, 0));
- __ addiu(a0, a0, char_size());
- ASSERT(str[i] <= String::kMaxOneByteCharCode);
- BranchOrBacktrack(on_failure, ne, a1, Operand(str[i]));
- } else {
- __ lhu(a1, MemOperand(a0, 0));
- __ addiu(a0, a0, char_size());
- uc16 match_char = str[i];
- int match_high_byte = (match_char >> 8);
- if (match_high_byte == 0) {
- BranchOrBacktrack(on_failure, ne, a1, Operand(str[i]));
- } else {
- if (match_high_byte != stored_high_byte) {
- __ li(a2, Operand(match_high_byte));
- stored_high_byte = match_high_byte;
- }
- __ Addu(a3, a2, Operand(match_char & 0xff));
- BranchOrBacktrack(on_failure, ne, a1, Operand(a3));
- }
- }
- }
-}
-
-
void RegExpMacroAssemblerMIPS::CheckGreedyLoop(Label* on_equal) {
Label backtrack_non_equal;
__ lw(a0, MemOperand(backtrack_stackpointer(), 0));
Label* on_equal);
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
- virtual void CheckCharacters(Vector<const uc16> str,
- int cp_offset,
- Label* on_failure,
- bool check_end_of_string);
// A "greedy loop" is a loop that is both greedy and with a simple
// body. It has a particularly simple implementation.
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
}
-void RegExpMacroAssemblerIrregexp::CheckCharacters(
- Vector<const uc16> str,
- int cp_offset,
- Label* on_failure,
- bool check_end_of_string) {
- ASSERT(cp_offset >= kMinCPOffset);
- ASSERT(cp_offset + str.length() - 1 <= kMaxCPOffset);
- // It is vital that this loop is backwards due to the unchecked character
- // load below.
- for (int i = str.length() - 1; i >= 0; i--) {
- if (check_end_of_string && i == str.length() - 1) {
- Emit(BC_LOAD_CURRENT_CHAR, cp_offset + i);
- EmitOrLink(on_failure);
- } else {
- Emit(BC_LOAD_CURRENT_CHAR_UNCHECKED, cp_offset + i);
- }
- Emit(BC_CHECK_NOT_CHAR, str[i]);
- EmitOrLink(on_failure);
- }
-}
-
-
void RegExpMacroAssemblerIrregexp::IfRegisterLT(int register_index,
int comparand,
Label* on_less_than) {
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
Label* on_no_match);
- virtual void CheckCharacters(Vector<const uc16> str,
- int cp_offset,
- Label* on_failure,
- bool check_end_of_string);
virtual void IfRegisterLT(int register_index, int comparand, Label* if_lt);
virtual void IfRegisterGE(int register_index, int comparand, Label* if_ge);
virtual void IfRegisterEqPos(int register_index, Label* if_eq);
}
-void RegExpMacroAssemblerTracer::CheckCharacters(Vector<const uc16> str,
- int cp_offset,
- Label* on_failure,
- bool check_end_of_string) {
- PrintF(" %s(str=\"",
- check_end_of_string ? "CheckCharacters" : "CheckCharactersUnchecked");
- for (int i = 0; i < str.length(); i++) {
- PrintF("0x%04x", str[i]);
- }
- PrintF("\", cp_offset=%d, label[%08x])\n",
- cp_offset, LabelToInt(on_failure));
- assembler_->CheckCharacters(str, cp_offset, on_failure, check_end_of_string);
-}
-
-
bool RegExpMacroAssemblerTracer::CheckSpecialCharacterClass(
uc16 type,
Label* on_no_match) {
Label* on_equal);
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
- virtual void CheckCharacters(
- Vector<const uc16> str,
- int cp_offset,
- Label* on_failure,
- bool check_end_of_string);
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
virtual void CheckNotAtStart(Label* on_not_at_start);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
Label* on_equal) = 0;
virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0;
virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0;
- // Check the current character for a match with a literal string. If we
- // fail to match then goto the on_failure label. If check_eos is set then
- // the end of input always fails. If check_eos is clear then it is the
- // caller's responsibility to ensure that the end of string is not hit.
- // If the label is NULL then we should pop a backtrack address off
- // the stack and go to that.
- virtual void CheckCharacters(
- Vector<const uc16> str,
- int cp_offset,
- Label* on_failure,
- bool check_eos) = 0;
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position) = 0;
virtual void CheckNotAtStart(Label* on_not_at_start) = 0;
virtual void CheckNotBackReference(int start_reg, Label* on_no_match) = 0;
}
-void RegExpMacroAssemblerX64::CheckCharacters(Vector<const uc16> str,
- int cp_offset,
- Label* on_failure,
- bool check_end_of_string) {
-#ifdef DEBUG
- // If input is ASCII, don't even bother calling here if the string to
- // match contains a non-ASCII character.
- if (mode_ == ASCII) {
- ASSERT(String::IsOneByte(str.start(), str.length()));
- }
-#endif
- int byte_length = str.length() * char_size();
- int byte_offset = cp_offset * char_size();
- if (check_end_of_string) {
- // Check that there are at least str.length() characters left in the input.
- __ cmpl(rdi, Immediate(-(byte_offset + byte_length)));
- BranchOrBacktrack(greater, on_failure);
- }
-
- if (on_failure == NULL) {
- // Instead of inlining a backtrack, (re)use the global backtrack target.
- on_failure = &backtrack_label_;
- }
-
- // Do one character test first to minimize loading for the case that
- // we don't match at all (loading more than one character introduces that
- // chance of reading unaligned and reading across cache boundaries).
- // If the first character matches, expect a larger chance of matching the
- // string, and start loading more characters at a time.
- if (mode_ == ASCII) {
- __ cmpb(Operand(rsi, rdi, times_1, byte_offset),
- Immediate(static_cast<int8_t>(str[0])));
- } else {
- // Don't use 16-bit immediate. The size changing prefix throws off
- // pre-decoding.
- __ movzxwl(rax,
- Operand(rsi, rdi, times_1, byte_offset));
- __ cmpl(rax, Immediate(static_cast<int32_t>(str[0])));
- }
- BranchOrBacktrack(not_equal, on_failure);
-
- __ lea(rbx, Operand(rsi, rdi, times_1, 0));
- for (int i = 1, n = str.length(); i < n; ) {
- if (mode_ == ASCII) {
- if (i + 8 <= n) {
- uint64_t combined_chars =
- (static_cast<uint64_t>(str[i + 0]) << 0) ||
- (static_cast<uint64_t>(str[i + 1]) << 8) ||
- (static_cast<uint64_t>(str[i + 2]) << 16) ||
- (static_cast<uint64_t>(str[i + 3]) << 24) ||
- (static_cast<uint64_t>(str[i + 4]) << 32) ||
- (static_cast<uint64_t>(str[i + 5]) << 40) ||
- (static_cast<uint64_t>(str[i + 6]) << 48) ||
- (static_cast<uint64_t>(str[i + 7]) << 56);
- __ movq(rax, combined_chars, RelocInfo::NONE64);
- __ cmpq(rax, Operand(rbx, byte_offset + i));
- i += 8;
- } else if (i + 4 <= n) {
- uint32_t combined_chars =
- (static_cast<uint32_t>(str[i + 0]) << 0) ||
- (static_cast<uint32_t>(str[i + 1]) << 8) ||
- (static_cast<uint32_t>(str[i + 2]) << 16) ||
- (static_cast<uint32_t>(str[i + 3]) << 24);
- __ cmpl(Operand(rbx, byte_offset + i), Immediate(combined_chars));
- i += 4;
- } else {
- __ cmpb(Operand(rbx, byte_offset + i),
- Immediate(static_cast<int8_t>(str[i])));
- i++;
- }
- } else {
- ASSERT(mode_ == UC16);
- if (i + 4 <= n) {
- uint64_t combined_chars = *reinterpret_cast<const uint64_t*>(&str[i]);
- __ movq(rax, combined_chars, RelocInfo::NONE64);
- __ cmpq(rax,
- Operand(rsi, rdi, times_1, byte_offset + i * sizeof(uc16)));
- i += 4;
- } else if (i + 2 <= n) {
- uint32_t combined_chars = *reinterpret_cast<const uint32_t*>(&str[i]);
- __ cmpl(Operand(rsi, rdi, times_1, byte_offset + i * sizeof(uc16)),
- Immediate(combined_chars));
- i += 2;
- } else {
- __ movzxwl(rax,
- Operand(rsi, rdi, times_1, byte_offset + i * sizeof(uc16)));
- __ cmpl(rax, Immediate(str[i]));
- i++;
- }
- }
- BranchOrBacktrack(not_equal, on_failure);
- }
-}
-
-
void RegExpMacroAssemblerX64::CheckGreedyLoop(Label* on_equal) {
Label fallthrough;
__ cmpl(rdi, Operand(backtrack_stackpointer(), 0));
Label* on_equal);
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
- virtual void CheckCharacters(Vector<const uc16> str,
- int cp_offset,
- Label* on_failure,
- bool check_end_of_string);
// A "greedy loop" is a loop that is both greedy and with a simple
// body. It has a particularly simple implementation.
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4,
Isolate::Current()->runtime_zone());
- uc16 foo_chars[3] = {'f', 'o', 'o'};
- Vector<const uc16> foo(foo_chars, 3);
-
- Label fail;
- m.CheckCharacters(foo, 0, &fail, true);
+ Label fail, backtrack;
+ m.PushBacktrack(&fail);
+ m.CheckNotAtStart(NULL);
+ m.LoadCurrentCharacter(2, NULL);
+ m.CheckNotCharacter('o', NULL);
+ m.LoadCurrentCharacter(1, NULL, false);
+ m.CheckNotCharacter('o', NULL);
+ m.LoadCurrentCharacter(0, NULL, false);
+ m.CheckNotCharacter('f', NULL);
m.WriteCurrentPositionToRegister(0, 0);
+ m.WriteCurrentPositionToRegister(1, 3);
m.AdvanceCurrentPosition(3);
- m.WriteCurrentPositionToRegister(1, 0);
+ m.PushBacktrack(&backtrack);
m.Succeed();
+ m.Bind(&backtrack);
+ m.Backtrack();
m.Bind(&fail);
m.Fail();
ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4,
Isolate::Current()->runtime_zone());
- uc16 foo_chars[3] = {'f', 'o', 'o'};
- Vector<const uc16> foo(foo_chars, 3);
-
- Label fail;
- m.CheckCharacters(foo, 0, &fail, true);
+ Label fail, backtrack;
+ m.PushBacktrack(&fail);
+ m.CheckNotAtStart(NULL);
+ m.LoadCurrentCharacter(2, NULL);
+ m.CheckNotCharacter('o', NULL);
+ m.LoadCurrentCharacter(1, NULL, false);
+ m.CheckNotCharacter('o', NULL);
+ m.LoadCurrentCharacter(0, NULL, false);
+ m.CheckNotCharacter('f', NULL);
m.WriteCurrentPositionToRegister(0, 0);
+ m.WriteCurrentPositionToRegister(1, 3);
m.AdvanceCurrentPosition(3);
- m.WriteCurrentPositionToRegister(1, 0);
+ m.PushBacktrack(&backtrack);
m.Succeed();
+ m.Bind(&backtrack);
+ m.Backtrack();
m.Bind(&fail);
m.Fail();
RegExpMacroAssemblerIrregexp m(Vector<byte>(codes, 1024),
Isolate::Current()->runtime_zone());
// ^f(o)o.
- Label fail, fail2, start;
- uc16 foo_chars[3];
- foo_chars[0] = 'f';
- foo_chars[1] = 'o';
- foo_chars[2] = 'o';
- Vector<const uc16> foo(foo_chars, 3);
+ Label start, fail, backtrack;
+
m.SetRegister(4, 42);
m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
m.AdvanceRegister(4, 42);
m.GoTo(&start);
m.Fail();
m.Bind(&start);
- m.PushBacktrack(&fail2);
- m.CheckCharacters(foo, 0, &fail, true);
+ m.PushBacktrack(&fail);
+ m.CheckNotAtStart(NULL);
+ m.LoadCurrentCharacter(0, NULL);
+ m.CheckNotCharacter('f', NULL);
+ m.LoadCurrentCharacter(1, NULL);
+ m.CheckNotCharacter('o', NULL);
+ m.LoadCurrentCharacter(2, NULL);
+ m.CheckNotCharacter('o', NULL);
m.WriteCurrentPositionToRegister(0, 0);
- m.PushCurrentPosition();
+ m.WriteCurrentPositionToRegister(1, 3);
+ m.WriteCurrentPositionToRegister(2, 1);
+ m.WriteCurrentPositionToRegister(3, 2);
m.AdvanceCurrentPosition(3);
- m.WriteCurrentPositionToRegister(1, 0);
- m.PopCurrentPosition();
- m.AdvanceCurrentPosition(1);
- m.WriteCurrentPositionToRegister(2, 0);
- m.AdvanceCurrentPosition(1);
- m.WriteCurrentPositionToRegister(3, 0);
+ m.PushBacktrack(&backtrack);
m.Succeed();
-
- m.Bind(&fail);
+ m.Bind(&backtrack);
+ m.ClearRegisters(2, 3);
m.Backtrack();
- m.Succeed();
-
- m.Bind(&fail2);
+ m.Bind(&fail);
m.PopRegister(0);
m.Fail();