void RegExpMacroAssemblerARM::AdvanceCurrentPosition(int by) {
if (by != 0) {
- Label inside_string;
__ add(current_input_offset(),
current_input_offset(), Operand(by * char_size()));
}
}
+void RegExpMacroAssemblerARM::SetCurrentPositionFromEnd(int by) {
+ Label after_position;
+ __ cmp(current_input_offset(), Operand(-by * char_size()));
+ __ b(ge, &after_position);
+ __ mov(current_input_offset(), Operand(-by * char_size()));
+ // On RegExp code entry (where this operation is used), the character before
+ // the current position is expected to be already loaded.
+ // We have advanced the position, so it's safe to read backwards.
+ LoadCurrentCharacterUnchecked(-1, 1);
+ __ bind(&after_position);
+}
+
+
void RegExpMacroAssemblerARM::SetRegister(int register_index, int to) {
ASSERT(register_index >= num_saved_registers_); // Reserved for positions!
__ mov(r0, Operand(to));
StackCheckFlag check_stack_limit);
virtual void ReadCurrentPositionFromRegister(int reg);
virtual void ReadStackPointerFromRegister(int reg);
+ virtual void SetCurrentPositionFromEnd(int by);
virtual void SetRegister(int register_index, int to);
virtual void Succeed();
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
}
-bool RegExpAssertion::IsAnchored() {
+bool RegExpAssertion::IsAnchoredAtStart() {
return type() == RegExpAssertion::START_OF_INPUT;
}
-bool RegExpAlternative::IsAnchored() {
+bool RegExpAssertion::IsAnchoredAtEnd() {
+ return type() == RegExpAssertion::END_OF_INPUT;
+}
+
+
+bool RegExpAlternative::IsAnchoredAtStart() {
ZoneList<RegExpTree*>* nodes = this->nodes();
for (int i = 0; i < nodes->length(); i++) {
RegExpTree* node = nodes->at(i);
- if (node->IsAnchored()) { return true; }
+ if (node->IsAnchoredAtStart()) { return true; }
+ if (node->max_match() > 0) { return false; }
+ }
+ return false;
+}
+
+
+bool RegExpAlternative::IsAnchoredAtEnd() {
+ ZoneList<RegExpTree*>* nodes = this->nodes();
+ for (int i = nodes->length() - 1; i >= 0; i--) {
+ RegExpTree* node = nodes->at(i);
+ if (node->IsAnchoredAtEnd()) { return true; }
if (node->max_match() > 0) { return false; }
}
return false;
}
-bool RegExpDisjunction::IsAnchored() {
+bool RegExpDisjunction::IsAnchoredAtStart() {
ZoneList<RegExpTree*>* alternatives = this->alternatives();
for (int i = 0; i < alternatives->length(); i++) {
- if (!alternatives->at(i)->IsAnchored())
+ if (!alternatives->at(i)->IsAnchoredAtStart())
return false;
}
return true;
}
-bool RegExpLookahead::IsAnchored() {
- return is_positive() && body()->IsAnchored();
+bool RegExpDisjunction::IsAnchoredAtEnd() {
+ ZoneList<RegExpTree*>* alternatives = this->alternatives();
+ for (int i = 0; i < alternatives->length(); i++) {
+ if (!alternatives->at(i)->IsAnchoredAtEnd())
+ return false;
+ }
+ return true;
+}
+
+
+bool RegExpLookahead::IsAnchoredAtStart() {
+ return is_positive() && body()->IsAnchoredAtStart();
+}
+
+
+bool RegExpCapture::IsAnchoredAtStart() {
+ return body()->IsAnchoredAtStart();
}
-bool RegExpCapture::IsAnchored() {
- return body()->IsAnchored();
+bool RegExpCapture::IsAnchoredAtEnd() {
+ return body()->IsAnchoredAtEnd();
}
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) = 0;
virtual bool IsTextElement() { return false; }
- virtual bool IsAnchored() { return false; }
+ virtual bool IsAnchoredAtStart() { return false; }
+ virtual bool IsAnchoredAtEnd() { return false; }
virtual int min_match() = 0;
virtual int max_match() = 0;
// Returns the interval of registers used for captures within this
virtual RegExpDisjunction* AsDisjunction();
virtual Interval CaptureRegisters();
virtual bool IsDisjunction();
- virtual bool IsAnchored();
+ virtual bool IsAnchoredAtStart();
+ virtual bool IsAnchoredAtEnd();
virtual int min_match() { return min_match_; }
virtual int max_match() { return max_match_; }
ZoneList<RegExpTree*>* alternatives() { return alternatives_; }
virtual RegExpAlternative* AsAlternative();
virtual Interval CaptureRegisters();
virtual bool IsAlternative();
- virtual bool IsAnchored();
+ virtual bool IsAnchoredAtStart();
+ virtual bool IsAnchoredAtEnd();
virtual int min_match() { return min_match_; }
virtual int max_match() { return max_match_; }
ZoneList<RegExpTree*>* nodes() { return nodes_; }
RegExpNode* on_success);
virtual RegExpAssertion* AsAssertion();
virtual bool IsAssertion();
- virtual bool IsAnchored();
+ virtual bool IsAnchoredAtStart();
+ virtual bool IsAnchoredAtEnd();
virtual int min_match() { return 0; }
virtual int max_match() { return 0; }
Type type() { return type_; }
RegExpCompiler* compiler,
RegExpNode* on_success);
virtual RegExpCapture* AsCapture();
- virtual bool IsAnchored();
+ virtual bool IsAnchoredAtStart();
+ virtual bool IsAnchoredAtEnd();
virtual Interval CaptureRegisters();
virtual bool IsCapture();
virtual int min_match() { return body_->min_match(); }
virtual RegExpLookahead* AsLookahead();
virtual Interval CaptureRegisters();
virtual bool IsLookahead();
- virtual bool IsAnchored();
+ virtual bool IsAnchoredAtStart();
virtual int min_match() { return 0; }
virtual int max_match() { return 0; }
RegExpTree* body() { return body_; }
V(CHECK_AT_START, 44, 8) /* bc8 pad24 addr32 */ \
V(CHECK_NOT_AT_START, 45, 8) /* bc8 pad24 addr32 */ \
V(CHECK_GREEDY, 46, 8) /* bc8 pad24 addr32 */ \
-V(ADVANCE_CP_AND_GOTO, 47, 8) /* bc8 offset24 addr32 */
+V(ADVANCE_CP_AND_GOTO, 47, 8) /* bc8 offset24 addr32 */ \
+V(SET_CURRENT_POSITION_FROM_END, 48, 4) /* bc8 idx24 */
#define DECLARE_BYTECODES(name, code, length) \
static const int BC_##name = code;
void RegExpMacroAssemblerIA32::AdvanceCurrentPosition(int by) {
if (by != 0) {
- Label inside_string;
__ add(Operand(edi), Immediate(by * char_size()));
}
}
__ add(backtrack_stackpointer(), Operand(ebp, kStackHighEnd));
}
+void RegExpMacroAssemblerIA32::SetCurrentPositionFromEnd(int by) {
+ NearLabel after_position;
+ __ cmp(edi, -by * char_size());
+ __ j(greater_equal, &after_position);
+ __ mov(edi, -by * char_size());
+ // On RegExp code entry (where this operation is used), the character before
+ // the current position is expected to be already loaded.
+ // We have advanced the position, so it's safe to read backwards.
+ LoadCurrentCharacterUnchecked(-1, 1);
+ __ bind(&after_position);
+}
void RegExpMacroAssemblerIA32::SetRegister(int register_index, int to) {
ASSERT(register_index >= num_saved_registers_); // Reserved for positions!
StackCheckFlag check_stack_limit);
virtual void ReadCurrentPositionFromRegister(int reg);
virtual void ReadStackPointerFromRegister(int reg);
+ virtual void SetCurrentPositionFromEnd(int by);
virtual void SetRegister(int register_index, int to);
virtual void Succeed();
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
pc = code_base + Load32Aligned(pc + 4);
}
break;
+ BYTECODE(SET_CURRENT_POSITION_FROM_END) {
+ int by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT;
+ if (subject.length() - current > by) {
+ current = subject.length() - by;
+ current_char = subject[current - 1];
+ }
+ pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH;
+ break;
+ }
default:
UNREACHABLE();
break;
&compiler,
compiler.accept());
RegExpNode* node = captured_body;
- if (!data->tree->IsAnchored()) {
+ bool is_end_anchored = data->tree->IsAnchoredAtEnd();
+ bool is_start_anchored = data->tree->IsAnchoredAtStart();
+ int max_length = data->tree->max_match();
+ if (!is_start_anchored) {
// Add a .*? at the beginning, outside the body capture, unless
// this expression is anchored at the beginning.
RegExpNode* loop_node =
RegExpMacroAssemblerIrregexp macro_assembler(codes);
#endif // V8_INTERPRETED_REGEXP
+ // Inserted here, instead of in Assembler, because it depends on information
+ // in the AST that isn't replicated in the Node structure.
+ static const int kMaxBacksearchLimit = 1024;
+ if (is_end_anchored &&
+ !is_start_anchored &&
+ max_length < kMaxBacksearchLimit) {
+ macro_assembler.SetCurrentPositionFromEnd(max_length);
+ }
+
return compiler.Assemble(¯o_assembler,
node,
data->capture_count,
}
+void RegExpMacroAssemblerIrregexp::SetCurrentPositionFromEnd(int by) {
+ ASSERT(is_uint24(by));
+ Emit(BC_SET_CURRENT_POSITION_FROM_END, by);
+}
+
+
void RegExpMacroAssemblerIrregexp::SetRegister(int register_index, int to) {
ASSERT(register_index >= 0);
ASSERT(register_index <= kMaxRegister);
virtual void PushRegister(int register_index,
StackCheckFlag check_stack_limit);
virtual void AdvanceRegister(int reg, int by); // r[reg] += by.
+ virtual void SetCurrentPositionFromEnd(int by);
virtual void SetRegister(int register_index, int to);
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void ClearRegisters(int reg_from, int reg_to);
}
+void RegExpMacroAssemblerTracer::SetCurrentPositionFromEnd(int by) {
+ PrintF(" SetCurrentPositionFromEnd(by=%d);\n", by);
+ assembler_->SetCurrentPositionFromEnd(by);
+}
+
+
void RegExpMacroAssemblerTracer::SetRegister(int register_index, int to) {
PrintF(" SetRegister(register=%d, to=%d);\n", register_index, to);
assembler_->SetRegister(register_index, to);
StackCheckFlag check_stack_limit);
virtual void ReadCurrentPositionFromRegister(int reg);
virtual void ReadStackPointerFromRegister(int reg);
+ virtual void SetCurrentPositionFromEnd(int by);
virtual void SetRegister(int register_index, int to);
virtual void Succeed();
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
StackCheckFlag check_stack_limit) = 0;
virtual void ReadCurrentPositionFromRegister(int reg) = 0;
virtual void ReadStackPointerFromRegister(int reg) = 0;
+ virtual void SetCurrentPositionFromEnd(int by) = 0;
virtual void SetRegister(int register_index, int to) = 0;
virtual void Succeed() = 0;
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset) = 0;
void RegExpMacroAssemblerX64::AdvanceCurrentPosition(int by) {
if (by != 0) {
- Label inside_string;
__ addq(rdi, Immediate(by * char_size()));
}
}
}
+void RegExpMacroAssemblerX64::SetCurrentPositionFromEnd(int by) {
+ NearLabel after_position;
+ __ cmpq(rdi, Immediate(-by * char_size()));
+ __ j(greater_equal, &after_position);
+ __ movq(rdi, Immediate(-by * char_size()));
+ // On RegExp code entry (where this operation is used), the character before
+ // the current position is expected to be already loaded.
+ // We have advanced the position, so it's safe to read backwards.
+ LoadCurrentCharacterUnchecked(-1, 1);
+ __ bind(&after_position);
+}
+
+
void RegExpMacroAssemblerX64::SetRegister(int register_index, int to) {
ASSERT(register_index >= num_saved_registers_); // Reserved for positions!
__ movq(register_location(register_index), Immediate(to));
StackCheckFlag check_stack_limit);
virtual void ReadCurrentPositionFromRegister(int reg);
virtual void ReadStackPointerFromRegister(int reg);
+ virtual void SetCurrentPositionFromEnd(int by);
virtual void SetRegister(int register_index, int to);
virtual void Succeed();
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
assertEquals(false, desc.configurable);
assertEquals(false, desc.enumerable);
assertEquals(true, desc.writable);
+
+
+// Check that end-anchored regexps are optimized correctly.
+var re = /(?:a|bc)g$/;
+assertTrue(re.test("ag"));
+assertTrue(re.test("bcg"));
+assertTrue(re.test("abcg"));
+assertTrue(re.test("zimbag"));
+assertTrue(re.test("zimbcg"));
+
+assertFalse(re.test("g"));
+assertFalse(re.test(""));
+
+// Global regexp (non-zero start).
+var re = /(?:a|bc)g$/g;
+assertTrue(re.test("ag"));
+re.lastIndex = 1; // Near start of string.
+assertTrue(re.test("zimbag"));
+re.lastIndex = 6; // At end of string.
+assertFalse(re.test("zimbag"));
+re.lastIndex = 5; // Near end of string.
+assertFalse(re.test("zimbag"));
+re.lastIndex = 4;
+assertTrue(re.test("zimbag"));
+
+// Anchored at both ends.
+var re = /^(?:a|bc)g$/g;
+assertTrue(re.test("ag"));
+re.lastIndex = 1;
+assertFalse(re.test("ag"));
+re.lastIndex = 1;
+assertFalse(re.test("zag"));
+
+// Long max_length of RegExp.
+var re = /VeryLongRegExp!{1,1000}$/;
+assertTrue(re.test("BahoolaVeryLongRegExp!!!!!!"));
+assertFalse(re.test("VeryLongRegExp"));
+assertFalse(re.test("!"));
+
+// End anchor inside disjunction.
+var re = /(?:a$|bc$)/;
+assertTrue(re.test("a"));
+assertTrue(re.test("bc"));
+assertTrue(re.test("abc"));
+assertTrue(re.test("zimzamzumba"));
+assertTrue(re.test("zimzamzumbc"));
+assertFalse(re.test("c"));
+assertFalse(re.test(""));
+
+// Only partially anchored.
+var re = /(?:a|bc$)/;
+assertTrue(re.test("a"));
+assertTrue(re.test("bc"));
+assertEquals(["a"], re.exec("abc"));
+assertEquals(4, re.exec("zimzamzumba").index);
+assertEquals(["bc"], re.exec("zimzomzumbc"));
+assertFalse(re.test("c"));
+assertFalse(re.test(""));