};
// An array literal has a literals object that is used
-// used for minimizing the work when contructing it at runtime.
+// for minimizing the work when constructing it at runtime.
class ArrayLiteral: public Expression {
public:
ArrayLiteral(Handle<FixedArray> literals,
};
+class CharacterSet BASE_EMBEDDED {
+ public:
+ explicit CharacterSet(uc16 standard_set_type)
+ : ranges_(NULL),
+ standard_set_type_(standard_set_type) {}
+ explicit CharacterSet(ZoneList<CharacterRange>* ranges)
+ : ranges_(ranges),
+ standard_set_type_(0) {}
+ ZoneList<CharacterRange>* ranges();
+ uc16 standard_set_type() { return standard_set_type_; }
+ void set_standard_set_type(uc16 special_set_type) {
+ standard_set_type_ = special_set_type;
+ }
+ bool is_standard() { return standard_set_type_ != 0; }
+ private:
+ ZoneList<CharacterRange>* ranges_;
+ // If non-zero, the value represents a standard set (e.g., all whitespace
+ // characters) without having to expand the ranges.
+ uc16 standard_set_type_;
+};
+
+
class RegExpCharacterClass: public RegExpTree {
public:
RegExpCharacterClass(ZoneList<CharacterRange>* ranges, bool is_negated)
- : ranges_(ranges),
+ : set_(ranges),
is_negated_(is_negated) { }
explicit RegExpCharacterClass(uc16 type)
- : ranges_(new ZoneList<CharacterRange>(2)),
- is_negated_(false) {
- CharacterRange::AddClassEscape(type, ranges_);
- }
+ : set_(type),
+ is_negated_(false) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success);
virtual int min_match() { return 1; }
virtual int max_match() { return 1; }
virtual void AppendToText(RegExpText* text);
- ZoneList<CharacterRange>* ranges() { return ranges_; }
+ CharacterSet character_set() { return set_; }
+ // TODO(lrn): Remove need for complex version if is_standard that
+ // recognizes a mangled standard set and just do { return set_.is_special(); }
+ bool is_standard();
+ // Returns a value representing the standard character set if is_standard()
+ // returns true.
+ // Currently used values are:
+ // s : unicode whitespace
+ // S : unicode non-whitespace
+ // w : ASCII word character (digit, letter, underscore)
+ // W : non-ASCII word character
+ // d : ASCII digit
+ // D : non-ASCII digit
+ // . : non-unicode newline
+ // * : All characters
+ uc16 standard_type() { return set_.standard_set_type(); }
+ ZoneList<CharacterRange>* ranges() { return set_.ranges(); }
bool is_negated() { return is_negated_; }
private:
- ZoneList<CharacterRange>* ranges_;
+ CharacterSet set_;
bool is_negated_;
};
} else if (parse_result.tree->IsAtom() &&
!flags.is_ignore_case() &&
parse_result.capture_count == 0) {
- // TODO(lrn) Accept capture_count > 0 on atoms.
RegExpAtom* atom = parse_result.tree->AsAtom();
Vector<const uc16> atom_pattern = atom->data();
- Handle<String> atom_string =
- Factory::NewStringFromTwoByte(atom_pattern);
+ Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern);
result = AtomCompile(re, pattern, flags, atom_string);
} else if (FLAG_irregexp) {
result = IrregexpPrepare(re, pattern, flags);
// Throw an exception.
Handle<JSArray> array = Factory::NewJSArray(2);
SetElement(array, 0, pattern);
- SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(
- (error_message == NULL) ? "Unknown regexp error" : error_message)));
+ const char* message =
+ (error_message == NULL) ? "Unknown regexp error" : error_message;
+ SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(message)));
Handle<Object> regexp_err =
Factory::NewSyntaxError("malformed_regexp", array);
Top::Throw(*regexp_err);
bool check_offset,
bool ascii,
bool preloaded) {
+ if (cc->is_standard() &&
+ macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),
+ cp_offset,
+ check_offset,
+ on_failure)) {
+ return;
+ }
+
ZoneList<CharacterRange>* ranges = cc->ranges();
int max_char;
if (ascii) {
// -------------------------------------------------------------------
// Tree to graph conversion
+static const int kSpaceRangeCount = 20;
+static const int kSpaceRangeAsciiCount = 4;
+static const uc16 kSpaceRanges[kSpaceRangeCount] = { 0x0009, 0x000D, 0x0020,
+ 0x0020, 0x00A0, 0x00A0, 0x1680, 0x1680, 0x180E, 0x180E, 0x2000, 0x200A,
+ 0x2028, 0x2029, 0x202F, 0x202F, 0x205F, 0x205F, 0x3000, 0x3000 };
+
+static const int kWordRangeCount = 8;
+static const uc16 kWordRanges[kWordRangeCount] = { '0', '9', 'A', 'Z', '_',
+ '_', 'a', 'z' };
+
+static const int kDigitRangeCount = 2;
+static const uc16 kDigitRanges[kDigitRangeCount] = { '0', '9' };
+
+static const int kLineTerminatorRangeCount = 6;
+static const uc16 kLineTerminatorRanges[kLineTerminatorRangeCount] = { 0x000A,
+ 0x000A, 0x000D, 0x000D, 0x2028, 0x2029 };
RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
return new TextNode(elements(), on_success);
}
+static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
+ const uc16* special_class,
+ int length) {
+ ASSERT(ranges->length() != 0);
+ ASSERT(length != 0);
+ ASSERT(special_class[0] != 0);
+ if (ranges->length() != (length >> 1) + 1) {
+ return false;
+ }
+ CharacterRange range = ranges->at(0);
+ if (range.from() != 0) {
+ return false;
+ }
+ for (int i = 0; i < length; i += 2) {
+ if (special_class[i] != (range.to() + 1)) {
+ return false;
+ }
+ range = ranges->at((i >> 1) + 1);
+ if (special_class[i+1] != range.from() - 1) {
+ return false;
+ }
+ }
+ if (range.to() != 0xffff) {
+ return false;
+ }
+ return true;
+}
+
+
+static bool CompareRanges(ZoneList<CharacterRange>* ranges,
+ const uc16* special_class,
+ int length) {
+ if (ranges->length() * 2 != length) {
+ return false;
+ }
+ for (int i = 0; i < length; i += 2) {
+ CharacterRange range = ranges->at(i >> 1);
+ if (range.from() != special_class[i] || range.to() != special_class[i+1]) {
+ return false;
+ }
+ }
+ return true;
+}
+
+
+bool RegExpCharacterClass::is_standard() {
+ // TODO(lrn): Remove need for this function, by not throwing away information
+ // along the way.
+ if (is_negated_) {
+ return false;
+ }
+ if (set_.is_standard()) {
+ return true;
+ }
+ if (CompareRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) {
+ set_.set_standard_set_type('s');
+ return true;
+ }
+ if (CompareInverseRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) {
+ set_.set_standard_set_type('S');
+ return true;
+ }
+ if (CompareInverseRanges(set_.ranges(),
+ kLineTerminatorRanges,
+ kLineTerminatorRangeCount)) {
+ set_.set_standard_set_type('.');
+ return true;
+ }
+ return false;
+}
+
RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
}
-static const int kSpaceRangeCount = 20;
-static const uc16 kSpaceRanges[kSpaceRangeCount] = {
- 0x0009, 0x000D, 0x0020, 0x0020, 0x00A0, 0x00A0, 0x1680,
- 0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x2028, 0x2029,
- 0x202F, 0x202F, 0x205F, 0x205F, 0x3000, 0x3000
-};
-
-
-static const int kWordRangeCount = 8;
-static const uc16 kWordRanges[kWordRangeCount] = {
- '0', '9', 'A', 'Z', '_', '_', 'a', 'z'
-};
-
-
-static const int kDigitRangeCount = 2;
-static const uc16 kDigitRanges[kDigitRangeCount] = {
- '0', '9'
-};
-
-
-static const int kLineTerminatorRangeCount = 6;
-static const uc16 kLineTerminatorRanges[kLineTerminatorRangeCount] = {
- 0x000A, 0x000A, 0x000D, 0x000D, 0x2028, 0x2029
-};
-
-
static void AddClass(const uc16* elmv,
int elmc,
ZoneList<CharacterRange>* ranges) {
}
+ZoneList<CharacterRange>* CharacterSet::ranges() {
+ if (ranges_ == NULL) {
+ ranges_ = new ZoneList<CharacterRange>(2);
+ CharacterRange::AddClassEscape(standard_set_type_, ranges_);
+ }
+ return ranges_;
+}
+
+
+
// -------------------------------------------------------------------
// Interest propagation
BranchOrBacktrack(not_equal, on_not_equal);
}
+bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type,
+ int cp_offset,
+ bool check_offset,
+ Label* on_no_match) {
+ // Range checks (c in min..max) are generally implemented by an unsigned
+ // (c - min) <= (max - min) check
+ switch (type) {
+ case 's':
+ // Match space-characters
+ if (mode_ == ASCII) {
+ // ASCII space characters are '\t'..'\r' and ' '.
+ if (check_offset) {
+ LoadCurrentCharacter(cp_offset, on_no_match);
+ } else {
+ LoadCurrentCharacterUnchecked(cp_offset, 1);
+ }
+ Label success;
+ __ cmp(current_character(), ' ');
+ __ j(equal, &success);
+ // Check range 0x09..0x0d
+ __ sub(Operand(current_character()), Immediate('\t'));
+ __ cmp(current_character(), '\r' - '\t');
+ BranchOrBacktrack(above_equal, on_no_match);
+ __ bind(&success);
+ return true;
+ }
+ return false;
+ case 'S':
+ // Match non-space characters.
+ if (check_offset) {
+ LoadCurrentCharacter(cp_offset, on_no_match, 1);
+ } else {
+ LoadCurrentCharacterUnchecked(cp_offset, 1);
+ }
+ if (mode_ == ASCII) {
+ // ASCII space characters are '\t'..'\r' and ' '.
+ __ cmp(current_character(), ' ');
+ BranchOrBacktrack(equal, on_no_match);
+ __ sub(Operand(current_character()), Immediate('\t'));
+ __ cmp(current_character(), '\r' - '\t');
+ BranchOrBacktrack(below, on_no_match);
+ return true;
+ }
+ return false;
+ case 'd':
+ // Match ASCII digits ('0'..'9')
+ if (check_offset) {
+ LoadCurrentCharacter(cp_offset, on_no_match, 1);
+ } else {
+ LoadCurrentCharacterUnchecked(cp_offset, 1);
+ }
+ __ sub(Operand(current_character()), Immediate('0'));
+ __ cmp(current_character(), '9' - '0');
+ BranchOrBacktrack(greater_equal, on_no_match);
+ return true;
+ case 'D':
+ // Match non ASCII-digits
+ if (check_offset) {
+ LoadCurrentCharacter(cp_offset, on_no_match, 1);
+ } else {
+ LoadCurrentCharacterUnchecked(cp_offset, 1);
+ }
+ __ sub(Operand(current_character()), Immediate('0'));
+ __ cmp(current_character(), '9' - '0');
+ BranchOrBacktrack(below, on_no_match);
+ return true;
+ case '.': {
+ // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ if (check_offset) {
+ LoadCurrentCharacter(cp_offset, on_no_match, 1);
+ } else {
+ LoadCurrentCharacterUnchecked(cp_offset, 1);
+ }
+ // Compute hash value so exactly 0x0a and 0x0d become zero.
+ __ sub(Operand(current_character()), Immediate('\n'));
+ __ mov(eax, current_character());
+ __ and_(current_character(), 0x01);
+ __ shr(eax, 1);
+ __ xor_(current_character(), Operand(eax));
+ BranchOrBacktrack(equal, on_no_match);
+ if (mode_ == UC16) {
+ // Compare original value to 0x2028 and 0x2029, using the already
+ // computed ((current_char - '\n') >> 1) in eax.
+ __ cmp(eax, (0x2028 - '\n') >> 1);
+ BranchOrBacktrack(equal, on_no_match);
+ }
+ return true;
+ }
+ case '*':
+ // Match any character.
+ if (check_offset) {
+ CheckPosition(cp_offset, on_no_match);
+ }
+ return true;
+ // No custom implementation (yet): w, W, s(UC16), S(UC16).
+ default:
+ return false;
+ }
+}
void RegExpMacroAssemblerIA32::DispatchHalfNibbleMap(
uc16 start,
int characters) {
ASSERT(cp_offset >= 0);
ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
- if (check_bounds) {
- __ cmp(edi, -(cp_offset + characters) * char_size());
- BranchOrBacktrack(greater, on_end_of_input);
- }
+ CheckPosition(cp_offset + characters - 1, on_end_of_input);
LoadCurrentCharacterUnchecked(cp_offset, characters);
}
}
+void RegExpMacroAssemblerIA32::CheckPosition(int cp_offset,
+ Label* on_outside_input) {
+ __ cmp(edi, -cp_offset * char_size());
+ BranchOrBacktrack(greater_equal, on_outside_input);
+}
+
+
void RegExpMacroAssemblerIA32::BranchOrBacktrack(Condition condition,
Label* to) {
if (condition < 0) { // No condition
uc16 minus,
uc16 mask,
Label* on_not_equal);
+ virtual bool CheckSpecialCharacterClass(uc16 type,
+ int cp_offset,
+ bool check_offset,
+ Label* on_no_match);
virtual void DispatchByteMap(uc16 start,
Label* byte_map,
const Vector<Label*>& destinations);
void LoadCurrentCharacterUnchecked(int cp_offset, int characters);
+ // Adds code that checks whether preemption has been requested
+ // (and checks if we have hit the stack limit too).
+ void CheckStackLimit();
+
// Called from RegExp if the stack-guard is triggered.
// If the code object is relocated, the return address is fixed before
// returning.
static int CheckStackGuardState(Address return_address, Code* re_code);
+ // Checks whether the given offset from the current position is before
+ // the end of the string.
+ void CheckPosition(int cp_offset, Label* on_outside_input);
+
// The ebp-relative location of a regexp register.
Operand register_location(int register_index);
// and an offset. Uses no extra registers.
void LoadConstantBufferAddress(Register reg, ArraySlice* buffer);
- // Adds code that checks whether preemption has been requested
- // (and checks if we have hit the stack limit too).
- void CheckStackLimit();
-
// Call and return internally in the generated code in a way that
// is GC-safe (i.e., doesn't leave absolute code addresses on the stack)
void SafeCall(Label* to);
void RegExpMacroAssemblerTracer::CheckBitmap(uc16 start, Label* bitmap,
Label* on_zero) {
- PrintF(" CheckBitmap(start=u$04x, <bitmap>, label[%08x]);\n", start, on_zero);
+ PrintF(" CheckBitmap(start=u%04x, <bitmap>, label[%08x]);\n", start, on_zero);
assembler_->CheckBitmap(start, bitmap, on_zero);
}
+bool RegExpMacroAssemblerTracer::CheckSpecialCharacterClass(
+ uc16 type,
+ int cp_offset,
+ bool check_offset,
+ Label* on_no_match) {
+ bool supported = assembler_->CheckSpecialCharacterClass(type,
+ cp_offset,
+ check_offset,
+ on_no_match);
+ PrintF(" CheckSpecialCharacterClass(type='%c', offset=%d, "
+ "check_offset=%s, label[%08x]): %s;\n",
+ type,
+ cp_offset,
+ check_offset ? "true" : "false",
+ on_no_match,
+ supported ? "true" : "false");
+ return supported;
+}
+
+
void RegExpMacroAssemblerTracer::DispatchHalfNibbleMap(
uc16 start,
Label* half_nibble_map,
const Vector<Label*>& destinations) {
- PrintF(" DispatchHalfNibbleMap(start=u$04x, <half_nibble_map>, [", start);
+ PrintF(" DispatchHalfNibbleMap(start=u%04x, <half_nibble_map>, [", start);
for (int i = 0; i < destinations.length(); i++) {
if (i > 0)
PrintF(", ");
uc16 start,
Label* byte_map,
const Vector<Label*>& destinations) {
- PrintF(" DispatchByteMap(start=u$04x, <byte_map>, [", start);
+ PrintF(" DispatchByteMap(start=u%04x, <byte_map>, [", start);
for (int i = 0; i < destinations.length(); i++) {
if (i > 0)
PrintF(", ");
byte start,
Label* byte_map,
const Vector<Label*>& destinations) {
- PrintF(" DispatchHighByteMap(start=u$04x, <byte_map>, [", start);
+ PrintF(" DispatchHighByteMap(start=u%04x, <byte_map>, [", start);
for (int i = 0; i < destinations.length(); i++) {
if (i > 0)
PrintF(", ");
uc16 minus,
uc16 and_with,
Label* on_not_equal);
+ virtual bool CheckSpecialCharacterClass(uc16 type,
+ int cp_offset,
+ bool check_offset,
+ Label* on_no_match);
virtual void DispatchByteMap(
uc16 start,
Label* byte_map,
virtual void CheckNotRegistersEqual(int reg1,
int reg2,
Label* on_not_equal) = 0;
+ // Check whether a standard/default character class matches the current
+ // character. Returns false if the type of special character class does
+ // not have custom support.
+ // May clobber the current loaded character.
+ virtual bool CheckSpecialCharacterClass(uc16 type,
+ int cp_offset,
+ bool check_offset,
+ Label* on_no_match) {
+ return false;
+ }
// Dispatch after looking the current character up in a byte map. The
// destinations vector has up to 256 labels.
virtual void DispatchByteMap(