}
+static const uc16 kNoCharClass = 0;
+
+// Adds range or pre-defined character class to character ranges.
+// If char_class is not kInvalidClass, it's interpreted as a class
+// escape (i.e., 's' means whitespace, from '\s').
+static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges,
+ uc16 char_class,
+ CharacterRange range) {
+ if (char_class != kNoCharClass) {
+ CharacterRange::AddClassEscape(char_class, ranges);
+ } else {
+ ranges->Add(range);
+ }
+}
+
+
RegExpTree* RegExpParser::ParseCharacterClass() {
static const char* kUnterminated = "Unterminated character class";
static const char* kRangeOutOfOrder = "Range out of order in character class";
}
ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
while (has_more() && current() != ']') {
- uc16 char_class = 0;
+ uc16 char_class = kNoCharClass;
CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED);
- if (char_class) {
- CharacterRange::AddClassEscape(char_class, ranges);
- continue;
- }
if (current() == '-') {
Advance();
if (current() == kEndMarker) {
// following code report an error.
break;
} else if (current() == ']') {
- ranges->Add(first);
+ AddRangeOrEscape(ranges, char_class, first);
ranges->Add(CharacterRange::Singleton('-'));
break;
}
- CharacterRange next = ParseClassAtom(&char_class CHECK_FAILED);
- if (char_class) {
- ranges->Add(first);
+ uc16 char_class_2 = kNoCharClass;
+ CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED);
+ if (char_class != kNoCharClass || char_class_2 != kNoCharClass) {
+ // Either end is an escaped character class. Treat the '-' verbatim.
+ AddRangeOrEscape(ranges, char_class, first);
ranges->Add(CharacterRange::Singleton('-'));
- CharacterRange::AddClassEscape(char_class, ranges);
+ AddRangeOrEscape(ranges, char_class_2, next);
continue;
}
if (first.from() > next.to()) {
}
ranges->Add(CharacterRange::Range(first.from(), next.to()));
} else {
- ranges->Add(first);
+ AddRangeOrEscape(ranges, char_class, first);
}
}
if (!has_more()) {
assertFalse(re.test('a'));
assertFalse(re.test('Z'));
+// First - is treated as range operator, second as literal minus.
+// This follows the specification in parsing, but doesn't throw on
+// the \s at the beginning of the range.
+re = /[\s-0-9]/;
+assertTrue(re.test(' '));
+assertTrue(re.test('\xA0'));
+assertTrue(re.test('-'));
+assertTrue(re.test('0'));
+assertTrue(re.test('9'));
+assertFalse(re.test('1'));
+
// Test beginning and end of line assertions with or without the
// multiline flag.
re = /^\d+/;
assertEquals(["bc"], re.exec("zimzomzumbc"));
assertFalse(re.test("c"));
assertFalse(re.test(""));
+