void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) {
stream()->Add("(# %i ", that->min());
- if (that->max() == RegExpQuantifier::kInfinity) {
+ if (that->max() == RegExpTree::kInfinity) {
stream()->Add("- ");
} else {
stream()->Add("%i ", that->max());
}
+RegExpDisjunction::RegExpDisjunction(ZoneList<RegExpTree*>* alternatives)
+ : alternatives_(alternatives) {
+ RegExpTree* first_alternative = alternatives->at(0);
+ min_match_ = first_alternative->min_match();
+ max_match_ = first_alternative->max_match();
+ for (int i = 1; i < alternatives->length(); i++) {
+ RegExpTree* alternative = alternatives->at(i);
+ min_match_ = Min(min_match_, alternative->min_match());
+ max_match_ = Max(max_match_, alternative->max_match());
+ }
+}
+
+
+RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes)
+ : nodes_(nodes) {
+ min_match_ = 0;
+ max_match_ = 0;
+ for (int i = 0; i < nodes->length(); i++) {
+ RegExpTree* node = nodes->at(i);
+ min_match_ += node->min_match();
+ int node_max_match = node->max_match();
+ if (kInfinity - max_match_ < node_max_match) {
+ max_match_ = kInfinity;
+ } else {
+ max_match_ += node->max_match();
+ }
+ }
+}
+
+
} } // namespace v8::internal
class RegExpTree: public ZoneObject {
public:
+ static const int kInfinity = (1<<31)-1;
virtual ~RegExpTree() { }
virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) = 0;
virtual bool IsTextElement() { return false; }
+ virtual int min_match() = 0;
+ virtual int max_match() = 0;
virtual void AppendToText(RegExpText* text);
SmartPointer<const char> ToString();
#define MAKE_ASTYPE(Name) \
class RegExpDisjunction: public RegExpTree {
public:
- explicit RegExpDisjunction(ZoneList<RegExpTree*>* alternatives)
- : alternatives_(alternatives) { }
+ explicit RegExpDisjunction(ZoneList<RegExpTree*>* alternatives);
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success);
virtual RegExpDisjunction* AsDisjunction();
virtual bool IsDisjunction();
+ virtual int min_match() { return min_match_; }
+ virtual int max_match() { return max_match_; }
ZoneList<RegExpTree*>* alternatives() { return alternatives_; }
private:
ZoneList<RegExpTree*>* alternatives_;
+ int min_match_;
+ int max_match_;
};
class RegExpAlternative: public RegExpTree {
public:
- explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes) : nodes_(nodes) { }
+ explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes);
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success);
virtual RegExpAlternative* AsAlternative();
virtual bool IsAlternative();
+ virtual int min_match() { return min_match_; }
+ virtual int max_match() { return max_match_; }
ZoneList<RegExpTree*>* nodes() { return nodes_; }
private:
ZoneList<RegExpTree*>* nodes_;
-};
-
-
-class RegExpText: public RegExpTree {
- public:
- RegExpText() : elements_(2) { }
- virtual void* Accept(RegExpVisitor* visitor, void* data);
- virtual RegExpNode* ToNode(RegExpCompiler* compiler,
- RegExpNode* on_success);
- virtual RegExpText* AsText();
- virtual bool IsText();
- virtual bool IsTextElement() { return true; }
- virtual void AppendToText(RegExpText* text);
- void AddElement(TextElement elm) { elements_.Add(elm); }
- ZoneList<TextElement>* elements() { return &elements_; }
- private:
- ZoneList<TextElement> elements_;
+ int min_match_;
+ int max_match_;
};
RegExpNode* on_success);
virtual RegExpAssertion* AsAssertion();
virtual bool IsAssertion();
+ virtual int min_match() { return 0; }
+ virtual int max_match() { return 0; }
Type type() { return type_; }
private:
Type type_;
virtual RegExpCharacterClass* AsCharacterClass();
virtual bool IsCharacterClass();
virtual bool IsTextElement() { return true; }
+ virtual int min_match() { return 1; }
+ virtual int max_match() { return 1; }
virtual void AppendToText(RegExpText* text);
ZoneList<CharacterRange>* ranges() { return ranges_; }
bool is_negated() { return is_negated_; }
virtual RegExpAtom* AsAtom();
virtual bool IsAtom();
virtual bool IsTextElement() { return true; }
+ virtual int min_match() { return data_.length(); }
+ virtual int max_match() { return data_.length(); }
virtual void AppendToText(RegExpText* text);
Vector<const uc16> data() { return data_; }
+ int length() { return data_.length(); }
private:
Vector<const uc16> data_;
};
+class RegExpText: public RegExpTree {
+ public:
+ RegExpText() : elements_(2), length_(0) {}
+ virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success);
+ virtual RegExpText* AsText();
+ virtual bool IsText();
+ virtual bool IsTextElement() { return true; }
+ virtual int min_match() { return length_; }
+ virtual int max_match() { return length_; }
+ virtual void AppendToText(RegExpText* text);
+ void AddElement(TextElement elm) {
+ elements_.Add(elm);
+ length_ += elm.length();
+ };
+ ZoneList<TextElement>* elements() { return &elements_; }
+ private:
+ ZoneList<TextElement> elements_;
+ int length_;
+};
+
+
class RegExpQuantifier: public RegExpTree {
public:
RegExpQuantifier(int min, int max, bool is_greedy, RegExpTree* body)
: min_(min),
max_(max),
is_greedy_(is_greedy),
- body_(body) { }
+ body_(body),
+ min_match_(min * body->min_match()) {
+ if (max > 0 && body->max_match() > kInfinity / max) {
+ max_match_ = kInfinity;
+ } else {
+ max_match_ = max * body->max_match();
+ }
+ }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success);
RegExpNode* on_success);
virtual RegExpQuantifier* AsQuantifier();
virtual bool IsQuantifier();
+ virtual int min_match() { return min_match_; }
+ virtual int max_match() { return max_match_; }
int min() { return min_; }
int max() { return max_; }
bool is_greedy() { return is_greedy_; }
RegExpTree* body() { return body_; }
- // We just use a very large integer value as infinity because 2^30
- // is infinite in practice.
- static const int kInfinity = (1 << 30);
private:
int min_;
int max_;
bool is_greedy_;
RegExpTree* body_;
+ int min_match_;
+ int max_match_;
};
RegExpNode* on_success);
virtual RegExpCapture* AsCapture();
virtual bool IsCapture();
+ virtual int min_match() { return body_->min_match(); }
+ virtual int max_match() { return body_->max_match(); }
RegExpTree* body() { return body_; }
int index() { return index_; }
inline CaptureAvailability available() { return available_; }
RegExpNode* on_success);
virtual RegExpLookahead* AsLookahead();
virtual bool IsLookahead();
+ virtual int min_match() { return 0; }
+ virtual int max_match() { return 0; }
RegExpTree* body() { return body_; }
bool is_positive() { return is_positive_; }
private:
RegExpNode* on_success);
virtual RegExpBackReference* AsBackReference();
virtual bool IsBackReference();
+ virtual int min_match() { return capture_->min_match(); }
+ virtual int max_match() { return capture_->max_match(); }
int index() { return capture_->index(); }
RegExpCapture* capture() { return capture_; }
private:
RegExpNode* on_success);
virtual RegExpEmpty* AsEmpty();
virtual bool IsEmpty();
+ virtual int min_match() { return 0; }
+ virtual int max_match() { return 0; }
static RegExpEmpty* GetInstance() { return &kInstance; }
private:
static RegExpEmpty kInstance;
}
+int TextElement::length() {
+ if (type == ATOM) {
+ return data.u_atom->length();
+ } else {
+ ASSERT(type == CHAR_CLASS);
+ return 1;
+ }
+}
+
+
DispatchTable* ChoiceNode::GetTable(bool ignore_case) {
if (table_ == NULL) {
table_ = new DispatchTable();
// TODO(someone): clear captures on repetition and handle empty
// matches.
bool has_min = min > 0;
- bool has_max = max < RegExpQuantifier::kInfinity;
+ bool has_max = max < RegExpTree::kInfinity;
bool needs_counter = has_min || has_max;
int reg_ctr = needs_counter ? compiler->AllocateRegister() : -1;
ChoiceNode* center = new LoopChoiceNode(2);
// since we don't even handle ^ yet I'm saving that optimization for
// later.
RegExpNode* node = RegExpQuantifier::ToNode(0,
- RegExpQuantifier::kInfinity,
+ RegExpTree::kInfinity,
false,
new RegExpCharacterClass('*'),
&compiler,
explicit TextElement(Type t) : type(t), cp_offset(-1) { }
static TextElement Atom(RegExpAtom* atom);
static TextElement CharClass(RegExpCharacterClass* char_class);
+ int length();
Type type;
union {
RegExpAtom* u_atom;
} else if (terms_.length() > 0) {
ASSERT(last_added_ == ADD_ATOM);
atom = terms_.RemoveLast();
- if (atom->IsLookahead() || atom->IsAssertion()) {
- // Guaranteed not to match a non-empty string.
- // Assertion as an atom can happen as, e.g., (?:\b)
+ if (atom->max_match() == 0) {
+ // Guaranteed to only match an empty string.
LAST(ADD_TERM);
if (min == 0) {
return;
// {
case '*':
min = 0;
- max = RegExpQuantifier::kInfinity;
+ max = RegExpTree::kInfinity;
Advance();
break;
case '+':
min = 1;
- max = RegExpQuantifier::kInfinity;
+ max = RegExpTree::kInfinity;
Advance();
break;
case '?':
} else if (current() == ',') {
Advance();
if (current() == '}') {
- max = RegExpQuantifier::kInfinity;
+ max = RegExpTree::kInfinity;
Advance();
} else {
while (IsDecimalDigit(current())) {
return result.simple;
}
+struct MinMaxPair {
+ int min_match;
+ int max_match;
+};
+
+static MinMaxPair CheckMinMaxMatch(const char* input) {
+ V8::Initialize(NULL);
+ v8::HandleScope scope;
+ unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
+ ZoneScope zone_scope(DELETE_ON_EXIT);
+ FlatStringReader reader(CStrVector(input));
+ RegExpCompileData result;
+ CHECK(v8::internal::ParseRegExp(&reader, false, &result));
+ CHECK(result.tree != NULL);
+ CHECK(result.error.is_null());
+ int min_match = result.tree->min_match();
+ int max_match = result.tree->max_match();
+ MinMaxPair pair = { min_match, max_match };
+ return pair;
+}
+
+
#define CHECK_PARSE_EQ(input, expected) CHECK_EQ(expected, *Parse(input))
#define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input));
+#define CHECK_MIN_MAX(input, min, max) \
+ { MinMaxPair min_max = CheckMinMaxMatch(input); \
+ CHECK_EQ(min, min_max.min_match); \
+ CHECK_EQ(max, min_max.max_match); \
+ }
TEST(Parser) {
V8::Initialize(NULL);
CHECK_PARSE_EQ("{12z}", "'{12z}'");
CHECK_PARSE_EQ("{12,", "'{12,'");
CHECK_PARSE_EQ("{12,3b", "'{12,3b'");
+
+ CHECK_MIN_MAX("a", 1, 1);
+ CHECK_MIN_MAX("abc", 3, 3);
+ CHECK_MIN_MAX("a[bc]d", 3, 3);
+ CHECK_MIN_MAX("a|bc", 1, 2);
+ CHECK_MIN_MAX("ab|c", 1, 2);
+ CHECK_MIN_MAX("a||bc", 0, 2);
+ CHECK_MIN_MAX("|", 0, 0);
+ CHECK_MIN_MAX("(?:ab)", 2, 2);
+ CHECK_MIN_MAX("(?:ab|cde)", 2, 3);
+ CHECK_MIN_MAX("(?:ab)|cde", 2, 3);
+ CHECK_MIN_MAX("(ab)", 2, 2);
+ CHECK_MIN_MAX("(ab|cde)", 2, 3);
+ CHECK_MIN_MAX("(ab)\\1", 4, 4);
+ CHECK_MIN_MAX("(ab|cde)\\1", 4, 6);
+ CHECK_MIN_MAX("(?:ab)?", 0, 2);
+ CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("a?", 0, 1);
+ CHECK_MIN_MAX("a*", 0, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("a+", 1, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("a??", 0, 1);
+ CHECK_MIN_MAX("a*?", 0, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("a+?", 1, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("(?:a?)?", 0, 1);
+ CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("a{0}", 0, 0);
+ CHECK_MIN_MAX("(?:a+){0}", 0, 0);
+ CHECK_MIN_MAX("(?:a+){0,0}", 0, 0);
+ CHECK_MIN_MAX("a*b", 1, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("a+b", 2, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("a*b|c", 1, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("a+b|c", 1, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("(?:ab){4,7}", 8, 14);
+ CHECK_MIN_MAX("a\\bc", 2, 2);
+ CHECK_MIN_MAX("a\\Bc", 2, 2);
+ CHECK_MIN_MAX("a\\sc", 3, 3);
+ CHECK_MIN_MAX("a\\Sc", 3, 3);
+ CHECK_MIN_MAX("a(?=b)c", 2, 2);
+ CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2);
+ CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2);
}
TEST(ParserRegression) {