From: christian.plesner.hansen@gmail.com Date: Mon, 1 Dec 2008 15:42:35 +0000 (+0000) Subject: - Added some expansion of assertions. X-Git-Tag: upstream/4.7.83~24931 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=917e91d1f274168dacb3cada11ed095f0141a173;p=platform%2Fupstream%2Fv8.git - Added some expansion of assertions. - Splitting of character classes into word and non-word parts. - A bunch of refactorings. - Made dispatch table construction lazy. git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@880 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- diff --git a/src/char-predicates-inl.h b/src/char-predicates-inl.h index b575bdfe5..3ef2f9161 100644 --- a/src/char-predicates-inl.h +++ b/src/char-predicates-inl.h @@ -59,6 +59,25 @@ inline bool IsHexDigit(uc32 c) { } +inline bool IsRegExpWord(uc16 c) { + return ('a' <= c && c <= 'z') + || ('A' <= c && c <= 'Z') + || ('0' <= c && c <= '9') + || (c == '_'); +} + + +inline bool IsRegExpNewline(uc16 c) { + switch (c) { + // CR LF LS PS + case 0x000A: case 0x000D: case 0x2028: case 0x2029: + return false; + default: + return true; + } +} + + } } // namespace v8::internal #endif // V8_CHAR_PREDICATES_INL_H_ diff --git a/src/char-predicates.h b/src/char-predicates.h index 3fcaeb626..63e83b4d3 100644 --- a/src/char-predicates.h +++ b/src/char-predicates.h @@ -37,6 +37,8 @@ inline bool IsCarriageReturn(uc32 c); inline bool IsLineFeed(uc32 c); inline bool IsDecimalDigit(uc32 c); inline bool IsHexDigit(uc32 c); +inline bool IsRegExpWord(uc32 c); +inline bool IsRegExpNewline(uc32 c); struct IdentifierStart { static inline bool Is(uc32 c) { diff --git a/src/jsregexp.cc b/src/jsregexp.cc index 439a8d109..8bafc28d6 100644 --- a/src/jsregexp.cc +++ b/src/jsregexp.cc @@ -897,6 +897,16 @@ TextElement TextElement::CharClass( } +DispatchTable* ChoiceNode::GetTable(bool ignore_case) { + if (table_ == NULL) { + table_ = new DispatchTable(); + DispatchTableConstructor cons(table_, ignore_case); + cons.BuildTable(this); + } + return table_; +} + + class RegExpCompiler { public: RegExpCompiler(int capture_count, bool ignore_case); @@ -1561,7 +1571,9 @@ bool BackReferenceNode::Emit(RegExpCompiler* compiler) { class DotPrinter: public NodeVisitor { public: - DotPrinter() : stream_(&alloc_) { } + explicit DotPrinter(bool ignore_case) + : ignore_case_(ignore_case), + stream_(&alloc_) { } void PrintNode(const char* label, RegExpNode* node); void Visit(RegExpNode* node); void PrintOnFailure(RegExpNode* from, RegExpNode* on_failure); @@ -1572,6 +1584,7 @@ class DotPrinter: public NodeVisitor { FOR_EACH_NODE_TYPE(DECLARE_VISIT) #undef DECLARE_VISIT private: + bool ignore_case_; HeapStringAllocator alloc_; StringStream stream_; std::set seen_; @@ -1668,44 +1681,80 @@ class TableEntryHeaderPrinter { }; +class AttributePrinter { + public: + explicit AttributePrinter(DotPrinter* out) + : out_(out), first_(true) { } + void PrintSeparator() { + if (first_) { + first_ = false; + } else { + out_->stream()->Add("|"); + } + } + void PrintBit(const char* name, bool value) { + if (!value) return; + PrintSeparator(); + out_->stream()->Add("{%s}", name); + } + void PrintPositive(const char* name, int value) { + if (value < 0) return; + PrintSeparator(); + out_->stream()->Add("{%s|%x}", name, value); + } + private: + DotPrinter* out_; + bool first_; +}; + + void DotPrinter::PrintAttributes(RegExpNode* that) { - stream()->Add(" a%p [shape=Mrecord, style=dashed, color=lightgrey, " - "fontcolor=lightgrey, margin=0.1, fontsize=10, label=\"{", + stream()->Add(" a%p [shape=Mrecord, color=grey, fontcolor=grey, " + "margin=0.1, fontsize=10, label=\"{", that); + AttributePrinter printer(this); NodeInfo* info = that->info(); - stream()->Add("{NI|%i}|{WI|%i}|{SI|%i}", - info->follows_newline_interest, - info->follows_word_interest, - info->follows_start_interest); - stream()->Add("|{DN|%i}|{DW|%i}|{DS|%i}|{AE|%i}", - info->determine_newline, - info->determine_word, - info->determine_start, - info->at_end); - if (info->follows_newline != NodeInfo::UNKNOWN) - stream()->Add("|{FN|%i}", info->follows_newline); - if (info->follows_word != NodeInfo::UNKNOWN) - stream()->Add("|{FW|%i}", info->follows_word); - if (info->follows_start != NodeInfo::UNKNOWN) - stream()->Add("|{FS|%i}", info->follows_start); + printer.PrintBit("NI", info->follows_newline_interest); + printer.PrintBit("WI", info->follows_word_interest); + printer.PrintBit("SI", info->follows_start_interest); + printer.PrintBit("DN", info->determine_newline); + printer.PrintBit("DW", info->determine_word); + printer.PrintBit("DS", info->determine_start); + printer.PrintBit("DDN", info->does_determine_newline); + printer.PrintBit("DDW", info->does_determine_word); + printer.PrintBit("DDS", info->does_determine_start); + printer.PrintPositive("IW", info->is_word); + printer.PrintPositive("IN", info->is_newline); + printer.PrintPositive("FN", info->follows_newline); + printer.PrintPositive("FW", info->follows_word); + printer.PrintPositive("FS", info->follows_start); Label* label = that->label(); if (label->is_bound()) - stream()->Add("|{@|%x}", label->pos()); + printer.PrintPositive("@", label->pos()); stream()->Add("}\"];\n"); - stream()->Add(" a%p -> n%p [style=dashed, color=lightgrey, " + stream()->Add(" a%p -> n%p [style=dashed, color=grey, " "arrowhead=none];\n", that, that); } +static const bool kPrintDispatchTable = false; void DotPrinter::VisitChoice(ChoiceNode* that) { - stream()->Add(" n%p [shape=Mrecord, label=\"", that); - TableEntryHeaderPrinter header_printer(stream()); - that->table()->ForEach(&header_printer); - stream()->Add("\"]\n", that); - PrintAttributes(that); - TableEntryBodyPrinter body_printer(stream(), that); - that->table()->ForEach(&body_printer); - PrintOnFailure(that, that->on_failure()); + if (kPrintDispatchTable) { + stream()->Add(" n%p [shape=Mrecord, label=\"", that); + TableEntryHeaderPrinter header_printer(stream()); + that->GetTable(ignore_case_)->ForEach(&header_printer); + stream()->Add("\"]\n", that); + PrintAttributes(that); + TableEntryBodyPrinter body_printer(stream(), that); + that->GetTable(ignore_case_)->ForEach(&body_printer); + PrintOnFailure(that, that->on_failure()); + } else { + stream()->Add(" n%p [shape=Mrecord, label=\"?\"];\n", that); + for (int i = 0; i < that->alternatives()->length(); i++) { + GuardedAlternative alt = that->alternatives()->at(i); + stream()->Add(" n%p -> n%p;\n", that, alt.node()); + } + } for (int i = 0; i < that->alternatives()->length(); i++) { GuardedAlternative alt = that->alternatives()->at(i); alt.node()->Accept(this); @@ -1837,8 +1886,10 @@ void DispatchTable::Dump() { } -void RegExpEngine::DotPrint(const char* label, RegExpNode* node) { - DotPrinter printer; +void RegExpEngine::DotPrint(const char* label, + RegExpNode* node, + bool ignore_case) { + DotPrinter printer(ignore_case); printer.PrintNode(label, node); } @@ -2181,6 +2232,56 @@ void CharacterRange::AddClassEscape(uc16 type, } +Vector CharacterRange::GetWordBounds() { + return Vector(kWordRanges, kWordRangeCount); +} + + +class CharacterRangeSplitter { + public: + CharacterRangeSplitter(ZoneList** included, + ZoneList** excluded) + : included_(included), + excluded_(excluded) { } + void Call(uc16 from, DispatchTable::Entry entry); + + static const int kInBase = 0; + static const int kInOverlay = 1; + + private: + ZoneList** included_; + ZoneList** excluded_; +}; + + +void CharacterRangeSplitter::Call(uc16 from, DispatchTable::Entry entry) { + if (!entry.out_set()->Get(kInBase)) return; + ZoneList** target = entry.out_set()->Get(kInOverlay) + ? included_ + : excluded_; + if (*target == NULL) *target = new ZoneList(2); + (*target)->Add(CharacterRange(entry.from(), entry.to())); +} + + +void CharacterRange::Split(ZoneList* base, + Vector overlay, + ZoneList** included, + ZoneList** excluded) { + ASSERT_EQ(NULL, *included); + ASSERT_EQ(NULL, *excluded); + DispatchTable table; + for (int i = 0; i < base->length(); i++) + table.AddRange(base->at(i), CharacterRangeSplitter::kInBase); + for (int i = 0; i < overlay.length(); i += 2) { + table.AddRange(CharacterRange(overlay[i], overlay[i+1]), + CharacterRangeSplitter::kInOverlay); + } + CharacterRangeSplitter callback(included, excluded); + table.ForEach(&callback); +} + + void CharacterRange::AddCaseEquivalents(ZoneList* ranges) { unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; if (IsSingleton()) { @@ -2266,40 +2367,47 @@ void CharacterRange::AddCaseEquivalents(ZoneList* ranges) { // Interest propagation -RegExpNode* RegExpNode::GetSibling(NodeInfo* info) { +RegExpNode* RegExpNode::TryGetSibling(NodeInfo* info) { for (int i = 0; i < siblings_.length(); i++) { RegExpNode* sibling = siblings_.Get(i); - if (sibling->info()->HasSameForwardInterests(info)) + if (sibling->info()->Matches(info)) return sibling; } return NULL; } +RegExpNode* RegExpNode::EnsureSibling(NodeInfo* info, bool* cloned) { + ASSERT_EQ(false, *cloned); + ASSERT(!info->HasAssertions()); + siblings_.Ensure(this); + RegExpNode* result = TryGetSibling(info); + if (result != NULL) return result; + result = this->Clone(); + NodeInfo* new_info = result->info(); + new_info->ResetCompilationState(); + new_info->AddFromPreceding(info); + AddSibling(result); + *cloned = true; + return result; +} + + template static RegExpNode* PropagateToEndpoint(C* node, NodeInfo* info) { NodeInfo full_info(*node->info()); full_info.AddFromPreceding(info); - RegExpNode* sibling = node->GetSibling(&full_info); - if (sibling != NULL) return sibling; - node->EnsureSiblings(); - sibling = new C(*node); - sibling->info()->AddFromPreceding(&full_info); - node->AddSibling(sibling); - return sibling; + bool cloned = false; + return RegExpNode::EnsureSibling(node, &full_info, &cloned); } RegExpNode* ActionNode::PropagateForward(NodeInfo* info) { NodeInfo full_info(*this->info()); full_info.AddFromPreceding(info); - RegExpNode* sibling = GetSibling(&full_info); - if (sibling != NULL) return sibling; - EnsureSiblings(); - ActionNode* action = new ActionNode(*this); - action->info()->AddFromPreceding(&full_info); - AddSibling(action); - if (type_ != ESCAPE_SUBMATCH) { + bool cloned = false; + ActionNode* action = EnsureSibling(this, &full_info, &cloned); + if (cloned && type_ != ESCAPE_SUBMATCH) { action->set_on_success(action->on_success()->PropagateForward(info)); } return action; @@ -2309,22 +2417,20 @@ RegExpNode* ActionNode::PropagateForward(NodeInfo* info) { RegExpNode* ChoiceNode::PropagateForward(NodeInfo* info) { NodeInfo full_info(*this->info()); full_info.AddFromPreceding(info); - RegExpNode* sibling = GetSibling(&full_info); - if (sibling != NULL) return sibling; - EnsureSiblings(); - ChoiceNode* choice = new ChoiceNode(*this); - choice->info()->AddFromPreceding(&full_info); - AddSibling(choice); - ZoneList* old_alternatives = alternatives(); - int count = old_alternatives->length(); - choice->alternatives_ = new ZoneList(count); - for (int i = 0; i < count; i++) { - GuardedAlternative alternative = old_alternatives->at(i); - alternative.set_node(alternative.node()->PropagateForward(info)); - choice->alternatives()->Add(alternative); - } - if (!choice->on_failure_->IsBacktrack()) { - choice->on_failure_ = choice->on_failure_->PropagateForward(info); + bool cloned = false; + ChoiceNode* choice = EnsureSibling(this, &full_info, &cloned); + if (cloned) { + ZoneList* old_alternatives = alternatives(); + int count = old_alternatives->length(); + choice->alternatives_ = new ZoneList(count); + for (int i = 0; i < count; i++) { + GuardedAlternative alternative = old_alternatives->at(i); + alternative.set_node(alternative.node()->PropagateForward(info)); + choice->alternatives()->Add(alternative); + } + if (!choice->on_failure_->IsBacktrack()) { + choice->on_failure_ = choice->on_failure_->PropagateForward(info); + } } return choice; } @@ -2338,18 +2444,16 @@ RegExpNode* EndNode::PropagateForward(NodeInfo* info) { RegExpNode* BackReferenceNode::PropagateForward(NodeInfo* info) { NodeInfo full_info(*this->info()); full_info.AddFromPreceding(info); - RegExpNode* sibling = GetSibling(&full_info); - if (sibling != NULL) return sibling; - EnsureSiblings(); - BackReferenceNode* back_ref = new BackReferenceNode(*this); - back_ref->info()->AddFromPreceding(&full_info); - AddSibling(back_ref); - // TODO(erikcorry): A back reference has to have two successors (by default - // the same node). The first is used if the back reference matches a non- - // empty back reference, the second if it matches an empty one. This doesn't - // matter for at_end, which is the only one implemented right now, but it will - // matter for other pieces of info. - back_ref->set_on_success(back_ref->on_success()->PropagateForward(info)); + bool cloned = false; + BackReferenceNode* back_ref = EnsureSibling(this, &full_info, &cloned); + if (cloned) { + // TODO(erikcorry): A back reference has to have two successors (by default + // the same node). The first is used if the back reference matches a non- + // empty back reference, the second if it matches an empty one. This + // doesn't matter for at_end, which is the only one implemented right now, + // but it will matter for other pieces of info. + back_ref->set_on_success(back_ref->on_success()->PropagateForward(info)); + } return back_ref; } @@ -2560,10 +2664,6 @@ void Analysis::VisitChoice(ChoiceNode* that) { // this node also, so it can pass it on. info->AddFromFollowing(node->info()); } - if (!that->table_calculated()) { - DispatchTableConstructor cons(that->table()); - cons.BuildTable(that); - } EnsureAnalyzed(that->on_failure()); } @@ -2574,6 +2674,208 @@ void Analysis::VisitBackReference(BackReferenceNode* that) { } +// ------------------------------------------------------------------- +// Assumption expansion + + +RegExpNode* RegExpNode::EnsureExpanded(NodeInfo* info) { + siblings_.Ensure(this); + NodeInfo new_info = *this->info(); + if (new_info.follows_word_interest) + new_info.follows_word = info->follows_word; + if (new_info.follows_newline_interest) + new_info.follows_newline = info->follows_newline; + // If the following node should determine something we need to get + // a sibling that determines it. + new_info.does_determine_newline = new_info.determine_newline; + new_info.does_determine_word = new_info.determine_word; + new_info.does_determine_start = new_info.determine_start; + RegExpNode* sibling = TryGetSibling(&new_info); + if (sibling == NULL) { + sibling = ExpandLocal(&new_info); + siblings_.Add(sibling); + sibling->info()->being_expanded = true; + sibling->ExpandChildren(); + sibling->info()->being_expanded = false; + sibling->info()->been_expanded = true; + } else { + NodeInfo* sib_info = sibling->info(); + if (!sib_info->been_expanded && !sib_info->being_expanded) { + sibling->info()->being_expanded = true; + sibling->ExpandChildren(); + sibling->info()->being_expanded = false; + sibling->info()->been_expanded = true; + } + } + return sibling; +} + + +RegExpNode* ChoiceNode::ExpandLocal(NodeInfo* info) { + ChoiceNode* clone = this->Clone(); + clone->info()->ResetCompilationState(); + clone->info()->AddAssumptions(info); + return clone; +} + + +void ChoiceNode::ExpandChildren() { + ZoneList* alts = alternatives(); + ZoneList* new_alts + = new ZoneList(alts->length()); + for (int i = 0; i < alts->length(); i++) { + GuardedAlternative next = alts->at(i); + next.set_node(next.node()->EnsureExpanded(info())); + new_alts->Add(next); + } + alternatives_ = new_alts; +} + + +RegExpNode* TextNode::ExpandLocal(NodeInfo* info) { + TextElement last = elements()->last(); + if (last.type == TextElement::CHAR_CLASS) { + RegExpCharacterClass* char_class = last.data.u_char_class; + if (info->does_determine_word) { + ZoneList* word = NULL; + ZoneList* non_word = NULL; + CharacterRange::Split(char_class->ranges(), + CharacterRange::GetWordBounds(), + &word, + &non_word); + if (non_word == NULL) { + // This node contains no non-word characters so it must be + // all word. + this->info()->is_word = NodeInfo::TRUE; + } else if (word == NULL) { + // Vice versa. + this->info()->is_word = NodeInfo::FALSE; + } else { + // If this character class contains both word and non-word + // characters we need to split it into two. + ChoiceNode* result = new ChoiceNode(2, on_failure()); + // Welcome to the family, son! + result->set_siblings(this->siblings()); + *result->info() = *this->info(); + result->info()->ResetCompilationState(); + result->info()->AddAssumptions(info); + RegExpNode* word_node + = new TextNode(new RegExpCharacterClass(word, false), + on_success(), + on_failure()); + word_node->info()->determine_word = true; + word_node->info()->does_determine_word = true; + word_node->info()->is_word = NodeInfo::TRUE; + result->alternatives()->Add(GuardedAlternative(word_node)); + RegExpNode* non_word_node + = new TextNode(new RegExpCharacterClass(non_word, false), + on_success(), + on_failure()); + non_word_node->info()->determine_word = true; + non_word_node->info()->does_determine_word = true; + non_word_node->info()->is_word = NodeInfo::FALSE; + result->alternatives()->Add(GuardedAlternative(non_word_node)); + return result; + } + } + } + TextNode* clone = this->Clone(); + clone->info()->ResetCompilationState(); + clone->info()->AddAssumptions(info); + return clone; +} + + +void TextNode::ExpandAtomChildren(RegExpAtom* that) { + NodeInfo new_info = *info(); + uc16 last = that->data()[that->data().length() - 1]; + if (info()->determine_word) { + new_info.follows_word = IsRegExpWord(last) + ? NodeInfo::TRUE : NodeInfo::FALSE; + } else { + new_info.follows_word = NodeInfo::UNKNOWN; + } + if (info()->determine_newline) { + new_info.follows_newline = IsRegExpNewline(last) + ? NodeInfo::TRUE : NodeInfo::FALSE; + } else { + new_info.follows_newline = NodeInfo::UNKNOWN; + } + if (info()->determine_start) { + new_info.follows_start = NodeInfo::FALSE; + } else { + new_info.follows_start = NodeInfo::UNKNOWN; + } + set_on_success(on_success()->EnsureExpanded(&new_info)); +} + + +void TextNode::ExpandCharClassChildren(RegExpCharacterClass* that) { + if (info()->does_determine_word) { + // ASSERT(info()->is_word != NodeInfo::UNKNOWN); + NodeInfo next_info = *on_success()->info(); + next_info.follows_word = info()->is_word; + set_on_success(on_success()->EnsureExpanded(&next_info)); + } else { + set_on_success(on_success()->EnsureExpanded(info())); + } +} + + +void TextNode::ExpandChildren() { + TextElement last = elements()->last(); + switch (last.type) { + case TextElement::ATOM: + ExpandAtomChildren(last.data.u_atom); + break; + case TextElement::CHAR_CLASS: + ExpandCharClassChildren(last.data.u_char_class); + break; + default: + UNREACHABLE(); + } +} + + +RegExpNode* ActionNode::ExpandLocal(NodeInfo* info) { + ActionNode* clone = this->Clone(); + clone->info()->ResetCompilationState(); + clone->info()->AddAssumptions(info); + return clone; +} + + +void ActionNode::ExpandChildren() { + set_on_success(on_success()->EnsureExpanded(info())); +} + + +RegExpNode* BackReferenceNode::ExpandLocal(NodeInfo* info) { + BackReferenceNode* clone = this->Clone(); + clone->info()->ResetCompilationState(); + clone->info()->AddAssumptions(info); + return clone; +} + + +void BackReferenceNode::ExpandChildren() { + set_on_success(on_success()->EnsureExpanded(info())); +} + + +RegExpNode* EndNode::ExpandLocal(NodeInfo* info) { + EndNode* clone = this->Clone(); + clone->info()->ResetCompilationState(); + clone->info()->AddAssumptions(info); + return clone; +} + + +void EndNode::ExpandChildren() { + // nothing to do +} + + // ------------------------------------------------------------------- // Dispatch table construction @@ -2584,7 +2886,6 @@ void DispatchTableConstructor::VisitEnd(EndNode* that) { void DispatchTableConstructor::BuildTable(ChoiceNode* node) { - ASSERT(!node->table_calculated()); node->set_being_calculated(true); ZoneList* alternatives = node->alternatives(); for (int i = 0; i < alternatives->length(); i++) { @@ -2592,7 +2893,6 @@ void DispatchTableConstructor::BuildTable(ChoiceNode* node) { alternatives->at(i).node()->Accept(this); } node->set_being_calculated(false); - node->set_table_calculated(true); } @@ -2615,13 +2915,9 @@ void AddDispatchRange::Call(uc32 from, DispatchTable::Entry entry) { void DispatchTableConstructor::VisitChoice(ChoiceNode* node) { if (node->being_calculated()) return; - if (!node->table_calculated()) { - DispatchTableConstructor constructor(node->table()); - constructor.BuildTable(node); - } - ASSERT(node->table_calculated()); + DispatchTable* table = node->GetTable(ignore_case_); AddDispatchRange adder(this); - node->table()->ForEach(&adder); + table->ForEach(&adder); } @@ -2716,6 +3012,9 @@ Handle RegExpEngine::Compile(RegExpParseResult* input, Analysis analysis(ignore_case); analysis.EnsureAnalyzed(node); + NodeInfo info = *node->info(); + node = node->EnsureExpanded(&info); + if (!FLAG_irregexp) { return Handle::null(); } diff --git a/src/jsregexp.h b/src/jsregexp.h index a691b127a..38357d670 100644 --- a/src/jsregexp.h +++ b/src/jsregexp.h @@ -185,6 +185,7 @@ class CharacterRange { CharacterRange(void* null) { ASSERT_EQ(NULL, null); } //NOLINT CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) { } static void AddClassEscape(uc16 type, ZoneList* ranges); + static Vector GetWordBounds(); static inline CharacterRange Singleton(uc16 value) { return CharacterRange(value, value); } @@ -203,9 +204,15 @@ class CharacterRange { bool is_valid() { return from_ <= to_; } bool IsSingleton() { return (from_ == to_); } void AddCaseEquivalents(ZoneList* ranges); + static void Split(ZoneList* base, + Vector overlay, + ZoneList** included, + ZoneList** excluded); + static const int kRangeCanonicalizeMax = 0x346; static const int kStartMarker = (1 << 24); static const int kPayloadMask = (1 << 24) - 1; + private: uc16 from_; uc16 to_; @@ -344,7 +351,7 @@ class OutSet: public ZoneObject { // A mapping from integers, specified as ranges, to a set of integers. // Used for mapping character ranges to choices. -class DispatchTable { +class DispatchTable : public ZoneObject { public: class Entry { public: @@ -437,29 +444,50 @@ class TextElement { struct NodeInfo { - enum PrecedingInfo { + enum TriBool { UNKNOWN = -1, FALSE = 0, TRUE = 1 }; NodeInfo() : being_analyzed(false), been_analyzed(false), + being_expanded(false), + been_expanded(false), determine_word(false), determine_newline(false), determine_start(false), + does_determine_word(false), + does_determine_newline(false), + does_determine_start(false), follows_word_interest(false), follows_newline_interest(false), follows_start_interest(false), + is_word(UNKNOWN), + is_newline(UNKNOWN), at_end(false), follows_word(UNKNOWN), follows_newline(UNKNOWN), follows_start(UNKNOWN) { } - bool HasSameForwardInterests(NodeInfo* that) { - return (at_end == that->at_end) - && (follows_word_interest == that->follows_word_interest) - && (follows_newline_interest == that->follows_newline_interest) - && (follows_start_interest == that->follows_start_interest); + // Returns true if the interests and assumptions of this node + // matches the given one. + bool Matches(NodeInfo* that) { + return (at_end == that->at_end) && + (follows_word_interest == that->follows_word_interest) && + (follows_newline_interest == that->follows_newline_interest) && + (follows_start_interest == that->follows_start_interest) && + (follows_word == that->follows_word) && + (follows_newline == that->follows_newline) && + (follows_start == that->follows_start) && + (does_determine_word == that->does_determine_word) && + (does_determine_newline == that->does_determine_newline) && + (does_determine_start == that->does_determine_start); + } + + bool HasAssertions() { + return (follows_word != UNKNOWN) || + (follows_newline != UNKNOWN) || + (follows_start != UNKNOWN); } // Updates the interests of this node given the interests of the @@ -471,6 +499,26 @@ struct NodeInfo { follows_start_interest |= that->follows_start_interest; } + void AddAssumptions(NodeInfo* that) { + if (that->follows_word != UNKNOWN) { + ASSERT(follows_word == UNKNOWN || follows_word == that->follows_word); + follows_word = that->follows_word; + } + if (that->follows_newline != UNKNOWN) { + ASSERT(follows_newline == UNKNOWN || + follows_newline == that->follows_newline); + follows_newline = that->follows_newline; + } + if (that->follows_start != UNKNOWN) { + ASSERT(follows_start == UNKNOWN || + follows_start == that->follows_start); + follows_start = that->follows_start; + } + does_determine_word = that->does_determine_word; + does_determine_newline = that->does_determine_newline; + does_determine_start = that->does_determine_start; + } + // Sets the interests of this node to include the interests of the // following node. void AddFromFollowing(NodeInfo* that) { @@ -479,8 +527,17 @@ struct NodeInfo { follows_start_interest |= that->follows_start_interest; } + void ResetCompilationState() { + being_analyzed = false; + been_analyzed = false; + being_expanded = false; + been_expanded = false; + } + bool being_analyzed: 1; bool been_analyzed: 1; + bool being_expanded: 1; + bool been_expanded: 1; // These bits are set if this node must propagate forward information // about the last character it consumed (or, in the case of 'start', @@ -489,19 +546,40 @@ struct NodeInfo { bool determine_newline: 1; bool determine_start: 1; + bool does_determine_word: 1; + bool does_determine_newline: 1; + bool does_determine_start: 1; + // These bits are set of this node has to know what the preceding // character was. bool follows_word_interest: 1; bool follows_newline_interest: 1; bool follows_start_interest: 1; + TriBool is_word: 2; + TriBool is_newline: 2; + bool at_end: 1; // These bits are set if the node can make assumptions about what // the previous character was. - PrecedingInfo follows_word: 2; - PrecedingInfo follows_newline: 2; - PrecedingInfo follows_start: 2; + TriBool follows_word: 2; + TriBool follows_newline: 2; + TriBool follows_start: 2; +}; + + +class ExpansionGuard { + public: + explicit inline ExpansionGuard(NodeInfo* info) : info_(info) { + ASSERT(!info->being_expanded); + info->being_expanded = true; + } + inline ~ExpansionGuard() { + info_->being_expanded = false; + } + private: + NodeInfo* info_; }; @@ -538,6 +616,10 @@ class RegExpNode: public ZoneObject { // false for failure. virtual bool Emit(RegExpCompiler* compiler) = 0; + RegExpNode* EnsureExpanded(NodeInfo* info); + virtual RegExpNode* ExpandLocal(NodeInfo* info) = 0; + virtual void ExpandChildren() = 0; + // Propagates the given interest information forward. When seeing // \bfoo for instance, the \b is implemented by propagating forward // to the 'foo' string that it should only succeed if its first @@ -546,11 +628,40 @@ class RegExpNode: public ZoneObject { NodeInfo* info() { return &info_; } virtual bool IsBacktrack() { return false; } - RegExpNode* GetSibling(NodeInfo* info); - void EnsureSiblings() { siblings_.Ensure(this); } + void AddSibling(RegExpNode* node) { siblings_.Add(node); } + + // Static version of EnsureSibling that expresses the fact that the + // result has the same type as the input. + template + static C* EnsureSibling(C* node, NodeInfo* info, bool* cloned) { + return static_cast(node->EnsureSibling(info, cloned)); + } + + SiblingList* siblings() { return &siblings_; } + void set_siblings(SiblingList* other) { siblings_ = *other; } + protected: + + // Returns a sibling of this node whose interests and assumptions + // match the ones in the given node info. If no sibling exists NULL + // is returned. + RegExpNode* TryGetSibling(NodeInfo* info); + + // Returns a sibling of this node whose interests match the ones in + // the given node info. The info must not contain any assertions. + // If no node exists a new one will be created by cloning the current + // node. The result will always be an instance of the same concrete + // class as this node. + RegExpNode* EnsureSibling(NodeInfo* info, bool* cloned); + + // Returns a clone of this node initialized using the copy constructor + // of its concrete class. Note that the node may have to be pre- + // processed before it is on a useable state. + virtual RegExpNode* Clone() = 0; + inline void Bind(RegExpMacroAssembler* macro); + private: Label label_; NodeInfo info_; @@ -593,7 +704,11 @@ class ActionNode: public SeqRegExpNode { RegExpNode* on_success); virtual void Accept(NodeVisitor* visitor); virtual bool Emit(RegExpCompiler* compiler); + virtual RegExpNode* ExpandLocal(NodeInfo* info); + virtual void ExpandChildren(); virtual RegExpNode* PropagateForward(NodeInfo* info); + virtual ActionNode* Clone() { return new ActionNode(*this); } + private: union { struct { @@ -627,13 +742,28 @@ class TextNode: public SeqRegExpNode { : SeqRegExpNode(on_success), on_failure_(on_failure), elms_(elms) { } + TextNode(RegExpCharacterClass* that, + RegExpNode* on_success, + RegExpNode* on_failure) + : SeqRegExpNode(on_success), + on_failure_(on_failure), + elms_(new ZoneList(1)) { + elms_->Add(TextElement::CharClass(that)); + } virtual void Accept(NodeVisitor* visitor); virtual RegExpNode* PropagateForward(NodeInfo* info); + virtual RegExpNode* ExpandLocal(NodeInfo* info); + virtual void ExpandChildren(); RegExpNode* on_failure() { return on_failure_; } virtual bool Emit(RegExpCompiler* compiler); ZoneList* elements() { return elms_; } void MakeCaseIndependent(); + virtual TextNode* Clone() { return new TextNode(*this); } + private: + void ExpandAtomChildren(RegExpAtom* that); + void ExpandCharClassChildren(RegExpCharacterClass* that); + RegExpNode* on_failure_; ZoneList* elms_; }; @@ -655,6 +785,10 @@ class BackReferenceNode: public SeqRegExpNode { int end_register() { return end_reg_; } virtual bool Emit(RegExpCompiler* compiler); virtual RegExpNode* PropagateForward(NodeInfo* info); + virtual RegExpNode* ExpandLocal(NodeInfo* info); + virtual void ExpandChildren(); + virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); } + private: RegExpNode* on_failure_; int start_reg_; @@ -669,8 +803,12 @@ class EndNode: public RegExpNode { virtual void Accept(NodeVisitor* visitor); virtual bool Emit(RegExpCompiler* compiler); virtual RegExpNode* PropagateForward(NodeInfo* info); + virtual RegExpNode* ExpandLocal(NodeInfo* info); + virtual void ExpandChildren(); virtual bool IsBacktrack() { return action_ == BACKTRACK; } virtual bool GoTo(RegExpCompiler* compiler); + virtual EndNode* Clone() { return new EndNode(*this); } + private: Action action_; }; @@ -686,6 +824,7 @@ class Guard: public ZoneObject { int reg() { return reg_; } Relation op() { return op_; } int value() { return value_; } + private: int reg_; Relation op_; @@ -700,6 +839,7 @@ class GuardedAlternative { RegExpNode* node() { return node_; } void set_node(RegExpNode* node) { node_ = node; } ZoneList* guards() { return guards_; } + private: RegExpNode* node_; ZoneList* guards_; @@ -711,27 +851,31 @@ class ChoiceNode: public RegExpNode { explicit ChoiceNode(int expected_size, RegExpNode* on_failure) : on_failure_(on_failure), alternatives_(new ZoneList(expected_size)), - table_calculated_(false), + table_(NULL), being_calculated_(false) { } virtual void Accept(NodeVisitor* visitor); void AddAlternative(GuardedAlternative node) { alternatives()->Add(node); } ZoneList* alternatives() { return alternatives_; } - DispatchTable* table() { return &table_; } + DispatchTable* GetTable(bool ignore_case); RegExpNode* on_failure() { return on_failure_; } virtual bool Emit(RegExpCompiler* compiler); virtual RegExpNode* PropagateForward(NodeInfo* info); - bool table_calculated() { return table_calculated_; } - void set_table_calculated(bool b) { table_calculated_ = b; } + virtual RegExpNode* ExpandLocal(NodeInfo* info); + virtual void ExpandChildren(); + virtual ChoiceNode* Clone() { return new ChoiceNode(*this); } + bool being_calculated() { return being_calculated_; } void set_being_calculated(bool b) { being_calculated_ = b; } + private: + friend class DispatchTableConstructor; + friend class Analysis; void GenerateGuard(RegExpMacroAssembler* macro_assembler, Guard *guard, Label* on_failure); RegExpNode* on_failure_; ZoneList* alternatives_; - DispatchTable table_; - bool table_calculated_; + DispatchTable* table_; bool being_calculated_; }; @@ -750,9 +894,10 @@ FOR_EACH_NODE_TYPE(DECLARE_VISIT) // dispatch table of a choice node. class DispatchTableConstructor: public NodeVisitor { public: - explicit DispatchTableConstructor(DispatchTable* table) + DispatchTableConstructor(DispatchTable* table, bool ignore_case) : table_(table), - choice_index_(-1) { } + choice_index_(-1), + ignore_case_(ignore_case) { } void BuildTable(ChoiceNode* node); @@ -773,6 +918,7 @@ FOR_EACH_NODE_TYPE(DECLARE_VISIT) protected: DispatchTable *table_; int choice_index_; + bool ignore_case_; }; @@ -808,7 +954,7 @@ class RegExpEngine: public AllStatic { RegExpNode** node_return, bool ignore_case, bool multiline); - static void DotPrint(const char* label, RegExpNode* node); + static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); }; diff --git a/src/parser.cc b/src/parser.cc index 50412c4d3..8217315c8 100644 --- a/src/parser.cc +++ b/src/parser.cc @@ -1028,7 +1028,7 @@ class LexicalScope BASE_EMBEDDED { #define DUMMY ) // to make indentation work #undef DUMMY -#define CHECK_FAILED ); \ +#define CHECK_FAILED /**/); \ if (failed_) return NULL; \ ((void)0 #define DUMMY ) // to make indentation work diff --git a/test/cctest/test-regexp.cc b/test/cctest/test-regexp.cc index 8be425e4f..2212949a3 100644 --- a/test/cctest/test-regexp.cc +++ b/test/cctest/test-regexp.cc @@ -324,27 +324,8 @@ static bool NotWhiteSpace(uc16 c) { } -static bool IsWord(uc16 c) { - return ('a' <= c && c <= 'z') - || ('A' <= c && c <= 'Z') - || ('0' <= c && c <= '9') - || (c == '_'); -} - - static bool NotWord(uc16 c) { - return !IsWord(c); -} - - -static bool Dot(uc16 c) { - switch (c) { - // CR LF LS PS - case 0x000A: case 0x000D: case 0x2028: case 0x2029: - return false; - default: - return true; - } + return !IsRegExpWord(c); } @@ -364,12 +345,12 @@ static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) { TEST(CharacterClassEscapes) { - TestCharacterClassEscapes('.', Dot); + TestCharacterClassEscapes('.', IsRegExpNewline); TestCharacterClassEscapes('d', IsDigit); TestCharacterClassEscapes('D', NotDigit); TestCharacterClassEscapes('s', IsWhiteSpace); TestCharacterClassEscapes('S', NotWhiteSpace); - TestCharacterClassEscapes('w', IsWord); + TestCharacterClassEscapes('w', IsRegExpWord); TestCharacterClassEscapes('W', NotWord); } @@ -395,7 +376,7 @@ static void Execute(const char* input, USE(node); #ifdef DEBUG if (dot_output) { - RegExpEngine::DotPrint(input, node); + RegExpEngine::DotPrint(input, node, false); exit(0); } #endif // DEBUG @@ -964,7 +945,7 @@ TEST(AddInverseToTable) { ranges->Add(CharacterRange(from, to)); } DispatchTable table; - DispatchTableConstructor cons(&table); + DispatchTableConstructor cons(&table, false); cons.set_choice_index(0); cons.AddInverse(ranges); for (int i = 0; i < kLimit; i++) { @@ -980,7 +961,7 @@ TEST(AddInverseToTable) { new ZoneList(1); ranges->Add(CharacterRange(0xFFF0, 0xFFFE)); DispatchTable table; - DispatchTableConstructor cons(&table); + DispatchTableConstructor cons(&table, false); cons.set_choice_index(0); cons.AddInverse(ranges); CHECK(!table.Get(0xFFFE)->Get(0)); @@ -1162,7 +1143,45 @@ TEST(CharacterRangeCaseIndependence) { } +static bool InClass(uc16 c, ZoneList* ranges) { + if (ranges == NULL) + return false; + for (int i = 0; i < ranges->length(); i++) { + CharacterRange range = ranges->at(i); + if (range.from() <= c && c <= range.to()) + return true; + } + return false; +} + + +TEST(CharClassDifference) { + ZoneScope zone_scope(DELETE_ON_EXIT); + ZoneList* base = new ZoneList(1); + base->Add(CharacterRange::Everything()); + Vector overlay = CharacterRange::GetWordBounds(); + ZoneList* included = NULL; + ZoneList* excluded = NULL; + CharacterRange::Split(base, overlay, &included, &excluded); + for (int i = 0; i < (1 << 16); i++) { + bool in_base = InClass(i, base); + if (in_base) { + bool in_overlay = false; + for (int j = 0; !in_overlay && j < overlay.length(); j += 2) { + if (overlay[j] <= i && i <= overlay[j+1]) + in_overlay = true; + } + CHECK_EQ(in_overlay, InClass(i, included)); + CHECK_EQ(!in_overlay, InClass(i, excluded)); + } else { + CHECK(!InClass(i, included)); + CHECK(!InClass(i, excluded)); + } + } +} + + TEST(Graph) { V8::Initialize(NULL); - Execute("foo$(?!bar)", false, true); + Execute("\\b\\w", false, true); }