From 9b8a56d4335ac5e591c6da5fe4354a4c49734031 Mon Sep 17 00:00:00 2001 From: erikcorry Date: Tue, 9 Jun 2015 16:10:04 -0700 Subject: [PATCH] Revert of Reland of 'Optimize trivial regexp disjunctions' CL 1176453002 (patchset #2 id:20001 of https://codereview.chromium.org/1174713002/) Reason for revert: Tree looks like a Christmas tree and this isn't helping Original issue's description: > Reland of 'Optimize trivial regexp disjunctions' CL 1176453002 > > Original code review: https://codereview.chromium.org/1176453002/ > > TBR=yangguo@chromium.org > BUG=chromium:482998 > LOG=n > > Committed: https://crrev.com/85fab0fa092e8d979413f6a61baec3abe26e568d > Cr-Commit-Position: refs/heads/master@{#28884} TBR=yangguo@chromium.org,erikcorry@chromium.org NOPRESUBMIT=true NOTREECHECKS=true NOTRY=true BUG=chromium:482998 Review URL: https://codereview.chromium.org/1172893002 Cr-Commit-Position: refs/heads/master@{#28885} --- src/ast.h | 3 - src/jsregexp.cc | 190 --------------------------------------- src/jsregexp.h | 2 +- src/list-inl.h | 35 +------- src/list.h | 5 -- src/vector.h | 14 --- test/cctest/test-heap.cc | 4 +- 7 files changed, 7 insertions(+), 246 deletions(-) diff --git a/src/ast.h b/src/ast.h index 0dc9e6066..01154d376 100644 --- a/src/ast.h +++ b/src/ast.h @@ -2872,9 +2872,6 @@ class RegExpDisjunction final : public RegExpTree { int max_match() override { return max_match_; } ZoneList* alternatives() { return alternatives_; } private: - bool SortConsecutiveAtoms(RegExpCompiler* compiler); - void RationalizeConsecutiveAtoms(RegExpCompiler* compiler); - void FixSingleCharacterDisjunctions(RegExpCompiler* compiler); ZoneList* alternatives_; int min_match_; int max_match_; diff --git a/src/jsregexp.cc b/src/jsregexp.cc index e284e8cb1..b410d47b3 100644 --- a/src/jsregexp.cc +++ b/src/jsregexp.cc @@ -4817,200 +4817,10 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, } -int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) { - RegExpAtom* atom1 = (*a)->AsAtom(); - RegExpAtom* atom2 = (*b)->AsAtom(); - uc16 character1 = atom1->data().at(0); - uc16 character2 = atom2->data().at(0); - if (character1 < character2) return -1; - if (character1 > character2) return 1; - return 0; -} - - -// We can stable sort runs of atoms, since the order does not matter if they -// start with different characters. -// Returns true if any consecutive atoms were found. -bool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) { - ZoneList* alternatives = this->alternatives(); - int length = alternatives->length(); - bool found_consecutive_atoms = false; - for (int i = 0; i < length; i++) { - while (i < length) { - RegExpTree* alternative = alternatives->at(i); - if (alternative->IsAtom()) break; - i++; - } - // i is length or it is the index of an atom. - if (i == length) break; - int first_atom = i; - i++; - while (i < length) { - RegExpTree* alternative = alternatives->at(i); - if (!alternative->IsAtom()) break; - i++; - } - // Sort atoms to get ones with common prefixes together. - // This step is not valid if we are in a case-independent regexp, - // because it would change /is|I/ to /I|is/, and order matters when - // the regexp parts don't match only disjoint starting points. To fix - // this would need a version of CompareFirstChar that uses case- - // independent character classes for comparison. - if (!compiler->ignore_case()) { - DCHECK_LT(first_atom, alternatives->length()); - DCHECK_LE(i, alternatives->length()); - DCHECK_LE(first_atom, i); - alternatives->StableSort(CompareFirstChar, first_atom, i - first_atom); - } - if (i - first_atom > 1) found_consecutive_atoms = true; - } - return found_consecutive_atoms; -} - - -// Optimizes ab|ac|az to a(?:b|c|d). -void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) { - Zone* zone = compiler->zone(); - ZoneList* alternatives = this->alternatives(); - int length = alternatives->length(); - - int write_posn = 0; - int i = 0; - while (i < length) { - RegExpTree* alternative = alternatives->at(i); - if (!alternative->IsAtom()) { - alternatives->at(write_posn++) = alternatives->at(i); - i++; - continue; - } - RegExpAtom* atom = alternative->AsAtom(); - uc16 common_prefix = atom->data().at(0); - int first_with_prefix = i; - int prefix_length = atom->length(); - i++; - while (i < length) { - alternative = alternatives->at(i); - if (!alternative->IsAtom()) break; - atom = alternative->AsAtom(); - if (atom->data().at(0) != common_prefix) break; - prefix_length = Min(prefix_length, atom->length()); - i++; - } - if (i > first_with_prefix + 2) { - // Found worthwhile run of alternatives with common prefix of at least one - // character. The sorting function above did not sort on more than one - // character for reasons of correctness, but there may still be a longer - // common prefix if the terms were similar or presorted in the input. - // Find out how long the common prefix is. - int run_length = i - first_with_prefix; - atom = alternatives->at(first_with_prefix)->AsAtom(); - for (int j = 1; j < run_length && prefix_length > 1; j++) { - RegExpAtom* old_atom = - alternatives->at(j + first_with_prefix)->AsAtom(); - for (int k = 1; k < prefix_length; k++) { - if (atom->data().at(k) != old_atom->data().at(k)) prefix_length = k; - } - } - RegExpAtom* prefix = - new (zone) RegExpAtom(atom->data().SubVector(0, prefix_length)); - ZoneList* pair = new (zone) ZoneList(2, zone); - pair->Add(prefix, zone); - ZoneList* suffixes = - new (zone) ZoneList(run_length, zone); - for (int j = 0; j < run_length; j++) { - RegExpAtom* old_atom = - alternatives->at(j + first_with_prefix)->AsAtom(); - int len = old_atom->length(); - if (len == prefix_length) { - suffixes->Add(new (zone) RegExpEmpty(), zone); - } else { - RegExpTree* suffix = new (zone) RegExpAtom( - old_atom->data().SubVector(prefix_length, old_atom->length())); - suffixes->Add(suffix, zone); - } - } - pair->Add(new (zone) RegExpDisjunction(suffixes), zone); - alternatives->at(write_posn++) = new (zone) RegExpAlternative(pair); - } else { - // Just copy any non-worthwhile alternatives. - for (int j = first_with_prefix; j < i; j++) { - alternatives->at(write_posn++) = alternatives->at(j); - } - } - } - alternatives->Rewind(write_posn); // Trim end of array. -} - - -// Optimizes b|c|z to [bcz]. -void RegExpDisjunction::FixSingleCharacterDisjunctions( - RegExpCompiler* compiler) { - Zone* zone = compiler->zone(); - ZoneList* alternatives = this->alternatives(); - int length = alternatives->length(); - - int write_posn = 0; - int i = 0; - while (i < length) { - RegExpTree* alternative = alternatives->at(i); - if (!alternative->IsAtom()) { - alternatives->at(write_posn++) = alternatives->at(i); - i++; - continue; - } - RegExpAtom* atom = alternative->AsAtom(); - if (atom->length() != 1) { - alternatives->at(write_posn++) = alternatives->at(i); - i++; - continue; - } - int first_in_run = i; - i++; - while (i < length) { - alternative = alternatives->at(i); - if (!alternative->IsAtom()) break; - atom = alternative->AsAtom(); - if (atom->length() != 1) break; - i++; - } - if (i > first_in_run + 1) { - // Found non-trivial run of single-character alternatives. - int run_length = i - first_in_run; - ZoneList* ranges = - new (zone) ZoneList(2, zone); - for (int j = 0; j < run_length; j++) { - RegExpAtom* old_atom = alternatives->at(j + first_in_run)->AsAtom(); - DCHECK_EQ(old_atom->length(), 1); - ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone); - } - alternatives->at(write_posn++) = - new (zone) RegExpCharacterClass(ranges, false); - } else { - // Just copy any trivial alternatives. - for (int j = first_in_run; j < i; j++) { - alternatives->at(write_posn++) = alternatives->at(j); - } - } - } - alternatives->Rewind(write_posn); // Trim end of array. -} - - RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) { ZoneList* alternatives = this->alternatives(); - - if (alternatives->length() > 2) { - bool found_consecutive_atoms = SortConsecutiveAtoms(compiler); - if (found_consecutive_atoms) RationalizeConsecutiveAtoms(compiler); - FixSingleCharacterDisjunctions(compiler); - if (alternatives->length() == 1) { - return alternatives->at(0)->ToNode(compiler, on_success); - } - } - int length = alternatives->length(); - ChoiceNode* result = new(compiler->zone()) ChoiceNode(length, compiler->zone()); for (int i = 0; i < length; i++) { diff --git a/src/jsregexp.h b/src/jsregexp.h index ff7759bfe..6e41c9fa5 100644 --- a/src/jsregexp.h +++ b/src/jsregexp.h @@ -212,7 +212,7 @@ class RegExpImpl { // and the total executable memory at any point. static const int kRegExpExecutableMemoryLimit = 16 * MB; static const int kRegExpCompiledLimit = 1 * MB; - static const int kRegExpTooLargeToOptimize = 20 * KB; + static const int kRegExpTooLargeToOptimize = 10 * KB; private: static bool CompileIrregexp(Handle re, diff --git a/src/list-inl.h b/src/list-inl.h index c09788e9a..9b122fdba 100644 --- a/src/list-inl.h +++ b/src/list-inl.h @@ -195,15 +195,10 @@ int List::CountOccurrences(const T& elm, int start, int end) const { template void List::Sort(int (*cmp)(const T* x, const T* y)) { - Sort(cmp, 0, length_); -} - - -template -void List::Sort(int (*cmp)(const T* x, const T* y), size_t s, size_t l) { - ToVector().Sort(cmp, s, l); + ToVector().Sort(cmp); #ifdef DEBUG - for (size_t i = s + 1; i < l; i++) DCHECK(cmp(&data_[i - 1], &data_[i]) <= 0); + for (int i = 1; i < length_; i++) + DCHECK(cmp(&data_[i - 1], &data_[i]) <= 0); #endif } @@ -214,29 +209,7 @@ void List::Sort() { } -template -void List::StableSort(int (*cmp)(const T* x, const T* y)) { - StableSort(cmp, 0, length_); -} - - -template -void List::StableSort(int (*cmp)(const T* x, const T* y), size_t s, - size_t l) { - ToVector().StableSort(cmp, s, l); -#ifdef DEBUG - for (size_t i = s + 1; i < l; i++) DCHECK(cmp(&data_[i - 1], &data_[i]) <= 0); -#endif -} - - -template -void List::StableSort() { - ToVector().StableSort(); -} - - -template +template void List::Initialize(int capacity, P allocator) { DCHECK(capacity >= 0); data_ = (capacity > 0) ? NewData(capacity, allocator) : NULL; diff --git a/src/list.h b/src/list.h index 00cbd4031..021cafe14 100644 --- a/src/list.h +++ b/src/list.h @@ -149,13 +149,8 @@ class List { void Iterate(Visitor* visitor); // Sort all list entries (using QuickSort) - void Sort(int (*cmp)(const T* x, const T* y), size_t start, size_t length); void Sort(int (*cmp)(const T* x, const T* y)); void Sort(); - void StableSort(int (*cmp)(const T* x, const T* y), size_t start, - size_t length); - void StableSort(int (*cmp)(const T* x, const T* y)); - void StableSort(); INLINE(void Initialize(int capacity, AllocationPolicy allocator = AllocationPolicy())); diff --git a/src/vector.h b/src/vector.h index d022fde3a..895c61b4e 100644 --- a/src/vector.h +++ b/src/vector.h @@ -69,10 +69,6 @@ class Vector { return Vector(result, length_); } - void Sort(int (*cmp)(const T*, const T*), size_t s, size_t l) { - std::sort(start() + s, start() + s + l, RawComparer(cmp)); - } - void Sort(int (*cmp)(const T*, const T*)) { std::sort(start(), start() + length(), RawComparer(cmp)); } @@ -81,16 +77,6 @@ class Vector { std::sort(start(), start() + length()); } - void StableSort(int (*cmp)(const T*, const T*), size_t s, size_t l) { - std::stable_sort(start() + s, start() + s + l, RawComparer(cmp)); - } - - void StableSort(int (*cmp)(const T*, const T*)) { - std::stable_sort(start(), start() + length(), RawComparer(cmp)); - } - - void StableSort() { std::stable_sort(start(), start() + length()); } - void Truncate(int length) { DCHECK(length <= length_); length_ = length; diff --git a/test/cctest/test-heap.cc b/test/cctest/test-heap.cc index 3f7923e4c..02cd5608b 100644 --- a/test/cctest/test-heap.cc +++ b/test/cctest/test-heap.cc @@ -1747,13 +1747,13 @@ TEST(TestSizeOfRegExpCode) { // Adjust source below and this check to match // RegExpImple::kRegExpTooLargeToOptimize. - DCHECK_EQ(i::RegExpImpl::kRegExpTooLargeToOptimize, 20 * KB); + DCHECK_EQ(i::RegExpImpl::kRegExpTooLargeToOptimize, 10 * KB); // Compile a regexp that is much larger if we are using regexp optimizations. CompileRun( "var reg_exp_source = '(?:a|bc|def|ghij|klmno|pqrstu)';" "var half_size_reg_exp;" - "while (reg_exp_source.length < 20 * 1024) {" + "while (reg_exp_source.length < 10 * 1024) {" " half_size_reg_exp = reg_exp_source;" " reg_exp_source = reg_exp_source + reg_exp_source;" "}" -- 2.34.1