Revert of Extend big-disjunction optimization to case-independent regexps (patchset...

author machenbach <machenbach@chromium.org>

Wed, 24 Jun 2015 19:04:04 +0000 (12:04 -0700)

committer Commit bot <commit-bot@chromium.org>

Wed, 24 Jun 2015 19:04:15 +0000 (19:04 +0000)
author machenbach <machenbach@chromium.org>
Wed, 24 Jun 2015 19:04:04 +0000 (12:04 -0700)
committer Commit bot <commit-bot@chromium.org>
Wed, 24 Jun 2015 19:04:15 +0000 (19:04 +0000)
diff --git a/src/heap-snapshot-generator.cc b/src/heap-snapshot-generator.cc

index f1bdc71cca03dc3cf1d0277d65a5f2997595f495..a27f4194587f0f8705c4ee2f938f2b92d07feef4 100644 (file)
--- a/src/heap-snapshot-generator.cc
+++ b/src/heap-snapshot-generator.cc
@@ -323,8 +323,7 @@ List<HeapEntry*>* HeapSnapshot::GetSortedEntriesList() {
      for (int i = 0; i < entries_.length(); ++i) {
        sorted_entries_[i] = &entries_[i];
      }
-    sorted_entries_.Sort<int (*)(HeapEntry* const*, HeapEntry* const*)>(
-        SortByIds);
+    sorted_entries_.Sort(SortByIds);
    }
    return &sorted_entries_;
  }
diff --git a/src/jsregexp.cc b/src/jsregexp.cc

index a02141d77a057fde6411e39b6734a4cd695dee46..92fdc77aaa20e19e091e7a36b131651802f08493 100644 (file)
--- a/src/jsregexp.cc
+++ b/src/jsregexp.cc
@@ -4837,34 +4837,6 @@ int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) {
  }
  
  
-static unibrow::uchar Canonical(
-    unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
-    unibrow::uchar c) {
-  unibrow::uchar chars[unibrow::Ecma262Canonicalize::kMaxWidth];
-  int length = canonicalize->get(c, '\0', chars);
-  DCHECK_LE(length, 1);
-  unibrow::uchar canonical = c;
-  if (length == 1) canonical = chars[0];
-  return canonical;
-}
-
-
-int CompareFirstCharCaseIndependent(
-    unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
-    RegExpTree* const* a, RegExpTree* const* b) {
-  RegExpAtom* atom1 = (*a)->AsAtom();
-  RegExpAtom* atom2 = (*b)->AsAtom();
-  unibrow::uchar character1 = atom1->data().at(0);
-  unibrow::uchar character2 = atom2->data().at(0);
-  if (character1 == character2) return 0;
-  if (character1 >= 'a' || character2 >= 'a') {
-    character1 = Canonical(canonicalize, character1);
-    character2 = Canonical(canonicalize, character2);
-  }
-  return static_cast<int>(character1) - static_cast<int>(character2);
-}
-
-
  // We can stable sort runs of atoms, since the order does not matter if they
  // start with different characters.
  // Returns true if any consecutive atoms were found.
@@ -4888,23 +4860,15 @@ bool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) {
        i++;
      }
      // Sort atoms to get ones with common prefixes together.
-    // This step is more tricky if we are in a case-independent regexp,
+    // This step is not valid if we are in a case-independent regexp,
      // because it would change /is|I/ to /I|is/, and order matters when
      // the regexp parts don't match only disjoint starting points. To fix
-    // this we have a version of CompareFirstChar that uses case-
+    // this would need a version of CompareFirstChar that uses case-
      // independent character classes for comparison.
-    DCHECK_LT(first_atom, alternatives->length());
-    DCHECK_LE(i, alternatives->length());
-    DCHECK_LE(first_atom, i);
-    if (compiler->ignore_case()) {
-      unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
-          compiler->isolate()->regexp_macro_assembler_canonicalize();
-      auto compare_closure =
-          [canonicalize](RegExpTree* const* a, RegExpTree* const* b) {
-            return CompareFirstCharCaseIndependent(canonicalize, a, b);
-          };
-      alternatives->StableSort(compare_closure, first_atom, i - first_atom);
-    } else {
+    if (!compiler->ignore_case()) {
+      DCHECK_LT(first_atom, alternatives->length());
+      DCHECK_LE(i, alternatives->length());
+      DCHECK_LE(first_atom, i);
        alternatives->StableSort(CompareFirstChar, first_atom, i - first_atom);
      }
      if (i - first_atom > 1) found_consecutive_atoms = true;
@@ -4929,7 +4893,7 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
        continue;
      }
      RegExpAtom* atom = alternative->AsAtom();
-    unibrow::uchar common_prefix = atom->data().at(0);
+    uc16 common_prefix = atom->data().at(0);
      int first_with_prefix = i;
      int prefix_length = atom->length();
      i++;
@@ -4937,15 +4901,7 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
        alternative = alternatives->at(i);
        if (!alternative->IsAtom()) break;
        atom = alternative->AsAtom();
-      unibrow::uchar new_prefix = atom->data().at(0);
-      if (new_prefix != common_prefix) {
-        if (!compiler->ignore_case()) break;
-        unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
-            compiler->isolate()->regexp_macro_assembler_canonicalize();
-        new_prefix = Canonical(canonicalize, new_prefix);
-        common_prefix = Canonical(canonicalize, common_prefix);
-        if (new_prefix != common_prefix) break;
-      }
+      if (atom->data().at(0) != common_prefix) break;
        prefix_length = Min(prefix_length, atom->length());
        i++;
      }
@@ -4961,10 +4917,7 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
          RegExpAtom* old_atom =
              alternatives->at(j + first_with_prefix)->AsAtom();
          for (int k = 1; k < prefix_length; k++) {
-          if (atom->data().at(k) != old_atom->data().at(k)) {
-            prefix_length = k;
-            break;
-          }
+          if (atom->data().at(k) != old_atom->data().at(k)) prefix_length = k;
          }
        }
        RegExpAtom* prefix =
diff --git a/src/list-inl.h b/src/list-inl.h

index 98f0343fa57f0b84aff9254bf6c1bed7b482e56a..c09788e9ae0e466e5639673c909215b4e52e8d8f 100644 (file)
--- a/src/list-inl.h
+++ b/src/list-inl.h
@@ -193,16 +193,14 @@ int List<T, P>::CountOccurrences(const T& elm, int start, int end) const {
  }
  
  
-template <typename T, class P>
-template <typename CompareFunction>
-void List<T, P>::Sort(CompareFunction cmp) {
+template<typename T, class P>
+void List<T, P>::Sort(int (*cmp)(const T* x, const T* y)) {
    Sort(cmp, 0, length_);
  }
  
  
  template <typename T, class P>
-template <typename CompareFunction>
-void List<T, P>::Sort(CompareFunction cmp, size_t s, size_t l) {
+void List<T, P>::Sort(int (*cmp)(const T* x, const T* y), size_t s, size_t l) {
    ToVector().Sort(cmp, s, l);
  #ifdef DEBUG
    for (size_t i = s + 1; i < l; i++) DCHECK(cmp(&data_[i - 1], &data_[i]) <= 0);
@@ -217,15 +215,14 @@ void List<T, P>::Sort() {
  
  
  template <typename T, class P>
-template <typename CompareFunction>
-void List<T, P>::StableSort(CompareFunction cmp) {
+void List<T, P>::StableSort(int (*cmp)(const T* x, const T* y)) {
    StableSort(cmp, 0, length_);
  }
  
  
  template <typename T, class P>
-template <typename CompareFunction>
-void List<T, P>::StableSort(CompareFunction cmp, size_t s, size_t l) {
+void List<T, P>::StableSort(int (*cmp)(const T* x, const T* y), size_t s,
+                            size_t l) {
    ToVector().StableSort(cmp, s, l);
  #ifdef DEBUG
    for (size_t i = s + 1; i < l; i++) DCHECK(cmp(&data_[i - 1], &data_[i]) <= 0);
diff --git a/src/list.h b/src/list.h

index b636449c423b9d9ea7a184de78a3580a49c5780e..00cbd40312025ad8ba2fcc40a1f17b2a8409cd22 100644 (file)
--- a/src/list.h
+++ b/src/list.h
@@ -149,15 +149,12 @@ class List {
    void Iterate(Visitor* visitor);
  
    // Sort all list entries (using QuickSort)
-  template <typename CompareFunction>
-  void Sort(CompareFunction cmp, size_t start, size_t length);
-  template <typename CompareFunction>
-  void Sort(CompareFunction cmp);
+  void Sort(int (*cmp)(const T* x, const T* y), size_t start, size_t length);
+  void Sort(int (*cmp)(const T* x, const T* y));
    void Sort();
-  template <typename CompareFunction>
-  void StableSort(CompareFunction cmp, size_t start, size_t length);
-  template <typename CompareFunction>
-  void StableSort(CompareFunction cmp);
+  void StableSort(int (*cmp)(const T* x, const T* y), size_t start,
+                  size_t length);
+  void StableSort(int (*cmp)(const T* x, const T* y));
    void StableSort();
  
    INLINE(void Initialize(int capacity,
diff --git a/src/vector.h b/src/vector.h

index 4f3128b9185cd42d21e439913a967c79753cb6f5..d022fde3a5bb25d88b7fba5e9ca4ae375e94d20b 100644 (file)
--- a/src/vector.h
+++ b/src/vector.h
@@ -69,30 +69,24 @@ class Vector {
      return Vector<T>(result, length_);
    }
  
-  template <typename CompareFunction>
-  void Sort(CompareFunction cmp, size_t s, size_t l) {
-    std::sort(start() + s, start() + s + l, RawComparer<CompareFunction>(cmp));
+  void Sort(int (*cmp)(const T*, const T*), size_t s, size_t l) {
+    std::sort(start() + s, start() + s + l, RawComparer(cmp));
    }
  
-  template <typename CompareFunction>
-  void Sort(CompareFunction cmp) {
-    std::sort(start(), start() + length(), RawComparer<CompareFunction>(cmp));
+  void Sort(int (*cmp)(const T*, const T*)) {
+    std::sort(start(), start() + length(), RawComparer(cmp));
    }
  
    void Sort() {
      std::sort(start(), start() + length());
    }
  
-  template <typename CompareFunction>
-  void StableSort(CompareFunction cmp, size_t s, size_t l) {
-    std::stable_sort(start() + s, start() + s + l,
-                     RawComparer<CompareFunction>(cmp));
+  void StableSort(int (*cmp)(const T*, const T*), size_t s, size_t l) {
+    std::stable_sort(start() + s, start() + s + l, RawComparer(cmp));
    }
  
-  template <typename CompareFunction>
-  void StableSort(CompareFunction cmp) {
-    std::stable_sort(start(), start() + length(),
-                     RawComparer<CompareFunction>(cmp));
+  void StableSort(int (*cmp)(const T*, const T*)) {
+    std::stable_sort(start(), start() + length(), RawComparer(cmp));
    }
  
    void StableSort() { std::stable_sort(start(), start() + length()); }
@@ -142,16 +136,15 @@ class Vector {
    T* start_;
    int length_;
  
-  template <typename CookedComparer>
    class RawComparer {
     public:
-    explicit RawComparer(CookedComparer cmp) : cmp_(cmp) {}
+    explicit RawComparer(int (*cmp)(const T*, const T*)) : cmp_(cmp) {}
      bool operator()(const T& a, const T& b) {
        return cmp_(&a, &b) < 0;
      }
  
     private:
-    CookedComparer cmp_;
+    int (*cmp_)(const T*, const T*);
    };
  };
  
diff --git a/test/mjsunit/regexp-sort.js b/test/mjsunit/regexp-sort.js

deleted file mode 100644 (file)

index 57d5070..0000000
--- a/test/mjsunit/regexp-sort.js
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright 2015 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-function Test(lower, upper) {
-  var lx = lower + "x";
-  var ux = upper + "x";
-  var lp = lower + "|";
-  var uxp = upper + "x|";
-  assertEquals(lx, new RegExp(uxp + lp + lower + "cat", "i").exec(lx) + "");
-  assertEquals(ux, new RegExp(uxp + lp + lower + "cat", "i").exec(ux) + "");
-  assertEquals(lower, new RegExp(lp + uxp + lower + "cat", "i").exec(lx) + "");
-  assertEquals(upper, new RegExp(lp + uxp + lower + "cat", "i").exec(ux) + "");
-}
-
-function TestFail(lower, upper) {
-  var lx = lower + "x";
-  var ux = upper + "x";
-  var lp = lower + "|";
-  var uxp = upper + "x|";
-  assertEquals(lower, new RegExp(uxp + lp + lower + "cat", "i").exec(lx) + "");
-  assertEquals(ux, new RegExp(uxp + lp + lower + "cat", "i").exec(ux) + "");
-  assertEquals(lower, new RegExp(lp + uxp + lower + "cat", "i").exec(lx) + "");
-  assertEquals(ux, new RegExp(lp + uxp + lower + "cat", "i").exec(ux) + "");
-}
-
-Test("a", "A");
-Test("0", "0");
-TestFail("a", "b");
-// Small and capital o-umlaut
-Test(String.fromCharCode(0xf6), String.fromCharCode(0xd6));
-// Small and capital kha.
-Test(String.fromCharCode(0x445), String.fromCharCode(0x425));
-// Small and capital y-umlaut.
-Test(String.fromCharCode(0xff), String.fromCharCode(0x178));
-// Small and large Greek mu.
-Test(String.fromCharCode(0x3bc), String.fromCharCode(0x39c));
-// Micron and large Greek mu.
-Test(String.fromCharCode(0xb5), String.fromCharCode(0x39c));
-// Micron and small Greek mu.
-Test(String.fromCharCode(0xb5), String.fromCharCode(0x3bc));
-// German double s and capital S. These are not equivalent since one is double.
-TestFail(String.fromCharCode(0xdf), "S");
-// Small i and Turkish capital dotted I. These are not equivalent due to
-// 21.2.2.8.2 section 3g.  One is below 128 and the other is above 127.
-TestFail("i", String.fromCharCode(0x130));
-// Small dotless i and I. These are not equivalent either.
-TestFail(String.fromCharCode(0x131), "I");
diff --git a/test/mjsunit/regress/regress-crbug-482998.js b/test/mjsunit/regress/regress-crbug-482998.js

index 80933a7a6d3c7b00df1a3ee8605cbe4f924831b7..94ff5008e85688ee8cf70dc9dd010c4c41257b26 100644 (file)
--- a/test/mjsunit/regress/regress-crbug-482998.js
+++ b/test/mjsunit/regress/regress-crbug-482998.js
@@ -3,13 +3,13 @@
  // found in the LICENSE file.
  
  // Should not time out.  Running time 0.5s vs. 120s before the change.
-function collapse(flags) {
+function collapse() {
    var src = "(?:";
    for (var i = 128; i < 0x1000; i++) {
-    src += String.fromCharCode(96 + i % 26) + String.fromCharCode(i) + "|";
+    src += "a" + String.fromCharCode(i) + "|";
    }
    src += "aa)";
-  var collapsible = new RegExp(src, flags);
+  var collapsible = new RegExp(src);
    var subject = "zzzzzzz" + String.fromCharCode(3000);
    for (var i = 0; i < 1000; i++) {
      subject += "xxxxxxx";
@@ -19,5 +19,4 @@ function collapse(flags) {
    }
  }
  
-collapse("i");
-collapse("");
+collapse();
author	machenbach <machenbach@chromium.org>
	Wed, 24 Jun 2015 19:04:04 +0000 (12:04 -0700)
committer	Commit bot <commit-bot@chromium.org>
	Wed, 24 Jun 2015 19:04:15 +0000 (19:04 +0000)
src/heap-snapshot-generator.cc		patch \| blob \| history
src/jsregexp.cc		patch \| blob \| history
src/list-inl.h		patch \| blob \| history
src/list.h		patch \| blob \| history
src/vector.h		patch \| blob \| history
test/mjsunit/regexp-sort.js	[deleted file]	patch \| blob \| history
test/mjsunit/regress/regress-crbug-482998.js		patch \| blob \| history