for (int i = 0; i < entries_.length(); ++i) {
sorted_entries_[i] = &entries_[i];
}
- sorted_entries_.Sort(SortByIds);
+ sorted_entries_.Sort<int (*)(HeapEntry* const*, HeapEntry* const*)>(
+ SortByIds);
}
return &sorted_entries_;
}
}
+static unibrow::uchar Canonical(
+ unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
+ unibrow::uchar c) {
+ unibrow::uchar chars[unibrow::Ecma262Canonicalize::kMaxWidth];
+ int length = canonicalize->get(c, '\0', chars);
+ DCHECK_LE(length, 1);
+ unibrow::uchar canonical = c;
+ if (length == 1) canonical = chars[0];
+ return canonical;
+}
+
+
+int CompareFirstCharCaseIndependent(
+ unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
+ RegExpTree* const* a, RegExpTree* const* b) {
+ RegExpAtom* atom1 = (*a)->AsAtom();
+ RegExpAtom* atom2 = (*b)->AsAtom();
+ unibrow::uchar character1 = atom1->data().at(0);
+ unibrow::uchar character2 = atom2->data().at(0);
+ if (character1 == character2) return 0;
+ if (character1 >= 'a' || character2 >= 'a') {
+ character1 = Canonical(canonicalize, character1);
+ character2 = Canonical(canonicalize, character2);
+ }
+ return static_cast<int>(character1) - static_cast<int>(character2);
+}
+
+
// We can stable sort runs of atoms, since the order does not matter if they
// start with different characters.
// Returns true if any consecutive atoms were found.
i++;
}
// Sort atoms to get ones with common prefixes together.
- // This step is not valid if we are in a case-independent regexp,
+ // This step is more tricky if we are in a case-independent regexp,
// because it would change /is|I/ to /I|is/, and order matters when
// the regexp parts don't match only disjoint starting points. To fix
- // this would need a version of CompareFirstChar that uses case-
+ // this we have a version of CompareFirstChar that uses case-
// independent character classes for comparison.
- if (!compiler->ignore_case()) {
- DCHECK_LT(first_atom, alternatives->length());
- DCHECK_LE(i, alternatives->length());
- DCHECK_LE(first_atom, i);
+ DCHECK_LT(first_atom, alternatives->length());
+ DCHECK_LE(i, alternatives->length());
+ DCHECK_LE(first_atom, i);
+ if (compiler->ignore_case()) {
+ unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
+ compiler->isolate()->regexp_macro_assembler_canonicalize();
+ auto compare_closure =
+ [canonicalize](RegExpTree* const* a, RegExpTree* const* b) {
+ return CompareFirstCharCaseIndependent(canonicalize, a, b);
+ };
+ alternatives->StableSort(compare_closure, first_atom, i - first_atom);
+ } else {
alternatives->StableSort(CompareFirstChar, first_atom, i - first_atom);
}
if (i - first_atom > 1) found_consecutive_atoms = true;
continue;
}
RegExpAtom* atom = alternative->AsAtom();
- uc16 common_prefix = atom->data().at(0);
+ unibrow::uchar common_prefix = atom->data().at(0);
int first_with_prefix = i;
int prefix_length = atom->length();
i++;
alternative = alternatives->at(i);
if (!alternative->IsAtom()) break;
atom = alternative->AsAtom();
- if (atom->data().at(0) != common_prefix) break;
+ unibrow::uchar new_prefix = atom->data().at(0);
+ if (new_prefix != common_prefix) {
+ if (!compiler->ignore_case()) break;
+ unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
+ compiler->isolate()->regexp_macro_assembler_canonicalize();
+ new_prefix = Canonical(canonicalize, new_prefix);
+ common_prefix = Canonical(canonicalize, common_prefix);
+ if (new_prefix != common_prefix) break;
+ }
prefix_length = Min(prefix_length, atom->length());
i++;
}
RegExpAtom* old_atom =
alternatives->at(j + first_with_prefix)->AsAtom();
for (int k = 1; k < prefix_length; k++) {
- if (atom->data().at(k) != old_atom->data().at(k)) prefix_length = k;
+ if (atom->data().at(k) != old_atom->data().at(k)) {
+ prefix_length = k;
+ break;
+ }
}
}
RegExpAtom* prefix =
}
-template<typename T, class P>
-void List<T, P>::Sort(int (*cmp)(const T* x, const T* y)) {
+template <typename T, class P>
+template <typename CompareFunction>
+void List<T, P>::Sort(CompareFunction cmp) {
Sort(cmp, 0, length_);
}
template <typename T, class P>
-void List<T, P>::Sort(int (*cmp)(const T* x, const T* y), size_t s, size_t l) {
+template <typename CompareFunction>
+void List<T, P>::Sort(CompareFunction cmp, size_t s, size_t l) {
ToVector().Sort(cmp, s, l);
#ifdef DEBUG
for (size_t i = s + 1; i < l; i++) DCHECK(cmp(&data_[i - 1], &data_[i]) <= 0);
template <typename T, class P>
-void List<T, P>::StableSort(int (*cmp)(const T* x, const T* y)) {
+template <typename CompareFunction>
+void List<T, P>::StableSort(CompareFunction cmp) {
StableSort(cmp, 0, length_);
}
template <typename T, class P>
-void List<T, P>::StableSort(int (*cmp)(const T* x, const T* y), size_t s,
- size_t l) {
+template <typename CompareFunction>
+void List<T, P>::StableSort(CompareFunction cmp, size_t s, size_t l) {
ToVector().StableSort(cmp, s, l);
#ifdef DEBUG
for (size_t i = s + 1; i < l; i++) DCHECK(cmp(&data_[i - 1], &data_[i]) <= 0);
void Iterate(Visitor* visitor);
// Sort all list entries (using QuickSort)
- void Sort(int (*cmp)(const T* x, const T* y), size_t start, size_t length);
- void Sort(int (*cmp)(const T* x, const T* y));
+ template <typename CompareFunction>
+ void Sort(CompareFunction cmp, size_t start, size_t length);
+ template <typename CompareFunction>
+ void Sort(CompareFunction cmp);
void Sort();
- void StableSort(int (*cmp)(const T* x, const T* y), size_t start,
- size_t length);
- void StableSort(int (*cmp)(const T* x, const T* y));
+ template <typename CompareFunction>
+ void StableSort(CompareFunction cmp, size_t start, size_t length);
+ template <typename CompareFunction>
+ void StableSort(CompareFunction cmp);
void StableSort();
INLINE(void Initialize(int capacity,
return Vector<T>(result, length_);
}
- void Sort(int (*cmp)(const T*, const T*), size_t s, size_t l) {
- std::sort(start() + s, start() + s + l, RawComparer(cmp));
+ template <typename CompareFunction>
+ void Sort(CompareFunction cmp, size_t s, size_t l) {
+ std::sort(start() + s, start() + s + l, RawComparer<CompareFunction>(cmp));
}
- void Sort(int (*cmp)(const T*, const T*)) {
- std::sort(start(), start() + length(), RawComparer(cmp));
+ template <typename CompareFunction>
+ void Sort(CompareFunction cmp) {
+ std::sort(start(), start() + length(), RawComparer<CompareFunction>(cmp));
}
void Sort() {
std::sort(start(), start() + length());
}
- void StableSort(int (*cmp)(const T*, const T*), size_t s, size_t l) {
- std::stable_sort(start() + s, start() + s + l, RawComparer(cmp));
+ template <typename CompareFunction>
+ void StableSort(CompareFunction cmp, size_t s, size_t l) {
+ std::stable_sort(start() + s, start() + s + l,
+ RawComparer<CompareFunction>(cmp));
}
- void StableSort(int (*cmp)(const T*, const T*)) {
- std::stable_sort(start(), start() + length(), RawComparer(cmp));
+ template <typename CompareFunction>
+ void StableSort(CompareFunction cmp) {
+ std::stable_sort(start(), start() + length(),
+ RawComparer<CompareFunction>(cmp));
}
void StableSort() { std::stable_sort(start(), start() + length()); }
T* start_;
int length_;
+ template <typename CookedComparer>
class RawComparer {
public:
- explicit RawComparer(int (*cmp)(const T*, const T*)) : cmp_(cmp) {}
+ explicit RawComparer(CookedComparer cmp) : cmp_(cmp) {}
bool operator()(const T& a, const T& b) {
return cmp_(&a, &b) < 0;
}
private:
- int (*cmp_)(const T*, const T*);
+ CookedComparer cmp_;
};
};
'array-constructor': [PASS, TIMEOUT],
# Very slow on ARM and MIPS, contains no architecture dependent code.
- 'unicode-case-overoptimization': [PASS, NO_VARIANTS, ['arch == arm or arch == android_arm or arch == android_arm64 or arch == mipsel or arch == mips64el or arch == mips', TIMEOUT]],
- 'regress/regress-3976': [PASS, NO_VARIANTS, ['arch == arm or arch == android_arm or arch == android_arm64 or arch == mipsel or arch == mips64el or arch == mips', SKIP]],
+ 'unicode-case-overoptimization': [PASS, NO_VARIANTS, ['arch == arm or arch == arm64 or arch == android_arm or arch == android_arm64 or arch == mipsel or arch == mips64el or arch == mips', TIMEOUT]],
+ 'regress/regress-3976': [PASS, NO_VARIANTS, ['arch == arm or arch == arm64 or arch == android_arm or arch == android_arm64 or arch == mipsel or arch == mips64el or arch == mips', SKIP]],
+ 'regress/regress-crbug-482998': [PASS, NO_VARIANTS, ['arch == arm or arch == arm64 or arch == android_arm or arch == android_arm64 or arch == mipsel or arch == mips64el or arch == mips', SKIP]],
##############################################################################
# This test expects to reach a certain recursion depth, which may not work
--- /dev/null
+// Copyright 2015 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+function Test(lower, upper) {
+ var lx = lower + "x";
+ var ux = upper + "x";
+ var lp = lower + "|";
+ var uxp = upper + "x|";
+ assertEquals(lx, new RegExp(uxp + lp + lower + "cat", "i").exec(lx) + "");
+ assertEquals(ux, new RegExp(uxp + lp + lower + "cat", "i").exec(ux) + "");
+ assertEquals(lower, new RegExp(lp + uxp + lower + "cat", "i").exec(lx) + "");
+ assertEquals(upper, new RegExp(lp + uxp + lower + "cat", "i").exec(ux) + "");
+}
+
+function TestFail(lower, upper) {
+ var lx = lower + "x";
+ var ux = upper + "x";
+ var lp = lower + "|";
+ var uxp = upper + "x|";
+ assertEquals(lower, new RegExp(uxp + lp + lower + "cat", "i").exec(lx) + "");
+ assertEquals(ux, new RegExp(uxp + lp + lower + "cat", "i").exec(ux) + "");
+ assertEquals(lower, new RegExp(lp + uxp + lower + "cat", "i").exec(lx) + "");
+ assertEquals(ux, new RegExp(lp + uxp + lower + "cat", "i").exec(ux) + "");
+}
+
+Test("a", "A");
+Test("0", "0");
+TestFail("a", "b");
+// Small and capital o-umlaut
+Test(String.fromCharCode(0xf6), String.fromCharCode(0xd6));
+// Small and capital kha.
+Test(String.fromCharCode(0x445), String.fromCharCode(0x425));
+// Small and capital y-umlaut.
+Test(String.fromCharCode(0xff), String.fromCharCode(0x178));
+// Small and large Greek mu.
+Test(String.fromCharCode(0x3bc), String.fromCharCode(0x39c));
+// Micron and large Greek mu.
+Test(String.fromCharCode(0xb5), String.fromCharCode(0x39c));
+// Micron and small Greek mu.
+Test(String.fromCharCode(0xb5), String.fromCharCode(0x3bc));
+// German double s and capital S. These are not equivalent since one is double.
+TestFail(String.fromCharCode(0xdf), "S");
+// Small i and Turkish capital dotted I. These are not equivalent due to
+// 21.2.2.8.2 section 3g. One is below 128 and the other is above 127.
+TestFail("i", String.fromCharCode(0x130));
+// Small dotless i and I. These are not equivalent either.
+TestFail(String.fromCharCode(0x131), "I");
// found in the LICENSE file.
// Should not time out. Running time 0.5s vs. 120s before the change.
-function collapse() {
+function collapse(flags) {
var src = "(?:";
for (var i = 128; i < 0x1000; i++) {
- src += "a" + String.fromCharCode(i) + "|";
+ src += String.fromCharCode(96 + i % 26) + String.fromCharCode(i) + "|";
}
src += "aa)";
- var collapsible = new RegExp(src);
+ var collapsible = new RegExp(src, flags);
var subject = "zzzzzzz" + String.fromCharCode(3000);
for (var i = 0; i < 1000; i++) {
subject += "xxxxxxx";
}
}
-collapse();
+collapse("i");
+collapse("");