}
-int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) {
- RegExpAtom* atom1 = (*a)->AsAtom();
- RegExpAtom* atom2 = (*b)->AsAtom();
- uc16 character1 = atom1->data().at(0);
- uc16 character2 = atom2->data().at(0);
- if (character1 < character2) return -1;
- if (character1 > character2) return 1;
- return 0;
-}
-
-
-// We can stable sort runs of atoms, since the order does not matter if they
-// start with different characters.
-// Returns true if any consecutive atoms were found.
-bool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) {
- ZoneList<RegExpTree*>* alternatives = this->alternatives();
- int length = alternatives->length();
- bool found_consecutive_atoms = false;
- for (int i = 0; i < length; i++) {
- while (i < length) {
- RegExpTree* alternative = alternatives->at(i);
- if (alternative->IsAtom()) break;
- i++;
- }
- // i is length or it is the index of an atom.
- int first_atom = i;
- i++;
- while (i < length) {
- RegExpTree* alternative = alternatives->at(i);
- if (!alternative->IsAtom()) break;
- i++;
- }
- // Sort atoms to get ones with common prefixes together.
- // This step is not valid if we are in a case-independent regexp,
- // because it would change /is|I/ to /I|is/, and order matters when
- // the regexp parts don't match only disjoint starting points.
- if (!compiler->ignore_case()) {
- alternatives->StableSort(CompareFirstChar, first_atom, i - first_atom);
- }
- if (i - first_atom > 1) found_consecutive_atoms = true;
- }
- return found_consecutive_atoms;
-}
-
-
-// Optimizes ab|ac|az to a(?:b|c|d).
-void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
- Zone* zone = compiler->zone();
- ZoneList<RegExpTree*>* alternatives = this->alternatives();
- int length = alternatives->length();
-
- int write_posn = 0;
- int i = 0;
- while (i < length) {
- RegExpTree* alternative = alternatives->at(i);
- if (!alternative->IsAtom()) {
- alternatives->at(write_posn++) = alternatives->at(i);
- i++;
- continue;
- }
- RegExpAtom* atom = alternative->AsAtom();
- uc16 common_prefix = atom->data().at(0);
- int first_with_prefix = i;
- int prefix_length = atom->length();
- i++;
- while (i < length) {
- alternative = alternatives->at(i);
- if (!alternative->IsAtom()) break;
- atom = alternative->AsAtom();
- if (atom->data().at(0) != common_prefix) break;
- prefix_length = Min(prefix_length, atom->length());
- i++;
- }
- if (i > first_with_prefix + 2) {
- // Found worthwhile run of alternatives with common prefix of at least one
- // character. Find out how long the common prefix is.
- int run_length = i - first_with_prefix;
- atom = alternatives->at(first_with_prefix)->AsAtom();
- for (int j = 1; j < run_length && prefix_length > 1; j++) {
- RegExpAtom* old_atom =
- alternatives->at(j + first_with_prefix)->AsAtom();
- for (int k = 1; k < prefix_length; k++) {
- if (atom->data().at(k) != old_atom->data().at(k)) prefix_length = k;
- }
- }
- RegExpAtom* prefix =
- new (zone) RegExpAtom(atom->data().SubVector(0, prefix_length));
- ZoneList<RegExpTree*>* pair = new (zone) ZoneList<RegExpTree*>(2, zone);
- pair->Add(prefix, zone);
- ZoneList<RegExpTree*>* suffixes =
- new (zone) ZoneList<RegExpTree*>(run_length, zone);
- for (int j = 0; j < run_length; j++) {
- RegExpAtom* old_atom =
- alternatives->at(j + first_with_prefix)->AsAtom();
- int len = old_atom->length();
- if (len == prefix_length) {
- suffixes->Add(new (zone) RegExpEmpty(), zone);
- } else {
- RegExpTree* suffix = new (zone) RegExpAtom(
- old_atom->data().SubVector(prefix_length, old_atom->length()));
- suffixes->Add(suffix, zone);
- }
- }
- pair->Add(new (zone) RegExpDisjunction(suffixes), zone);
- alternatives->at(write_posn++) = new (zone) RegExpAlternative(pair);
- } else {
- // Just copy any non-worthwhile alternatives.
- for (int j = first_with_prefix; j < i; j++) {
- alternatives->at(write_posn++) = alternatives->at(j);
- }
- }
- }
- alternatives->Rewind(write_posn); // Trim end of array.
-}
-
-
-// Optimizes b|c|z to [bcz].
-void RegExpDisjunction::FixSingleCharacterDisjunctions(
- RegExpCompiler* compiler) {
- Zone* zone = compiler->zone();
- ZoneList<RegExpTree*>* alternatives = this->alternatives();
- int length = alternatives->length();
-
- int write_posn = 0;
- int i = 0;
- while (i < length) {
- RegExpTree* alternative = alternatives->at(i);
- if (!alternative->IsAtom()) {
- alternatives->at(write_posn++) = alternatives->at(i);
- i++;
- continue;
- }
- RegExpAtom* atom = alternative->AsAtom();
- if (atom->length() != 1) {
- alternatives->at(write_posn++) = alternatives->at(i);
- i++;
- continue;
- }
- int first_in_run = i;
- i++;
- while (i < length) {
- alternative = alternatives->at(i);
- if (!alternative->IsAtom()) break;
- atom = alternative->AsAtom();
- if (atom->length() != 1) break;
- i++;
- }
- if (i > first_in_run + 1) {
- // Found non-trivial run of single-character alternatives.
- int run_length = i - first_in_run;
- ZoneList<CharacterRange>* ranges =
- new (zone) ZoneList<CharacterRange>(2, zone);
- for (int j = 0; j < run_length; j++) {
- RegExpAtom* old_atom = alternatives->at(j + first_in_run)->AsAtom();
- DCHECK_EQ(old_atom->length(), 1);
- ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone);
- }
- alternatives->at(write_posn++) =
- new (zone) RegExpCharacterClass(ranges, false);
- } else {
- // Just copy any trivial alternatives.
- for (int j = first_in_run; j < i; j++) {
- alternatives->at(write_posn++) = alternatives->at(j);
- }
- }
- }
- alternatives->Rewind(write_posn); // Trim end of array.
-}
-
-
RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
ZoneList<RegExpTree*>* alternatives = this->alternatives();
-
- if (alternatives->length() > 2) {
- bool found_consecutive_atoms = SortConsecutiveAtoms(compiler);
- if (found_consecutive_atoms) RationalizeConsecutiveAtoms(compiler);
- FixSingleCharacterDisjunctions(compiler);
- if (alternatives->length() == 1) {
- return alternatives->at(0)->ToNode(compiler, on_success);
- }
- }
-
int length = alternatives->length();
-
ChoiceNode* result =
new(compiler->zone()) ChoiceNode(length, compiler->zone());
for (int i = 0; i < length; i++) {
template<typename T, class P>
void List<T, P>::Sort(int (*cmp)(const T* x, const T* y)) {
- Sort(cmp, 0, length_);
-}
-
-
-template <typename T, class P>
-void List<T, P>::Sort(int (*cmp)(const T* x, const T* y), size_t s, size_t l) {
- ToVector().Sort(cmp, s, l);
+ ToVector().Sort(cmp);
#ifdef DEBUG
- for (size_t i = s + 1; i < l; i++) DCHECK(cmp(&data_[i - 1], &data_[i]) <= 0);
+ for (int i = 1; i < length_; i++)
+ DCHECK(cmp(&data_[i - 1], &data_[i]) <= 0);
#endif
}
}
-template <typename T, class P>
-void List<T, P>::StableSort(int (*cmp)(const T* x, const T* y)) {
- StableSort(cmp, 0, length_);
-}
-
-
-template <typename T, class P>
-void List<T, P>::StableSort(int (*cmp)(const T* x, const T* y), size_t s,
- size_t l) {
- ToVector().StableSort(cmp, s, l);
-#ifdef DEBUG
- for (size_t i = s + 1; i < l; i++) DCHECK(cmp(&data_[i - 1], &data_[i]) <= 0);
-#endif
-}
-
-
-template <typename T, class P>
-void List<T, P>::StableSort() {
- ToVector().StableSort();
-}
-
-
-template <typename T, class P>
+template<typename T, class P>
void List<T, P>::Initialize(int capacity, P allocator) {
DCHECK(capacity >= 0);
data_ = (capacity > 0) ? NewData(capacity, allocator) : NULL;