From: lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00> Date: Mon, 25 Jan 2010 12:56:49 +0000 (+0000) Subject: Fix bug in character-set merging. Add test case. X-Git-Tag: upstream/4.7.83~22621 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=bfcbc10c7d5b2a93f2fc7f39818d2ecf6a966f27;p=platform%2Fupstream%2Fv8.git Fix bug in character-set merging. Add test case. See Chromium bug 32637. Review URL: http://codereview.chromium.org/553067 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@3689 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- diff --git a/src/jsregexp.cc b/src/jsregexp.cc index 8af472d39..505cf03e5 100644 --- a/src/jsregexp.cc +++ b/src/jsregexp.cc @@ -4462,10 +4462,13 @@ void CharacterRange::Merge(ZoneList<CharacterRange>* first_set, while (i1 < n1 || i2 < n2) { CharacterRange next_range; int range_source; - if (i2 == n2 || first_set->at(i1).from() < second_set->at(i2).from()) { + if (i2 == n2 || + (i1 < n1 && first_set->at(i1).from() < second_set->at(i2).from())) { + // Next smallest element is in first set. next_range = first_set->at(i1++); range_source = kInsideFirst; } else { + // Next smallest element is in second set. next_range = second_set->at(i2++); range_source = kInsideSecond; } diff --git a/test/cctest/test-regexp.cc b/test/cctest/test-regexp.cc index c72c4d19f..ad68a4568 100644 --- a/test/cctest/test-regexp.cc +++ b/test/cctest/test-regexp.cc @@ -1650,6 +1650,163 @@ TEST(CanonicalizeCharacterSets) { ASSERT_EQ(30, list->at(0).to()); } +// Checks whether a character is in the set represented by a list of ranges. +static bool CharacterInSet(ZoneList<CharacterRange>* set, uc16 value) { + for (int i = 0; i < set->length(); i++) { + CharacterRange range = set->at(i); + if (range.from() <= value && value <= range.to()) { + return true; + } + } + return false; +} + +TEST(CharacterRangeMerge) { + ZoneScope zone_scope(DELETE_ON_EXIT); + ZoneList<CharacterRange> l1(4); + ZoneList<CharacterRange> l2(4); + // Create all combinations of intersections of ranges, both singletons and + // longer. + + int offset = 0; + + // The five kinds of singleton intersections: + // X + // Y - outside before + // Y - outside touching start + // Y - overlap + // Y - outside touching end + // Y - outside after + + for (int i = 0; i < 5; i++) { + l1.Add(CharacterRange::Singleton(offset + 2)); + l2.Add(CharacterRange::Singleton(offset + i)); + offset += 6; + } + + // The seven kinds of singleton/non-singleton intersections: + // XXX + // Y - outside before + // Y - outside touching start + // Y - inside touching start + // Y - entirely inside + // Y - inside touching end + // Y - outside touching end + // Y - disjoint after + + for (int i = 0; i < 7; i++) { + l1.Add(CharacterRange::Range(offset + 2, offset + 4)); + l2.Add(CharacterRange::Singleton(offset + i)); + offset += 8; + } + + // The eleven kinds of non-singleton intersections: + // + // XXXXXXXX + // YYYY - outside before. + // YYYY - outside touching start. + // YYYY - overlapping start + // YYYY - inside touching start + // YYYY - entirely inside + // YYYY - inside touching end + // YYYY - overlapping end + // YYYY - outside touching end + // YYYY - outside after + // YYYYYYYY - identical + // YYYYYYYYYYYY - containing entirely. + + for (int i = 0; i < 9; i++) { + l1.Add(CharacterRange::Range(offset + 6, offset + 15)); // Length 8. + l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3)); + offset += 22; + } + l1.Add(CharacterRange::Range(offset + 6, offset + 15)); + l2.Add(CharacterRange::Range(offset + 6, offset + 15)); + offset += 22; + l1.Add(CharacterRange::Range(offset + 6, offset + 15)); + l2.Add(CharacterRange::Range(offset + 4, offset + 17)); + offset += 22; + + // Different kinds of multi-range overlap: + // XXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXX + // YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y + + l1.Add(CharacterRange::Range(offset, offset + 21)); + l1.Add(CharacterRange::Range(offset + 31, offset + 52)); + for (int i = 0; i < 6; i++) { + l2.Add(CharacterRange::Range(offset + 2, offset + 5)); + l2.Add(CharacterRange::Singleton(offset + 8)); + offset += 9; + } + + ASSERT(CharacterRange::IsCanonical(&l1)); + ASSERT(CharacterRange::IsCanonical(&l2)); + + ZoneList<CharacterRange> first_only(4); + ZoneList<CharacterRange> second_only(4); + ZoneList<CharacterRange> both(4); + + // Merge one direction. + CharacterRange::Merge(&l1, &l2, &first_only, &second_only, &both); + + CHECK(CharacterRange::IsCanonical(&first_only)); + CHECK(CharacterRange::IsCanonical(&second_only)); + CHECK(CharacterRange::IsCanonical(&both)); + + for (uc16 i = 0; i < offset; i++) { + bool in_first = CharacterInSet(&l1, i); + bool in_second = CharacterInSet(&l2, i); + CHECK((in_first && !in_second) == CharacterInSet(&first_only, i)); + CHECK((!in_first && in_second) == CharacterInSet(&second_only, i)); + CHECK((in_first && in_second) == CharacterInSet(&both, i)); + } + + first_only.Clear(); + second_only.Clear(); + both.Clear(); + + // Merge other direction. + CharacterRange::Merge(&l2, &l1, &second_only, &first_only, &both); + + CHECK(CharacterRange::IsCanonical(&first_only)); + CHECK(CharacterRange::IsCanonical(&second_only)); + CHECK(CharacterRange::IsCanonical(&both)); + + for (uc16 i = 0; i < offset; i++) { + bool in_first = CharacterInSet(&l1, i); + bool in_second = CharacterInSet(&l2, i); + CHECK((in_first && !in_second) == CharacterInSet(&first_only, i)); + CHECK((!in_first && in_second) == CharacterInSet(&second_only, i)); + CHECK((in_first && in_second) == CharacterInSet(&both, i)); + } + + first_only.Clear(); + second_only.Clear(); + both.Clear(); + + // Merge but don't record all combinations. + CharacterRange::Merge(&l1, &l2, NULL, NULL, &both); + + CHECK(CharacterRange::IsCanonical(&both)); + + for (uc16 i = 0; i < offset; i++) { + bool in_first = CharacterInSet(&l1, i); + bool in_second = CharacterInSet(&l2, i); + CHECK((in_first && in_second) == CharacterInSet(&both, i)); + } + + // Merge into same set. + ZoneList<CharacterRange> all(4); + CharacterRange::Merge(&l1, &l2, &all, &all, &all); + + CHECK(CharacterRange::IsCanonical(&all)); + + for (uc16 i = 0; i < offset; i++) { + bool in_first = CharacterInSet(&l1, i); + bool in_second = CharacterInSet(&l2, i); + CHECK((in_first || in_second) == CharacterInSet(&all, i)); + } +} TEST(Graph) {