Fix bug in character-set merging. Add test case.
authorlrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Mon, 25 Jan 2010 12:56:49 +0000 (12:56 +0000)
committerlrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Mon, 25 Jan 2010 12:56:49 +0000 (12:56 +0000)
See Chromium bug 32637.

Review URL: http://codereview.chromium.org/553067

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@3689 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

src/jsregexp.cc
test/cctest/test-regexp.cc

index 8af472d39e18f6a65933c7191e808074d9aa0529..505cf03e5ee47ce33fee04095dc7815658423ad1 100644 (file)
@@ -4462,10 +4462,13 @@ void CharacterRange::Merge(ZoneList<CharacterRange>* first_set,
   while (i1 < n1 || i2 < n2) {
     CharacterRange next_range;
     int range_source;
-    if (i2 == n2 || first_set->at(i1).from() < second_set->at(i2).from()) {
+    if (i2 == n2 ||
+        (i1 < n1 && first_set->at(i1).from() < second_set->at(i2).from())) {
+      // Next smallest element is in first set.
       next_range = first_set->at(i1++);
       range_source = kInsideFirst;
     } else {
+      // Next smallest element is in second set.
       next_range = second_set->at(i2++);
       range_source = kInsideSecond;
     }
index c72c4d19fc40a52cce879a0c4e6f2b6611917877..ad68a4568d015d307e922ecc01a1dd99cd7ca889 100644 (file)
@@ -1650,6 +1650,163 @@ TEST(CanonicalizeCharacterSets) {
   ASSERT_EQ(30, list->at(0).to());
 }
 
+// Checks whether a character is in the set represented by a list of ranges.
+static bool CharacterInSet(ZoneList<CharacterRange>* set, uc16 value) {
+  for (int i = 0; i < set->length(); i++) {
+    CharacterRange range = set->at(i);
+    if (range.from() <= value && value <= range.to()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+TEST(CharacterRangeMerge) {
+  ZoneScope zone_scope(DELETE_ON_EXIT);
+  ZoneList<CharacterRange> l1(4);
+  ZoneList<CharacterRange> l2(4);
+  // Create all combinations of intersections of ranges, both singletons and
+  // longer.
+
+  int offset = 0;
+
+  // The five kinds of singleton intersections:
+  //     X
+  //   Y      - outside before
+  //    Y     - outside touching start
+  //     Y    - overlap
+  //      Y   - outside touching end
+  //       Y  - outside after
+
+  for (int i = 0; i < 5; i++) {
+    l1.Add(CharacterRange::Singleton(offset + 2));
+    l2.Add(CharacterRange::Singleton(offset + i));
+    offset += 6;
+  }
+
+  // The seven kinds of singleton/non-singleton intersections:
+  //    XXX
+  //  Y        - outside before
+  //   Y       - outside touching start
+  //    Y      - inside touching start
+  //     Y     - entirely inside
+  //      Y    - inside touching end
+  //       Y   - outside touching end
+  //        Y  - disjoint after
+
+  for (int i = 0; i < 7; i++) {
+    l1.Add(CharacterRange::Range(offset + 2, offset + 4));
+    l2.Add(CharacterRange::Singleton(offset + i));
+    offset += 8;
+  }
+
+  // The eleven kinds of non-singleton intersections:
+  //
+  //       XXXXXXXX
+  // YYYY                  - outside before.
+  //   YYYY                - outside touching start.
+  //     YYYY              - overlapping start
+  //       YYYY            - inside touching start
+  //         YYYY          - entirely inside
+  //           YYYY        - inside touching end
+  //             YYYY      - overlapping end
+  //               YYYY    - outside touching end
+  //                 YYYY  - outside after
+  //       YYYYYYYY        - identical
+  //     YYYYYYYYYYYY      - containing entirely.
+
+  for (int i = 0; i < 9; i++) {
+    l1.Add(CharacterRange::Range(offset + 6, offset + 15));  // Length 8.
+    l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3));
+    offset += 22;
+  }
+  l1.Add(CharacterRange::Range(offset + 6, offset + 15));
+  l2.Add(CharacterRange::Range(offset + 6, offset + 15));
+  offset += 22;
+  l1.Add(CharacterRange::Range(offset + 6, offset + 15));
+  l2.Add(CharacterRange::Range(offset + 4, offset + 17));
+  offset += 22;
+
+  // Different kinds of multi-range overlap:
+  // XXXXXXXXXXXXXXXXXXXXXX         XXXXXXXXXXXXXXXXXXXXXX
+  //   YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y
+
+  l1.Add(CharacterRange::Range(offset, offset + 21));
+  l1.Add(CharacterRange::Range(offset + 31, offset + 52));
+  for (int i = 0; i < 6; i++) {
+    l2.Add(CharacterRange::Range(offset + 2, offset + 5));
+    l2.Add(CharacterRange::Singleton(offset + 8));
+    offset += 9;
+  }
+
+  ASSERT(CharacterRange::IsCanonical(&l1));
+  ASSERT(CharacterRange::IsCanonical(&l2));
+
+  ZoneList<CharacterRange> first_only(4);
+  ZoneList<CharacterRange> second_only(4);
+  ZoneList<CharacterRange> both(4);
+
+  // Merge one direction.
+  CharacterRange::Merge(&l1, &l2, &first_only, &second_only, &both);
+
+  CHECK(CharacterRange::IsCanonical(&first_only));
+  CHECK(CharacterRange::IsCanonical(&second_only));
+  CHECK(CharacterRange::IsCanonical(&both));
+
+  for (uc16 i = 0; i < offset; i++) {
+    bool in_first = CharacterInSet(&l1, i);
+    bool in_second = CharacterInSet(&l2, i);
+    CHECK((in_first && !in_second) == CharacterInSet(&first_only, i));
+    CHECK((!in_first && in_second) == CharacterInSet(&second_only, i));
+    CHECK((in_first && in_second) == CharacterInSet(&both, i));
+  }
+
+  first_only.Clear();
+  second_only.Clear();
+  both.Clear();
+
+  // Merge other direction.
+  CharacterRange::Merge(&l2, &l1, &second_only, &first_only, &both);
+
+  CHECK(CharacterRange::IsCanonical(&first_only));
+  CHECK(CharacterRange::IsCanonical(&second_only));
+  CHECK(CharacterRange::IsCanonical(&both));
+
+  for (uc16 i = 0; i < offset; i++) {
+    bool in_first = CharacterInSet(&l1, i);
+    bool in_second = CharacterInSet(&l2, i);
+    CHECK((in_first && !in_second) == CharacterInSet(&first_only, i));
+    CHECK((!in_first && in_second) == CharacterInSet(&second_only, i));
+    CHECK((in_first && in_second) == CharacterInSet(&both, i));
+  }
+
+  first_only.Clear();
+  second_only.Clear();
+  both.Clear();
+
+  // Merge but don't record all combinations.
+  CharacterRange::Merge(&l1, &l2, NULL, NULL, &both);
+
+  CHECK(CharacterRange::IsCanonical(&both));
+
+  for (uc16 i = 0; i < offset; i++) {
+    bool in_first = CharacterInSet(&l1, i);
+    bool in_second = CharacterInSet(&l2, i);
+    CHECK((in_first && in_second) == CharacterInSet(&both, i));
+  }
+
+  // Merge into same set.
+  ZoneList<CharacterRange> all(4);
+  CharacterRange::Merge(&l1, &l2, &all, &all, &all);
+
+  CHECK(CharacterRange::IsCanonical(&all));
+
+  for (uc16 i = 0; i < offset; i++) {
+    bool in_first = CharacterInSet(&l1, i);
+    bool in_second = CharacterInSet(&l2, i);
+    CHECK((in_first || in_second) == CharacterInSet(&all, i));
+  }
+}
 
 
 TEST(Graph) {