[libc++] Fix handling of negated character classes in regex
authorLouis Dionne <ldionne@apple.com>
Fri, 24 Aug 2018 14:10:28 +0000 (14:10 +0000)
committerLouis Dionne <ldionne@apple.com>
Fri, 24 Aug 2018 14:10:28 +0000 (14:10 +0000)
Summary:
This commit fixes a regression introduced in r316095, where we don't match
inverted character classes when there's no negated characrers in the []'s.

rdar://problem/43060054

Reviewers: mclow.lists, timshen, EricWF

Subscribers: christof, dexonsmith, cfe-commits

Differential Revision: https://reviews.llvm.org/D50534

llvm-svn: 340609

libcxx/include/regex
libcxx/test/std/re/re.alg/re.alg.match/inverted_character_classes.pass.cpp [new file with mode: 0644]
libcxx/test/std/re/re.alg/re.alg.search/invert_neg_word_search.pass.cpp

index 84aacc0..dcdb14a 100644 (file)
@@ -2414,20 +2414,17 @@ __bracket_expression<_CharT, _Traits>::__exec(__state& __s) const
                 goto __exit;
             }
         }
-        // set of "__found" chars =
+        // When there's at least one of __neg_chars_ and __neg_mask_, the set
+        // of "__found" chars is
         //   union(complement(union(__neg_chars_, __neg_mask_)),
         //         other cases...)
         //
-        // __neg_chars_ and __neg_mask_'d better be handled together, as there
-        // are no short circuit opportunities.
-        //
-        // In addition, when __neg_mask_/__neg_chars_ is empty, they should be
-        // treated as all ones/all chars.
+        // It doesn't make sense to check this when there are no __neg_chars_
+        // and no __neg_mask_.
+        if (!(__neg_mask_ == 0 && __neg_chars_.empty()))
         {
-          const bool __in_neg_mask = (__neg_mask_ == 0) ||
-              __traits_.isctype(__ch, __neg_mask_);
+            const bool __in_neg_mask = __traits_.isctype(__ch, __neg_mask_);
           const bool __in_neg_chars =
-              __neg_chars_.empty() ||
               std::find(__neg_chars_.begin(), __neg_chars_.end(), __ch) !=
               __neg_chars_.end();
           if (!(__in_neg_mask || __in_neg_chars))
diff --git a/libcxx/test/std/re/re.alg/re.alg.match/inverted_character_classes.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.match/inverted_character_classes.pass.cpp
new file mode 100644 (file)
index 0000000..5a19edc
--- /dev/null
@@ -0,0 +1,44 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// <regex>
+// UNSUPPORTED: c++98, c++03
+
+// Make sure that we correctly match inverted character classes.
+
+#include <cassert>
+#include <regex>
+
+
+int main() {
+    assert(std::regex_match("X", std::regex("[X]")));
+    assert(std::regex_match("X", std::regex("[XY]")));
+    assert(!std::regex_match("X", std::regex("[^X]")));
+    assert(!std::regex_match("X", std::regex("[^XY]")));
+
+    assert(std::regex_match("X", std::regex("[\\S]")));
+    assert(!std::regex_match("X", std::regex("[^\\S]")));
+
+    assert(!std::regex_match("X", std::regex("[\\s]")));
+    assert(std::regex_match("X", std::regex("[^\\s]")));
+
+    assert(std::regex_match("X", std::regex("[\\s\\S]")));
+    assert(std::regex_match("X", std::regex("[^Y\\s]")));
+    assert(!std::regex_match("X", std::regex("[^X\\s]")));
+
+    assert(std::regex_match("X", std::regex("[\\w]")));
+    assert(std::regex_match("_", std::regex("[\\w]")));
+    assert(!std::regex_match("X", std::regex("[^\\w]")));
+    assert(!std::regex_match("_", std::regex("[^\\w]")));
+
+    assert(!std::regex_match("X", std::regex("[\\W]")));
+    assert(!std::regex_match("_", std::regex("[\\W]")));
+    assert(std::regex_match("X", std::regex("[^\\W]")));
+    assert(std::regex_match("_", std::regex("[^\\W]")));
+}