From: peter klausler <pklausler@nvidia.com>
Date: Thu, 19 Apr 2018 00:05:07 +0000 (-0700)
Subject: [flang] Finish repackaging SetOfChars
X-Git-Tag: llvmorg-12-init~9537^2~2689
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=0acda77554414dd0c64780daea46b79cb31bc7fa;p=platform%2Fupstream%2Fllvm.git

[flang] Finish repackaging SetOfChars

Original-commit: flang-compiler/f18@37085c0cddaf693ae21b79ac6bedd7e059c241d6
Reviewed-on: https://github.com/flang-compiler/f18/pull/61
Tree-same-pre-rewrite: false
---

diff --git a/flang/lib/parser/char-set.cc b/flang/lib/parser/char-set.cc
index 174667c..a389c7c 100644
--- a/flang/lib/parser/char-set.cc
+++ b/flang/lib/parser/char-set.cc
@@ -5,10 +5,10 @@ namespace parser {
 
 std::string SetOfChars::ToString() const {
   std::string result;
-  std::uint64_t set{bits_};
-  for (char ch{' '}; set != 0; ++ch) {
-    if (IsCharInSet(set, ch)) {
-      set -= SetOfChars{ch}.bits_;
+  SetOfChars set{*this};
+  for (char ch{' '}; !set.empty(); ++ch) {
+    if (set.Has(ch)) {
+      set = set.Difference(ch);
       result += ch;
     }
   }
diff --git a/flang/lib/parser/char-set.h b/flang/lib/parser/char-set.h
index 5ee739d..10dc57b 100644
--- a/flang/lib/parser/char-set.h
+++ b/flang/lib/parser/char-set.h
@@ -3,7 +3,8 @@
 
 // Sets of distinct characters that are valid in Fortran programs outside
 // character literals are encoded as 64-bit integers by mapping them to a 6-bit
-// character set encoding in which the case of letters is lost.  These sets
+// character set encoding in which the case of letters is lost (even if
+// mixed case input reached the parser, which it does not).  These sets
 // need to be suitable for constexprs, so std::bitset<> was not eligible.
 
 #include <cinttypes>
@@ -14,12 +15,19 @@ namespace parser {
 
 struct SetOfChars {
   constexpr SetOfChars() {}
+
   constexpr SetOfChars(char c) {
-    if (c <= 32 /*space*/) {
-      // map control characters, incl. LF (newline), to '?'
+    // This is basically the old DECSIX encoding, which maps the
+    // 7-bit ASCII codes [32..95] to [0..63].  Only '#', '&', '?', '\', and '^'
+    // in that range are unused in Fortran after preprocessing outside
+    // character literals.  We repurpose '?' and '^' for newline and unknown
+    // characters (resp.), leaving the others alone in case this code might
+    // be useful in preprocssing.
+    if (c == '\n') {
+      // map newline to '?'
       c = '?';
-    } else if (c >= 127) {
-      // map DEL and 8-bit characters to '^'
+    } else if (c < 32 || c >= 127) {
+      // map other control characters, DEL, and 8-bit characters to '^'
       c = '^';
     } else if (c >= 96) {
       // map lower-case letters to upper-case
@@ -28,23 +36,38 @@ struct SetOfChars {
     // range is now [32..95]; reduce to [0..63] and use as a shift count
     bits_ = static_cast<std::uint64_t>(1) << (c - 32);
   }
-  constexpr SetOfChars(const char str[], std::size_t n = 256) {
+
+  constexpr SetOfChars(const char str[], std::size_t n) {
     for (std::size_t j{0}; j < n; ++j) {
       bits_ |= SetOfChars{str[j]}.bits_;
     }
   }
-  constexpr SetOfChars(std::uint64_t b) : bits_{b} {}
+
   constexpr SetOfChars(const SetOfChars &) = default;
   constexpr SetOfChars(SetOfChars &&) = default;
   constexpr SetOfChars &operator=(const SetOfChars &) = default;
   constexpr SetOfChars &operator=(SetOfChars &&) = default;
+  constexpr bool empty() const { return bits_ == 0; }
+
+  constexpr bool Has(SetOfChars that) const {
+    return (that.bits_ & ~bits_) == 0;
+  }
+  constexpr SetOfChars Union(SetOfChars that) const {
+    return SetOfChars{bits_ | that.bits_};
+  }
+  constexpr SetOfChars Intersection(SetOfChars that) const {
+    return SetOfChars{bits_ & that.bits_};
+  }
+  constexpr SetOfChars Difference(SetOfChars that) const {
+    return SetOfChars{bits_ & ~that.bits_};
+  }
+
   std::string ToString() const;
+
+private:
+  constexpr SetOfChars(std::uint64_t b) : bits_{b} {}
   std::uint64_t bits_{0};
 };
-
-static inline constexpr bool IsCharInSet(SetOfChars set, char c) {
-  return (set.bits_ & SetOfChars{c}.bits_) != 0;
-}
 }  // namespace parser
 }  // namespace Fortran
 #endif  // FORTRAN_PARSER_CHAR_SET_H_
diff --git a/flang/lib/parser/message.cc b/flang/lib/parser/message.cc
index d4b015e..500fbda 100644
--- a/flang/lib/parser/message.cc
+++ b/flang/lib/parser/message.cc
@@ -39,8 +39,8 @@ MessageFormattedText::MessageFormattedText(MessageFixedText text, ...)
 void Message::Incorporate(Message &that) {
   if (provenance_ == that.provenance_ &&
       cookedSourceLocation_ == that.cookedSourceLocation_ &&
-      expected_.bits_ != 0) {
-    expected_.bits_ |= that.expected_.bits_;
+      !expected_.empty()) {
+    expected_ = expected_.Union(that.expected_);
   }
 }
 
@@ -57,9 +57,9 @@ std::string Message::ToString() const {
       }
     } else {
       SetOfChars expect{expected_};
-      if (IsCharInSet(expect, '\n')) {
-        expect.bits_ &= ~SetOfChars{'\n'}.bits_;
-        if (expect.bits_ == 0) {
+      if (expect.Has('\n')) {
+        expect = expect.Difference('\n');
+        if (expect.empty()) {
           return "expected end of line"_err_en_US.ToString();
         } else {
           s = expect.ToString();
diff --git a/flang/lib/parser/token-parsers.h b/flang/lib/parser/token-parsers.h
index b5899dd..c4c7cd7 100644
--- a/flang/lib/parser/token-parsers.h
+++ b/flang/lib/parser/token-parsers.h
@@ -30,7 +30,7 @@ public:
   constexpr AnyOfChars(SetOfChars set) : set_{set} {}
   std::optional<const char *> Parse(ParseState *state) const {
     if (std::optional<const char *> at{state->PeekAtNextChar()}) {
-      if (IsCharInSet(set_, **at)) {
+      if (set_.Has(**at)) {
         state->UncheckedAdvance();
         return at;
       }