From: peter klausler Date: Thu, 19 Apr 2018 00:05:07 +0000 (-0700) Subject: [flang] Finish repackaging SetOfChars X-Git-Tag: llvmorg-12-init~9537^2~2689 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=0acda77554414dd0c64780daea46b79cb31bc7fa;p=platform%2Fupstream%2Fllvm.git [flang] Finish repackaging SetOfChars Original-commit: flang-compiler/f18@37085c0cddaf693ae21b79ac6bedd7e059c241d6 Reviewed-on: https://github.com/flang-compiler/f18/pull/61 Tree-same-pre-rewrite: false --- diff --git a/flang/lib/parser/char-set.cc b/flang/lib/parser/char-set.cc index 174667c..a389c7c 100644 --- a/flang/lib/parser/char-set.cc +++ b/flang/lib/parser/char-set.cc @@ -5,10 +5,10 @@ namespace parser { std::string SetOfChars::ToString() const { std::string result; - std::uint64_t set{bits_}; - for (char ch{' '}; set != 0; ++ch) { - if (IsCharInSet(set, ch)) { - set -= SetOfChars{ch}.bits_; + SetOfChars set{*this}; + for (char ch{' '}; !set.empty(); ++ch) { + if (set.Has(ch)) { + set = set.Difference(ch); result += ch; } } diff --git a/flang/lib/parser/char-set.h b/flang/lib/parser/char-set.h index 5ee739d..10dc57b 100644 --- a/flang/lib/parser/char-set.h +++ b/flang/lib/parser/char-set.h @@ -3,7 +3,8 @@ // Sets of distinct characters that are valid in Fortran programs outside // character literals are encoded as 64-bit integers by mapping them to a 6-bit -// character set encoding in which the case of letters is lost. These sets +// character set encoding in which the case of letters is lost (even if +// mixed case input reached the parser, which it does not). These sets // need to be suitable for constexprs, so std::bitset<> was not eligible. #include @@ -14,12 +15,19 @@ namespace parser { struct SetOfChars { constexpr SetOfChars() {} + constexpr SetOfChars(char c) { - if (c <= 32 /*space*/) { - // map control characters, incl. LF (newline), to '?' + // This is basically the old DECSIX encoding, which maps the + // 7-bit ASCII codes [32..95] to [0..63]. Only '#', '&', '?', '\', and '^' + // in that range are unused in Fortran after preprocessing outside + // character literals. We repurpose '?' and '^' for newline and unknown + // characters (resp.), leaving the others alone in case this code might + // be useful in preprocssing. + if (c == '\n') { + // map newline to '?' c = '?'; - } else if (c >= 127) { - // map DEL and 8-bit characters to '^' + } else if (c < 32 || c >= 127) { + // map other control characters, DEL, and 8-bit characters to '^' c = '^'; } else if (c >= 96) { // map lower-case letters to upper-case @@ -28,23 +36,38 @@ struct SetOfChars { // range is now [32..95]; reduce to [0..63] and use as a shift count bits_ = static_cast(1) << (c - 32); } - constexpr SetOfChars(const char str[], std::size_t n = 256) { + + constexpr SetOfChars(const char str[], std::size_t n) { for (std::size_t j{0}; j < n; ++j) { bits_ |= SetOfChars{str[j]}.bits_; } } - constexpr SetOfChars(std::uint64_t b) : bits_{b} {} + constexpr SetOfChars(const SetOfChars &) = default; constexpr SetOfChars(SetOfChars &&) = default; constexpr SetOfChars &operator=(const SetOfChars &) = default; constexpr SetOfChars &operator=(SetOfChars &&) = default; + constexpr bool empty() const { return bits_ == 0; } + + constexpr bool Has(SetOfChars that) const { + return (that.bits_ & ~bits_) == 0; + } + constexpr SetOfChars Union(SetOfChars that) const { + return SetOfChars{bits_ | that.bits_}; + } + constexpr SetOfChars Intersection(SetOfChars that) const { + return SetOfChars{bits_ & that.bits_}; + } + constexpr SetOfChars Difference(SetOfChars that) const { + return SetOfChars{bits_ & ~that.bits_}; + } + std::string ToString() const; + +private: + constexpr SetOfChars(std::uint64_t b) : bits_{b} {} std::uint64_t bits_{0}; }; - -static inline constexpr bool IsCharInSet(SetOfChars set, char c) { - return (set.bits_ & SetOfChars{c}.bits_) != 0; -} } // namespace parser } // namespace Fortran #endif // FORTRAN_PARSER_CHAR_SET_H_ diff --git a/flang/lib/parser/message.cc b/flang/lib/parser/message.cc index d4b015e..500fbda 100644 --- a/flang/lib/parser/message.cc +++ b/flang/lib/parser/message.cc @@ -39,8 +39,8 @@ MessageFormattedText::MessageFormattedText(MessageFixedText text, ...) void Message::Incorporate(Message &that) { if (provenance_ == that.provenance_ && cookedSourceLocation_ == that.cookedSourceLocation_ && - expected_.bits_ != 0) { - expected_.bits_ |= that.expected_.bits_; + !expected_.empty()) { + expected_ = expected_.Union(that.expected_); } } @@ -57,9 +57,9 @@ std::string Message::ToString() const { } } else { SetOfChars expect{expected_}; - if (IsCharInSet(expect, '\n')) { - expect.bits_ &= ~SetOfChars{'\n'}.bits_; - if (expect.bits_ == 0) { + if (expect.Has('\n')) { + expect = expect.Difference('\n'); + if (expect.empty()) { return "expected end of line"_err_en_US.ToString(); } else { s = expect.ToString(); diff --git a/flang/lib/parser/token-parsers.h b/flang/lib/parser/token-parsers.h index b5899dd..c4c7cd7 100644 --- a/flang/lib/parser/token-parsers.h +++ b/flang/lib/parser/token-parsers.h @@ -30,7 +30,7 @@ public: constexpr AnyOfChars(SetOfChars set) : set_{set} {} std::optional Parse(ParseState *state) const { if (std::optional at{state->PeekAtNextChar()}) { - if (IsCharInSet(set_, **at)) { + if (set_.Has(**at)) { state->UncheckedAdvance(); return at; }