// Sets of distinct characters that are valid in Fortran programs outside
// character literals are encoded as 64-bit integers by mapping them to a 6-bit
-// character set encoding in which the case of letters is lost. These sets
+// character set encoding in which the case of letters is lost (even if
+// mixed case input reached the parser, which it does not). These sets
// need to be suitable for constexprs, so std::bitset<> was not eligible.
#include <cinttypes>
struct SetOfChars {
constexpr SetOfChars() {}
+
constexpr SetOfChars(char c) {
- if (c <= 32 /*space*/) {
- // map control characters, incl. LF (newline), to '?'
+ // This is basically the old DECSIX encoding, which maps the
+ // 7-bit ASCII codes [32..95] to [0..63]. Only '#', '&', '?', '\', and '^'
+ // in that range are unused in Fortran after preprocessing outside
+ // character literals. We repurpose '?' and '^' for newline and unknown
+ // characters (resp.), leaving the others alone in case this code might
+ // be useful in preprocssing.
+ if (c == '\n') {
+ // map newline to '?'
c = '?';
- } else if (c >= 127) {
- // map DEL and 8-bit characters to '^'
+ } else if (c < 32 || c >= 127) {
+ // map other control characters, DEL, and 8-bit characters to '^'
c = '^';
} else if (c >= 96) {
// map lower-case letters to upper-case
// range is now [32..95]; reduce to [0..63] and use as a shift count
bits_ = static_cast<std::uint64_t>(1) << (c - 32);
}
- constexpr SetOfChars(const char str[], std::size_t n = 256) {
+
+ constexpr SetOfChars(const char str[], std::size_t n) {
for (std::size_t j{0}; j < n; ++j) {
bits_ |= SetOfChars{str[j]}.bits_;
}
}
- constexpr SetOfChars(std::uint64_t b) : bits_{b} {}
+
constexpr SetOfChars(const SetOfChars &) = default;
constexpr SetOfChars(SetOfChars &&) = default;
constexpr SetOfChars &operator=(const SetOfChars &) = default;
constexpr SetOfChars &operator=(SetOfChars &&) = default;
+ constexpr bool empty() const { return bits_ == 0; }
+
+ constexpr bool Has(SetOfChars that) const {
+ return (that.bits_ & ~bits_) == 0;
+ }
+ constexpr SetOfChars Union(SetOfChars that) const {
+ return SetOfChars{bits_ | that.bits_};
+ }
+ constexpr SetOfChars Intersection(SetOfChars that) const {
+ return SetOfChars{bits_ & that.bits_};
+ }
+ constexpr SetOfChars Difference(SetOfChars that) const {
+ return SetOfChars{bits_ & ~that.bits_};
+ }
+
std::string ToString() const;
+
+private:
+ constexpr SetOfChars(std::uint64_t b) : bits_{b} {}
std::uint64_t bits_{0};
};
-
-static inline constexpr bool IsCharInSet(SetOfChars set, char c) {
- return (set.bits_ & SetOfChars{c}.bits_) != 0;
-}
} // namespace parser
} // namespace Fortran
#endif // FORTRAN_PARSER_CHAR_SET_H_
void Message::Incorporate(Message &that) {
if (provenance_ == that.provenance_ &&
cookedSourceLocation_ == that.cookedSourceLocation_ &&
- expected_.bits_ != 0) {
- expected_.bits_ |= that.expected_.bits_;
+ !expected_.empty()) {
+ expected_ = expected_.Union(that.expected_);
}
}
}
} else {
SetOfChars expect{expected_};
- if (IsCharInSet(expect, '\n')) {
- expect.bits_ &= ~SetOfChars{'\n'}.bits_;
- if (expect.bits_ == 0) {
+ if (expect.Has('\n')) {
+ expect = expect.Difference('\n');
+ if (expect.empty()) {
return "expected end of line"_err_en_US.ToString();
} else {
s = expect.ToString();