add_library(FortranParser
char-buffer.cc
+ char-set.cc
characters.cc
idioms.cc
message.cc
--- /dev/null
+#include "char-set.h"
+
+namespace Fortran {
+namespace parser {
+
+std::string SetOfCharsToString(SetOfChars set) {
+ int code{0};
+ std::string result;
+ for (SetOfChars bit{1}; set != 0; bit = bit + bit, ++code) {
+ if ((set & bit) != 0) {
+ set &= ~bit;
+ result += SixBitDecoding(code);
+ }
+ }
+ return result;
+}
+
+} // namespace parser
+} // namespace Fortran
--- /dev/null
+#ifndef FORTRAN_PARSER_CHAR_SET_H_
+#define FORTRAN_PARSER_CHAR_SET_H_
+
+// Sets of distinct characters that are valid in Fortran programs outside
+// character literals are encoded as 64-bit integers by mapping them to a 6-bit
+// character set encoding in which the case of letters is lost. These sets
+// need to be suitable for constexprs, so std::bitset<> was not eligible.
+
+#include <cinttypes>
+#include <string>
+
+namespace Fortran {
+namespace parser {
+
+using SetOfChars = std::uint64_t;
+
+static constexpr char SixBitEncoding(char c) {
+ if (c <= 32 /*space*/) {
+ // map control characters, incl. LF (newline), to '?'
+ c = '?';
+ } else if (c >= 127) {
+ // map DEL and 8-bit characters to '^'
+ c = '^';
+ } else if (c >= 96) {
+ // map lower-case letters to upper-case
+ c -= 32;
+ }
+ // range is now [32..95]; reduce to [0..63]
+ return c - 32;
+}
+
+static constexpr char SixBitDecoding(char c) {
+ c += 32;
+ if (c == '?') {
+ return '\n';
+ }
+ return c;
+}
+
+static constexpr SetOfChars SingletonChar(char c) {
+ return static_cast<SetOfChars>(1) << SixBitEncoding(c);
+}
+
+static constexpr SetOfChars CharsToSet(const char str[], std::size_t n = 256) {
+ SetOfChars chars{0};
+ for (std::size_t j{0}; j < n; ++j) {
+ if (str[j] == '\0') {
+ break;
+ }
+ chars |= SingletonChar(str[j]);
+ }
+ return chars;
+}
+
+static const SetOfChars emptySetOfChars{0};
+static const SetOfChars fullSetOfChars{~static_cast<SetOfChars>(0)};
+static const SetOfChars setOfLetters{
+ CharsToSet("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")};
+static const SetOfChars setOfDecimalDigits{CharsToSet("0123456789")};
+static const SetOfChars setOfIdentifierStarts{setOfLetters | CharsToSet("_@$")};
+static const SetOfChars setOfIdentifierChars{
+ setOfIdentifierStarts | setOfDecimalDigits};
+
+// sanity check
+static_assert(setOfLetters == 0x7fffffe00000000);
+static_assert(setOfDecimalDigits == 0x3ff0000);
+
+static inline constexpr bool IsCharInSet(SetOfChars set, char c) {
+ return (set & SingletonChar(c)) != 0;
+}
+
+static inline constexpr bool IsSingleton(SetOfChars set) {
+ return (set & (set - 1)) == emptySetOfChars;
+}
+
+std::string SetOfCharsToString(SetOfChars);
+} // namespace parser
+} // namespace Fortran
+#endif // FORTRAN_PARSER_CHAR_SET_H_
#include "message.h"
+#include "char-set.h"
#include <cstdarg>
#include <cstddef>
#include <cstdio>
string_ = buffer;
}
-MessageFixedText MessageExpectedText::AsMessageFixedText() const {
- if (str_ != nullptr) {
- return {str_, bytes_};
- }
- static char chars[256];
- if (chars[1] == '\0') {
- // one-time initialization of array used for permanant single-byte string
- // pointers
- for (std::size_t j{0}; j < sizeof chars; ++j) {
- chars[j] = j;
+std::string Message::ToString() const {
+ std::string s{string_};
+ bool isExpected{isExpected_};
+ if (string_.empty()) {
+ if (fixedText_ != nullptr) {
+ if (fixedBytes_ > 0) {
+ s = std::string{fixedText_, fixedBytes_};
+ } else {
+ s = std::string{fixedText_}; // NUL-terminated
+ }
+ } else {
+ s = SetOfCharsToString(expected_);
+ if (!IsSingleton(expected_)) {
+ return MessageFormattedText("expected one of '%s'"_err_en_US, s)
+ .MoveString();
+ }
+ if (expected_ == SingletonChar('\n')) {
+ return "expected end of line"_err_en_US.ToString();
+ }
+ isExpected = true;
}
}
- return {&chars[static_cast<unsigned char>(singleton_)], 1};
+ if (isExpected) {
+ return MessageFormattedText("expected '%s'"_err_en_US, s).MoveString();
+ }
+ return s;
}
Provenance Message::Emit(
if (isFatal_) {
o << "ERROR: ";
}
- if (string_.empty()) {
- if (isExpectedText_) {
- std::string goal{text_.ToString()};
- if (goal == "\n") {
- o << "expected end of line"_err_en_US;
- } else {
- o << MessageFormattedText("expected '%s'"_err_en_US, goal.data())
- .MoveString();
- }
- } else {
- o << text_;
- }
- } else {
- o << string_;
- }
- o << '\n';
+ o << ToString() << '\n';
return provenance;
}
// Defines a representation for sequences of compiler messages.
// Supports nested contextualization.
+#include "char-set.h"
#include "idioms.h"
#include "provenance.h"
#include "reference-counted.h"
#include <cstddef>
+#include <cstring>
#include <forward_list>
#include <optional>
#include <ostream>
}
} // namespace literals
-std::ostream &operator<<(std::ostream &, const MessageFixedText &);
-
class MessageFormattedText {
public:
MessageFormattedText(MessageFixedText, ...);
class MessageExpectedText {
public:
MessageExpectedText(const char *s, std::size_t n) : str_{s}, bytes_{n} {}
- explicit MessageExpectedText(char ch) : singleton_{ch} {}
- MessageFixedText AsMessageFixedText() const;
+ explicit MessageExpectedText(const char *s)
+ : str_{s}, bytes_{std::strlen(s)} {}
+ explicit MessageExpectedText(char ch) : set_{SingletonChar(ch)} {}
+ explicit MessageExpectedText(SetOfChars set) : set_{set} {}
+
+ const char *str() const { return str_; }
+ std::size_t size() const { return bytes_; }
+ SetOfChars set() const { return set_; }
private:
const char *str_{nullptr};
- char singleton_;
- std::size_t bytes_{1};
+ std::size_t bytes_{0};
+ SetOfChars set_{emptySetOfChars};
};
class Message : public ReferenceCounted<Message> {
Message &operator=(Message &&that) = default;
// TODO: Change these to cover ranges of provenance
- Message(Provenance p, MessageFixedText t, Message *c = nullptr)
- : provenance_{p}, text_{t}, context_{c}, isFatal_{t.isFatal()} {}
- Message(Provenance p, MessageFormattedText &&s, Message *c = nullptr)
- : provenance_{p}, string_{s.MoveString()}, context_{c}, isFatal_{
- s.isFatal()} {}
- Message(Provenance p, MessageExpectedText t, Message *c = nullptr)
- : provenance_{p}, text_{t.AsMessageFixedText()},
- isExpectedText_{true}, context_{c}, isFatal_{true} {}
-
- Message(const char *csl, MessageFixedText t, Message *c = nullptr)
- : cookedSourceLocation_{csl}, text_{t}, context_{c}, isFatal_{t.isFatal()} {
- }
- Message(const char *csl, MessageFormattedText &&s, Message *c = nullptr)
- : cookedSourceLocation_{csl}, string_{s.MoveString()}, context_{c},
- isFatal_{s.isFatal()} {}
- Message(const char *csl, MessageExpectedText t, Message *c = nullptr)
- : cookedSourceLocation_{csl}, text_{t.AsMessageFixedText()},
- isExpectedText_{true}, context_{c}, isFatal_{true} {}
+ Message(Provenance p, MessageFixedText t)
+ : provenance_{p}, fixedText_{t.str()},
+ fixedBytes_{t.size()}, isFatal_{t.isFatal()} {}
+ Message(Provenance p, MessageFormattedText &&s)
+ : provenance_{p}, string_{s.MoveString()}, isFatal_{s.isFatal()} {}
+ Message(Provenance p, MessageExpectedText t)
+ : provenance_{p}, fixedText_{t.str()}, fixedBytes_{t.size()},
+ isExpected_{true}, expected_{t.set()}, isFatal_{true} {}
+
+ Message(const char *csl, MessageFixedText t)
+ : cookedSourceLocation_{csl}, fixedText_{t.str()},
+ fixedBytes_{t.size()}, isFatal_{t.isFatal()} {}
+ Message(const char *csl, MessageFormattedText &&s)
+ : cookedSourceLocation_{csl}, string_{s.MoveString()}, isFatal_{
+ s.isFatal()} {}
+ Message(const char *csl, MessageExpectedText t)
+ : cookedSourceLocation_{csl}, fixedText_{t.str()}, fixedBytes_{t.size()},
+ isExpected_{true}, expected_{t.set()}, isFatal_{true} {}
bool operator<(const Message &that) const {
if (cookedSourceLocation_ != nullptr) {
Provenance provenance() const { return provenance_; }
const char *cookedSourceLocation() const { return cookedSourceLocation_; }
Context context() const { return context_; }
+ Message &set_context(Message *c) {
+ context_ = c;
+ return *this;
+ }
bool isFatal() const { return isFatal_; }
+ std::string ToString() const;
Provenance Emit(
std::ostream &, const CookedSource &, bool echoSourceLine = true) const;
private:
Provenance provenance_;
const char *cookedSourceLocation_{nullptr};
- MessageFixedText text_;
- bool isExpectedText_{false}; // implies "expected '%s'"_err_en_US
+ const char *fixedText_{nullptr};
+ std::size_t fixedBytes_{0};
+ bool isExpected_{false};
std::string string_;
+ SetOfChars expected_{emptySetOfChars};
Context context_;
bool isFatal_{false};
};
}
}
- void Put(Message &&m) {
+ Message &Put(Message &&m) {
CHECK(IsValidLocation(m));
last_ = messages_.emplace_after(last_, std::move(m));
+ return *last_;
}
- template<typename... A> void Say(A &&... args) {
+ template<typename... A> Message &Say(A &&... args) {
last_ = messages_.emplace_after(last_, std::forward<A>(args)...);
+ return *last_;
}
void Annex(Messages &that) {
const char *GetLocation() const { return p_; }
void PushContext(MessageFixedText text) {
- context_ = Message::Context{new Message{p_, text, context_.get()}};
+ auto m = new Message{p_, text};
+ m->set_context(context_.get());
+ context_ = Message::Context{m};
}
void PopContext() {
if (deferMessages_) {
anyDeferredMessages_ = true;
} else {
- messages_.Say(at, t, context_.get());
+ messages_.Say(at, t).set_context(context_.get());
}
}
void Say(const char *at, MessageFormattedText &&t) {
if (deferMessages_) {
anyDeferredMessages_ = true;
} else {
- messages_.Say(at, std::move(t), context_.get());
+ messages_.Say(at, std::move(t)).set_context(context_.get());
}
}
void Say(const char *at, MessageExpectedText &&t) {
if (deferMessages_) {
anyDeferredMessages_ = true;
} else {
- messages_.Say(at, std::move(t), context_.get());
+ messages_.Say(at, std::move(t)).set_context(context_.get());
}
}
// the prescanned character stream and recognize context-sensitive tokens.
#include "basic-parsers.h"
+#include "char-set.h"
#include "characters.h"
#include "idioms.h"
#include "provenance.h"
namespace Fortran {
namespace parser {
-class CharPredicateGuard {
-public:
- using resultType = const char *;
- constexpr CharPredicateGuard(const CharPredicateGuard &) = default;
- constexpr CharPredicateGuard(bool (*f)(char), MessageFixedText m)
- : predicate_{f}, messageText_{m} {}
- std::optional<const char *> Parse(ParseState *state) const {
- if (std::optional<const char *> at{state->PeekAtNextChar()}) {
- if (predicate_(**at)) {
- state->UncheckedAdvance();
- return at;
- }
- }
- state->Say(messageText_);
- return {};
- }
-
-private:
- bool (*const predicate_)(char);
- const MessageFixedText messageText_;
-};
-
-constexpr auto letter =
- CharPredicateGuard{IsLetter, "expected letter"_err_en_US};
-constexpr auto digit =
- CharPredicateGuard{IsDecimalDigit, "expected digit"_err_en_US};
-
// "xyz"_ch matches one instance of the characters x, y, or z without skipping
// any spaces before or after. The parser returns the location of the character
// on success.
-class AnyOfChar {
+class AnyOfChars {
public:
using resultType = const char *;
- constexpr AnyOfChar(const AnyOfChar &) = default;
- constexpr AnyOfChar(const char *chars, std::size_t n)
- : chars_{chars}, bytes_{n} {}
+ constexpr AnyOfChars(const AnyOfChars &) = default;
+ constexpr AnyOfChars(SetOfChars set) : set_{set} {}
std::optional<const char *> Parse(ParseState *state) const {
if (std::optional<const char *> at{state->PeekAtNextChar()}) {
- char ch{**at};
- const char *p{chars_};
- for (std::size_t j{0}; j < bytes_ && *p != '\0'; ++j, ++p) {
- if (ch == ToLowerCaseLetter(*p)) {
- state->UncheckedAdvance();
- return at;
- }
+ if (IsCharInSet(set_, **at)) {
+ state->UncheckedAdvance();
+ return at;
}
}
- state->Say(MessageExpectedText{chars_, bytes_});
+ state->Say(MessageExpectedText{set_});
return {};
}
private:
- const char *const chars_;
- const std::size_t bytes_{std::numeric_limits<std::size_t>::max()};
+ SetOfChars set_;
};
-constexpr AnyOfChar operator""_ch(const char str[], std::size_t n) {
- return AnyOfChar{str, n};
+constexpr AnyOfChars operator""_ch(const char str[], std::size_t n) {
+ return AnyOfChars{CharsToSet(str, n)};
}
+constexpr auto letter = "abcdefghijklmnopqrstuvwxyz"_ch;
+constexpr auto digit = "0123456789"_ch;
+
// Skips over optional spaces. Always succeeds.
constexpr struct Space {
using resultType = Success;
if (IsOctalDigit(ch)) {
ch -= '0';
for (int j = (ch > 3 ? 1 : 2); j-- > 0;) {
- static constexpr auto octalDigit =
- CharPredicateGuard{IsOctalDigit, "expected octal digit"_en_US};
+ static constexpr auto octalDigit = "01234567"_ch;
och = octalDigit.Parse(state);
if (och.has_value()) {
ch = 8 * ch + **och - '0';
} else if (ch == 'x' || ch == 'X') {
ch = 0;
for (int j = 0; j++ < 2;) {
- static constexpr auto hexDigit = CharPredicateGuard{
- IsHexadecimalDigit, "expected hexadecimal digit"_en_US};
+ static constexpr auto hexDigit = "0123456789abcdefABCDEF"_ch;
och = hexDigit.Parse(state);
if (och.has_value()) {
ch = 16 * ch + HexadecimalDigitValue(**och);
static std::optional<std::string> Parse(ParseState *state) {
std::string str;
static constexpr auto nextch = attempt(CharLiteralChar{});
- static char q{quote};
while (std::optional<CharLiteralChar::Result> ch{nextch.Parse(state)}) {
if (ch->ch == quote && !ch->wasEscaped) {
- static constexpr auto doubled = attempt(AnyOfChar{&q, 1});
+ static constexpr auto doubled =
+ attempt(AnyOfChars{SingletonChar(quote)});
if (!doubled.Parse(state).has_value()) {
return {str};
}
if (!charCount || *charCount < 1) {
return {};
}
- std::optional<const char *> h{letter.Parse(state)};
- if (!h || **h != 'h') {
+ static const auto letterH = "h"_ch;
+ std::optional<const char *> h{letterH.Parse(state)};
+ if (!h.has_value()) {
return {};
}
std::string content;