From: peter klausler Date: Tue, 30 Jan 2018 19:50:36 +0000 (-0800) Subject: [flang] Add cooked-tokens.h. X-Git-Tag: llvmorg-12-init~9537^2~3008 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=5e49363c38221a120f1b1a956eaa18acb466099d;p=platform%2Fupstream%2Fllvm.git [flang] Add cooked-tokens.h. Original-commit: flang-compiler/f18@7e452551f7647191fac183b10e042d72aa547997 --- diff --git a/flang/cooked-tokens.h b/flang/cooked-tokens.h new file mode 100644 index 0000000..0b5bfdd --- /dev/null +++ b/flang/cooked-tokens.h @@ -0,0 +1,447 @@ +#ifndef FORTRAN_COOKED_TOKENS_H_ +#define FORTRAN_COOKED_TOKENS_H_ + +// These parsers are driven by the Fortran grammar (grammar.h) to consume +// the cooked character stream from cookedNextChar (cooked-chars.h) and +// partition it into a context-sensitive token stream. + +#include "basic-parsers.h" +#include "cooked-chars.h" +#include "idioms.h" +#include "position.h" +#include +#include +#include +#include +#include +#include +#include + +namespace Fortran { + +class CharPredicateGuardParser { + public: + using resultType = char; + constexpr CharPredicateGuardParser(const CharPredicateGuardParser &) + = default; + constexpr CharPredicateGuardParser(bool (*f)(char), const char *msg) + : predicate_{f}, message_{msg} {} + std::optional Parse(ParseState *state) const { + Position at{state->position()}; + if (std::optional result{cookedNextChar.Parse(state)}) { + if (predicate_(*result)) { + return result; + } + } + state->messages()->Add(Message{at, message_, state->context()}); + return {}; + } + private: + bool (*const predicate_)(char); + const char *const message_; +}; + +static inline constexpr bool IsDecimalDigit(char ch) { + return isdigit(ch); +} + +static inline constexpr bool IsOctalDigit(char ch) { + return ch >= '0' && ch <= '7'; +} + +static inline constexpr bool IsHexadecimalDigit(char ch) { + return isxdigit(ch); +} + +static inline constexpr bool IsLetter(char ch) { + return isalpha(ch); +} + +static inline constexpr char ToLower(char &&ch) { + return tolower(ch); +} + +constexpr CharPredicateGuardParser digit{IsDecimalDigit, "expected digit"}; + +constexpr auto letter = + applyFunction(ToLower, + CharPredicateGuardParser{IsLetter, "expected letter"}); + +template +class CharMatch { + public: + using resultType = char; + constexpr CharMatch() {} + static std::optional Parse(ParseState *state) { + Position at{state->position()}; + std::optional result{cookedNextChar.Parse(state)}; + if (result && *result != good) { + result.reset(); + } + if (!result) { + state->messages()->Add(Message{at, good, state->context()}); + } + return {result}; + } +}; + +constexpr struct Space { + using resultType = Success; + constexpr Space() {} + static std::optional Parse(ParseState *state) { + std::optional ch{cookedNextChar.Parse(state)}; + if (ch) { + if (ch == ' ' || ch == '\t') { + return {Success{}}; + } + } + return {}; + } +} space; + +constexpr auto spaces = skipMany(space); + +class TokenStringMatch { + public: + using resultType = Success; + constexpr TokenStringMatch(const TokenStringMatch &) = default; + constexpr TokenStringMatch(const char *str, size_t n) + : str_{str}, length_{n} {} + constexpr TokenStringMatch(const char *str) : str_{str} {} + std::optional Parse(ParseState *state) const { + Position at{state->position()}; + if (!spaces.Parse(state)) { + return {}; + } + const char *p{str_}; + std::optional ch; // initially empty + for (size_t j{0}; j < length_ && *p != '\0'; ++j, ++p) { + const auto spaceSkipping{*p == ' '}; + if (spaceSkipping) { + if (j + 1 == length_ || p[1] == ' ' || p[1] == '\0') { + continue; // redundant; ignore + } + } + if (!ch && !(ch = cookedNextChar.Parse(state))) { + return {}; + } + if (spaceSkipping) { + // medial space: 0 or more spaces/tabs accepted, none required + while (*ch == ' ' || *ch == '\t') { + if (!(ch = cookedNextChar.Parse(state))) { + return {}; + } + } + // ch remains full for next iteration + } else if (*ch == tolower(*p)) { + ch.reset(); + } else { + state->messages()->Add(Message{at, "expected '"s + str_ + '\'', + state->context()}); + return {}; + } + } + return spaces.Parse(state); + } + private: + const char *const str_; + const size_t length_{std::numeric_limits::max()}; +}; + +constexpr TokenStringMatch operator""_tok(const char str[], size_t n) { + return TokenStringMatch{str, n}; +} + +template::value, int> = 0> +inline constexpr SequenceParser +operator>>(const char *str, const PA &p) { + return SequenceParser{TokenStringMatch{str}, p}; +} + +template::value, int> = 0> +inline constexpr InvertedSequenceParser +operator/(const PA &p, const char *str) { + return InvertedSequenceParser{p, TokenStringMatch{str}}; +} + +template +inline constexpr SequenceParser> +parenthesized(const PA &p) { + return "(" >> p / ")"; +} + +template +inline constexpr SequenceParser> +bracketed(const PA &p) { + return "[" >> p / "]"; +} + +static inline int HexadecimalDigitValue(char ch) { + if (IsDecimalDigit(ch)) { + return ch - '0'; + } + return toupper(ch) - 'A' + 10; +} + +// Quoted character literal constants. +struct CharLiteralChar { + struct Result { + Result(char c, bool esc) : ch{c}, wasEscaped{esc} {} + static Result Bare(char c) { return Result{c, false}; } + static Result Escaped(char c) { return Result{c, true}; } + char ch; + bool wasEscaped; + }; + using resultType = Result; + static std::optional Parse(ParseState *state) { + Position at{state->position()}; + std::optional och{cookedNextChar.Parse(state)}; + if (!och.has_value()) { + return {}; + } + char ch{*och}; + if (ch == '\n') { + state->messages()->Add(Message{at, "unclosed character constant", + state->context()}); + return {}; + } + if (ch != '\\' || !state->enableBackslashEscapesInCharLiterals()) { + return {Result::Bare(ch)}; + } + if (!(och = cookedNextChar.Parse(state)).has_value()) { + return {}; + } + switch ((ch = *och)) { + case 'a': return {Result::Escaped('\a')}; + case 'b': return {Result::Escaped('\b')}; + case 'f': return {Result::Escaped('\f')}; + case 'n': return {Result::Escaped('\n')}; + case 'r': return {Result::Escaped('\r')}; + case 't': return {Result::Escaped('\t')}; + case 'v': return {Result::Escaped('\v')}; + case '"': + case '\'': + case '\\': + return {Result::Escaped(ch)}; + case '\n': + state->messages()->Add(Message{at, "unclosed character constant", + state->context()}); + return {}; + default: + if (IsOctalDigit(ch)) { + ch -= '0'; + for (int j = (ch > 3 ? 1 : 2); j-- > 0;) { + static constexpr auto octalDigit = + attempt(CharPredicateGuardParser{IsOctalDigit, + "expected octal digit"}); + if ((och = octalDigit.Parse(state)).has_value()) { + ch = 8 * ch + *och - '0'; + } + } + } else if (ch == 'x' || ch == 'X') { + ch = 0; + for (int j = 0; j++ < 2;) { + static constexpr auto hexDigit = + attempt(CharPredicateGuardParser{IsHexadecimalDigit, + "expected hexadecimal digit"}); + if ((och = hexDigit.Parse(state)).has_value()) { + ch = 16 * ch + HexadecimalDigitValue(*och); + } + } + } else { + state->messages()->Add(Message{at, "bad escaped character", + state->context()}); + } + return {Result::Escaped(ch)}; + } + } +}; + +template +struct CharLiteral { + using resultType = std::string; + static std::optional Parse(ParseState *state) { + std::string str; + CHECK(!state->set_inCharLiteral(true)); + static constexpr auto nextch = attempt(CharLiteralChar{}); + while (std::optional ch{nextch.Parse(state)}) { + if (ch->ch == quote && !ch->wasEscaped) { + static constexpr auto doubled = attempt(CharMatch{}); + if (!doubled.Parse(state).has_value()) { + state->set_inCharLiteral(false); + return {str}; + } + } + str += ch->ch; + } + return {}; + } +}; + +// Parse "BOZ" binary literal quoted constants. +// As extensions, support X as an alternate hexadecimal marker, and allow +// BOZX markers to appear as synonyms. +struct BOZLiteral { + using resultType = std::uint64_t; + static std::optional Parse(ParseState *state) { + std::optional shift; + auto baseChar = [&shift](char ch) -> bool { + switch (toupper(ch)) { + case 'B': shift = 1; return true; + case 'O': shift = 3; return true; + case 'Z': shift = 4; return true; + case 'X': shift = 4; return true; + default: return false; + } + }; + + if (!spaces.Parse(state)) { + return {}; + } + + auto ch = cookedNextChar.Parse(state); + if (!ch) { + return {}; + } + if (toupper(*ch) == 'X' && state->strictConformance()) { + return {}; + } + if (baseChar(*ch) && !(ch = cookedNextChar.Parse(state))) { + return {}; + } + + char quote = *ch; + if (quote != '\'' && quote != '"') { + return {}; + } + + Position at{state->position()}; + std::string content; + while (true) { + if (!(ch = cookedNextChar.Parse(state))) { + return {}; + } + if (*ch == quote) { + break; + } + if (!isxdigit(*ch)) { + return {}; + } + content += *ch; + } + + if (!shift && !state->strictConformance()) { + // extension: base allowed to appear as suffix + if (!(ch = cookedNextChar.Parse(state)) || !baseChar(*ch)) { + return {}; + } + } + + if (content.empty()) { + state->messages()->Add(Message{at, "no digit in BOZ literal", + state->context()}); + return {}; + } + + std::uint64_t value{0}; + for (auto digit : content) { + digit = HexadecimalDigitValue(digit); + if ((digit >> *shift) > 0) { + state->messages()->Add(Message{at, "bad digit in BOZ literal", + state->context()}); + return {}; + } + std::uint64_t was{value}; + value <<= *shift; + if ((value >> *shift) != was) { + state->messages()->Add(Message{at, "excessive digits in BOZ literal", + state->context()}); + return {}; + } + value |= digit; + } + return {value}; + } +}; + +// Unsigned decimal digit string; no space skipping +struct DigitString { + using resultType = std::uint64_t; + static std::optional Parse(ParseState *state) { + static constexpr auto getDigit = attempt(digit); + Position at{state->position()}; + std::optional firstDigit{getDigit.Parse(state)}; + if (!firstDigit) { + return {}; + } + std::uint64_t value = *firstDigit - '0'; + bool overflow{false}; + while (auto nextDigit{getDigit.Parse(state)}) { + if (value > std::numeric_limits::max() / 10) { + overflow = true; + } + value *= 10; + int digitValue = *nextDigit - '0'; + if (value > std::numeric_limits::max() - digitValue) { + overflow = true; + } + value += digitValue; + } + if (overflow) { + state->messages()->Add(Message{at, "overflow in decimal literal", + state->context()}); + } + return {value}; + } +}; + +// Legacy feature: Hollerith literal constants +struct HollerithLiteral { + using resultType = std::string; + static std::optional Parse(ParseState *state) { + if (!spaces.Parse(state)) { + return {}; + } + Position at{state->position()}; + std::optional charCount{DigitString{}.Parse(state)}; + if (!charCount || *charCount < 1) { + return {}; + } + std::optional h {letter.Parse(state)}; + if (!h || (*h != 'h' && *h != 'H')) { + return {}; + } + std::string content; + CHECK(!state->set_inCharLiteral(true)); + for (auto j = *charCount; j-- > 0; ) { + std::optional ch{cookedNextChar.Parse(state)}; + if (!ch || !isprint(*ch)) { + state->messages()->Add(Message{at, + "insufficient or bad characters in Hollerith", + state->context()}); + state->set_inCharLiteral(false); + return {}; + } + content += *ch; + } + state->set_inCharLiteral(false); + return {content}; + } +}; + +// A common idiom in the Fortran grammar is an optional item (usually +// a nonempty comma-separated list) that, if present, must follow a comma +// and precede a doubled colon. When the item is absent, the comma must +// not appear, and the doubled colons are optional. +// [[, xyz] ::] is optionalBeforeColons(xyz) +// [[, xyz]... ::] is optionalBeforeColons(nonemptyList(xyz)) +template +inline constexpr auto optionalBeforeColons(const PA &p) { + return "," >> p / "::" || + "::" >> construct{} || + !","_tok >> construct{}; +} +} // namespace Fortran +#endif // FORTRAN_COOKED_TOKENS_H_