From f4a85288dbd4e4894ec3b5c373fde46842681423 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Tue, 27 Feb 2018 14:02:10 -0800 Subject: [PATCH] [flang] Fix some bugs that I noticed while running regression tests. Add characters.h to consolidate all per-character functions and reduce dependence on . Add omitted DIRECT= and UNFORMATTED= specifiers to INQUIRE statement. Add Interval<>, rebase ProvenanceRange upon it, rebase CharPointerWithLength and rename it ContiguousChars. Reformat C++. Original-commit: flang-compiler/f18@78c73c62badb3f2c32a931bbcdf0dec6b26b0312 Reviewed-on: https://github.com/flang-compiler/f18/pull/19 Tree-same-pre-rewrite: false --- flang/lib/parser/characters.h | 114 +++++++++++++++++++++++++++++++++++++ flang/lib/parser/grammar.h | 25 +++++--- flang/lib/parser/parse-state.h | 2 +- flang/lib/parser/parse-tree.h | 7 ++- flang/lib/parser/preprocessor.cc | 72 ++++++++++------------- flang/lib/parser/preprocessor.h | 6 +- flang/lib/parser/prescan.cc | 63 ++++++++------------ flang/lib/parser/prescan.h | 5 -- flang/lib/parser/provenance.cc | 71 +++++++++++------------ flang/lib/parser/provenance.h | 96 +++++++++++++++---------------- flang/lib/parser/token-parsers.h | 90 ++++++++++------------------- flang/lib/parser/token-sequence.cc | 23 ++++---- flang/lib/parser/token-sequence.h | 47 +++++++-------- 13 files changed, 338 insertions(+), 283 deletions(-) create mode 100644 flang/lib/parser/characters.h diff --git a/flang/lib/parser/characters.h b/flang/lib/parser/characters.h new file mode 100644 index 0000000..7d1dec8 --- /dev/null +++ b/flang/lib/parser/characters.h @@ -0,0 +1,114 @@ +#ifndef FORTRAN_PARSER_CHARACTERS_H_ +#define FORTRAN_PARSER_CHARACTERS_H_ + +// Define some character classification predicates and +// conversions here to avoid dependences upon and +// also to accomodate Fortran tokenization. + +#include +#include + +namespace Fortran { +namespace parser { + +static constexpr bool IsUpperCaseLetter(char ch) { + if constexpr ('A' == static_cast(0xc1)) { + // EBCDIC + return (ch >= 'A' && ch <= 'I') || (ch >= 'J' && ch <= 'R') || + (ch >= 'S' && ch <= 'Z'); + } + return ch >= 'A' && ch <= 'Z'; +} + +static constexpr bool IsLowerCaseLetter(char ch) { + if constexpr ('a' == static_cast(0x81)) { + // EBCDIC + return (ch >= 'a' && ch <= 'i') || (ch >= 'j' && ch <= 'r') || + (ch >= 's' && ch <= 'z'); + } + return ch >= 'a' && ch <= 'z'; +} + +static constexpr bool IsLetter(char ch) { + return IsUpperCaseLetter(ch) || IsLowerCaseLetter(ch); +} + +static constexpr bool IsDecimalDigit(char ch) { return ch >= '0' && ch <= '9'; } + +static constexpr bool IsHexadecimalDigit(char ch) { + return (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F') || + (ch >= 'a' && ch <= 'f'); +} + +static constexpr bool IsOctalDigit(char ch) { return ch >= '0' && ch <= '7'; } + +static constexpr bool IsLegalIdentifierStart(char ch) { + return IsLetter(ch) || ch == '_' || ch == '@' || ch == '$'; +} + +static constexpr bool IsLegalInIdentifier(char ch) { + return IsLegalIdentifierStart(ch) || IsDecimalDigit(ch); +} + +static constexpr char ToLowerCaseLetter(char ch) { + return IsUpperCaseLetter(ch) ? ch - 'A' + 'a' : ch; +} + +static constexpr char ToLowerCaseLetter(char &&ch) { + return IsUpperCaseLetter(ch) ? ch - 'A' + 'a' : ch; +} + +static constexpr bool IsSameApartFromCase(char x, char y) { + return ToLowerCaseLetter(x) == ToLowerCaseLetter(y); +} + +static inline std::string ToLowerCaseLetters(const std::string &str) { + std::string lowered{str}; + for (char &ch : lowered) { + ch = ToLowerCaseLetter(ch); + } + return lowered; +} + +static constexpr char DecimalDigitValue(char ch) { return ch - '0'; } + +static constexpr char HexadecimalDigitValue(char ch) { + return IsUpperCaseLetter(ch) + ? ch - 'A' + 10 + : IsLowerCaseLetter(ch) ? ch - 'a' + 10 : DecimalDigitValue(ch); +} + +static constexpr std::optional BackslashEscapeValue(char ch) { + switch (ch) { + case 'a': return {'\a'}; + case 'b': return {'\b'}; + case 'f': return {'\f'}; + case 'n': return {'\n'}; + case 'r': return {'\r'}; + case 't': return {'\t'}; + case 'v': return {'\v'}; + case '"': + case '\'': + case '\\': return {ch}; + default: return {}; + } +} + +static constexpr std::optional BackslashEscapeChar(char ch) { + switch (ch) { + case '\a': return {'a'}; + case '\b': return {'b'}; + case '\f': return {'f'}; + case '\n': return {'n'}; + case '\r': return {'r'}; + case '\t': return {'t'}; + case '\v': return {'v'}; + case '"': + case '\'': + case '\\': return {ch}; + default: return {}; + } +} +} // namespace parser +} // namespace Fortran +#endif // FORTRAN_PARSER_CHARACTERS_H_ diff --git a/flang/lib/parser/grammar.h b/flang/lib/parser/grammar.h index 87b85a0..a058c80 100644 --- a/flang/lib/parser/grammar.h +++ b/flang/lib/parser/grammar.h @@ -8,11 +8,11 @@ // library used here to implement an LL recursive descent recognizer. #include "basic-parsers.h" +#include "characters.h" #include "format-specification.h" #include "parse-tree.h" #include "token-parsers.h" #include "user-state.h" -#include #include #include #include @@ -569,8 +569,8 @@ constexpr CharMatch<'_'> underscore; constexpr auto otherIdCharacter = underscore / !(CharMatch<'\''>{} || CharMatch<'"'>{}) || extension( - CharMatch<'$'>{}); // PGI/ifort (and Cray/gfortran, but not first) -// Cray also allows '@'. + CharMatch<'$'>{} || // PGI/ifort (and Cray/gfortran, but not first) + CharMatch<'@'>{}); // Cray constexpr auto nonDigitIdCharacter = letter || otherIdCharacter; @@ -753,11 +753,11 @@ TYPE_CONTEXT_PARSER("REAL literal constant"_en_US, // Extension: Q // Not a complete token. inline constexpr bool isEorD(char ch) { - ch = tolower(ch); + ch = ToLowerCaseLetter(ch); return ch == 'e' || ch == 'd'; } -inline constexpr bool isQ(char ch) { return tolower(ch) == 'q'; } +inline constexpr bool isQ(char ch) { return ToLowerCaseLetter(ch) == 'q'; } constexpr CharPredicateGuardParser exponentEorD{ isEorD, "expected exponent letter"_en_US}; @@ -821,7 +821,7 @@ TYPE_PARSER(construct{}(parenthesized(typeParamValue)) || // combined. Backslash escapes can be enabled. // PGI extension: nc'...' is Kanji. // N.B. charLiteralConstantWithoutKind does not skip preceding spaces. -// N.B. the parsing of "name" in takes care to not consume the '_'. +// N.B. the parsing of "name" takes care to not consume the '_'. constexpr auto charLiteralConstantWithoutKind = CharMatch<'\''>{} >> CharLiteral<'\''>{} || CharMatch<'"'>{} >> CharLiteral<'"'>{}; @@ -2952,6 +2952,9 @@ TYPE_PARSER(maybe("UNIT ="_tok) >> construct{}(fileUnitNumber) || "DELIM =" >> construct{}(construct{}( pure(InquireSpec::CharVar::Kind::Delim), scalarDefaultCharVariable)) || + "DIRECT =" >> construct{}(construct{}( + pure(InquireSpec::CharVar::Kind::Direct), + scalarDefaultCharVariable)) || "ENCODING =" >> construct{}(construct{}( pure(InquireSpec::CharVar::Kind::Encoding), scalarDefaultCharVariable)) || @@ -3027,6 +3030,10 @@ TYPE_PARSER(maybe("UNIT ="_tok) >> construct{}(fileUnitNumber) || "STATUS =" >> construct{}(construct{}( pure(InquireSpec::CharVar::Kind::Status), scalarDefaultCharVariable)) || + "UNFORMATTED =" >> + construct{}(construct{}( + pure(InquireSpec::CharVar::Kind::Unformatted), + scalarDefaultCharVariable)) || "WRITE =" >> construct{}(construct{}( pure(InquireSpec::CharVar::Kind::Write), scalarDefaultCharVariable))) @@ -3043,7 +3050,8 @@ TYPE_CONTEXT_PARSER("INQUIRE statement"_en_US, nonemptyList(outputItem))))) // R1301 format-stmt -> FORMAT format-specification -TYPE_PARSER("FORMAT" >> construct{}(Parser{})) +TYPE_CONTEXT_PARSER("FORMAT statement"_en_US, + "FORMAT" >> construct{}(Parser{})) // R1321 char-string-edit-desc // N.B. C1313 disallows any kind parameter on the character literal. @@ -3123,7 +3131,8 @@ TYPE_PARSER(construct{}( // PGI/Intel extension: omitting width (and all else that follows) extension(construct{}( "I" >> pure(IntrinsicTypeDataEditDesc::Kind::I) || - "B" >> pure(IntrinsicTypeDataEditDesc::Kind::B) || + ("B"_tok / !letter /* don't occlude BN & BZ */) >> + pure(IntrinsicTypeDataEditDesc::Kind::B) || "O" >> pure(IntrinsicTypeDataEditDesc::Kind::O) || "Z" >> pure(IntrinsicTypeDataEditDesc::Kind::Z) || "F" >> pure(IntrinsicTypeDataEditDesc::Kind::F) || diff --git a/flang/lib/parser/parse-state.h b/flang/lib/parser/parse-state.h index 944462b..38ceb35 100644 --- a/flang/lib/parser/parse-state.h +++ b/flang/lib/parser/parse-state.h @@ -99,7 +99,7 @@ public: const char *GetLocation() const { return p_; } Provenance GetProvenance(const char *at) const { - return cooked_.GetProvenance(at).LocalOffsetToProvenance(0); + return cooked_.GetProvenance(at).start(); } Provenance GetProvenance() const { return GetProvenance(p_); } diff --git a/flang/lib/parser/parse-tree.h b/flang/lib/parser/parse-tree.h index a6c300b..f0a8aec 100644 --- a/flang/lib/parser/parse-tree.h +++ b/flang/lib/parser/parse-tree.h @@ -2633,6 +2633,7 @@ struct FlushStmt { // BLANK = scalar-default-char-variable | // DECIMAL = scalar-default-char-variable | // DELIM = scalar-default-char-variable | +// DIRECT = scalar-default-char-variable | // ENCODING = scalar-default-char-variable | // ERR = label | EXIST = scalar-logical-variable | // FORM = scalar-default-char-variable | @@ -2654,13 +2655,15 @@ struct FlushStmt { // SIZE = scalar-int-variable | // STREAM = scalar-default-char-variable | // STATUS = scalar-default-char-variable | +// UNFORMATTED = scalar-default-char-variable | // WRITE = scalar-default-char-variable struct InquireSpec { UNION_CLASS_BOILERPLATE(InquireSpec); struct CharVar { DEFINE_NESTED_ENUM_CLASS(Kind, Access, Action, Asynchronous, Blank, Decimal, - Delim, Encoding, Form, Formatted, Iomsg, Name, Pad, Position, Read, - Readwrite, Round, Sequential, Sign, Stream, Status, Write); + Delim, Direct, Encoding, Form, Formatted, Iomsg, Name, Pad, Position, + Read, Readwrite, Round, Sequential, Sign, Stream, Status, Unformatted, + Write); TUPLE_CLASS_BOILERPLATE(CharVar); std::tuple t; }; diff --git a/flang/lib/parser/preprocessor.cc b/flang/lib/parser/preprocessor.cc index 91db2b6..4a99568 100644 --- a/flang/lib/parser/preprocessor.cc +++ b/flang/lib/parser/preprocessor.cc @@ -1,9 +1,9 @@ #include "preprocessor.h" +#include "characters.h" #include "idioms.h" #include "message.h" #include "prescan.h" #include -#include #include #include #include @@ -28,8 +28,8 @@ Definition::Definition(const std::vector &argNames, Definition::Definition(const std::string &predefined, AllSources *sources) : isPredefined_{true}, - replacement_{predefined, - sources->AddCompilerInsertion(predefined).LocalOffsetToProvenance(0)} {} + replacement_{ + predefined, sources->AddCompilerInsertion(predefined).start()} {} bool Definition::set_isDisabled(bool disable) { bool was{isDisabled_}; @@ -37,12 +37,8 @@ bool Definition::set_isDisabled(bool disable) { return was; } -static bool IsIdentifierFirstCharacter(char ch) { - return ch == '_' || isalpha(ch); -} - -static bool IsIdentifierFirstCharacter(const CharPointerWithLength &cpl) { - return cpl.size() > 0 && IsIdentifierFirstCharacter(cpl[0]); +static bool IsLegalIdentifierStart(const ContiguousChars &cpl) { + return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]); } TokenSequence Definition::Tokenize(const std::vector &argNames, @@ -55,8 +51,8 @@ TokenSequence Definition::Tokenize(const std::vector &argNames, } TokenSequence result; for (size_t j{0}; j < tokens; ++j) { - CharPointerWithLength tok{token[firstToken + j]}; - if (IsIdentifierFirstCharacter(tok)) { + ContiguousChars tok{token[firstToken + j]}; + if (IsLegalIdentifierStart(tok)) { auto it = args.find(tok.ToString()); if (it != args.end()) { result.Put(it->second, token.GetTokenProvenance(j)); @@ -83,7 +79,7 @@ static TokenSequence Stringify( Provenance quoteProvenance{allSources->CompilerInsertionProvenance('"')}; result.PutNextTokenChar('"', quoteProvenance); for (size_t j{0}; j < tokens.size(); ++j) { - const CharPointerWithLength &token{tokens[j]}; + const ContiguousChars &token{tokens[j]}; size_t bytes{token.size()}; for (size_t k{0}; k < bytes; ++k) { char ch{token[k]}; @@ -107,7 +103,7 @@ TokenSequence Definition::Apply( int parenthesesNesting{0}; size_t tokens{replacement_.size()}; for (size_t j{0}; j < tokens; ++j) { - const CharPointerWithLength &token{replacement_[j]}; + const ContiguousChars &token{replacement_[j]}; size_t bytes{token.size()}; if (skipping) { if (bytes == 1) { @@ -212,7 +208,7 @@ bool Preprocessor::MacroReplacement(const TokenSequence &input, size_t j; for (j = 0; j < tokens; ++j) { size_t bytes{input[j].size()}; - if (bytes > 0 && IsIdentifierFirstCharacter(input[j][0]) && + if (bytes > 0 && IsLegalIdentifierStart(input[j][0]) && IsNameDefined(input[j])) { break; } @@ -222,8 +218,8 @@ bool Preprocessor::MacroReplacement(const TokenSequence &input, } result->Put(input, 0, j); for (; j < tokens; ++j) { - const CharPointerWithLength &token{input[j]}; - if (token.IsBlank() || !IsIdentifierFirstCharacter(token[0])) { + const ContiguousChars &token{input[j]}; + if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) { result->Put(input, j); continue; } @@ -253,7 +249,7 @@ bool Preprocessor::MacroReplacement(const TokenSequence &input, ProvenanceRange insert{allSources_->AddCompilerInsertion(repl)}; ProvenanceRange call{allSources_->AddMacroCall( insert, input.GetTokenProvenanceRange(j), repl)}; - result->Put(repl, call.LocalOffsetToProvenance(0)); + result->Put(repl, call.start()); continue; } } @@ -274,7 +270,7 @@ bool Preprocessor::MacroReplacement(const TokenSequence &input, size_t k{j}; bool leftParen{false}; while (++k < tokens) { - const CharPointerWithLength &lookAhead{input[k]}; + const ContiguousChars &lookAhead{input[k]}; if (!lookAhead.IsBlank() && lookAhead[0] != '\n') { leftParen = lookAhead[0] == '(' && lookAhead.size() == 1; break; @@ -353,14 +349,6 @@ static TokenSequence StripBlanks( return noBlanks; } -static std::string ConvertToLowerCase(const std::string &str) { - std::string lowered{str}; - for (char &ch : lowered) { - ch = tolower(ch); - } - return lowered; -} - static std::string GetDirectiveName(const TokenSequence &line, size_t *rest) { size_t tokens{line.size()}; size_t j{SkipBlanks(line, 0, tokens)}; @@ -374,7 +362,7 @@ static std::string GetDirectiveName(const TokenSequence &line, size_t *rest) { return {}; } *rest = SkipBlanks(line, j + 1, tokens); - return ConvertToLowerCase(line[j].ToString()); + return ToLowerCaseLetters(line[j].ToString()); } bool Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) { @@ -391,13 +379,13 @@ bool Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) { if (j == tokens) { return true; } - if (isdigit(dir[j][0]) || dir[j][0] == '"') { + if (IsDecimalDigit(dir[j][0]) || dir[j][0] == '"') { return true; // TODO: treat as #line } - std::string dirName{ConvertToLowerCase(dir[j].ToString())}; + std::string dirName{ToLowerCaseLetters(dir[j].ToString())}; j = SkipBlanks(dir, j + 1, tokens); - CharPointerWithLength nameToken; - if (j < tokens && IsIdentifierFirstCharacter(dir[j][0])) { + ContiguousChars nameToken; + if (j < tokens && IsLegalIdentifierStart(dir[j][0])) { nameToken = dir[j]; } if (dirName == "line") { @@ -421,7 +409,7 @@ bool Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) { if (an == "...") { isVariadic = true; } else { - if (an.empty() || !IsIdentifierFirstCharacter(an[0])) { + if (an.empty() || !IsLegalIdentifierStart(an[0])) { prescanner->Complain( "#define: missing or invalid argument name"_en_US); return false; @@ -598,13 +586,12 @@ bool Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) { return false; } -CharPointerWithLength Preprocessor::SaveTokenAsName( - const CharPointerWithLength &t) { +ContiguousChars Preprocessor::SaveTokenAsName(const ContiguousChars &t) { names_.push_back(t.ToString()); return {names_.back().data(), names_.back().size()}; } -bool Preprocessor::IsNameDefined(const CharPointerWithLength &token) { +bool Preprocessor::IsNameDefined(const ContiguousChars &token) { return definitions_.find(token) != definitions_.end(); } @@ -743,14 +730,14 @@ static std::int64_t ExpressionValue(const TokenSequence &token, std::int64_t left{0}; if (t == "(") { op = PARENS; - } else if (isdigit(t[0])) { + } else if (IsDecimalDigit(t[0])) { op = CONST; size_t consumed{0}; left = std::stoll(t, &consumed); if (consumed < t.size()) { *error = "uninterpretable numeric constant '"_en_US; } - } else if (IsIdentifierFirstCharacter(t[0])) { + } else if (IsLegalIdentifierStart(t[0])) { // undefined macro name -> zero // TODO: BOZ constants? op = CONST; @@ -759,7 +746,7 @@ static std::int64_t ExpressionValue(const TokenSequence &token, } else if (t == "-") { op = UMINUS; } else if (t == "." && *atToken + 2 < tokens && - ConvertToLowerCase(token[*atToken + 1].ToString()) == "not" && + ToLowerCaseLetters(token[*atToken + 1].ToString()) == "not" && token[*atToken + 2].ToString() == ".") { op = NOT; *atToken += 2; @@ -803,7 +790,7 @@ static std::int64_t ExpressionValue(const TokenSequence &token, t = token[*atToken].ToString(); if (t == "." && *atToken + 2 < tokens && token[*atToken + 2].ToString() == ".") { - t += ConvertToLowerCase(token[*atToken + 1].ToString()) + '.'; + t += ToLowerCaseLetters(token[*atToken + 1].ToString()) + '.'; advance = 3; } auto it = opNameMap.find(t); @@ -913,14 +900,13 @@ bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr, size_t first, TokenSequence expr1{StripBlanks(expr, first, first + exprTokens)}; TokenSequence expr2; for (size_t j{0}; j < expr1.size(); ++j) { - if (ConvertToLowerCase(expr1[j].ToString()) == "defined") { - CharPointerWithLength name; + if (ToLowerCaseLetters(expr1[j].ToString()) == "defined") { + ContiguousChars name; if (j + 3 < expr1.size() && expr1[j + 1].ToString() == "(" && expr1[j + 3].ToString() == ")") { name = expr1[j + 2]; j += 3; - } else if (j + 1 < expr1.size() && - IsIdentifierFirstCharacter(expr1[j + 1])) { + } else if (j + 1 < expr1.size() && IsLegalIdentifierStart(expr1[j + 1])) { name = expr1[j++]; } if (!name.empty()) { diff --git a/flang/lib/parser/preprocessor.h b/flang/lib/parser/preprocessor.h index e84ac8e..c210689 100644 --- a/flang/lib/parser/preprocessor.h +++ b/flang/lib/parser/preprocessor.h @@ -70,8 +70,8 @@ private: enum class IsElseActive { No, Yes }; enum class CanDeadElseAppear { No, Yes }; - CharPointerWithLength SaveTokenAsName(const CharPointerWithLength &); - bool IsNameDefined(const CharPointerWithLength &); + ContiguousChars SaveTokenAsName(const ContiguousChars &); + bool IsNameDefined(const ContiguousChars &); TokenSequence ReplaceMacros(const TokenSequence &, const Prescanner &); bool SkipDisabledConditionalCode( const std::string &, IsElseActive, Prescanner *); @@ -80,7 +80,7 @@ private: AllSources *allSources_; std::list names_; - std::unordered_map definitions_; + std::unordered_map definitions_; std::stack ifStack_; }; } // namespace parser diff --git a/flang/lib/parser/prescan.cc b/flang/lib/parser/prescan.cc index fcd52a6..4e968e2 100644 --- a/flang/lib/parser/prescan.cc +++ b/flang/lib/parser/prescan.cc @@ -1,10 +1,10 @@ #include "prescan.h" +#include "characters.h" #include "idioms.h" #include "message.h" #include "preprocessor.h" #include "source.h" #include "token-sequence.h" -#include #include #include #include @@ -28,7 +28,7 @@ Prescanner::Prescanner(const Prescanner &that) bool Prescanner::Prescan(ProvenanceRange range) { AllSources *allSources{cooked_->allSources()}; ProvenanceRange around{allSources->GetContiguousRangeAround(range)}; - startProvenance_ = range.LocalOffsetToProvenance(0); + startProvenance_ = range.start(); size_t offset{0}; const SourceFile *source{ allSources->GetSourceFile(startProvenance_, &offset)}; @@ -181,10 +181,6 @@ void Prescanner::SkipSpaces() { } } -static inline bool IsNameChar(char ch) { - return isalnum(ch) || ch == '_' || ch == '$' || ch == '@'; -} - bool Prescanner::NextToken(TokenSequence *tokens) { CHECK(at_ >= start_ && at_ < limit_); if (inFixedForm_) { @@ -206,18 +202,18 @@ bool Prescanner::NextToken(TokenSequence *tokens) { if (*at_ == '\'' || *at_ == '"') { QuotedCharacterLiteral(tokens); preventHollerith_ = false; - } else if (isdigit(*at_)) { + } else if (IsDecimalDigit(*at_)) { int n{0}; static constexpr int maxHollerith = 256 * (132 - 6); do { if (n < maxHollerith) { - n = 10 * n + *at_ - '0'; + n = 10 * n + DecimalDigitValue(*at_); } EmitCharAndAdvance(tokens, *at_); if (inFixedForm_) { SkipSpaces(); } - } while (isdigit(*at_)); + } while (IsDecimalDigit(*at_)); if ((*at_ == 'h' || *at_ == 'H') && n > 0 && n < maxHollerith && !preventHollerith_) { EmitCharAndAdvance(tokens, 'h'); @@ -232,11 +228,11 @@ bool Prescanner::NextToken(TokenSequence *tokens) { } inCharLiteral_ = false; } else if (*at_ == '.') { - while (isdigit(EmitCharAndAdvance(tokens, *at_))) { + while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) { } ExponentAndKind(tokens); } else if (ExponentAndKind(tokens)) { - } else if (isalpha(*at_)) { + } else if (IsLetter(*at_)) { // Handles FORMAT(3I9HHOLLERITH) by skipping over the first I so that // we don't misrecognize I9HOLLERITH as an identifier in the next case. EmitCharAndAdvance(tokens, *at_); @@ -244,16 +240,16 @@ bool Prescanner::NextToken(TokenSequence *tokens) { preventHollerith_ = false; } else if (*at_ == '.') { char nch{EmitCharAndAdvance(tokens, '.')}; - if (isdigit(nch)) { - while (isdigit(EmitCharAndAdvance(tokens, *at_))) { + if (IsDecimalDigit(nch)) { + while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) { } ExponentAndKind(tokens); } else if (nch == '.' && EmitCharAndAdvance(tokens, '.') == '.') { EmitCharAndAdvance(tokens, '.'); // variadic macro definition ellipsis } preventHollerith_ = false; - } else if (IsNameChar(*at_)) { - while (IsNameChar(EmitCharAndAdvance(tokens, *at_))) { + } else if (IsLegalInIdentifier(*at_)) { + while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))) { } if (*at_ == '\'' || *at_ == '"') { QuotedCharacterLiteral(tokens); @@ -297,40 +293,31 @@ bool Prescanner::ExponentAndKind(TokenSequence *tokens) { if (*at_ == '+' || *at_ == '-') { EmitCharAndAdvance(tokens, *at_); } - while (isdigit(*at_)) { + while (IsDecimalDigit(*at_)) { EmitCharAndAdvance(tokens, *at_); } if (*at_ == '_') { - while (IsNameChar(EmitCharAndAdvance(tokens, *at_))) { + while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))) { } } return true; } void Prescanner::EmitQuotedCharacter(TokenSequence *tokens, char ch) { - switch (ch) { - case '\a': EmitEscapedChar(tokens, 'a'); break; - case '\b': EmitEscapedChar(tokens, 'b'); break; - case '\f': EmitEscapedChar(tokens, 'f'); break; - case '\r': EmitEscapedChar(tokens, 'r'); break; - case '\t': EmitEscapedChar(tokens, 't'); break; - case '\v': EmitEscapedChar(tokens, 'v'); break; - case '\\': - if (!enableBackslashEscapesInCharLiterals_) { - EmitInsertedChar(tokens, '\\'); - } - EmitChar(tokens, '\\'); - break; - default: - if (ch < ' ') { - // emit an octal escape sequence + if (std::optional escape{BackslashEscapeChar(ch)}) { + if (ch != '\'' && ch != '"' && + (ch != '\\' || !enableBackslashEscapesInCharLiterals_)) { EmitInsertedChar(tokens, '\\'); - EmitInsertedChar(tokens, '0' + ((ch >> 6) & 3)); - EmitInsertedChar(tokens, '0' + ((ch >> 3) & 7)); - EmitInsertedChar(tokens, '0' + (ch & 7)); - } else { - EmitChar(tokens, ch); } + EmitChar(tokens, *escape); + } else if (ch < ' ') { + // emit an octal escape sequence + EmitInsertedChar(tokens, '\\'); + EmitInsertedChar(tokens, '0' + ((ch >> 6) & 3)); + EmitInsertedChar(tokens, '0' + ((ch >> 3) & 7)); + EmitInsertedChar(tokens, '0' + (ch & 7)); + } else { + EmitChar(tokens, ch); } } diff --git a/flang/lib/parser/prescan.h b/flang/lib/parser/prescan.h index 8782f9d..8951d07 100644 --- a/flang/lib/parser/prescan.h +++ b/flang/lib/parser/prescan.h @@ -80,11 +80,6 @@ private: tokens->PutNextTokenChar(ch, provenance); } - void EmitEscapedChar(TokenSequence *tokens, char ch) { - EmitInsertedChar(tokens, '\\'); - EmitChar(tokens, ch); - } - char EmitCharAndAdvance(TokenSequence *tokens, char ch) { EmitChar(tokens, ch); NextChar(); diff --git a/flang/lib/parser/provenance.cc b/flang/lib/parser/provenance.cc index bec938c..b3c847c 100644 --- a/flang/lib/parser/provenance.cc +++ b/flang/lib/parser/provenance.cc @@ -72,7 +72,7 @@ AllSources::~AllSources() {} const char &AllSources::operator[](Provenance at) const { const Origin &origin{MapToOrigin(at)}; - return origin[origin.covers.ProvenanceToLocalOffset(at)]; + return origin[origin.covers.MemberOffset(at)]; } void AllSources::PushSearchPathDirectory(std::string directory) { @@ -98,7 +98,7 @@ ProvenanceRange AllSources::AddIncludedFile( const SourceFile &source, ProvenanceRange from, bool isModule) { ProvenanceRange covers{range_.NextAfter(), source.bytes()}; CHECK(range_.AnnexIfPredecessor(covers)); - CHECK(origin_.back().covers.IsPredecessor(covers)); + CHECK(origin_.back().covers.ImmediatelyPrecedes(covers)); origin_.emplace_back(covers, source, from, isModule); return covers; } @@ -107,7 +107,7 @@ ProvenanceRange AllSources::AddMacroCall( ProvenanceRange def, ProvenanceRange use, const std::string &expansion) { ProvenanceRange covers{range_.NextAfter(), expansion.size()}; CHECK(range_.AnnexIfPredecessor(covers)); - CHECK(origin_.back().covers.IsPredecessor(covers)); + CHECK(origin_.back().covers.ImmediatelyPrecedes(covers)); origin_.emplace_back(covers, def, use, expansion); return covers; } @@ -115,7 +115,7 @@ ProvenanceRange AllSources::AddMacroCall( ProvenanceRange AllSources::AddCompilerInsertion(std::string text) { ProvenanceRange covers{range_.NextAfter(), text.size()}; CHECK(range_.AnnexIfPredecessor(covers)); - CHECK(origin_.back().covers.IsPredecessor(covers)); + CHECK(origin_.back().covers.ImmediatelyPrecedes(covers)); origin_.emplace_back(covers, text); return covers; } @@ -128,7 +128,7 @@ void AllSources::Identify(std::ostream &o, Provenance at, std::visit( visitors{ [&](const Inclusion &inc) { - size_t offset{origin.covers.ProvenanceToLocalOffset(at)}; + size_t offset{origin.covers.MemberOffset(at)}; std::pair pos{inc.source.FindOffsetLineAndColumn(offset)}; o << prefix << "at line " << pos.first << ", column " << pos.second; if (echoSourceLine) { @@ -151,24 +151,21 @@ void AllSources::Identify(std::ostream &o, Provenance at, << inc.source.path(); if (IsValid(origin.replaces)) { o << (inc.isModule ? " used\n" : " included\n"); - Identify(o, origin.replaces.LocalOffsetToProvenance(0), indented); + Identify(o, origin.replaces.start(), indented); } else { o << '\n'; } }, [&](const Macro &mac) { o << prefix << "in the expansion of a macro that was defined\n"; - Identify(o, mac.definition.LocalOffsetToProvenance(0), indented, - echoSourceLine); + Identify(o, mac.definition.start(), indented, echoSourceLine); o << prefix << "and called\n"; - Identify(o, origin.replaces.LocalOffsetToProvenance(0), indented, - echoSourceLine); + Identify(o, origin.replaces.start(), indented, echoSourceLine); if (echoSourceLine) { o << prefix << "and expanded to\n" << indented << " " << mac.expansion << '\n' << indented << " "; - for (size_t j{0}; origin.covers.LocalOffsetToProvenance(j) < at; - ++j) { + for (size_t j{0}; origin.covers.OffsetMember(j) < at; ++j) { o << (mac.expansion[j] == '\t' ? '\t' : ' '); } o << "^\n"; @@ -187,30 +184,28 @@ void AllSources::Identify(std::ostream &o, Provenance at, const SourceFile *AllSources::GetSourceFile( Provenance at, size_t *offset) const { const Origin &origin{MapToOrigin(at)}; - return std::visit( - visitors{[&](const Inclusion &inc) { - if (offset != nullptr) { - *offset = origin.covers.ProvenanceToLocalOffset(at); - } - return &inc.source; - }, - [&](const Macro &mac) { - return GetSourceFile( - origin.replaces.LocalOffsetToProvenance(0), offset); - }, - [offset](const CompilerInsertion &) { - if (offset != nullptr) { - *offset = 0; - } - return static_cast(nullptr); - }}, + return std::visit(visitors{[&](const Inclusion &inc) { + if (offset != nullptr) { + *offset = origin.covers.MemberOffset(at); + } + return &inc.source; + }, + [&](const Macro &mac) { + return GetSourceFile(origin.replaces.start(), offset); + }, + [offset](const CompilerInsertion &) { + if (offset != nullptr) { + *offset = 0; + } + return static_cast(nullptr); + }}, origin.u); } ProvenanceRange AllSources::GetContiguousRangeAround( ProvenanceRange range) const { CHECK(IsValid(range)); - const Origin &origin{MapToOrigin(range.LocalOffsetToProvenance(0))}; + const Origin &origin{MapToOrigin(range.start())}; CHECK(origin.covers.Contains(range)); return origin.covers; } @@ -232,7 +227,7 @@ Provenance AllSources::CompilerInsertionProvenance(char ch) { return iter->second; } ProvenanceRange newCharRange{AddCompilerInsertion(std::string{ch})}; - Provenance newCharProvenance{newCharRange.LocalOffsetToProvenance(0)}; + Provenance newCharProvenance{newCharRange.start()}; compilerInsertionProvenance_.insert(std::make_pair(ch, newCharProvenance)); return newCharProvenance; } @@ -265,7 +260,7 @@ const AllSources::Origin &AllSources::MapToOrigin(Provenance at) const { size_t low{0}, count{origin_.size()}; while (count > 1) { size_t mid{low + (count >> 1)}; - if (at < origin_[mid].covers.LocalOffsetToProvenance(0)) { + if (at < origin_[mid].covers.start()) { count = mid - low; } else { count -= mid - low; @@ -292,9 +287,9 @@ void CookedSource::Marshal() { buffer_.clear(); } -void ProvenanceRange::Dump(std::ostream &o) const { - o << "[" << start_.offset() << ".." << (start_.offset() + bytes_ - 1) << "] (" - << bytes_ << " bytes)"; +static void DumpRange(std::ostream &o, const ProvenanceRange &r) { + o << "[" << r.start().offset() << ".." << r.Last().offset() << "] (" + << r.size() << " bytes)"; } void OffsetToProvenanceMappings::Dump(std::ostream &o) const { @@ -302,18 +297,18 @@ void OffsetToProvenanceMappings::Dump(std::ostream &o) const { size_t n{m.range.size()}; o << "offsets [" << m.start << ".." << (m.start + n - 1) << "] -> provenances "; - m.range.Dump(o); + DumpRange(o, m.range); o << '\n'; } } void AllSources::Dump(std::ostream &o) const { o << "AllSources range_ "; - range_.Dump(o); + DumpRange(o, range_); o << '\n'; for (const Origin &m : origin_) { o << " "; - m.covers.Dump(o); + DumpRange(o, m.covers); o << " -> "; std::visit(visitors{[&](const Inclusion &inc) { if (inc.isModule) { diff --git a/flang/lib/parser/provenance.h b/flang/lib/parser/provenance.h index df6b05e..9589845 100644 --- a/flang/lib/parser/provenance.h +++ b/flang/lib/parser/provenance.h @@ -47,6 +47,10 @@ public: return {offset_ + static_cast(n)}; } Provenance operator+(size_t n) const { return {offset_ + n}; } + size_t operator-(Provenance that) const { + CHECK(that <= *this); + return offset_ - that.offset_; + } bool operator<(Provenance that) const { return offset_ < that.offset_; } bool operator<=(Provenance that) const { return !(that < *this); } bool operator==(Provenance that) const { return offset_ == that.offset_; } @@ -57,72 +61,64 @@ private: size_t offset_{0}; }; -class ProvenanceRange { +template class Interval { public: - ProvenanceRange() {} - ProvenanceRange(Provenance s, size_t n) : start_{s}, bytes_{n} { - CHECK(n > 0); - } - ProvenanceRange(const ProvenanceRange &) = default; - ProvenanceRange(ProvenanceRange &&) = default; - ProvenanceRange &operator=(const ProvenanceRange &) = default; - ProvenanceRange &operator=(ProvenanceRange &&) = default; - - bool operator==(ProvenanceRange that) const { - return start_ == that.start_ && bytes_ == that.bytes_; - } - - size_t size() const { return bytes_; } - - bool Contains(Provenance at) const { - return start_ <= at && at < start_ + bytes_; + using type = A; + Interval() {} + Interval(const A &s, size_t n) : start_{s}, size_{n} {} + Interval(A &&s, size_t n) : start_{std::move(s)}, size_{n} {} + Interval(const Interval &) = default; + Interval(Interval &&) = default; + Interval &operator=(const Interval &) = default; + Interval &operator=(Interval &&) = default; + + bool operator==(const Interval &that) const { + return start_ == that.start_ && size_ == that.size_; } - bool Contains(ProvenanceRange that) const { - return Contains(that.start_) && Contains(that.start_ + (that.bytes_ - 1)); - } + const A &start() const { return start_; } + size_t size() const { return size_; } + bool empty() const { return size_ == 0; } - size_t ProvenanceToLocalOffset(Provenance at) const { - CHECK(Contains(at)); - return at.offset() - start_.offset(); + bool Contains(const A &x) const { return start_ <= x && x < start_ + size_; } + bool Contains(const Interval &that) const { + return Contains(that.start_) && Contains(that.start_ + (that.size_ - 1)); } - - Provenance LocalOffsetToProvenance(size_t at) const { - CHECK(at < bytes_); - return start_ + at; + bool ImmediatelyPrecedes(const Interval &that) const { + return NextAfter() == that.start_; } - - Provenance NextAfter() const { return start_ + bytes_; } - - ProvenanceRange Suffix(size_t at) const { - CHECK(at < bytes_); - return {start_ + at, bytes_ - at}; + bool AnnexIfPredecessor(const Interval &that) { + if (ImmediatelyPrecedes(that)) { + size_ += that.size_; + return true; + } + return false; } - ProvenanceRange Prefix(size_t bytes) const { - CHECK(bytes > 0); - return {start_, std::min(bytes_, bytes)}; + size_t MemberOffset(const A &x) const { + CHECK(Contains(x)); + return x - start_; } - - bool IsPredecessor(ProvenanceRange next) { - return start_ + bytes_ == next.start_; + A OffsetMember(size_t n) const { + CHECK(n < size_); + return start_ + n; } - bool AnnexIfPredecessor(ProvenanceRange next) { - if (IsPredecessor(next)) { - bytes_ += next.bytes_; - return true; - } - return false; + A Last() const { return start_ + (size_ - 1); } + A NextAfter() const { return start_ + size_; } + Interval Prefix(size_t n) const { return {start_, std::min(size_, n)}; } + Interval Suffix(size_t n) const { + CHECK(n <= size_); + return {start_ + n, size_ - n}; } - void Dump(std::ostream &) const; - private: - Provenance start_; - size_t bytes_{0}; + A start_; + size_t size_{0}; }; +using ProvenanceRange = Interval; + // Maps 0-based local offsets in some contiguous range (e.g., a token // sequence) to their provenances. Lookup time is on the order of // O(log(#of intervals with contiguous provenances)). As mentioned diff --git a/flang/lib/parser/token-parsers.h b/flang/lib/parser/token-parsers.h index d652302..d12e7a5 100644 --- a/flang/lib/parser/token-parsers.h +++ b/flang/lib/parser/token-parsers.h @@ -5,9 +5,9 @@ // the prescanned character stream and recognize context-sensitive tokens. #include "basic-parsers.h" +#include "characters.h" #include "idioms.h" #include "provenance.h" -#include #include #include #include @@ -41,25 +41,11 @@ private: const MessageFixedText text_; }; -static inline constexpr bool IsDecimalDigit(char ch) { return isdigit(ch); } - -static inline constexpr bool IsOctalDigit(char ch) { - return ch >= '0' && ch <= '7'; -} - -static inline constexpr bool IsHexadecimalDigit(char ch) { - return isxdigit(ch); -} - -static inline constexpr bool IsLetter(char ch) { return isalpha(ch); } - -static inline constexpr char ToLower(char &&ch) { return tolower(ch); } - constexpr CharPredicateGuardParser digit{ IsDecimalDigit, "expected digit"_en_US}; -constexpr auto letter = applyFunction( - ToLower, CharPredicateGuardParser{IsLetter, "expected letter"_en_US}); +constexpr auto letter = applyFunction(ToLowerCaseLetter, + CharPredicateGuardParser{IsLetter, "expected letter"_en_US}); template class CharMatch { public: @@ -115,18 +101,19 @@ public: continue; // redundant; ignore } } - if (!ch && !(ch = nextChar.Parse(state))) { + if (!ch.has_value() && !(ch = nextChar.Parse(state))) { return {}; } if (spaceSkipping) { // medial space: 0 or more spaces/tabs accepted, none required + // TODO: designate and enforce free-form mandatory white space while (*ch == ' ' || *ch == '\t') { if (!(ch = nextChar.Parse(state))) { return {}; } } // ch remains full for next iteration - } else if (*ch == tolower(*p)) { + } else if (IsSameApartFromCase(*ch, *p)) { ch.reset(); } else { state->PutMessage(at, MessageExpectedText{str_, bytes_}); @@ -171,13 +158,6 @@ bracketed(const PA &p) { return "[" >> p / "]"; } -static inline int HexadecimalDigitValue(char ch) { - if (IsDecimalDigit(ch)) { - return ch - '0'; - } - return toupper(ch) - 'A' + 10; -} - // Quoted character literal constants. struct CharLiteralChar { struct Result { @@ -205,44 +185,36 @@ struct CharLiteralChar { if (!(och = nextChar.Parse(state)).has_value()) { return {}; } - switch ((ch = *och)) { - case 'a': return {Result::Escaped('\a')}; - case 'b': return {Result::Escaped('\b')}; - case 'f': return {Result::Escaped('\f')}; - case 'n': return {Result::Escaped('\n')}; - case 'r': return {Result::Escaped('\r')}; - case 't': return {Result::Escaped('\t')}; - case 'v': return {Result::Escaped('\v')}; - case '"': - case '\'': - case '\\': return {Result::Escaped(ch)}; - case '\n': + ch = *och; + if (ch == '\n') { state->PutMessage(at, "unclosed character constant"_en_US); return {}; - default: - if (IsOctalDigit(ch)) { - ch -= '0'; - for (int j = (ch > 3 ? 1 : 2); j-- > 0;) { - static constexpr auto octalDigit = attempt(CharPredicateGuardParser{ - IsOctalDigit, "expected octal digit"_en_US}); - if ((och = octalDigit.Parse(state)).has_value()) { - ch = 8 * ch + *och - '0'; - } + } + if (std::optional escChar{BackslashEscapeValue(ch)}) { + return {Result::Escaped(*escChar)}; + } + if (IsOctalDigit(ch)) { + ch -= '0'; + for (int j = (ch > 3 ? 1 : 2); j-- > 0;) { + static constexpr auto octalDigit = attempt(CharPredicateGuardParser{ + IsOctalDigit, "expected octal digit"_en_US}); + if ((och = octalDigit.Parse(state)).has_value()) { + ch = 8 * ch + *och - '0'; } - } else if (ch == 'x' || ch == 'X') { - ch = 0; - for (int j = 0; j++ < 2;) { - static constexpr auto hexDigit = attempt(CharPredicateGuardParser{ - IsHexadecimalDigit, "expected hexadecimal digit"_en_US}); - if ((och = hexDigit.Parse(state)).has_value()) { - ch = 16 * ch + HexadecimalDigitValue(*och); - } + } + } else if (ch == 'x' || ch == 'X') { + ch = 0; + for (int j = 0; j++ < 2;) { + static constexpr auto hexDigit = attempt(CharPredicateGuardParser{ + IsHexadecimalDigit, "expected hexadecimal digit"_en_US}); + if ((och = hexDigit.Parse(state)).has_value()) { + ch = 16 * ch + HexadecimalDigitValue(*och); } - } else { - state->PutMessage(at, "bad escaped character"_en_US); } - return {Result::Escaped(ch)}; + } else { + state->PutMessage(at, "bad escaped character"_en_US); } + return {Result::Escaped(ch)}; } }; @@ -310,7 +282,7 @@ struct BOZLiteral { if (*ch == quote) { break; } - if (!isxdigit(*ch)) { + if (!IsHexadecimalDigit(*ch)) { return {}; } content += *ch; diff --git a/flang/lib/parser/token-sequence.cc b/flang/lib/parser/token-sequence.cc index 5c2a2f9..ad89e0f 100644 --- a/flang/lib/parser/token-sequence.cc +++ b/flang/lib/parser/token-sequence.cc @@ -1,11 +1,14 @@ #include "token-sequence.h" +#include "characters.h" namespace Fortran { namespace parser { -bool CharPointerWithLength::IsBlank() const { - for (size_t j{0}; j < bytes_; ++j) { - char ch{data_[j]}; +bool ContiguousChars::IsBlank() const { + const char *data{interval_.start()}; + size_t n{interval_.size()}; + for (size_t j{0}; j < n; ++j) { + char ch{data[j]}; if (ch != ' ' && ch != '\t') { return false; } @@ -50,8 +53,8 @@ void TokenSequence::Put(const TokenSequence &that) { void TokenSequence::Put(const TokenSequence &that, ProvenanceRange range) { size_t offset{0}; for (size_t j{0}; j < that.size(); ++j) { - CharPointerWithLength tok{that[j]}; - Put(tok, range.LocalOffsetToProvenance(offset)); + ContiguousChars tok{that[j]}; + Put(tok, range.OffsetMember(offset)); offset += tok.size(); } CHECK(offset == range.size()); @@ -61,14 +64,14 @@ void TokenSequence::Put(const TokenSequence &that, size_t at, size_t tokens) { ProvenanceRange provenance; size_t offset{0}; for (; tokens-- > 0; ++at) { - CharPointerWithLength tok{that[at]}; + ContiguousChars tok{that[at]}; size_t tokBytes{tok.size()}; for (size_t j{0}; j < tokBytes; ++j) { if (offset == provenance.size()) { offset = 0; provenance = that.provenances_.Map(that.start_[at] + j); } - PutNextTokenChar(tok[j], provenance.LocalOffsetToProvenance(offset++)); + PutNextTokenChar(tok[j], provenance.OffsetMember(offset++)); } CloseToken(); } @@ -81,7 +84,7 @@ void TokenSequence::Put(const char *s, size_t bytes, Provenance provenance) { CloseToken(); } -void TokenSequence::Put(const CharPointerWithLength &t, Provenance provenance) { +void TokenSequence::Put(const ContiguousChars &t, Provenance provenance) { Put(&t[0], t.size(), provenance); } @@ -99,7 +102,7 @@ void TokenSequence::EmitWithCaseConversion(CookedSource *cooked) const { size_t atToken{0}; for (size_t j{0}; j < chars;) { size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars}; - if (isalpha(char_[j])) { + if (IsLegalInIdentifier(char_[j])) { for (; j < nextStart; ++j) { cooked->Put(tolower(char_[j])); } @@ -118,7 +121,7 @@ std::string TokenSequence::ToString() const { Provenance TokenSequence::GetTokenProvenance( size_t token, size_t offset) const { ProvenanceRange range{provenances_.Map(start_[token] + offset)}; - return range.LocalOffsetToProvenance(0); + return range.start(); } ProvenanceRange TokenSequence::GetTokenProvenanceRange( diff --git a/flang/lib/parser/token-sequence.h b/flang/lib/parser/token-sequence.h index d093f0e..12333ba 100644 --- a/flang/lib/parser/token-sequence.h +++ b/flang/lib/parser/token-sequence.h @@ -16,37 +16,32 @@ namespace parser { // Just a const char pointer with an associated length; does not presume // to own the referenced data. Used to describe buffered tokens and hash // table keys. -class CharPointerWithLength { +class ContiguousChars { public: - CharPointerWithLength() {} - CharPointerWithLength(const char *x, size_t n) : data_{x}, bytes_{n} {} - CharPointerWithLength(const std::string &s) - : data_{s.data()}, bytes_{s.size()} {} - CharPointerWithLength(const CharPointerWithLength &that) - : data_{that.data_}, bytes_{that.bytes_} {} - CharPointerWithLength &operator=(const CharPointerWithLength &that) { - data_ = that.data_; - bytes_ = that.bytes_; - return *this; - } + ContiguousChars() {} + ContiguousChars(const char *x, size_t n) : interval_{x, n} {} + ContiguousChars(const std::string &s) : interval_{s.data(), s.size()} {} + ContiguousChars(const ContiguousChars &that) = default; + ContiguousChars &operator=(const ContiguousChars &that) = default; - bool empty() const { return bytes_ == 0; } - size_t size() const { return bytes_; } - const char &operator[](size_t j) const { return data_[j]; } + bool empty() const { return interval_.empty(); } + size_t size() const { return interval_.size(); } + const char &operator[](size_t j) const { return interval_.start()[j]; } bool IsBlank() const; - std::string ToString() const { return std::string{data_, bytes_}; } + std::string ToString() const { + return std::string{interval_.start(), interval_.size()}; + } private: - const char *data_{nullptr}; - size_t bytes_{0}; + Interval interval_{nullptr, 0}; }; } // namespace parser } // namespace Fortran -// Specializations to enable std::unordered_map -template<> struct std::hash { - size_t operator()(const Fortran::parser::CharPointerWithLength &x) const { +// Specializations to enable std::unordered_map +template<> struct std::hash { + size_t operator()(const Fortran::parser::ContiguousChars &x) const { size_t hash{0}, bytes{x.size()}; for (size_t j{0}; j < bytes; ++j) { hash = (hash * 31) ^ x[j]; @@ -55,9 +50,9 @@ template<> struct std::hash { } }; -template<> struct std::equal_to { - bool operator()(const Fortran::parser::CharPointerWithLength &x, - const Fortran::parser::CharPointerWithLength &y) const { +template<> struct std::equal_to { + bool operator()(const Fortran::parser::ContiguousChars &x, + const Fortran::parser::ContiguousChars &y) const { return x.size() == y.size() && std::memcmp(static_cast(&x[0]), static_cast(&y[0]), x.size()) == 0; @@ -94,7 +89,7 @@ public: return *this; } - CharPointerWithLength operator[](size_t token) const { + ContiguousChars operator[](size_t token) const { return {&char_[start_[token]], TokenBytes(token)}; } @@ -124,7 +119,7 @@ public: void Put(const TokenSequence &, ProvenanceRange); void Put(const TokenSequence &, size_t at, size_t tokens = 1); void Put(const char *, size_t, Provenance); - void Put(const CharPointerWithLength &, Provenance); + void Put(const ContiguousChars &, Provenance); void Put(const std::string &, Provenance); void Put(const std::stringstream &, Provenance); void EmitWithCaseConversion(CookedSource *) const; -- 2.7.4