--- /dev/null
+#ifndef FORTRAN_PARSER_CHARACTERS_H_
+#define FORTRAN_PARSER_CHARACTERS_H_
+
+// Define some character classification predicates and
+// conversions here to avoid dependences upon <cctype> and
+// also to accomodate Fortran tokenization.
+
+#include <optional>
+#include <string>
+
+namespace Fortran {
+namespace parser {
+
+static constexpr bool IsUpperCaseLetter(char ch) {
+ if constexpr ('A' == static_cast<char>(0xc1)) {
+ // EBCDIC
+ return (ch >= 'A' && ch <= 'I') || (ch >= 'J' && ch <= 'R') ||
+ (ch >= 'S' && ch <= 'Z');
+ }
+ return ch >= 'A' && ch <= 'Z';
+}
+
+static constexpr bool IsLowerCaseLetter(char ch) {
+ if constexpr ('a' == static_cast<char>(0x81)) {
+ // EBCDIC
+ return (ch >= 'a' && ch <= 'i') || (ch >= 'j' && ch <= 'r') ||
+ (ch >= 's' && ch <= 'z');
+ }
+ return ch >= 'a' && ch <= 'z';
+}
+
+static constexpr bool IsLetter(char ch) {
+ return IsUpperCaseLetter(ch) || IsLowerCaseLetter(ch);
+}
+
+static constexpr bool IsDecimalDigit(char ch) { return ch >= '0' && ch <= '9'; }
+
+static constexpr bool IsHexadecimalDigit(char ch) {
+ return (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F') ||
+ (ch >= 'a' && ch <= 'f');
+}
+
+static constexpr bool IsOctalDigit(char ch) { return ch >= '0' && ch <= '7'; }
+
+static constexpr bool IsLegalIdentifierStart(char ch) {
+ return IsLetter(ch) || ch == '_' || ch == '@' || ch == '$';
+}
+
+static constexpr bool IsLegalInIdentifier(char ch) {
+ return IsLegalIdentifierStart(ch) || IsDecimalDigit(ch);
+}
+
+static constexpr char ToLowerCaseLetter(char ch) {
+ return IsUpperCaseLetter(ch) ? ch - 'A' + 'a' : ch;
+}
+
+static constexpr char ToLowerCaseLetter(char &&ch) {
+ return IsUpperCaseLetter(ch) ? ch - 'A' + 'a' : ch;
+}
+
+static constexpr bool IsSameApartFromCase(char x, char y) {
+ return ToLowerCaseLetter(x) == ToLowerCaseLetter(y);
+}
+
+static inline std::string ToLowerCaseLetters(const std::string &str) {
+ std::string lowered{str};
+ for (char &ch : lowered) {
+ ch = ToLowerCaseLetter(ch);
+ }
+ return lowered;
+}
+
+static constexpr char DecimalDigitValue(char ch) { return ch - '0'; }
+
+static constexpr char HexadecimalDigitValue(char ch) {
+ return IsUpperCaseLetter(ch)
+ ? ch - 'A' + 10
+ : IsLowerCaseLetter(ch) ? ch - 'a' + 10 : DecimalDigitValue(ch);
+}
+
+static constexpr std::optional<char> BackslashEscapeValue(char ch) {
+ switch (ch) {
+ case 'a': return {'\a'};
+ case 'b': return {'\b'};
+ case 'f': return {'\f'};
+ case 'n': return {'\n'};
+ case 'r': return {'\r'};
+ case 't': return {'\t'};
+ case 'v': return {'\v'};
+ case '"':
+ case '\'':
+ case '\\': return {ch};
+ default: return {};
+ }
+}
+
+static constexpr std::optional<char> BackslashEscapeChar(char ch) {
+ switch (ch) {
+ case '\a': return {'a'};
+ case '\b': return {'b'};
+ case '\f': return {'f'};
+ case '\n': return {'n'};
+ case '\r': return {'r'};
+ case '\t': return {'t'};
+ case '\v': return {'v'};
+ case '"':
+ case '\'':
+ case '\\': return {ch};
+ default: return {};
+ }
+}
+} // namespace parser
+} // namespace Fortran
+#endif // FORTRAN_PARSER_CHARACTERS_H_
// library used here to implement an LL recursive descent recognizer.
#include "basic-parsers.h"
+#include "characters.h"
#include "format-specification.h"
#include "parse-tree.h"
#include "token-parsers.h"
#include "user-state.h"
-#include <cctype>
#include <cinttypes>
#include <cstdio>
#include <functional>
constexpr auto otherIdCharacter =
underscore / !(CharMatch<'\''>{} || CharMatch<'"'>{}) ||
extension(
- CharMatch<'$'>{}); // PGI/ifort (and Cray/gfortran, but not first)
-// Cray also allows '@'.
+ CharMatch<'$'>{} || // PGI/ifort (and Cray/gfortran, but not first)
+ CharMatch<'@'>{}); // Cray
constexpr auto nonDigitIdCharacter = letter || otherIdCharacter;
// Extension: Q
// Not a complete token.
inline constexpr bool isEorD(char ch) {
- ch = tolower(ch);
+ ch = ToLowerCaseLetter(ch);
return ch == 'e' || ch == 'd';
}
-inline constexpr bool isQ(char ch) { return tolower(ch) == 'q'; }
+inline constexpr bool isQ(char ch) { return ToLowerCaseLetter(ch) == 'q'; }
constexpr CharPredicateGuardParser exponentEorD{
isEorD, "expected exponent letter"_en_US};
// combined. Backslash escapes can be enabled.
// PGI extension: nc'...' is Kanji.
// N.B. charLiteralConstantWithoutKind does not skip preceding spaces.
-// N.B. the parsing of "name" in takes care to not consume the '_'.
+// N.B. the parsing of "name" takes care to not consume the '_'.
constexpr auto charLiteralConstantWithoutKind =
CharMatch<'\''>{} >> CharLiteral<'\''>{} ||
CharMatch<'"'>{} >> CharLiteral<'"'>{};
"DELIM =" >> construct<InquireSpec>{}(construct<InquireSpec::CharVar>{}(
pure(InquireSpec::CharVar::Kind::Delim),
scalarDefaultCharVariable)) ||
+ "DIRECT =" >> construct<InquireSpec>{}(construct<InquireSpec::CharVar>{}(
+ pure(InquireSpec::CharVar::Kind::Direct),
+ scalarDefaultCharVariable)) ||
"ENCODING =" >> construct<InquireSpec>{}(construct<InquireSpec::CharVar>{}(
pure(InquireSpec::CharVar::Kind::Encoding),
scalarDefaultCharVariable)) ||
"STATUS =" >> construct<InquireSpec>{}(construct<InquireSpec::CharVar>{}(
pure(InquireSpec::CharVar::Kind::Status),
scalarDefaultCharVariable)) ||
+ "UNFORMATTED =" >>
+ construct<InquireSpec>{}(construct<InquireSpec::CharVar>{}(
+ pure(InquireSpec::CharVar::Kind::Unformatted),
+ scalarDefaultCharVariable)) ||
"WRITE =" >> construct<InquireSpec>{}(construct<InquireSpec::CharVar>{}(
pure(InquireSpec::CharVar::Kind::Write),
scalarDefaultCharVariable)))
nonemptyList(outputItem)))))
// R1301 format-stmt -> FORMAT format-specification
-TYPE_PARSER("FORMAT" >> construct<FormatStmt>{}(Parser<FormatSpecification>{}))
+TYPE_CONTEXT_PARSER("FORMAT statement"_en_US,
+ "FORMAT" >> construct<FormatStmt>{}(Parser<FormatSpecification>{}))
// R1321 char-string-edit-desc
// N.B. C1313 disallows any kind parameter on the character literal.
// PGI/Intel extension: omitting width (and all else that follows)
extension(construct<IntrinsicTypeDataEditDesc>{}(
"I" >> pure(IntrinsicTypeDataEditDesc::Kind::I) ||
- "B" >> pure(IntrinsicTypeDataEditDesc::Kind::B) ||
+ ("B"_tok / !letter /* don't occlude BN & BZ */) >>
+ pure(IntrinsicTypeDataEditDesc::Kind::B) ||
"O" >> pure(IntrinsicTypeDataEditDesc::Kind::O) ||
"Z" >> pure(IntrinsicTypeDataEditDesc::Kind::Z) ||
"F" >> pure(IntrinsicTypeDataEditDesc::Kind::F) ||
const char *GetLocation() const { return p_; }
Provenance GetProvenance(const char *at) const {
- return cooked_.GetProvenance(at).LocalOffsetToProvenance(0);
+ return cooked_.GetProvenance(at).start();
}
Provenance GetProvenance() const { return GetProvenance(p_); }
// BLANK = scalar-default-char-variable |
// DECIMAL = scalar-default-char-variable |
// DELIM = scalar-default-char-variable |
+// DIRECT = scalar-default-char-variable |
// ENCODING = scalar-default-char-variable |
// ERR = label | EXIST = scalar-logical-variable |
// FORM = scalar-default-char-variable |
// SIZE = scalar-int-variable |
// STREAM = scalar-default-char-variable |
// STATUS = scalar-default-char-variable |
+// UNFORMATTED = scalar-default-char-variable |
// WRITE = scalar-default-char-variable
struct InquireSpec {
UNION_CLASS_BOILERPLATE(InquireSpec);
struct CharVar {
DEFINE_NESTED_ENUM_CLASS(Kind, Access, Action, Asynchronous, Blank, Decimal,
- Delim, Encoding, Form, Formatted, Iomsg, Name, Pad, Position, Read,
- Readwrite, Round, Sequential, Sign, Stream, Status, Write);
+ Delim, Direct, Encoding, Form, Formatted, Iomsg, Name, Pad, Position,
+ Read, Readwrite, Round, Sequential, Sign, Stream, Status, Unformatted,
+ Write);
TUPLE_CLASS_BOILERPLATE(CharVar);
std::tuple<Kind, ScalarDefaultCharVariable> t;
};
#include "preprocessor.h"
+#include "characters.h"
#include "idioms.h"
#include "message.h"
#include "prescan.h"
#include <algorithm>
-#include <cctype>
#include <cinttypes>
#include <ctime>
#include <map>
Definition::Definition(const std::string &predefined, AllSources *sources)
: isPredefined_{true},
- replacement_{predefined,
- sources->AddCompilerInsertion(predefined).LocalOffsetToProvenance(0)} {}
+ replacement_{
+ predefined, sources->AddCompilerInsertion(predefined).start()} {}
bool Definition::set_isDisabled(bool disable) {
bool was{isDisabled_};
return was;
}
-static bool IsIdentifierFirstCharacter(char ch) {
- return ch == '_' || isalpha(ch);
-}
-
-static bool IsIdentifierFirstCharacter(const CharPointerWithLength &cpl) {
- return cpl.size() > 0 && IsIdentifierFirstCharacter(cpl[0]);
+static bool IsLegalIdentifierStart(const ContiguousChars &cpl) {
+ return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]);
}
TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames,
}
TokenSequence result;
for (size_t j{0}; j < tokens; ++j) {
- CharPointerWithLength tok{token[firstToken + j]};
- if (IsIdentifierFirstCharacter(tok)) {
+ ContiguousChars tok{token[firstToken + j]};
+ if (IsLegalIdentifierStart(tok)) {
auto it = args.find(tok.ToString());
if (it != args.end()) {
result.Put(it->second, token.GetTokenProvenance(j));
Provenance quoteProvenance{allSources->CompilerInsertionProvenance('"')};
result.PutNextTokenChar('"', quoteProvenance);
for (size_t j{0}; j < tokens.size(); ++j) {
- const CharPointerWithLength &token{tokens[j]};
+ const ContiguousChars &token{tokens[j]};
size_t bytes{token.size()};
for (size_t k{0}; k < bytes; ++k) {
char ch{token[k]};
int parenthesesNesting{0};
size_t tokens{replacement_.size()};
for (size_t j{0}; j < tokens; ++j) {
- const CharPointerWithLength &token{replacement_[j]};
+ const ContiguousChars &token{replacement_[j]};
size_t bytes{token.size()};
if (skipping) {
if (bytes == 1) {
size_t j;
for (j = 0; j < tokens; ++j) {
size_t bytes{input[j].size()};
- if (bytes > 0 && IsIdentifierFirstCharacter(input[j][0]) &&
+ if (bytes > 0 && IsLegalIdentifierStart(input[j][0]) &&
IsNameDefined(input[j])) {
break;
}
}
result->Put(input, 0, j);
for (; j < tokens; ++j) {
- const CharPointerWithLength &token{input[j]};
- if (token.IsBlank() || !IsIdentifierFirstCharacter(token[0])) {
+ const ContiguousChars &token{input[j]};
+ if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) {
result->Put(input, j);
continue;
}
ProvenanceRange insert{allSources_->AddCompilerInsertion(repl)};
ProvenanceRange call{allSources_->AddMacroCall(
insert, input.GetTokenProvenanceRange(j), repl)};
- result->Put(repl, call.LocalOffsetToProvenance(0));
+ result->Put(repl, call.start());
continue;
}
}
size_t k{j};
bool leftParen{false};
while (++k < tokens) {
- const CharPointerWithLength &lookAhead{input[k]};
+ const ContiguousChars &lookAhead{input[k]};
if (!lookAhead.IsBlank() && lookAhead[0] != '\n') {
leftParen = lookAhead[0] == '(' && lookAhead.size() == 1;
break;
return noBlanks;
}
-static std::string ConvertToLowerCase(const std::string &str) {
- std::string lowered{str};
- for (char &ch : lowered) {
- ch = tolower(ch);
- }
- return lowered;
-}
-
static std::string GetDirectiveName(const TokenSequence &line, size_t *rest) {
size_t tokens{line.size()};
size_t j{SkipBlanks(line, 0, tokens)};
return {};
}
*rest = SkipBlanks(line, j + 1, tokens);
- return ConvertToLowerCase(line[j].ToString());
+ return ToLowerCaseLetters(line[j].ToString());
}
bool Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) {
if (j == tokens) {
return true;
}
- if (isdigit(dir[j][0]) || dir[j][0] == '"') {
+ if (IsDecimalDigit(dir[j][0]) || dir[j][0] == '"') {
return true; // TODO: treat as #line
}
- std::string dirName{ConvertToLowerCase(dir[j].ToString())};
+ std::string dirName{ToLowerCaseLetters(dir[j].ToString())};
j = SkipBlanks(dir, j + 1, tokens);
- CharPointerWithLength nameToken;
- if (j < tokens && IsIdentifierFirstCharacter(dir[j][0])) {
+ ContiguousChars nameToken;
+ if (j < tokens && IsLegalIdentifierStart(dir[j][0])) {
nameToken = dir[j];
}
if (dirName == "line") {
if (an == "...") {
isVariadic = true;
} else {
- if (an.empty() || !IsIdentifierFirstCharacter(an[0])) {
+ if (an.empty() || !IsLegalIdentifierStart(an[0])) {
prescanner->Complain(
"#define: missing or invalid argument name"_en_US);
return false;
return false;
}
-CharPointerWithLength Preprocessor::SaveTokenAsName(
- const CharPointerWithLength &t) {
+ContiguousChars Preprocessor::SaveTokenAsName(const ContiguousChars &t) {
names_.push_back(t.ToString());
return {names_.back().data(), names_.back().size()};
}
-bool Preprocessor::IsNameDefined(const CharPointerWithLength &token) {
+bool Preprocessor::IsNameDefined(const ContiguousChars &token) {
return definitions_.find(token) != definitions_.end();
}
std::int64_t left{0};
if (t == "(") {
op = PARENS;
- } else if (isdigit(t[0])) {
+ } else if (IsDecimalDigit(t[0])) {
op = CONST;
size_t consumed{0};
left = std::stoll(t, &consumed);
if (consumed < t.size()) {
*error = "uninterpretable numeric constant '"_en_US;
}
- } else if (IsIdentifierFirstCharacter(t[0])) {
+ } else if (IsLegalIdentifierStart(t[0])) {
// undefined macro name -> zero
// TODO: BOZ constants?
op = CONST;
} else if (t == "-") {
op = UMINUS;
} else if (t == "." && *atToken + 2 < tokens &&
- ConvertToLowerCase(token[*atToken + 1].ToString()) == "not" &&
+ ToLowerCaseLetters(token[*atToken + 1].ToString()) == "not" &&
token[*atToken + 2].ToString() == ".") {
op = NOT;
*atToken += 2;
t = token[*atToken].ToString();
if (t == "." && *atToken + 2 < tokens &&
token[*atToken + 2].ToString() == ".") {
- t += ConvertToLowerCase(token[*atToken + 1].ToString()) + '.';
+ t += ToLowerCaseLetters(token[*atToken + 1].ToString()) + '.';
advance = 3;
}
auto it = opNameMap.find(t);
TokenSequence expr1{StripBlanks(expr, first, first + exprTokens)};
TokenSequence expr2;
for (size_t j{0}; j < expr1.size(); ++j) {
- if (ConvertToLowerCase(expr1[j].ToString()) == "defined") {
- CharPointerWithLength name;
+ if (ToLowerCaseLetters(expr1[j].ToString()) == "defined") {
+ ContiguousChars name;
if (j + 3 < expr1.size() && expr1[j + 1].ToString() == "(" &&
expr1[j + 3].ToString() == ")") {
name = expr1[j + 2];
j += 3;
- } else if (j + 1 < expr1.size() &&
- IsIdentifierFirstCharacter(expr1[j + 1])) {
+ } else if (j + 1 < expr1.size() && IsLegalIdentifierStart(expr1[j + 1])) {
name = expr1[j++];
}
if (!name.empty()) {
enum class IsElseActive { No, Yes };
enum class CanDeadElseAppear { No, Yes };
- CharPointerWithLength SaveTokenAsName(const CharPointerWithLength &);
- bool IsNameDefined(const CharPointerWithLength &);
+ ContiguousChars SaveTokenAsName(const ContiguousChars &);
+ bool IsNameDefined(const ContiguousChars &);
TokenSequence ReplaceMacros(const TokenSequence &, const Prescanner &);
bool SkipDisabledConditionalCode(
const std::string &, IsElseActive, Prescanner *);
AllSources *allSources_;
std::list<std::string> names_;
- std::unordered_map<CharPointerWithLength, Definition> definitions_;
+ std::unordered_map<ContiguousChars, Definition> definitions_;
std::stack<CanDeadElseAppear> ifStack_;
};
} // namespace parser
#include "prescan.h"
+#include "characters.h"
#include "idioms.h"
#include "message.h"
#include "preprocessor.h"
#include "source.h"
#include "token-sequence.h"
-#include <cctype>
#include <cstring>
#include <sstream>
#include <utility>
bool Prescanner::Prescan(ProvenanceRange range) {
AllSources *allSources{cooked_->allSources()};
ProvenanceRange around{allSources->GetContiguousRangeAround(range)};
- startProvenance_ = range.LocalOffsetToProvenance(0);
+ startProvenance_ = range.start();
size_t offset{0};
const SourceFile *source{
allSources->GetSourceFile(startProvenance_, &offset)};
}
}
-static inline bool IsNameChar(char ch) {
- return isalnum(ch) || ch == '_' || ch == '$' || ch == '@';
-}
-
bool Prescanner::NextToken(TokenSequence *tokens) {
CHECK(at_ >= start_ && at_ < limit_);
if (inFixedForm_) {
if (*at_ == '\'' || *at_ == '"') {
QuotedCharacterLiteral(tokens);
preventHollerith_ = false;
- } else if (isdigit(*at_)) {
+ } else if (IsDecimalDigit(*at_)) {
int n{0};
static constexpr int maxHollerith = 256 * (132 - 6);
do {
if (n < maxHollerith) {
- n = 10 * n + *at_ - '0';
+ n = 10 * n + DecimalDigitValue(*at_);
}
EmitCharAndAdvance(tokens, *at_);
if (inFixedForm_) {
SkipSpaces();
}
- } while (isdigit(*at_));
+ } while (IsDecimalDigit(*at_));
if ((*at_ == 'h' || *at_ == 'H') && n > 0 && n < maxHollerith &&
!preventHollerith_) {
EmitCharAndAdvance(tokens, 'h');
}
inCharLiteral_ = false;
} else if (*at_ == '.') {
- while (isdigit(EmitCharAndAdvance(tokens, *at_))) {
+ while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) {
}
ExponentAndKind(tokens);
} else if (ExponentAndKind(tokens)) {
- } else if (isalpha(*at_)) {
+ } else if (IsLetter(*at_)) {
// Handles FORMAT(3I9HHOLLERITH) by skipping over the first I so that
// we don't misrecognize I9HOLLERITH as an identifier in the next case.
EmitCharAndAdvance(tokens, *at_);
preventHollerith_ = false;
} else if (*at_ == '.') {
char nch{EmitCharAndAdvance(tokens, '.')};
- if (isdigit(nch)) {
- while (isdigit(EmitCharAndAdvance(tokens, *at_))) {
+ if (IsDecimalDigit(nch)) {
+ while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) {
}
ExponentAndKind(tokens);
} else if (nch == '.' && EmitCharAndAdvance(tokens, '.') == '.') {
EmitCharAndAdvance(tokens, '.'); // variadic macro definition ellipsis
}
preventHollerith_ = false;
- } else if (IsNameChar(*at_)) {
- while (IsNameChar(EmitCharAndAdvance(tokens, *at_))) {
+ } else if (IsLegalInIdentifier(*at_)) {
+ while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))) {
}
if (*at_ == '\'' || *at_ == '"') {
QuotedCharacterLiteral(tokens);
if (*at_ == '+' || *at_ == '-') {
EmitCharAndAdvance(tokens, *at_);
}
- while (isdigit(*at_)) {
+ while (IsDecimalDigit(*at_)) {
EmitCharAndAdvance(tokens, *at_);
}
if (*at_ == '_') {
- while (IsNameChar(EmitCharAndAdvance(tokens, *at_))) {
+ while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))) {
}
}
return true;
}
void Prescanner::EmitQuotedCharacter(TokenSequence *tokens, char ch) {
- switch (ch) {
- case '\a': EmitEscapedChar(tokens, 'a'); break;
- case '\b': EmitEscapedChar(tokens, 'b'); break;
- case '\f': EmitEscapedChar(tokens, 'f'); break;
- case '\r': EmitEscapedChar(tokens, 'r'); break;
- case '\t': EmitEscapedChar(tokens, 't'); break;
- case '\v': EmitEscapedChar(tokens, 'v'); break;
- case '\\':
- if (!enableBackslashEscapesInCharLiterals_) {
- EmitInsertedChar(tokens, '\\');
- }
- EmitChar(tokens, '\\');
- break;
- default:
- if (ch < ' ') {
- // emit an octal escape sequence
+ if (std::optional escape{BackslashEscapeChar(ch)}) {
+ if (ch != '\'' && ch != '"' &&
+ (ch != '\\' || !enableBackslashEscapesInCharLiterals_)) {
EmitInsertedChar(tokens, '\\');
- EmitInsertedChar(tokens, '0' + ((ch >> 6) & 3));
- EmitInsertedChar(tokens, '0' + ((ch >> 3) & 7));
- EmitInsertedChar(tokens, '0' + (ch & 7));
- } else {
- EmitChar(tokens, ch);
}
+ EmitChar(tokens, *escape);
+ } else if (ch < ' ') {
+ // emit an octal escape sequence
+ EmitInsertedChar(tokens, '\\');
+ EmitInsertedChar(tokens, '0' + ((ch >> 6) & 3));
+ EmitInsertedChar(tokens, '0' + ((ch >> 3) & 7));
+ EmitInsertedChar(tokens, '0' + (ch & 7));
+ } else {
+ EmitChar(tokens, ch);
}
}
tokens->PutNextTokenChar(ch, provenance);
}
- void EmitEscapedChar(TokenSequence *tokens, char ch) {
- EmitInsertedChar(tokens, '\\');
- EmitChar(tokens, ch);
- }
-
char EmitCharAndAdvance(TokenSequence *tokens, char ch) {
EmitChar(tokens, ch);
NextChar();
const char &AllSources::operator[](Provenance at) const {
const Origin &origin{MapToOrigin(at)};
- return origin[origin.covers.ProvenanceToLocalOffset(at)];
+ return origin[origin.covers.MemberOffset(at)];
}
void AllSources::PushSearchPathDirectory(std::string directory) {
const SourceFile &source, ProvenanceRange from, bool isModule) {
ProvenanceRange covers{range_.NextAfter(), source.bytes()};
CHECK(range_.AnnexIfPredecessor(covers));
- CHECK(origin_.back().covers.IsPredecessor(covers));
+ CHECK(origin_.back().covers.ImmediatelyPrecedes(covers));
origin_.emplace_back(covers, source, from, isModule);
return covers;
}
ProvenanceRange def, ProvenanceRange use, const std::string &expansion) {
ProvenanceRange covers{range_.NextAfter(), expansion.size()};
CHECK(range_.AnnexIfPredecessor(covers));
- CHECK(origin_.back().covers.IsPredecessor(covers));
+ CHECK(origin_.back().covers.ImmediatelyPrecedes(covers));
origin_.emplace_back(covers, def, use, expansion);
return covers;
}
ProvenanceRange AllSources::AddCompilerInsertion(std::string text) {
ProvenanceRange covers{range_.NextAfter(), text.size()};
CHECK(range_.AnnexIfPredecessor(covers));
- CHECK(origin_.back().covers.IsPredecessor(covers));
+ CHECK(origin_.back().covers.ImmediatelyPrecedes(covers));
origin_.emplace_back(covers, text);
return covers;
}
std::visit(
visitors{
[&](const Inclusion &inc) {
- size_t offset{origin.covers.ProvenanceToLocalOffset(at)};
+ size_t offset{origin.covers.MemberOffset(at)};
std::pair<int, int> pos{inc.source.FindOffsetLineAndColumn(offset)};
o << prefix << "at line " << pos.first << ", column " << pos.second;
if (echoSourceLine) {
<< inc.source.path();
if (IsValid(origin.replaces)) {
o << (inc.isModule ? " used\n" : " included\n");
- Identify(o, origin.replaces.LocalOffsetToProvenance(0), indented);
+ Identify(o, origin.replaces.start(), indented);
} else {
o << '\n';
}
},
[&](const Macro &mac) {
o << prefix << "in the expansion of a macro that was defined\n";
- Identify(o, mac.definition.LocalOffsetToProvenance(0), indented,
- echoSourceLine);
+ Identify(o, mac.definition.start(), indented, echoSourceLine);
o << prefix << "and called\n";
- Identify(o, origin.replaces.LocalOffsetToProvenance(0), indented,
- echoSourceLine);
+ Identify(o, origin.replaces.start(), indented, echoSourceLine);
if (echoSourceLine) {
o << prefix << "and expanded to\n"
<< indented << " " << mac.expansion << '\n'
<< indented << " ";
- for (size_t j{0}; origin.covers.LocalOffsetToProvenance(j) < at;
- ++j) {
+ for (size_t j{0}; origin.covers.OffsetMember(j) < at; ++j) {
o << (mac.expansion[j] == '\t' ? '\t' : ' ');
}
o << "^\n";
const SourceFile *AllSources::GetSourceFile(
Provenance at, size_t *offset) const {
const Origin &origin{MapToOrigin(at)};
- return std::visit(
- visitors{[&](const Inclusion &inc) {
- if (offset != nullptr) {
- *offset = origin.covers.ProvenanceToLocalOffset(at);
- }
- return &inc.source;
- },
- [&](const Macro &mac) {
- return GetSourceFile(
- origin.replaces.LocalOffsetToProvenance(0), offset);
- },
- [offset](const CompilerInsertion &) {
- if (offset != nullptr) {
- *offset = 0;
- }
- return static_cast<const SourceFile *>(nullptr);
- }},
+ return std::visit(visitors{[&](const Inclusion &inc) {
+ if (offset != nullptr) {
+ *offset = origin.covers.MemberOffset(at);
+ }
+ return &inc.source;
+ },
+ [&](const Macro &mac) {
+ return GetSourceFile(origin.replaces.start(), offset);
+ },
+ [offset](const CompilerInsertion &) {
+ if (offset != nullptr) {
+ *offset = 0;
+ }
+ return static_cast<const SourceFile *>(nullptr);
+ }},
origin.u);
}
ProvenanceRange AllSources::GetContiguousRangeAround(
ProvenanceRange range) const {
CHECK(IsValid(range));
- const Origin &origin{MapToOrigin(range.LocalOffsetToProvenance(0))};
+ const Origin &origin{MapToOrigin(range.start())};
CHECK(origin.covers.Contains(range));
return origin.covers;
}
return iter->second;
}
ProvenanceRange newCharRange{AddCompilerInsertion(std::string{ch})};
- Provenance newCharProvenance{newCharRange.LocalOffsetToProvenance(0)};
+ Provenance newCharProvenance{newCharRange.start()};
compilerInsertionProvenance_.insert(std::make_pair(ch, newCharProvenance));
return newCharProvenance;
}
size_t low{0}, count{origin_.size()};
while (count > 1) {
size_t mid{low + (count >> 1)};
- if (at < origin_[mid].covers.LocalOffsetToProvenance(0)) {
+ if (at < origin_[mid].covers.start()) {
count = mid - low;
} else {
count -= mid - low;
buffer_.clear();
}
-void ProvenanceRange::Dump(std::ostream &o) const {
- o << "[" << start_.offset() << ".." << (start_.offset() + bytes_ - 1) << "] ("
- << bytes_ << " bytes)";
+static void DumpRange(std::ostream &o, const ProvenanceRange &r) {
+ o << "[" << r.start().offset() << ".." << r.Last().offset() << "] ("
+ << r.size() << " bytes)";
}
void OffsetToProvenanceMappings::Dump(std::ostream &o) const {
size_t n{m.range.size()};
o << "offsets [" << m.start << ".." << (m.start + n - 1)
<< "] -> provenances ";
- m.range.Dump(o);
+ DumpRange(o, m.range);
o << '\n';
}
}
void AllSources::Dump(std::ostream &o) const {
o << "AllSources range_ ";
- range_.Dump(o);
+ DumpRange(o, range_);
o << '\n';
for (const Origin &m : origin_) {
o << " ";
- m.covers.Dump(o);
+ DumpRange(o, m.covers);
o << " -> ";
std::visit(visitors{[&](const Inclusion &inc) {
if (inc.isModule) {
return {offset_ + static_cast<size_t>(n)};
}
Provenance operator+(size_t n) const { return {offset_ + n}; }
+ size_t operator-(Provenance that) const {
+ CHECK(that <= *this);
+ return offset_ - that.offset_;
+ }
bool operator<(Provenance that) const { return offset_ < that.offset_; }
bool operator<=(Provenance that) const { return !(that < *this); }
bool operator==(Provenance that) const { return offset_ == that.offset_; }
size_t offset_{0};
};
-class ProvenanceRange {
+template<typename A> class Interval {
public:
- ProvenanceRange() {}
- ProvenanceRange(Provenance s, size_t n) : start_{s}, bytes_{n} {
- CHECK(n > 0);
- }
- ProvenanceRange(const ProvenanceRange &) = default;
- ProvenanceRange(ProvenanceRange &&) = default;
- ProvenanceRange &operator=(const ProvenanceRange &) = default;
- ProvenanceRange &operator=(ProvenanceRange &&) = default;
-
- bool operator==(ProvenanceRange that) const {
- return start_ == that.start_ && bytes_ == that.bytes_;
- }
-
- size_t size() const { return bytes_; }
-
- bool Contains(Provenance at) const {
- return start_ <= at && at < start_ + bytes_;
+ using type = A;
+ Interval() {}
+ Interval(const A &s, size_t n) : start_{s}, size_{n} {}
+ Interval(A &&s, size_t n) : start_{std::move(s)}, size_{n} {}
+ Interval(const Interval &) = default;
+ Interval(Interval &&) = default;
+ Interval &operator=(const Interval &) = default;
+ Interval &operator=(Interval &&) = default;
+
+ bool operator==(const Interval &that) const {
+ return start_ == that.start_ && size_ == that.size_;
}
- bool Contains(ProvenanceRange that) const {
- return Contains(that.start_) && Contains(that.start_ + (that.bytes_ - 1));
- }
+ const A &start() const { return start_; }
+ size_t size() const { return size_; }
+ bool empty() const { return size_ == 0; }
- size_t ProvenanceToLocalOffset(Provenance at) const {
- CHECK(Contains(at));
- return at.offset() - start_.offset();
+ bool Contains(const A &x) const { return start_ <= x && x < start_ + size_; }
+ bool Contains(const Interval &that) const {
+ return Contains(that.start_) && Contains(that.start_ + (that.size_ - 1));
}
-
- Provenance LocalOffsetToProvenance(size_t at) const {
- CHECK(at < bytes_);
- return start_ + at;
+ bool ImmediatelyPrecedes(const Interval &that) const {
+ return NextAfter() == that.start_;
}
-
- Provenance NextAfter() const { return start_ + bytes_; }
-
- ProvenanceRange Suffix(size_t at) const {
- CHECK(at < bytes_);
- return {start_ + at, bytes_ - at};
+ bool AnnexIfPredecessor(const Interval &that) {
+ if (ImmediatelyPrecedes(that)) {
+ size_ += that.size_;
+ return true;
+ }
+ return false;
}
- ProvenanceRange Prefix(size_t bytes) const {
- CHECK(bytes > 0);
- return {start_, std::min(bytes_, bytes)};
+ size_t MemberOffset(const A &x) const {
+ CHECK(Contains(x));
+ return x - start_;
}
-
- bool IsPredecessor(ProvenanceRange next) {
- return start_ + bytes_ == next.start_;
+ A OffsetMember(size_t n) const {
+ CHECK(n < size_);
+ return start_ + n;
}
- bool AnnexIfPredecessor(ProvenanceRange next) {
- if (IsPredecessor(next)) {
- bytes_ += next.bytes_;
- return true;
- }
- return false;
+ A Last() const { return start_ + (size_ - 1); }
+ A NextAfter() const { return start_ + size_; }
+ Interval Prefix(size_t n) const { return {start_, std::min(size_, n)}; }
+ Interval Suffix(size_t n) const {
+ CHECK(n <= size_);
+ return {start_ + n, size_ - n};
}
- void Dump(std::ostream &) const;
-
private:
- Provenance start_;
- size_t bytes_{0};
+ A start_;
+ size_t size_{0};
};
+using ProvenanceRange = Interval<Provenance>;
+
// Maps 0-based local offsets in some contiguous range (e.g., a token
// sequence) to their provenances. Lookup time is on the order of
// O(log(#of intervals with contiguous provenances)). As mentioned
// the prescanned character stream and recognize context-sensitive tokens.
#include "basic-parsers.h"
+#include "characters.h"
#include "idioms.h"
#include "provenance.h"
-#include <cctype>
#include <cstring>
#include <functional>
#include <limits>
const MessageFixedText text_;
};
-static inline constexpr bool IsDecimalDigit(char ch) { return isdigit(ch); }
-
-static inline constexpr bool IsOctalDigit(char ch) {
- return ch >= '0' && ch <= '7';
-}
-
-static inline constexpr bool IsHexadecimalDigit(char ch) {
- return isxdigit(ch);
-}
-
-static inline constexpr bool IsLetter(char ch) { return isalpha(ch); }
-
-static inline constexpr char ToLower(char &&ch) { return tolower(ch); }
-
constexpr CharPredicateGuardParser digit{
IsDecimalDigit, "expected digit"_en_US};
-constexpr auto letter = applyFunction(
- ToLower, CharPredicateGuardParser{IsLetter, "expected letter"_en_US});
+constexpr auto letter = applyFunction(ToLowerCaseLetter,
+ CharPredicateGuardParser{IsLetter, "expected letter"_en_US});
template<char good> class CharMatch {
public:
continue; // redundant; ignore
}
}
- if (!ch && !(ch = nextChar.Parse(state))) {
+ if (!ch.has_value() && !(ch = nextChar.Parse(state))) {
return {};
}
if (spaceSkipping) {
// medial space: 0 or more spaces/tabs accepted, none required
+ // TODO: designate and enforce free-form mandatory white space
while (*ch == ' ' || *ch == '\t') {
if (!(ch = nextChar.Parse(state))) {
return {};
}
}
// ch remains full for next iteration
- } else if (*ch == tolower(*p)) {
+ } else if (IsSameApartFromCase(*ch, *p)) {
ch.reset();
} else {
state->PutMessage(at, MessageExpectedText{str_, bytes_});
return "[" >> p / "]";
}
-static inline int HexadecimalDigitValue(char ch) {
- if (IsDecimalDigit(ch)) {
- return ch - '0';
- }
- return toupper(ch) - 'A' + 10;
-}
-
// Quoted character literal constants.
struct CharLiteralChar {
struct Result {
if (!(och = nextChar.Parse(state)).has_value()) {
return {};
}
- switch ((ch = *och)) {
- case 'a': return {Result::Escaped('\a')};
- case 'b': return {Result::Escaped('\b')};
- case 'f': return {Result::Escaped('\f')};
- case 'n': return {Result::Escaped('\n')};
- case 'r': return {Result::Escaped('\r')};
- case 't': return {Result::Escaped('\t')};
- case 'v': return {Result::Escaped('\v')};
- case '"':
- case '\'':
- case '\\': return {Result::Escaped(ch)};
- case '\n':
+ ch = *och;
+ if (ch == '\n') {
state->PutMessage(at, "unclosed character constant"_en_US);
return {};
- default:
- if (IsOctalDigit(ch)) {
- ch -= '0';
- for (int j = (ch > 3 ? 1 : 2); j-- > 0;) {
- static constexpr auto octalDigit = attempt(CharPredicateGuardParser{
- IsOctalDigit, "expected octal digit"_en_US});
- if ((och = octalDigit.Parse(state)).has_value()) {
- ch = 8 * ch + *och - '0';
- }
+ }
+ if (std::optional<char> escChar{BackslashEscapeValue(ch)}) {
+ return {Result::Escaped(*escChar)};
+ }
+ if (IsOctalDigit(ch)) {
+ ch -= '0';
+ for (int j = (ch > 3 ? 1 : 2); j-- > 0;) {
+ static constexpr auto octalDigit = attempt(CharPredicateGuardParser{
+ IsOctalDigit, "expected octal digit"_en_US});
+ if ((och = octalDigit.Parse(state)).has_value()) {
+ ch = 8 * ch + *och - '0';
}
- } else if (ch == 'x' || ch == 'X') {
- ch = 0;
- for (int j = 0; j++ < 2;) {
- static constexpr auto hexDigit = attempt(CharPredicateGuardParser{
- IsHexadecimalDigit, "expected hexadecimal digit"_en_US});
- if ((och = hexDigit.Parse(state)).has_value()) {
- ch = 16 * ch + HexadecimalDigitValue(*och);
- }
+ }
+ } else if (ch == 'x' || ch == 'X') {
+ ch = 0;
+ for (int j = 0; j++ < 2;) {
+ static constexpr auto hexDigit = attempt(CharPredicateGuardParser{
+ IsHexadecimalDigit, "expected hexadecimal digit"_en_US});
+ if ((och = hexDigit.Parse(state)).has_value()) {
+ ch = 16 * ch + HexadecimalDigitValue(*och);
}
- } else {
- state->PutMessage(at, "bad escaped character"_en_US);
}
- return {Result::Escaped(ch)};
+ } else {
+ state->PutMessage(at, "bad escaped character"_en_US);
}
+ return {Result::Escaped(ch)};
}
};
if (*ch == quote) {
break;
}
- if (!isxdigit(*ch)) {
+ if (!IsHexadecimalDigit(*ch)) {
return {};
}
content += *ch;
#include "token-sequence.h"
+#include "characters.h"
namespace Fortran {
namespace parser {
-bool CharPointerWithLength::IsBlank() const {
- for (size_t j{0}; j < bytes_; ++j) {
- char ch{data_[j]};
+bool ContiguousChars::IsBlank() const {
+ const char *data{interval_.start()};
+ size_t n{interval_.size()};
+ for (size_t j{0}; j < n; ++j) {
+ char ch{data[j]};
if (ch != ' ' && ch != '\t') {
return false;
}
void TokenSequence::Put(const TokenSequence &that, ProvenanceRange range) {
size_t offset{0};
for (size_t j{0}; j < that.size(); ++j) {
- CharPointerWithLength tok{that[j]};
- Put(tok, range.LocalOffsetToProvenance(offset));
+ ContiguousChars tok{that[j]};
+ Put(tok, range.OffsetMember(offset));
offset += tok.size();
}
CHECK(offset == range.size());
ProvenanceRange provenance;
size_t offset{0};
for (; tokens-- > 0; ++at) {
- CharPointerWithLength tok{that[at]};
+ ContiguousChars tok{that[at]};
size_t tokBytes{tok.size()};
for (size_t j{0}; j < tokBytes; ++j) {
if (offset == provenance.size()) {
offset = 0;
provenance = that.provenances_.Map(that.start_[at] + j);
}
- PutNextTokenChar(tok[j], provenance.LocalOffsetToProvenance(offset++));
+ PutNextTokenChar(tok[j], provenance.OffsetMember(offset++));
}
CloseToken();
}
CloseToken();
}
-void TokenSequence::Put(const CharPointerWithLength &t, Provenance provenance) {
+void TokenSequence::Put(const ContiguousChars &t, Provenance provenance) {
Put(&t[0], t.size(), provenance);
}
size_t atToken{0};
for (size_t j{0}; j < chars;) {
size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars};
- if (isalpha(char_[j])) {
+ if (IsLegalInIdentifier(char_[j])) {
for (; j < nextStart; ++j) {
cooked->Put(tolower(char_[j]));
}
Provenance TokenSequence::GetTokenProvenance(
size_t token, size_t offset) const {
ProvenanceRange range{provenances_.Map(start_[token] + offset)};
- return range.LocalOffsetToProvenance(0);
+ return range.start();
}
ProvenanceRange TokenSequence::GetTokenProvenanceRange(
// Just a const char pointer with an associated length; does not presume
// to own the referenced data. Used to describe buffered tokens and hash
// table keys.
-class CharPointerWithLength {
+class ContiguousChars {
public:
- CharPointerWithLength() {}
- CharPointerWithLength(const char *x, size_t n) : data_{x}, bytes_{n} {}
- CharPointerWithLength(const std::string &s)
- : data_{s.data()}, bytes_{s.size()} {}
- CharPointerWithLength(const CharPointerWithLength &that)
- : data_{that.data_}, bytes_{that.bytes_} {}
- CharPointerWithLength &operator=(const CharPointerWithLength &that) {
- data_ = that.data_;
- bytes_ = that.bytes_;
- return *this;
- }
+ ContiguousChars() {}
+ ContiguousChars(const char *x, size_t n) : interval_{x, n} {}
+ ContiguousChars(const std::string &s) : interval_{s.data(), s.size()} {}
+ ContiguousChars(const ContiguousChars &that) = default;
+ ContiguousChars &operator=(const ContiguousChars &that) = default;
- bool empty() const { return bytes_ == 0; }
- size_t size() const { return bytes_; }
- const char &operator[](size_t j) const { return data_[j]; }
+ bool empty() const { return interval_.empty(); }
+ size_t size() const { return interval_.size(); }
+ const char &operator[](size_t j) const { return interval_.start()[j]; }
bool IsBlank() const;
- std::string ToString() const { return std::string{data_, bytes_}; }
+ std::string ToString() const {
+ return std::string{interval_.start(), interval_.size()};
+ }
private:
- const char *data_{nullptr};
- size_t bytes_{0};
+ Interval<const char *> interval_{nullptr, 0};
};
} // namespace parser
} // namespace Fortran
-// Specializations to enable std::unordered_map<CharPointerWithLength, ...>
-template<> struct std::hash<Fortran::parser::CharPointerWithLength> {
- size_t operator()(const Fortran::parser::CharPointerWithLength &x) const {
+// Specializations to enable std::unordered_map<ContiguousChars, ...>
+template<> struct std::hash<Fortran::parser::ContiguousChars> {
+ size_t operator()(const Fortran::parser::ContiguousChars &x) const {
size_t hash{0}, bytes{x.size()};
for (size_t j{0}; j < bytes; ++j) {
hash = (hash * 31) ^ x[j];
}
};
-template<> struct std::equal_to<Fortran::parser::CharPointerWithLength> {
- bool operator()(const Fortran::parser::CharPointerWithLength &x,
- const Fortran::parser::CharPointerWithLength &y) const {
+template<> struct std::equal_to<Fortran::parser::ContiguousChars> {
+ bool operator()(const Fortran::parser::ContiguousChars &x,
+ const Fortran::parser::ContiguousChars &y) const {
return x.size() == y.size() &&
std::memcmp(static_cast<const void *>(&x[0]),
static_cast<const void *>(&y[0]), x.size()) == 0;
return *this;
}
- CharPointerWithLength operator[](size_t token) const {
+ ContiguousChars operator[](size_t token) const {
return {&char_[start_[token]], TokenBytes(token)};
}
void Put(const TokenSequence &, ProvenanceRange);
void Put(const TokenSequence &, size_t at, size_t tokens = 1);
void Put(const char *, size_t, Provenance);
- void Put(const CharPointerWithLength &, Provenance);
+ void Put(const ContiguousChars &, Provenance);
void Put(const std::string &, Provenance);
void Put(const std::stringstream &, Provenance);
void EmitWithCaseConversion(CookedSource *) const;