[flang] Fix some bugs that I noticed while running regression tests.

author peter klausler <pklausler@nvidia.com>

Tue, 27 Feb 2018 22:02:10 +0000 (14:02 -0800)

committer peter klausler <pklausler@nvidia.com>

Tue, 27 Feb 2018 23:55:10 +0000 (15:55 -0800)
author peter klausler <pklausler@nvidia.com>
Tue, 27 Feb 2018 22:02:10 +0000 (14:02 -0800)
committer peter klausler <pklausler@nvidia.com>
Tue, 27 Feb 2018 23:55:10 +0000 (15:55 -0800)
diff --git a/flang/lib/parser/characters.h b/flang/lib/parser/characters.h

new file mode 100644 (file)

index 0000000..7d1dec8
--- /dev/null
+++ b/flang/lib/parser/characters.h
@@ -0,0 +1,114 @@
+#ifndef FORTRAN_PARSER_CHARACTERS_H_
+#define FORTRAN_PARSER_CHARACTERS_H_
+
+// Define some character classification predicates and
+// conversions here to avoid dependences upon <cctype> and
+// also to accomodate Fortran tokenization.
+
+#include <optional>
+#include <string>
+
+namespace Fortran {
+namespace parser {
+
+static constexpr bool IsUpperCaseLetter(char ch) {
+  if constexpr ('A' == static_cast<char>(0xc1)) {
+    // EBCDIC
+    return (ch >= 'A' && ch <= 'I') || (ch >= 'J' && ch <= 'R') ||
+        (ch >= 'S' && ch <= 'Z');
+  }
+  return ch >= 'A' && ch <= 'Z';
+}
+
+static constexpr bool IsLowerCaseLetter(char ch) {
+  if constexpr ('a' == static_cast<char>(0x81)) {
+    // EBCDIC
+    return (ch >= 'a' && ch <= 'i') || (ch >= 'j' && ch <= 'r') ||
+        (ch >= 's' && ch <= 'z');
+  }
+  return ch >= 'a' && ch <= 'z';
+}
+
+static constexpr bool IsLetter(char ch) {
+  return IsUpperCaseLetter(ch) || IsLowerCaseLetter(ch);
+}
+
+static constexpr bool IsDecimalDigit(char ch) { return ch >= '0' && ch <= '9'; }
+
+static constexpr bool IsHexadecimalDigit(char ch) {
+  return (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F') ||
+      (ch >= 'a' && ch <= 'f');
+}
+
+static constexpr bool IsOctalDigit(char ch) { return ch >= '0' && ch <= '7'; }
+
+static constexpr bool IsLegalIdentifierStart(char ch) {
+  return IsLetter(ch) || ch == '_' || ch == '@' || ch == '$';
+}
+
+static constexpr bool IsLegalInIdentifier(char ch) {
+  return IsLegalIdentifierStart(ch) || IsDecimalDigit(ch);
+}
+
+static constexpr char ToLowerCaseLetter(char ch) {
+  return IsUpperCaseLetter(ch) ? ch - 'A' + 'a' : ch;
+}
+
+static constexpr char ToLowerCaseLetter(char &&ch) {
+  return IsUpperCaseLetter(ch) ? ch - 'A' + 'a' : ch;
+}
+
+static constexpr bool IsSameApartFromCase(char x, char y) {
+  return ToLowerCaseLetter(x) == ToLowerCaseLetter(y);
+}
+
+static inline std::string ToLowerCaseLetters(const std::string &str) {
+  std::string lowered{str};
+  for (char &ch : lowered) {
+    ch = ToLowerCaseLetter(ch);
+  }
+  return lowered;
+}
+
+static constexpr char DecimalDigitValue(char ch) { return ch - '0'; }
+
+static constexpr char HexadecimalDigitValue(char ch) {
+  return IsUpperCaseLetter(ch)
+      ? ch - 'A' + 10
+      : IsLowerCaseLetter(ch) ? ch - 'a' + 10 : DecimalDigitValue(ch);
+}
+
+static constexpr std::optional<char> BackslashEscapeValue(char ch) {
+  switch (ch) {
+  case 'a': return {'\a'};
+  case 'b': return {'\b'};
+  case 'f': return {'\f'};
+  case 'n': return {'\n'};
+  case 'r': return {'\r'};
+  case 't': return {'\t'};
+  case 'v': return {'\v'};
+  case '"':
+  case '\'':
+  case '\\': return {ch};
+  default: return {};
+  }
+}
+
+static constexpr std::optional<char> BackslashEscapeChar(char ch) {
+  switch (ch) {
+  case '\a': return {'a'};
+  case '\b': return {'b'};
+  case '\f': return {'f'};
+  case '\n': return {'n'};
+  case '\r': return {'r'};
+  case '\t': return {'t'};
+  case '\v': return {'v'};
+  case '"':
+  case '\'':
+  case '\\': return {ch};
+  default: return {};
+  }
+}
+}  // namespace parser
+}  // namespace Fortran
+#endif  // FORTRAN_PARSER_CHARACTERS_H_
diff --git a/flang/lib/parser/grammar.h b/flang/lib/parser/grammar.h

index 87b85a0..a058c80 100644 (file)
--- a/flang/lib/parser/grammar.h
+++ b/flang/lib/parser/grammar.h
@@ -8,11 +8,11 @@
  // library used here to implement an LL recursive descent recognizer.
  
  #include "basic-parsers.h"
+#include "characters.h"
  #include "format-specification.h"
  #include "parse-tree.h"
  #include "token-parsers.h"
  #include "user-state.h"
-#include <cctype>
  #include <cinttypes>
  #include <cstdio>
  #include <functional>
@@ -569,8 +569,8 @@ constexpr CharMatch<'_'> underscore;
  constexpr auto otherIdCharacter =
      underscore / !(CharMatch<'\''>{} || CharMatch<'"'>{}) ||
      extension(
-        CharMatch<'$'>{});  // PGI/ifort (and Cray/gfortran, but not first)
-// Cray also allows '@'.
+        CharMatch<'$'>{} ||  // PGI/ifort (and Cray/gfortran, but not first)
+        CharMatch<'@'>{});  // Cray
  
  constexpr auto nonDigitIdCharacter = letter || otherIdCharacter;
  
@@ -753,11 +753,11 @@ TYPE_CONTEXT_PARSER("REAL literal constant"_en_US,
  // Extension: Q
  // Not a complete token.
  inline constexpr bool isEorD(char ch) {
-  ch = tolower(ch);
+  ch = ToLowerCaseLetter(ch);
    return ch == 'e' || ch == 'd';
  }
  
-inline constexpr bool isQ(char ch) { return tolower(ch) == 'q'; }
+inline constexpr bool isQ(char ch) { return ToLowerCaseLetter(ch) == 'q'; }
  
  constexpr CharPredicateGuardParser exponentEorD{
      isEorD, "expected exponent letter"_en_US};
@@ -821,7 +821,7 @@ TYPE_PARSER(construct<CharLength>{}(parenthesized(typeParamValue)) ||
  // combined.  Backslash escapes can be enabled.
  // PGI extension: nc'...' is Kanji.
  // N.B. charLiteralConstantWithoutKind does not skip preceding spaces.
-// N.B. the parsing of "name" in takes care to not consume the '_'.
+// N.B. the parsing of "name" takes care to not consume the '_'.
  constexpr auto charLiteralConstantWithoutKind =
      CharMatch<'\''>{} >> CharLiteral<'\''>{} ||
      CharMatch<'"'>{} >> CharLiteral<'"'>{};
@@ -2952,6 +2952,9 @@ TYPE_PARSER(maybe("UNIT ="_tok) >> construct<InquireSpec>{}(fileUnitNumber) ||
      "DELIM =" >> construct<InquireSpec>{}(construct<InquireSpec::CharVar>{}(
                       pure(InquireSpec::CharVar::Kind::Delim),
                       scalarDefaultCharVariable)) ||
+    "DIRECT =" >> construct<InquireSpec>{}(construct<InquireSpec::CharVar>{}(
+                      pure(InquireSpec::CharVar::Kind::Direct),
+                      scalarDefaultCharVariable)) ||
      "ENCODING =" >> construct<InquireSpec>{}(construct<InquireSpec::CharVar>{}(
                          pure(InquireSpec::CharVar::Kind::Encoding),
                          scalarDefaultCharVariable)) ||
@@ -3027,6 +3030,10 @@ TYPE_PARSER(maybe("UNIT ="_tok) >> construct<InquireSpec>{}(fileUnitNumber) ||
      "STATUS =" >> construct<InquireSpec>{}(construct<InquireSpec::CharVar>{}(
                        pure(InquireSpec::CharVar::Kind::Status),
                        scalarDefaultCharVariable)) ||
+    "UNFORMATTED =" >>
+        construct<InquireSpec>{}(construct<InquireSpec::CharVar>{}(
+            pure(InquireSpec::CharVar::Kind::Unformatted),
+            scalarDefaultCharVariable)) ||
      "WRITE =" >> construct<InquireSpec>{}(construct<InquireSpec::CharVar>{}(
                       pure(InquireSpec::CharVar::Kind::Write),
                       scalarDefaultCharVariable)))
@@ -3043,7 +3050,8 @@ TYPE_CONTEXT_PARSER("INQUIRE statement"_en_US,
                  nonemptyList(outputItem)))))
  
  // R1301 format-stmt -> FORMAT format-specification
-TYPE_PARSER("FORMAT" >> construct<FormatStmt>{}(Parser<FormatSpecification>{}))
+TYPE_CONTEXT_PARSER("FORMAT statement"_en_US,
+    "FORMAT" >> construct<FormatStmt>{}(Parser<FormatSpecification>{}))
  
  // R1321 char-string-edit-desc
  // N.B. C1313 disallows any kind parameter on the character literal.
@@ -3123,7 +3131,8 @@ TYPE_PARSER(construct<IntrinsicTypeDataEditDesc>{}(
      // PGI/Intel extension: omitting width (and all else that follows)
      extension(construct<IntrinsicTypeDataEditDesc>{}(
          "I" >> pure(IntrinsicTypeDataEditDesc::Kind::I) ||
-            "B" >> pure(IntrinsicTypeDataEditDesc::Kind::B) ||
+            ("B"_tok / !letter /* don't occlude BN & BZ */) >>
+                pure(IntrinsicTypeDataEditDesc::Kind::B) ||
              "O" >> pure(IntrinsicTypeDataEditDesc::Kind::O) ||
              "Z" >> pure(IntrinsicTypeDataEditDesc::Kind::Z) ||
              "F" >> pure(IntrinsicTypeDataEditDesc::Kind::F) ||
diff --git a/flang/lib/parser/parse-state.h b/flang/lib/parser/parse-state.h

index 944462b..38ceb35 100644 (file)
--- a/flang/lib/parser/parse-state.h
+++ b/flang/lib/parser/parse-state.h
@@ -99,7 +99,7 @@ public:
  
    const char *GetLocation() const { return p_; }
    Provenance GetProvenance(const char *at) const {
-    return cooked_.GetProvenance(at).LocalOffsetToProvenance(0);
+    return cooked_.GetProvenance(at).start();
    }
    Provenance GetProvenance() const { return GetProvenance(p_); }
  
diff --git a/flang/lib/parser/parse-tree.h b/flang/lib/parser/parse-tree.h

index a6c300b..f0a8aec 100644 (file)
--- a/flang/lib/parser/parse-tree.h
+++ b/flang/lib/parser/parse-tree.h
@@ -2633,6 +2633,7 @@ struct FlushStmt {
  //         BLANK = scalar-default-char-variable |
  //         DECIMAL = scalar-default-char-variable |
  //         DELIM = scalar-default-char-variable |
+//         DIRECT = scalar-default-char-variable |
  //         ENCODING = scalar-default-char-variable |
  //         ERR = label | EXIST = scalar-logical-variable |
  //         FORM = scalar-default-char-variable |
@@ -2654,13 +2655,15 @@ struct FlushStmt {
  //         SIZE = scalar-int-variable |
  //         STREAM = scalar-default-char-variable |
  //         STATUS = scalar-default-char-variable |
+//         UNFORMATTED = scalar-default-char-variable |
  //         WRITE = scalar-default-char-variable
  struct InquireSpec {
    UNION_CLASS_BOILERPLATE(InquireSpec);
    struct CharVar {
      DEFINE_NESTED_ENUM_CLASS(Kind, Access, Action, Asynchronous, Blank, Decimal,
-        Delim, Encoding, Form, Formatted, Iomsg, Name, Pad, Position, Read,
-        Readwrite, Round, Sequential, Sign, Stream, Status, Write);
+        Delim, Direct, Encoding, Form, Formatted, Iomsg, Name, Pad, Position,
+        Read, Readwrite, Round, Sequential, Sign, Stream, Status, Unformatted,
+        Write);
      TUPLE_CLASS_BOILERPLATE(CharVar);
      std::tuple<Kind, ScalarDefaultCharVariable> t;
    };
diff --git a/flang/lib/parser/preprocessor.cc b/flang/lib/parser/preprocessor.cc

index 91db2b6..4a99568 100644 (file)
--- a/flang/lib/parser/preprocessor.cc
+++ b/flang/lib/parser/preprocessor.cc
@@ -1,9 +1,9 @@
  #include "preprocessor.h"
+#include "characters.h"
  #include "idioms.h"
  #include "message.h"
  #include "prescan.h"
  #include <algorithm>
-#include <cctype>
  #include <cinttypes>
  #include <ctime>
  #include <map>
@@ -28,8 +28,8 @@ Definition::Definition(const std::vector<std::string> &argNames,
  
  Definition::Definition(const std::string &predefined, AllSources *sources)
    : isPredefined_{true},
-    replacement_{predefined,
-        sources->AddCompilerInsertion(predefined).LocalOffsetToProvenance(0)} {}
+    replacement_{
+        predefined, sources->AddCompilerInsertion(predefined).start()} {}
  
  bool Definition::set_isDisabled(bool disable) {
    bool was{isDisabled_};
@@ -37,12 +37,8 @@ bool Definition::set_isDisabled(bool disable) {
    return was;
  }
  
-static bool IsIdentifierFirstCharacter(char ch) {
-  return ch == '_' || isalpha(ch);
-}
-
-static bool IsIdentifierFirstCharacter(const CharPointerWithLength &cpl) {
-  return cpl.size() > 0 && IsIdentifierFirstCharacter(cpl[0]);
+static bool IsLegalIdentifierStart(const ContiguousChars &cpl) {
+  return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]);
  }
  
  TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames,
@@ -55,8 +51,8 @@ TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames,
    }
    TokenSequence result;
    for (size_t j{0}; j < tokens; ++j) {
-    CharPointerWithLength tok{token[firstToken + j]};
-    if (IsIdentifierFirstCharacter(tok)) {
+    ContiguousChars tok{token[firstToken + j]};
+    if (IsLegalIdentifierStart(tok)) {
        auto it = args.find(tok.ToString());
        if (it != args.end()) {
          result.Put(it->second, token.GetTokenProvenance(j));
@@ -83,7 +79,7 @@ static TokenSequence Stringify(
    Provenance quoteProvenance{allSources->CompilerInsertionProvenance('"')};
    result.PutNextTokenChar('"', quoteProvenance);
    for (size_t j{0}; j < tokens.size(); ++j) {
-    const CharPointerWithLength &token{tokens[j]};
+    const ContiguousChars &token{tokens[j]};
      size_t bytes{token.size()};
      for (size_t k{0}; k < bytes; ++k) {
        char ch{token[k]};
@@ -107,7 +103,7 @@ TokenSequence Definition::Apply(
    int parenthesesNesting{0};
    size_t tokens{replacement_.size()};
    for (size_t j{0}; j < tokens; ++j) {
-    const CharPointerWithLength &token{replacement_[j]};
+    const ContiguousChars &token{replacement_[j]};
      size_t bytes{token.size()};
      if (skipping) {
        if (bytes == 1) {
@@ -212,7 +208,7 @@ bool Preprocessor::MacroReplacement(const TokenSequence &input,
    size_t j;
    for (j = 0; j < tokens; ++j) {
      size_t bytes{input[j].size()};
-    if (bytes > 0 && IsIdentifierFirstCharacter(input[j][0]) &&
+    if (bytes > 0 && IsLegalIdentifierStart(input[j][0]) &&
          IsNameDefined(input[j])) {
        break;
      }
@@ -222,8 +218,8 @@ bool Preprocessor::MacroReplacement(const TokenSequence &input,
    }
    result->Put(input, 0, j);
    for (; j < tokens; ++j) {
-    const CharPointerWithLength &token{input[j]};
-    if (token.IsBlank() || !IsIdentifierFirstCharacter(token[0])) {
+    const ContiguousChars &token{input[j]};
+    if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) {
        result->Put(input, j);
        continue;
      }
@@ -253,7 +249,7 @@ bool Preprocessor::MacroReplacement(const TokenSequence &input,
            ProvenanceRange insert{allSources_->AddCompilerInsertion(repl)};
            ProvenanceRange call{allSources_->AddMacroCall(
                insert, input.GetTokenProvenanceRange(j), repl)};
-          result->Put(repl, call.LocalOffsetToProvenance(0));
+          result->Put(repl, call.start());
            continue;
          }
        }
@@ -274,7 +270,7 @@ bool Preprocessor::MacroReplacement(const TokenSequence &input,
      size_t k{j};
      bool leftParen{false};
      while (++k < tokens) {
-      const CharPointerWithLength &lookAhead{input[k]};
+      const ContiguousChars &lookAhead{input[k]};
        if (!lookAhead.IsBlank() && lookAhead[0] != '\n') {
          leftParen = lookAhead[0] == '(' && lookAhead.size() == 1;
          break;
@@ -353,14 +349,6 @@ static TokenSequence StripBlanks(
    return noBlanks;
  }
  
-static std::string ConvertToLowerCase(const std::string &str) {
-  std::string lowered{str};
-  for (char &ch : lowered) {
-    ch = tolower(ch);
-  }
-  return lowered;
-}
-
  static std::string GetDirectiveName(const TokenSequence &line, size_t *rest) {
    size_t tokens{line.size()};
    size_t j{SkipBlanks(line, 0, tokens)};
@@ -374,7 +362,7 @@ static std::string GetDirectiveName(const TokenSequence &line, size_t *rest) {
      return {};
    }
    *rest = SkipBlanks(line, j + 1, tokens);
-  return ConvertToLowerCase(line[j].ToString());
+  return ToLowerCaseLetters(line[j].ToString());
  }
  
  bool Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) {
@@ -391,13 +379,13 @@ bool Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) {
    if (j == tokens) {
      return true;
    }
-  if (isdigit(dir[j][0]) || dir[j][0] == '"') {
+  if (IsDecimalDigit(dir[j][0]) || dir[j][0] == '"') {
      return true;  // TODO: treat as #line
    }
-  std::string dirName{ConvertToLowerCase(dir[j].ToString())};
+  std::string dirName{ToLowerCaseLetters(dir[j].ToString())};
    j = SkipBlanks(dir, j + 1, tokens);
-  CharPointerWithLength nameToken;
-  if (j < tokens && IsIdentifierFirstCharacter(dir[j][0])) {
+  ContiguousChars nameToken;
+  if (j < tokens && IsLegalIdentifierStart(dir[j][0])) {
      nameToken = dir[j];
    }
    if (dirName == "line") {
@@ -421,7 +409,7 @@ bool Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) {
            if (an == "...") {
              isVariadic = true;
            } else {
-            if (an.empty() || !IsIdentifierFirstCharacter(an[0])) {
+            if (an.empty() || !IsLegalIdentifierStart(an[0])) {
                prescanner->Complain(
                    "#define: missing or invalid argument name"_en_US);
                return false;
@@ -598,13 +586,12 @@ bool Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) {
    return false;
  }
  
-CharPointerWithLength Preprocessor::SaveTokenAsName(
-    const CharPointerWithLength &t) {
+ContiguousChars Preprocessor::SaveTokenAsName(const ContiguousChars &t) {
    names_.push_back(t.ToString());
    return {names_.back().data(), names_.back().size()};
  }
  
-bool Preprocessor::IsNameDefined(const CharPointerWithLength &token) {
+bool Preprocessor::IsNameDefined(const ContiguousChars &token) {
    return definitions_.find(token) != definitions_.end();
  }
  
@@ -743,14 +730,14 @@ static std::int64_t ExpressionValue(const TokenSequence &token,
    std::int64_t left{0};
    if (t == "(") {
      op = PARENS;
-  } else if (isdigit(t[0])) {
+  } else if (IsDecimalDigit(t[0])) {
      op = CONST;
      size_t consumed{0};
      left = std::stoll(t, &consumed);
      if (consumed < t.size()) {
        *error = "uninterpretable numeric constant '"_en_US;
      }
-  } else if (IsIdentifierFirstCharacter(t[0])) {
+  } else if (IsLegalIdentifierStart(t[0])) {
      // undefined macro name -> zero
      // TODO: BOZ constants?
      op = CONST;
@@ -759,7 +746,7 @@ static std::int64_t ExpressionValue(const TokenSequence &token,
    } else if (t == "-") {
      op = UMINUS;
    } else if (t == "." && *atToken + 2 < tokens &&
-      ConvertToLowerCase(token[*atToken + 1].ToString()) == "not" &&
+      ToLowerCaseLetters(token[*atToken + 1].ToString()) == "not" &&
        token[*atToken + 2].ToString() == ".") {
      op = NOT;
      *atToken += 2;
@@ -803,7 +790,7 @@ static std::int64_t ExpressionValue(const TokenSequence &token,
    t = token[*atToken].ToString();
    if (t == "." && *atToken + 2 < tokens &&
        token[*atToken + 2].ToString() == ".") {
-    t += ConvertToLowerCase(token[*atToken + 1].ToString()) + '.';
+    t += ToLowerCaseLetters(token[*atToken + 1].ToString()) + '.';
      advance = 3;
    }
    auto it = opNameMap.find(t);
@@ -913,14 +900,13 @@ bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr, size_t first,
    TokenSequence expr1{StripBlanks(expr, first, first + exprTokens)};
    TokenSequence expr2;
    for (size_t j{0}; j < expr1.size(); ++j) {
-    if (ConvertToLowerCase(expr1[j].ToString()) == "defined") {
-      CharPointerWithLength name;
+    if (ToLowerCaseLetters(expr1[j].ToString()) == "defined") {
+      ContiguousChars name;
        if (j + 3 < expr1.size() && expr1[j + 1].ToString() == "(" &&
            expr1[j + 3].ToString() == ")") {
          name = expr1[j + 2];
          j += 3;
-      } else if (j + 1 < expr1.size() &&
-          IsIdentifierFirstCharacter(expr1[j + 1])) {
+      } else if (j + 1 < expr1.size() && IsLegalIdentifierStart(expr1[j + 1])) {
          name = expr1[j++];
        }
        if (!name.empty()) {
diff --git a/flang/lib/parser/preprocessor.h b/flang/lib/parser/preprocessor.h

index e84ac8e..c210689 100644 (file)
--- a/flang/lib/parser/preprocessor.h
+++ b/flang/lib/parser/preprocessor.h
@@ -70,8 +70,8 @@ private:
    enum class IsElseActive { No, Yes };
    enum class CanDeadElseAppear { No, Yes };
  
-  CharPointerWithLength SaveTokenAsName(const CharPointerWithLength &);
-  bool IsNameDefined(const CharPointerWithLength &);
+  ContiguousChars SaveTokenAsName(const ContiguousChars &);
+  bool IsNameDefined(const ContiguousChars &);
    TokenSequence ReplaceMacros(const TokenSequence &, const Prescanner &);
    bool SkipDisabledConditionalCode(
        const std::string &, IsElseActive, Prescanner *);
@@ -80,7 +80,7 @@ private:
  
    AllSources *allSources_;
    std::list<std::string> names_;
-  std::unordered_map<CharPointerWithLength, Definition> definitions_;
+  std::unordered_map<ContiguousChars, Definition> definitions_;
    std::stack<CanDeadElseAppear> ifStack_;
  };
  }  // namespace parser
diff --git a/flang/lib/parser/prescan.cc b/flang/lib/parser/prescan.cc

index fcd52a6..4e968e2 100644 (file)
--- a/flang/lib/parser/prescan.cc
+++ b/flang/lib/parser/prescan.cc
@@ -1,10 +1,10 @@
  #include "prescan.h"
+#include "characters.h"
  #include "idioms.h"
  #include "message.h"
  #include "preprocessor.h"
  #include "source.h"
  #include "token-sequence.h"
-#include <cctype>
  #include <cstring>
  #include <sstream>
  #include <utility>
@@ -28,7 +28,7 @@ Prescanner::Prescanner(const Prescanner &that)
  bool Prescanner::Prescan(ProvenanceRange range) {
    AllSources *allSources{cooked_->allSources()};
    ProvenanceRange around{allSources->GetContiguousRangeAround(range)};
-  startProvenance_ = range.LocalOffsetToProvenance(0);
+  startProvenance_ = range.start();
    size_t offset{0};
    const SourceFile *source{
        allSources->GetSourceFile(startProvenance_, &offset)};
@@ -181,10 +181,6 @@ void Prescanner::SkipSpaces() {
    }
  }
  
-static inline bool IsNameChar(char ch) {
-  return isalnum(ch) || ch == '_' || ch == '$' || ch == '@';
-}
-
  bool Prescanner::NextToken(TokenSequence *tokens) {
    CHECK(at_ >= start_ && at_ < limit_);
    if (inFixedForm_) {
@@ -206,18 +202,18 @@ bool Prescanner::NextToken(TokenSequence *tokens) {
    if (*at_ == '\'' || *at_ == '"') {
      QuotedCharacterLiteral(tokens);
      preventHollerith_ = false;
-  } else if (isdigit(*at_)) {
+  } else if (IsDecimalDigit(*at_)) {
      int n{0};
      static constexpr int maxHollerith = 256 * (132 - 6);
      do {
        if (n < maxHollerith) {
-        n = 10 * n + *at_ - '0';
+        n = 10 * n + DecimalDigitValue(*at_);
        }
        EmitCharAndAdvance(tokens, *at_);
        if (inFixedForm_) {
          SkipSpaces();
        }
-    } while (isdigit(*at_));
+    } while (IsDecimalDigit(*at_));
      if ((*at_ == 'h' || *at_ == 'H') && n > 0 && n < maxHollerith &&
          !preventHollerith_) {
        EmitCharAndAdvance(tokens, 'h');
@@ -232,11 +228,11 @@ bool Prescanner::NextToken(TokenSequence *tokens) {
        }
        inCharLiteral_ = false;
      } else if (*at_ == '.') {
-      while (isdigit(EmitCharAndAdvance(tokens, *at_))) {
+      while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) {
        }
        ExponentAndKind(tokens);
      } else if (ExponentAndKind(tokens)) {
-    } else if (isalpha(*at_)) {
+    } else if (IsLetter(*at_)) {
        // Handles FORMAT(3I9HHOLLERITH) by skipping over the first I so that
        // we don't misrecognize I9HOLLERITH as an identifier in the next case.
        EmitCharAndAdvance(tokens, *at_);
@@ -244,16 +240,16 @@ bool Prescanner::NextToken(TokenSequence *tokens) {
      preventHollerith_ = false;
    } else if (*at_ == '.') {
      char nch{EmitCharAndAdvance(tokens, '.')};
-    if (isdigit(nch)) {
-      while (isdigit(EmitCharAndAdvance(tokens, *at_))) {
+    if (IsDecimalDigit(nch)) {
+      while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) {
        }
        ExponentAndKind(tokens);
      } else if (nch == '.' && EmitCharAndAdvance(tokens, '.') == '.') {
        EmitCharAndAdvance(tokens, '.');  // variadic macro definition ellipsis
      }
      preventHollerith_ = false;
-  } else if (IsNameChar(*at_)) {
-    while (IsNameChar(EmitCharAndAdvance(tokens, *at_))) {
+  } else if (IsLegalInIdentifier(*at_)) {
+    while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))) {
      }
      if (*at_ == '\'' || *at_ == '"') {
        QuotedCharacterLiteral(tokens);
@@ -297,40 +293,31 @@ bool Prescanner::ExponentAndKind(TokenSequence *tokens) {
    if (*at_ == '+' || *at_ == '-') {
      EmitCharAndAdvance(tokens, *at_);
    }
-  while (isdigit(*at_)) {
+  while (IsDecimalDigit(*at_)) {
      EmitCharAndAdvance(tokens, *at_);
    }
    if (*at_ == '_') {
-    while (IsNameChar(EmitCharAndAdvance(tokens, *at_))) {
+    while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))) {
      }
    }
    return true;
  }
  
  void Prescanner::EmitQuotedCharacter(TokenSequence *tokens, char ch) {
-  switch (ch) {
-  case '\a': EmitEscapedChar(tokens, 'a'); break;
-  case '\b': EmitEscapedChar(tokens, 'b'); break;
-  case '\f': EmitEscapedChar(tokens, 'f'); break;
-  case '\r': EmitEscapedChar(tokens, 'r'); break;
-  case '\t': EmitEscapedChar(tokens, 't'); break;
-  case '\v': EmitEscapedChar(tokens, 'v'); break;
-  case '\\':
-    if (!enableBackslashEscapesInCharLiterals_) {
-      EmitInsertedChar(tokens, '\\');
-    }
-    EmitChar(tokens, '\\');
-    break;
-  default:
-    if (ch < ' ') {
-      // emit an octal escape sequence
+  if (std::optional escape{BackslashEscapeChar(ch)}) {
+    if (ch != '\'' && ch != '"' &&
+        (ch != '\\' || !enableBackslashEscapesInCharLiterals_)) {
        EmitInsertedChar(tokens, '\\');
-      EmitInsertedChar(tokens, '0' + ((ch >> 6) & 3));
-      EmitInsertedChar(tokens, '0' + ((ch >> 3) & 7));
-      EmitInsertedChar(tokens, '0' + (ch & 7));
-    } else {
-      EmitChar(tokens, ch);
      }
+    EmitChar(tokens, *escape);
+  } else if (ch < ' ') {
+    // emit an octal escape sequence
+    EmitInsertedChar(tokens, '\\');
+    EmitInsertedChar(tokens, '0' + ((ch >> 6) & 3));
+    EmitInsertedChar(tokens, '0' + ((ch >> 3) & 7));
+    EmitInsertedChar(tokens, '0' + (ch & 7));
+  } else {
+    EmitChar(tokens, ch);
    }
  }
  
diff --git a/flang/lib/parser/prescan.h b/flang/lib/parser/prescan.h

index 8782f9d..8951d07 100644 (file)
--- a/flang/lib/parser/prescan.h
+++ b/flang/lib/parser/prescan.h
@@ -80,11 +80,6 @@ private:
      tokens->PutNextTokenChar(ch, provenance);
    }
  
-  void EmitEscapedChar(TokenSequence *tokens, char ch) {
-    EmitInsertedChar(tokens, '\\');
-    EmitChar(tokens, ch);
-  }
-
    char EmitCharAndAdvance(TokenSequence *tokens, char ch) {
      EmitChar(tokens, ch);
      NextChar();
diff --git a/flang/lib/parser/provenance.cc b/flang/lib/parser/provenance.cc

index bec938c..b3c847c 100644 (file)
--- a/flang/lib/parser/provenance.cc
+++ b/flang/lib/parser/provenance.cc
@@ -72,7 +72,7 @@ AllSources::~AllSources() {}
  
  const char &AllSources::operator[](Provenance at) const {
    const Origin &origin{MapToOrigin(at)};
-  return origin[origin.covers.ProvenanceToLocalOffset(at)];
+  return origin[origin.covers.MemberOffset(at)];
  }
  
  void AllSources::PushSearchPathDirectory(std::string directory) {
@@ -98,7 +98,7 @@ ProvenanceRange AllSources::AddIncludedFile(
      const SourceFile &source, ProvenanceRange from, bool isModule) {
    ProvenanceRange covers{range_.NextAfter(), source.bytes()};
    CHECK(range_.AnnexIfPredecessor(covers));
-  CHECK(origin_.back().covers.IsPredecessor(covers));
+  CHECK(origin_.back().covers.ImmediatelyPrecedes(covers));
    origin_.emplace_back(covers, source, from, isModule);
    return covers;
  }
@@ -107,7 +107,7 @@ ProvenanceRange AllSources::AddMacroCall(
      ProvenanceRange def, ProvenanceRange use, const std::string &expansion) {
    ProvenanceRange covers{range_.NextAfter(), expansion.size()};
    CHECK(range_.AnnexIfPredecessor(covers));
-  CHECK(origin_.back().covers.IsPredecessor(covers));
+  CHECK(origin_.back().covers.ImmediatelyPrecedes(covers));
    origin_.emplace_back(covers, def, use, expansion);
    return covers;
  }
@@ -115,7 +115,7 @@ ProvenanceRange AllSources::AddMacroCall(
  ProvenanceRange AllSources::AddCompilerInsertion(std::string text) {
    ProvenanceRange covers{range_.NextAfter(), text.size()};
    CHECK(range_.AnnexIfPredecessor(covers));
-  CHECK(origin_.back().covers.IsPredecessor(covers));
+  CHECK(origin_.back().covers.ImmediatelyPrecedes(covers));
    origin_.emplace_back(covers, text);
    return covers;
  }
@@ -128,7 +128,7 @@ void AllSources::Identify(std::ostream &o, Provenance at,
    std::visit(
        visitors{
            [&](const Inclusion &inc) {
-            size_t offset{origin.covers.ProvenanceToLocalOffset(at)};
+            size_t offset{origin.covers.MemberOffset(at)};
              std::pair<int, int> pos{inc.source.FindOffsetLineAndColumn(offset)};
              o << prefix << "at line " << pos.first << ", column " << pos.second;
              if (echoSourceLine) {
@@ -151,24 +151,21 @@ void AllSources::Identify(std::ostream &o, Provenance at,
                << inc.source.path();
              if (IsValid(origin.replaces)) {
                o << (inc.isModule ? " used\n" : " included\n");
-              Identify(o, origin.replaces.LocalOffsetToProvenance(0), indented);
+              Identify(o, origin.replaces.start(), indented);
              } else {
                o << '\n';
              }
            },
            [&](const Macro &mac) {
              o << prefix << "in the expansion of a macro that was defined\n";
-            Identify(o, mac.definition.LocalOffsetToProvenance(0), indented,
-                echoSourceLine);
+            Identify(o, mac.definition.start(), indented, echoSourceLine);
              o << prefix << "and called\n";
-            Identify(o, origin.replaces.LocalOffsetToProvenance(0), indented,
-                echoSourceLine);
+            Identify(o, origin.replaces.start(), indented, echoSourceLine);
              if (echoSourceLine) {
                o << prefix << "and expanded to\n"
                  << indented << "  " << mac.expansion << '\n'
                  << indented << "  ";
-              for (size_t j{0}; origin.covers.LocalOffsetToProvenance(j) < at;
-                   ++j) {
+              for (size_t j{0}; origin.covers.OffsetMember(j) < at; ++j) {
                  o << (mac.expansion[j] == '\t' ? '\t' : ' ');
                }
                o << "^\n";
@@ -187,30 +184,28 @@ void AllSources::Identify(std::ostream &o, Provenance at,
  const SourceFile *AllSources::GetSourceFile(
      Provenance at, size_t *offset) const {
    const Origin &origin{MapToOrigin(at)};
-  return std::visit(
-      visitors{[&](const Inclusion &inc) {
-                 if (offset != nullptr) {
-                   *offset = origin.covers.ProvenanceToLocalOffset(at);
-                 }
-                 return &inc.source;
-               },
-          [&](const Macro &mac) {
-            return GetSourceFile(
-                origin.replaces.LocalOffsetToProvenance(0), offset);
-          },
-          [offset](const CompilerInsertion &) {
-            if (offset != nullptr) {
-              *offset = 0;
-            }
-            return static_cast<const SourceFile *>(nullptr);
-          }},
+  return std::visit(visitors{[&](const Inclusion &inc) {
+                               if (offset != nullptr) {
+                                 *offset = origin.covers.MemberOffset(at);
+                               }
+                               return &inc.source;
+                             },
+                        [&](const Macro &mac) {
+                          return GetSourceFile(origin.replaces.start(), offset);
+                        },
+                        [offset](const CompilerInsertion &) {
+                          if (offset != nullptr) {
+                            *offset = 0;
+                          }
+                          return static_cast<const SourceFile *>(nullptr);
+                        }},
        origin.u);
  }
  
  ProvenanceRange AllSources::GetContiguousRangeAround(
      ProvenanceRange range) const {
    CHECK(IsValid(range));
-  const Origin &origin{MapToOrigin(range.LocalOffsetToProvenance(0))};
+  const Origin &origin{MapToOrigin(range.start())};
    CHECK(origin.covers.Contains(range));
    return origin.covers;
  }
@@ -232,7 +227,7 @@ Provenance AllSources::CompilerInsertionProvenance(char ch) {
      return iter->second;
    }
    ProvenanceRange newCharRange{AddCompilerInsertion(std::string{ch})};
-  Provenance newCharProvenance{newCharRange.LocalOffsetToProvenance(0)};
+  Provenance newCharProvenance{newCharRange.start()};
    compilerInsertionProvenance_.insert(std::make_pair(ch, newCharProvenance));
    return newCharProvenance;
  }
@@ -265,7 +260,7 @@ const AllSources::Origin &AllSources::MapToOrigin(Provenance at) const {
    size_t low{0}, count{origin_.size()};
    while (count > 1) {
      size_t mid{low + (count >> 1)};
-    if (at < origin_[mid].covers.LocalOffsetToProvenance(0)) {
+    if (at < origin_[mid].covers.start()) {
        count = mid - low;
      } else {
        count -= mid - low;
@@ -292,9 +287,9 @@ void CookedSource::Marshal() {
    buffer_.clear();
  }
  
-void ProvenanceRange::Dump(std::ostream &o) const {
-  o << "[" << start_.offset() << ".." << (start_.offset() + bytes_ - 1) << "] ("
-    << bytes_ << " bytes)";
+static void DumpRange(std::ostream &o, const ProvenanceRange &r) {
+  o << "[" << r.start().offset() << ".." << r.Last().offset() << "] ("
+    << r.size() << " bytes)";
  }
  
  void OffsetToProvenanceMappings::Dump(std::ostream &o) const {
@@ -302,18 +297,18 @@ void OffsetToProvenanceMappings::Dump(std::ostream &o) const {
      size_t n{m.range.size()};
      o << "offsets [" << m.start << ".." << (m.start + n - 1)
        << "] -> provenances ";
-    m.range.Dump(o);
+    DumpRange(o, m.range);
      o << '\n';
    }
  }
  
  void AllSources::Dump(std::ostream &o) const {
    o << "AllSources range_ ";
-  range_.Dump(o);
+  DumpRange(o, range_);
    o << '\n';
    for (const Origin &m : origin_) {
      o << "   ";
-    m.covers.Dump(o);
+    DumpRange(o, m.covers);
      o << " -> ";
      std::visit(visitors{[&](const Inclusion &inc) {
                            if (inc.isModule) {
diff --git a/flang/lib/parser/provenance.h b/flang/lib/parser/provenance.h

index df6b05e..9589845 100644 (file)
--- a/flang/lib/parser/provenance.h
+++ b/flang/lib/parser/provenance.h
@@ -47,6 +47,10 @@ public:
      return {offset_ + static_cast<size_t>(n)};
    }
    Provenance operator+(size_t n) const { return {offset_ + n}; }
+  size_t operator-(Provenance that) const {
+    CHECK(that <= *this);
+    return offset_ - that.offset_;
+  }
    bool operator<(Provenance that) const { return offset_ < that.offset_; }
    bool operator<=(Provenance that) const { return !(that < *this); }
    bool operator==(Provenance that) const { return offset_ == that.offset_; }
@@ -57,72 +61,64 @@ private:
    size_t offset_{0};
  };
  
-class ProvenanceRange {
+template<typename A> class Interval {
  public:
-  ProvenanceRange() {}
-  ProvenanceRange(Provenance s, size_t n) : start_{s}, bytes_{n} {
-    CHECK(n > 0);
-  }
-  ProvenanceRange(const ProvenanceRange &) = default;
-  ProvenanceRange(ProvenanceRange &&) = default;
-  ProvenanceRange &operator=(const ProvenanceRange &) = default;
-  ProvenanceRange &operator=(ProvenanceRange &&) = default;
-
-  bool operator==(ProvenanceRange that) const {
-    return start_ == that.start_ && bytes_ == that.bytes_;
-  }
-
-  size_t size() const { return bytes_; }
-
-  bool Contains(Provenance at) const {
-    return start_ <= at && at < start_ + bytes_;
+  using type = A;
+  Interval() {}
+  Interval(const A &s, size_t n) : start_{s}, size_{n} {}
+  Interval(A &&s, size_t n) : start_{std::move(s)}, size_{n} {}
+  Interval(const Interval &) = default;
+  Interval(Interval &&) = default;
+  Interval &operator=(const Interval &) = default;
+  Interval &operator=(Interval &&) = default;
+
+  bool operator==(const Interval &that) const {
+    return start_ == that.start_ && size_ == that.size_;
    }
  
-  bool Contains(ProvenanceRange that) const {
-    return Contains(that.start_) && Contains(that.start_ + (that.bytes_ - 1));
-  }
+  const A &start() const { return start_; }
+  size_t size() const { return size_; }
+  bool empty() const { return size_ == 0; }
  
-  size_t ProvenanceToLocalOffset(Provenance at) const {
-    CHECK(Contains(at));
-    return at.offset() - start_.offset();
+  bool Contains(const A &x) const { return start_ <= x && x < start_ + size_; }
+  bool Contains(const Interval &that) const {
+    return Contains(that.start_) && Contains(that.start_ + (that.size_ - 1));
    }
-
-  Provenance LocalOffsetToProvenance(size_t at) const {
-    CHECK(at < bytes_);
-    return start_ + at;
+  bool ImmediatelyPrecedes(const Interval &that) const {
+    return NextAfter() == that.start_;
    }
-
-  Provenance NextAfter() const { return start_ + bytes_; }
-
-  ProvenanceRange Suffix(size_t at) const {
-    CHECK(at < bytes_);
-    return {start_ + at, bytes_ - at};
+  bool AnnexIfPredecessor(const Interval &that) {
+    if (ImmediatelyPrecedes(that)) {
+      size_ += that.size_;
+      return true;
+    }
+    return false;
    }
  
-  ProvenanceRange Prefix(size_t bytes) const {
-    CHECK(bytes > 0);
-    return {start_, std::min(bytes_, bytes)};
+  size_t MemberOffset(const A &x) const {
+    CHECK(Contains(x));
+    return x - start_;
    }
-
-  bool IsPredecessor(ProvenanceRange next) {
-    return start_ + bytes_ == next.start_;
+  A OffsetMember(size_t n) const {
+    CHECK(n < size_);
+    return start_ + n;
    }
  
-  bool AnnexIfPredecessor(ProvenanceRange next) {
-    if (IsPredecessor(next)) {
-      bytes_ += next.bytes_;
-      return true;
-    }
-    return false;
+  A Last() const { return start_ + (size_ - 1); }
+  A NextAfter() const { return start_ + size_; }
+  Interval Prefix(size_t n) const { return {start_, std::min(size_, n)}; }
+  Interval Suffix(size_t n) const {
+    CHECK(n <= size_);
+    return {start_ + n, size_ - n};
    }
  
-  void Dump(std::ostream &) const;
-
  private:
-  Provenance start_;
-  size_t bytes_{0};
+  A start_;
+  size_t size_{0};
  };
  
+using ProvenanceRange = Interval<Provenance>;
+
  // Maps 0-based local offsets in some contiguous range (e.g., a token
  // sequence) to their provenances.  Lookup time is on the order of
  // O(log(#of intervals with contiguous provenances)).  As mentioned
diff --git a/flang/lib/parser/token-parsers.h b/flang/lib/parser/token-parsers.h

index d652302..d12e7a5 100644 (file)
--- a/flang/lib/parser/token-parsers.h
+++ b/flang/lib/parser/token-parsers.h
@@ -5,9 +5,9 @@
  // the prescanned character stream and recognize context-sensitive tokens.
  
  #include "basic-parsers.h"
+#include "characters.h"
  #include "idioms.h"
  #include "provenance.h"
-#include <cctype>
  #include <cstring>
  #include <functional>
  #include <limits>
@@ -41,25 +41,11 @@ private:
    const MessageFixedText text_;
  };
  
-static inline constexpr bool IsDecimalDigit(char ch) { return isdigit(ch); }
-
-static inline constexpr bool IsOctalDigit(char ch) {
-  return ch >= '0' && ch <= '7';
-}
-
-static inline constexpr bool IsHexadecimalDigit(char ch) {
-  return isxdigit(ch);
-}
-
-static inline constexpr bool IsLetter(char ch) { return isalpha(ch); }
-
-static inline constexpr char ToLower(char &&ch) { return tolower(ch); }
-
  constexpr CharPredicateGuardParser digit{
      IsDecimalDigit, "expected digit"_en_US};
  
-constexpr auto letter = applyFunction(
-    ToLower, CharPredicateGuardParser{IsLetter, "expected letter"_en_US});
+constexpr auto letter = applyFunction(ToLowerCaseLetter,
+    CharPredicateGuardParser{IsLetter, "expected letter"_en_US});
  
  template<char good> class CharMatch {
  public:
@@ -115,18 +101,19 @@ public:
            continue;  // redundant; ignore
          }
        }
-      if (!ch && !(ch = nextChar.Parse(state))) {
+      if (!ch.has_value() && !(ch = nextChar.Parse(state))) {
          return {};
        }
        if (spaceSkipping) {
          // medial space: 0 or more spaces/tabs accepted, none required
+        // TODO: designate and enforce free-form mandatory white space
          while (*ch == ' ' || *ch == '\t') {
            if (!(ch = nextChar.Parse(state))) {
              return {};
            }
          }
          // ch remains full for next iteration
-      } else if (*ch == tolower(*p)) {
+      } else if (IsSameApartFromCase(*ch, *p)) {
          ch.reset();
        } else {
          state->PutMessage(at, MessageExpectedText{str_, bytes_});
@@ -171,13 +158,6 @@ bracketed(const PA &p) {
    return "[" >> p / "]";
  }
  
-static inline int HexadecimalDigitValue(char ch) {
-  if (IsDecimalDigit(ch)) {
-    return ch - '0';
-  }
-  return toupper(ch) - 'A' + 10;
-}
-
  // Quoted character literal constants.
  struct CharLiteralChar {
    struct Result {
@@ -205,44 +185,36 @@ struct CharLiteralChar {
      if (!(och = nextChar.Parse(state)).has_value()) {
        return {};
      }
-    switch ((ch = *och)) {
-    case 'a': return {Result::Escaped('\a')};
-    case 'b': return {Result::Escaped('\b')};
-    case 'f': return {Result::Escaped('\f')};
-    case 'n': return {Result::Escaped('\n')};
-    case 'r': return {Result::Escaped('\r')};
-    case 't': return {Result::Escaped('\t')};
-    case 'v': return {Result::Escaped('\v')};
-    case '"':
-    case '\'':
-    case '\\': return {Result::Escaped(ch)};
-    case '\n':
+    ch = *och;
+    if (ch == '\n') {
        state->PutMessage(at, "unclosed character constant"_en_US);
        return {};
-    default:
-      if (IsOctalDigit(ch)) {
-        ch -= '0';
-        for (int j = (ch > 3 ? 1 : 2); j-- > 0;) {
-          static constexpr auto octalDigit = attempt(CharPredicateGuardParser{
-              IsOctalDigit, "expected octal digit"_en_US});
-          if ((och = octalDigit.Parse(state)).has_value()) {
-            ch = 8 * ch + *och - '0';
-          }
+    }
+    if (std::optional<char> escChar{BackslashEscapeValue(ch)}) {
+      return {Result::Escaped(*escChar)};
+    }
+    if (IsOctalDigit(ch)) {
+      ch -= '0';
+      for (int j = (ch > 3 ? 1 : 2); j-- > 0;) {
+        static constexpr auto octalDigit = attempt(CharPredicateGuardParser{
+            IsOctalDigit, "expected octal digit"_en_US});
+        if ((och = octalDigit.Parse(state)).has_value()) {
+          ch = 8 * ch + *och - '0';
          }
-      } else if (ch == 'x' || ch == 'X') {
-        ch = 0;
-        for (int j = 0; j++ < 2;) {
-          static constexpr auto hexDigit = attempt(CharPredicateGuardParser{
-              IsHexadecimalDigit, "expected hexadecimal digit"_en_US});
-          if ((och = hexDigit.Parse(state)).has_value()) {
-            ch = 16 * ch + HexadecimalDigitValue(*och);
-          }
+      }
+    } else if (ch == 'x' || ch == 'X') {
+      ch = 0;
+      for (int j = 0; j++ < 2;) {
+        static constexpr auto hexDigit = attempt(CharPredicateGuardParser{
+            IsHexadecimalDigit, "expected hexadecimal digit"_en_US});
+        if ((och = hexDigit.Parse(state)).has_value()) {
+          ch = 16 * ch + HexadecimalDigitValue(*och);
          }
-      } else {
-        state->PutMessage(at, "bad escaped character"_en_US);
        }
-      return {Result::Escaped(ch)};
+    } else {
+      state->PutMessage(at, "bad escaped character"_en_US);
      }
+    return {Result::Escaped(ch)};
    }
  };
  
@@ -310,7 +282,7 @@ struct BOZLiteral {
        if (*ch == quote) {
          break;
        }
-      if (!isxdigit(*ch)) {
+      if (!IsHexadecimalDigit(*ch)) {
          return {};
        }
        content += *ch;
diff --git a/flang/lib/parser/token-sequence.cc b/flang/lib/parser/token-sequence.cc

index 5c2a2f9..ad89e0f 100644 (file)
--- a/flang/lib/parser/token-sequence.cc
+++ b/flang/lib/parser/token-sequence.cc
@@ -1,11 +1,14 @@
  #include "token-sequence.h"
+#include "characters.h"
  
  namespace Fortran {
  namespace parser {
  
-bool CharPointerWithLength::IsBlank() const {
-  for (size_t j{0}; j < bytes_; ++j) {
-    char ch{data_[j]};
+bool ContiguousChars::IsBlank() const {
+  const char *data{interval_.start()};
+  size_t n{interval_.size()};
+  for (size_t j{0}; j < n; ++j) {
+    char ch{data[j]};
      if (ch != ' ' && ch != '\t') {
        return false;
      }
@@ -50,8 +53,8 @@ void TokenSequence::Put(const TokenSequence &that) {
  void TokenSequence::Put(const TokenSequence &that, ProvenanceRange range) {
    size_t offset{0};
    for (size_t j{0}; j < that.size(); ++j) {
-    CharPointerWithLength tok{that[j]};
-    Put(tok, range.LocalOffsetToProvenance(offset));
+    ContiguousChars tok{that[j]};
+    Put(tok, range.OffsetMember(offset));
      offset += tok.size();
    }
    CHECK(offset == range.size());
@@ -61,14 +64,14 @@ void TokenSequence::Put(const TokenSequence &that, size_t at, size_t tokens) {
    ProvenanceRange provenance;
    size_t offset{0};
    for (; tokens-- > 0; ++at) {
-    CharPointerWithLength tok{that[at]};
+    ContiguousChars tok{that[at]};
      size_t tokBytes{tok.size()};
      for (size_t j{0}; j < tokBytes; ++j) {
        if (offset == provenance.size()) {
          offset = 0;
          provenance = that.provenances_.Map(that.start_[at] + j);
        }
-      PutNextTokenChar(tok[j], provenance.LocalOffsetToProvenance(offset++));
+      PutNextTokenChar(tok[j], provenance.OffsetMember(offset++));
      }
      CloseToken();
    }
@@ -81,7 +84,7 @@ void TokenSequence::Put(const char *s, size_t bytes, Provenance provenance) {
    CloseToken();
  }
  
-void TokenSequence::Put(const CharPointerWithLength &t, Provenance provenance) {
+void TokenSequence::Put(const ContiguousChars &t, Provenance provenance) {
    Put(&t[0], t.size(), provenance);
  }
  
@@ -99,7 +102,7 @@ void TokenSequence::EmitWithCaseConversion(CookedSource *cooked) const {
    size_t atToken{0};
    for (size_t j{0}; j < chars;) {
      size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars};
-    if (isalpha(char_[j])) {
+    if (IsLegalInIdentifier(char_[j])) {
        for (; j < nextStart; ++j) {
          cooked->Put(tolower(char_[j]));
        }
@@ -118,7 +121,7 @@ std::string TokenSequence::ToString() const {
  Provenance TokenSequence::GetTokenProvenance(
      size_t token, size_t offset) const {
    ProvenanceRange range{provenances_.Map(start_[token] + offset)};
-  return range.LocalOffsetToProvenance(0);
+  return range.start();
  }
  
  ProvenanceRange TokenSequence::GetTokenProvenanceRange(
diff --git a/flang/lib/parser/token-sequence.h b/flang/lib/parser/token-sequence.h

index d093f0e..12333ba 100644 (file)
--- a/flang/lib/parser/token-sequence.h
+++ b/flang/lib/parser/token-sequence.h
@@ -16,37 +16,32 @@ namespace parser {
  // Just a const char pointer with an associated length; does not presume
  // to own the referenced data.  Used to describe buffered tokens and hash
  // table keys.
-class CharPointerWithLength {
+class ContiguousChars {
  public:
-  CharPointerWithLength() {}
-  CharPointerWithLength(const char *x, size_t n) : data_{x}, bytes_{n} {}
-  CharPointerWithLength(const std::string &s)
-    : data_{s.data()}, bytes_{s.size()} {}
-  CharPointerWithLength(const CharPointerWithLength &that)
-    : data_{that.data_}, bytes_{that.bytes_} {}
-  CharPointerWithLength &operator=(const CharPointerWithLength &that) {
-    data_ = that.data_;
-    bytes_ = that.bytes_;
-    return *this;
-  }
+  ContiguousChars() {}
+  ContiguousChars(const char *x, size_t n) : interval_{x, n} {}
+  ContiguousChars(const std::string &s) : interval_{s.data(), s.size()} {}
+  ContiguousChars(const ContiguousChars &that) = default;
+  ContiguousChars &operator=(const ContiguousChars &that) = default;
  
-  bool empty() const { return bytes_ == 0; }
-  size_t size() const { return bytes_; }
-  const char &operator[](size_t j) const { return data_[j]; }
+  bool empty() const { return interval_.empty(); }
+  size_t size() const { return interval_.size(); }
+  const char &operator[](size_t j) const { return interval_.start()[j]; }
  
    bool IsBlank() const;
-  std::string ToString() const { return std::string{data_, bytes_}; }
+  std::string ToString() const {
+    return std::string{interval_.start(), interval_.size()};
+  }
  
  private:
-  const char *data_{nullptr};
-  size_t bytes_{0};
+  Interval<const char *> interval_{nullptr, 0};
  };
  }  // namespace parser
  }  // namespace Fortran
  
-// Specializations to enable std::unordered_map<CharPointerWithLength, ...>
-template<> struct std::hash<Fortran::parser::CharPointerWithLength> {
-  size_t operator()(const Fortran::parser::CharPointerWithLength &x) const {
+// Specializations to enable std::unordered_map<ContiguousChars, ...>
+template<> struct std::hash<Fortran::parser::ContiguousChars> {
+  size_t operator()(const Fortran::parser::ContiguousChars &x) const {
      size_t hash{0}, bytes{x.size()};
      for (size_t j{0}; j < bytes; ++j) {
        hash = (hash * 31) ^ x[j];
@@ -55,9 +50,9 @@ template<> struct std::hash<Fortran::parser::CharPointerWithLength> {
    }
  };
  
-template<> struct std::equal_to<Fortran::parser::CharPointerWithLength> {
-  bool operator()(const Fortran::parser::CharPointerWithLength &x,
-      const Fortran::parser::CharPointerWithLength &y) const {
+template<> struct std::equal_to<Fortran::parser::ContiguousChars> {
+  bool operator()(const Fortran::parser::ContiguousChars &x,
+      const Fortran::parser::ContiguousChars &y) const {
      return x.size() == y.size() &&
          std::memcmp(static_cast<const void *>(&x[0]),
              static_cast<const void *>(&y[0]), x.size()) == 0;
@@ -94,7 +89,7 @@ public:
      return *this;
    }
  
-  CharPointerWithLength operator[](size_t token) const {
+  ContiguousChars operator[](size_t token) const {
      return {&char_[start_[token]], TokenBytes(token)};
    }
  
@@ -124,7 +119,7 @@ public:
    void Put(const TokenSequence &, ProvenanceRange);
    void Put(const TokenSequence &, size_t at, size_t tokens = 1);
    void Put(const char *, size_t, Provenance);
-  void Put(const CharPointerWithLength &, Provenance);
+  void Put(const ContiguousChars &, Provenance);
    void Put(const std::string &, Provenance);
    void Put(const std::stringstream &, Provenance);
    void EmitWithCaseConversion(CookedSource *) const;
author	peter klausler <pklausler@nvidia.com>
	Tue, 27 Feb 2018 22:02:10 +0000 (14:02 -0800)
committer	peter klausler <pklausler@nvidia.com>
	Tue, 27 Feb 2018 23:55:10 +0000 (15:55 -0800)
flang/lib/parser/characters.h	[new file with mode: 0644]	patch \| blob
flang/lib/parser/grammar.h		patch \| blob \| history
flang/lib/parser/parse-state.h		patch \| blob \| history
flang/lib/parser/parse-tree.h		patch \| blob \| history
flang/lib/parser/preprocessor.cc		patch \| blob \| history
flang/lib/parser/preprocessor.h		patch \| blob \| history
flang/lib/parser/prescan.cc		patch \| blob \| history
flang/lib/parser/prescan.h		patch \| blob \| history
flang/lib/parser/provenance.cc		patch \| blob \| history
flang/lib/parser/provenance.h		patch \| blob \| history
flang/lib/parser/token-parsers.h		patch \| blob \| history
flang/lib/parser/token-sequence.cc		patch \| blob \| history
flang/lib/parser/token-sequence.h		patch \| blob \| history