[flang] nag debugged

author peter klausler <pklausler@nvidia.com>

Mon, 17 Jun 2019 23:12:28 +0000 (16:12 -0700)

committer peter klausler <pklausler@nvidia.com>

Mon, 17 Jun 2019 23:13:10 +0000 (16:13 -0700)
author peter klausler <pklausler@nvidia.com>
Mon, 17 Jun 2019 23:12:28 +0000 (16:12 -0700)
committer peter klausler <pklausler@nvidia.com>
Mon, 17 Jun 2019 23:13:10 +0000 (16:13 -0700)
diff --git a/flang/documentation/Parsing.md b/flang/documentation/Parsing.md

index 943c32b..b6cf135 100644 (file)
--- a/flang/documentation/Parsing.md
+++ b/flang/documentation/Parsing.md
@@ -58,8 +58,8 @@ by a CookedSource class instance, in which:
  * except for the payload in character literals, Hollerith constants,
    and character and Hollerith edit descriptors, all letters have been
    normalized to lower case
-* all original non-ASCII characters in character literals have been
-  decoded, converted to UTF-8, and then formatted with escape sequences
+* all original non-ASCII characters in Hollerith constants have been
+  decoded and re-encoded into UTF-8
  
  Lines in the cooked character stream can be of arbitrary length.
  
diff --git a/flang/lib/evaluate/formatting.cc b/flang/lib/evaluate/formatting.cc

index 485277f..9f006e2 100644 (file)
--- a/flang/lib/evaluate/formatting.cc
+++ b/flang/lib/evaluate/formatting.cc
@@ -92,9 +92,7 @@ std::ostream &Constant<Type<TypeCategory::Character, KIND>>::AsFortran(
      } else if (Rank() == 0) {
        o << Result::kind << '_';
      }
-    o << parser::QuoteCharacterLiteral(value,
-        false /* avoid backslash escapes */,
-        parser::Encoding::UTF_8 /* module files are UTF-8 */);
+    o << parser::QuoteCharacterLiteral(value);
    }
    if (Rank() > 0) {
      o << ']';
diff --git a/flang/lib/parser/characters.h b/flang/lib/parser/characters.h

index 43bbbbb..e4ebfb4 100644 (file)
--- a/flang/lib/parser/characters.h
+++ b/flang/lib/parser/characters.h
@@ -161,13 +161,9 @@ void EmitQuotedChar(char32_t ch, const NORMAL &emit, const INSERTED &insert,
        if (std::optional<char> escape{BackslashEscapeChar(ch)}) {
          emit(*escape);
        } else {
-        // octal escape sequence
-        if (ch > 077) {
-          insert('0' + (ch >> 6));
-        }
-        if (ch > 07) {
-          insert('0' + ((ch >> 3) & 7));
-        }
+        // octal escape sequence; always emit 3 digits to avoid ambiguity
+        insert('0' + (ch >> 6));
+        insert('0' + ((ch >> 3) & 7));
          insert('0' + (ch & 7));
        }
      } else {
diff --git a/flang/lib/parser/prescan.cc b/flang/lib/parser/prescan.cc

index c968ac2..b96c185 100644 (file)
--- a/flang/lib/parser/prescan.cc
+++ b/flang/lib/parser/prescan.cc
@@ -558,32 +558,11 @@ void Prescanner::QuotedCharacterLiteral(
    inCharLiteral_ = true;
    const auto emit{[&](char ch) { EmitChar(tokens, ch); }};
    const auto insert{[&](char ch) { EmitInsertedChar(tokens, ch); }};
+  bool isEscaped{false};
    bool escapesEnabled{features_.IsEnabled(LanguageFeature::BackslashEscapes)};
-  Encoding encoding{encoding_};
-  if (isKanji) {
-    // NC'...' - the contents are always decoded as EUC_JP
-    encoding = Encoding::EUC_JP;
-  } else if (encoding == Encoding::EUC_JP) {
-    encoding = Encoding::LATIN_1;  // for compatibility with tests
-  }
    while (true) {
-    DecodedCharacter decoded{DecodeCharacter(
-        encoding, at_, static_cast<std::size_t>(limit_ - at_), escapesEnabled)};
-    if (decoded.bytes <= 0) {
-      Say(GetProvenanceRange(start, at_),
-          "Bad character in character literal"_en_US);
-      // Just eat a byte and press on.
-      decoded.codepoint = static_cast<unsigned char>(*at_);
-      decoded.bytes = 1;
-    }
-    char32_t ch{decoded.codepoint};
-    if (ch == quote) {
-      if (decoded.bytes > 1) {
-        EmitChar(tokens, '\\');
-      }
-    }
-    EmitQuotedChar(
-        ch, emit, insert, true /* use backslash escapes */, Encoding::UTF_8);
+    EmitQuotedChar(static_cast<unsigned char>(*at_), emit, insert, false,
+        Encoding::LATIN_1);
      while (PadOutCharacterLiteral(tokens)) {
      }
      if (*at_ == '\n') {
@@ -593,13 +572,20 @@ void Prescanner::QuotedCharacterLiteral(
        }
        break;
      }
-    at_ += decoded.bytes - 1;
+    isEscaped = !isEscaped && *at_ == '\\';
      end = at_ + 1;
      NextChar();
      if (*at_ == quote) {
-      // A doubled quote mark becomes a single instance of the quote character
-      // in the literal (later).  There can be spaces between the quotes in
-      // fixed form source.
+      if (isEscaped) {
+        if (escapesEnabled) {
+          continue;
+        } else {
+          insert('\\');
+        }
+      }
+      // A doubled unescaped quote mark becomes a single instance of that
+      // quote character in the literal (later).  There can be spaces between
+      // the quotes in fixed form source.
        EmitChar(tokens, quote);
        inCharLiteral_ = false;  // for cases like print *, '...'!comment
        NextChar();
@@ -629,10 +615,10 @@ void Prescanner::Hollerith(
      } else {
        NextChar();
        // Multi-byte character encodings each count as single characters.
+      // The cooked character stream always uses UTF-8 for Hollerith.
        DecodedCharacter decoded{DecodeCharacter(
            encoding_, at_, static_cast<std::size_t>(limit_ - at_))};
        if (decoded.bytes > 0) {
-        // The cooked character stream we emit is always in UTF-8.
          EncodedCharacter utf8{EncodeUTF_8(decoded.codepoint)};
          for (int j{0}; j < utf8.bytes; ++j) {
            EmitChar(tokens, utf8.buffer[j]);
@@ -812,9 +798,11 @@ bool Prescanner::SkipCommentLine(bool afterAmpersand) {
    } else if (inPreprocessorDirective_) {
      return false;
    } else if (lineClass.kind ==
-      LineClassification::Kind::ConditionalCompilationDirective) {
+          LineClassification::Kind::ConditionalCompilationDirective ||
+      lineClass.kind == LineClassification::Kind::PreprocessorDirective) {
      // Allow conditional compilation directives (e.g., #ifdef) to affect
      // continuation lines.
+    // Allow other preprocessor directives, too, except #include.
      preprocessor_.Directive(TokenizePreprocessorDirective(), this);
      return true;
    } else if (afterAmpersand &&
@@ -952,7 +940,7 @@ bool Prescanner::FixedFormContinuation(bool mightNeedSpace) {
        NextLine();
        return true;
      }
-  } while (SkipCommentLine(false /* not after & */));
+  } while (SkipCommentLine(false /* not after ampersand */));
    return false;
  }
  
diff --git a/flang/lib/parser/token-parsers.h b/flang/lib/parser/token-parsers.h

index 41742fe..016c0da 100644 (file)
--- a/flang/lib/parser/token-parsers.h
+++ b/flang/lib/parser/token-parsers.h
@@ -230,18 +230,31 @@ struct CharLiteralChar {
          return std::make_pair(*escChar, true);
        } else if (IsOctalDigit(**cp)) {
          char result{static_cast<char>(**cp - '0')};
-        for (int j = (result > 3 ? 1 : 2); j-- > 0;) {
+        for (int j{0}; j < 2 && static_cast<unsigned char>(result) <= 037;
+             ++j) {
            static constexpr auto octalDigit{attempt("01234567"_ch)};
            if (std::optional<const char *> oct{octalDigit.Parse(state)}) {
-            result = 8 * result + **oct - '0';
+            result = 8 * result + DecimalDigitValue(**oct);
            } else {
              break;
            }
          }
          return std::make_pair(result, true);
-      } else {
-        // unknown escape - ignore the '\' (PGI compatibility)
-        return std::make_pair(static_cast<char>(1 [*cp]), true);
+      } else if (**cp == 'x' || **cp == 'X') {
+        char result{0};
+        for (int j{0}; j < 2; ++j) {
+          static constexpr auto hexadecimalDigit{
+              attempt("01234567abcdefABCDEF"_ch)};
+          if (std::optional<const char *> hex{hexadecimalDigit.Parse(state)}) {
+            result = 16 * result + HexadecimalDigitValue(**hex);
+          } else {
+            return std::nullopt;
+          }
+        }
+        return std::make_pair(result, true);
+      } else if (IsLetter(**cp)) {
+        // Unknown escape - ignore the '\' (PGI compatibility)
+        return std::make_pair(**cp, true);
        }
      }
      return std::nullopt;
diff --git a/flang/lib/parser/unparse.cc b/flang/lib/parser/unparse.cc

index a3b8552..1e96b99 100644 (file)
--- a/flang/lib/parser/unparse.cc
+++ b/flang/lib/parser/unparse.cc
@@ -186,17 +186,15 @@ public:
          x.u);
    }
    void Unparse(const CharLiteralConstant &x) {  // R724
-    Encoding encoding{Encoding::LATIN_1};
      if (const auto &k{std::get<std::optional<KindParam>>(x.t)}) {
        if (std::holds_alternative<KindParam::Kanji>(k->u)) {
          Word("NC");
-        encoding = Encoding::EUC_JP;
        } else {
          Walk(*k), Put('_');
        }
      }
      Put(QuoteCharacterLiteral(
-        DecodeUTF_8(std::get<std::string>(x.t)), backslashEscapes_, encoding));
+        std::get<std::string>(x.t), backslashEscapes_, Encoding::LATIN_1));
    }
    void Unparse(const HollerithLiteralConstant &x) {
      std::u32string ucs{DecodeUTF_8(x.v)};
diff --git a/flang/lib/semantics/expression.cc b/flang/lib/semantics/expression.cc

index da94755..f0dae55 100644 (file)
--- a/flang/lib/semantics/expression.cc
+++ b/flang/lib/semantics/expression.cc
@@ -519,37 +519,16 @@ MaybeExpr ExpressionAnalyzer::AnalyzeString(std::string &&string, int kind) {
    if (!CheckIntrinsicKind(TypeCategory::Character, kind)) {
      return std::nullopt;
    }
-  std::u32string codepoint{parser::DecodeUTF_8(string)};
    if (kind == 1) {
-    std::string result;
-    for (char32_t ch : codepoint) {
-      if (ch <= 0xff) {
-        result += static_cast<char>(ch);
-      } else {
-        // Original literal in UTF-8 source contained a byte sequence
-        // that looked like UTF-8 and got decoded as such.  Reconstruct.
-        parser::EncodedCharacter encoded{parser::EncodeUTF_8(ch)};
-        result += std::string{
-            encoded.buffer, static_cast<std::size_t>(encoded.bytes)};
-      }
-    }
      return AsGenericExpr(
-        Constant<Type<TypeCategory::Character, 1>>{std::move(result)});
+        Constant<Type<TypeCategory::Character, 1>>{std::move(string)});
    } else if (kind == 2) {
-    std::u16string result;
-    for (char32_t ch : codepoint) {
-      if (ch > 0xffff) {
-        Say("Bad character in CHARACTER(KIND=2) literal"_err_en_US);
-        return std::nullopt;
-      }
-      result += static_cast<char16_t>(ch);
-    }
-    return AsGenericExpr(
-        Constant<Type<TypeCategory::Character, 2>>{std::move(result)});
+    return AsGenericExpr(Constant<Type<TypeCategory::Character, 2>>{
+        parser::DecodeEUC_JP(string)});
    } else {
      CHECK(kind == 4);
-    return AsGenericExpr(
-        Constant<Type<TypeCategory::Character, 4>>{std::move(codepoint)});
+    return AsGenericExpr(Constant<Type<TypeCategory::Character, 4>>{
+        parser::DecodeUTF_8(string)});
    }
  }
  
diff --git a/flang/test/semantics/modfile28.f90 b/flang/test/semantics/modfile28.f90

index 9a6cd3d..d817857 100644 (file)
--- a/flang/test/semantics/modfile28.f90
+++ b/flang/test/semantics/modfile28.f90
@@ -29,9 +29,9 @@ end module m
  
  !Expect: m.mod
  !module m
-!character(:,4),parameter::c4=4_"Hi! 你好!"
-!character(:,1),parameter::c1=1_"Hi! ä½ å¥½!"
-!character(:,4),parameter::c4a(1_8:)=[CHARACTER(KIND=4,LEN=1)::"一","二","三","四","五"]
+!character(:,4),parameter::c4=4_"Hi! \344\275\240\345\245\275!"
+!character(:,1),parameter::c1=1_"Hi! \344\275\240\345\245\275!"
+!character(:,4),parameter::c4a(1_8:)=[CHARACTER(KIND=4,LEN=1)::"\344\270\200","\344\272\214","\344\270\211","\345\233\233","\344\272\224"]
  !integer(4),parameter::lc4=7_4
  !integer(4),parameter::lc1=11_4
  !end
author	peter klausler <pklausler@nvidia.com>
	Mon, 17 Jun 2019 23:12:28 +0000 (16:12 -0700)
committer	peter klausler <pklausler@nvidia.com>
	Mon, 17 Jun 2019 23:13:10 +0000 (16:13 -0700)
flang/documentation/Parsing.md		patch \| blob \| history
flang/lib/evaluate/formatting.cc		patch \| blob \| history
flang/lib/parser/characters.h		patch \| blob \| history
flang/lib/parser/prescan.cc		patch \| blob \| history
flang/lib/parser/token-parsers.h		patch \| blob \| history
flang/lib/parser/unparse.cc		patch \| blob \| history
flang/lib/semantics/expression.cc		patch \| blob \| history
flang/test/semantics/modfile28.f90		patch \| blob \| history