From dbb202c5bee8d24dba0eee285441679299eb699d Mon Sep 17 00:00:00 2001 From: peter klausler Date: Fri, 28 Jun 2019 11:16:37 -0700 Subject: [PATCH] [flang] Extirpate NCHARACTER type, NC"" literals, and EUC-JP Hollerith Original-commit: flang-compiler/f18@10a592a591c8e4f66f32e586597c19dd343914b9 Reviewed-on: https://github.com/flang-compiler/f18/pull/535 Tree-same-pre-rewrite: false --- flang/documentation/Extensions.md | 2 +- flang/lib/parser/char-set.h | 3 +- flang/lib/parser/characters.cc | 71 ++++-------------------------------- flang/lib/parser/characters.h | 38 ++++--------------- flang/lib/parser/dump-parse-tree.h | 2 - flang/lib/parser/features.h | 2 +- flang/lib/parser/grammar.h | 12 +----- flang/lib/parser/parse-tree.h | 8 ++-- flang/lib/parser/token-sequence.cc | 3 +- flang/lib/parser/unparse.cc | 18 ++------- flang/lib/semantics/expression.cc | 19 ++++------ flang/lib/semantics/expression.h | 4 +- flang/lib/semantics/resolve-names.cc | 13 ------- flang/tools/f18/f18.cc | 4 -- 14 files changed, 38 insertions(+), 161 deletions(-) diff --git a/flang/documentation/Extensions.md b/flang/documentation/Extensions.md index 35548cf..a3b694b 100644 --- a/flang/documentation/Extensions.md +++ b/flang/documentation/Extensions.md @@ -23,7 +23,6 @@ Extensions, deletions, and legacy features supported by default * Signed complex literal constants * DEC `STRUCTURE`, `RECORD`, `UNION`, and `MAP` * Structure field access with `.field` -* `NCHARACTER` type and `NC` Kanji character literals * `BYTE` as synonym for `INTEGER(KIND=1)` * Quad precision REAL literals with `Q` * `X` prefix/suffix as synonym for `Z` on hexadecimal literals @@ -111,3 +110,4 @@ Extensions and legacy features deliberately not supported * Comparsion of LOGICAL with ==/.EQ. rather than .EQV. (also .NEQV.) (PGI/Intel) * Procedure pointers in COMMON blocks (PGI/Intel) * Underindexing multi-dimensional arrays (e.g., A(1) rather than A(1,1)) (PGI only) +* Legacy PGI `NCHARACTER` type and `NC` Kanji character literals diff --git a/flang/lib/parser/char-set.h b/flang/lib/parser/char-set.h index 414c9aa..a83c185 100644 --- a/flang/lib/parser/char-set.h +++ b/flang/lib/parser/char-set.h @@ -1,4 +1,4 @@ -// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -36,7 +36,6 @@ struct SetOfChars { // character literals. We repurpose '^' and '?' for newline and unknown // characters (resp.), leaving the others alone in case this code might // be useful in preprocssing. - // TODO: EBCDIC? if (c == '\n') { // map newline to '^' c = '^'; diff --git a/flang/lib/parser/characters.cc b/flang/lib/parser/characters.cc index 49c46e4..b2e97b2 100644 --- a/flang/lib/parser/characters.cc +++ b/flang/lib/parser/characters.cc @@ -119,40 +119,9 @@ template<> EncodedCharacter EncodeCharacter(char32_t ucs) { return result; } -// These are placeholders; the actual mapping is complicated. -static char32_t JIS_0208ToUCS(char32_t jis) { return jis | 0x80000; } -static char32_t JIS_0212ToUCS(char32_t jis) { return jis | 0x90000; } -static bool IsUCSJIS_0212(char32_t ucs) { return (ucs & 0x90000) == 0x90000; } -static char32_t UCSToJIS(char32_t ucs) { return ucs & 0xffff; } - -template<> EncodedCharacter EncodeCharacter(char32_t ucs) { - EncodedCharacter result; - if (ucs <= 0x7f) { - result.buffer[0] = ucs; - result.bytes = 1; - } else if (ucs <= 0xff) { - result.buffer[0] = '\x8e'; // JIS X 0201 - result.buffer[1] = ucs; - result.bytes = 2; - } else if (IsUCSJIS_0212(ucs)) { // JIS X 0212 - char32_t jis{UCSToJIS(ucs)}; - result.buffer[0] = '\x8f'; - result.buffer[1] = 0x80 ^ (jis >> 8); - result.buffer[2] = 0x80 ^ jis; - result.bytes = 3; - } else { // JIS X 0208 - char32_t jis{UCSToJIS(ucs)}; - result.buffer[0] = 0x80 ^ (jis >> 8); - result.buffer[1] = 0x80 ^ jis; - result.bytes = 2; - } - return result; -} - EncodedCharacter EncodeCharacter(Encoding encoding, char32_t ucs) { switch (encoding) { case Encoding::LATIN_1: return EncodeCharacter(ucs); - case Encoding::EUC_JP: return EncodeCharacter(ucs); case Encoding::UTF_8: return EncodeCharacter(ucs); default: CRASH_NO_CASE; } @@ -171,7 +140,7 @@ std::string EncodeString(const STRING &str) { template std::string EncodeString( const std::string &); -template std::string EncodeString( +template std::string EncodeString( const std::u16string &); template std::string EncodeString( const std::u32string &); @@ -213,27 +182,6 @@ DecodedCharacter DecodeRawCharacter( } } -template<> -DecodedCharacter DecodeRawCharacter( - const char *cp, std::size_t bytes) { - auto p{reinterpret_cast(cp)}; - char32_t ch{*p}; - if (ch <= 0x7f) { - return {ch, 1}; - } else if (ch == 0x8e) { - if (bytes >= 2) { - return {p[1], 2}; // JIS X 0201 - } - } else if (ch == 0x8f) { // JIS X 0212 - if (bytes >= 3) { - return {JIS_0212ToUCS(((p[1] << 8) | p[2]) ^ 0x8080), 3}; - } - } else if (bytes >= 2) { // assume JIS X 0208 - return {JIS_0208ToUCS(((ch << 8) | p[1]) ^ 0x8080), 2}; - } - return {}; -} - static DecodedCharacter DecodeEscapedCharacter( const char *cp, std::size_t bytes) { if (cp[0] == '\\' && bytes >= 2) { @@ -297,8 +245,6 @@ DecodedCharacter DecodeCharacter( template DecodedCharacter DecodeCharacter( const char *, std::size_t, bool); -template DecodedCharacter DecodeCharacter( - const char *, std::size_t, bool); template DecodedCharacter DecodeCharacter( const char *, std::size_t, bool); @@ -307,17 +253,15 @@ DecodedCharacter DecodeCharacter(Encoding encoding, const char *cp, switch (encoding) { case Encoding::LATIN_1: return DecodeCharacter(cp, bytes, backslashEscapes); - case Encoding::EUC_JP: - return DecodeCharacter(cp, bytes, backslashEscapes); case Encoding::UTF_8: return DecodeCharacter(cp, bytes, backslashEscapes); default: CRASH_NO_CASE; } } -template -StringFor DecodeString(const std::string &s, bool backslashEscapes) { - StringFor result; +template +RESULT DecodeString(const std::string &s, bool backslashEscapes) { + RESULT result; const char *p{s.c_str()}; for (auto bytes{s.size()}; bytes != 0;) { DecodedCharacter decoded{ @@ -337,9 +281,10 @@ StringFor DecodeString(const std::string &s, bool backslashEscapes) { return result; } -template std::string DecodeString(const std::string &, bool); -template std::u16string DecodeString( +template std::string DecodeString( + const std::string &, bool); +template std::u16string DecodeString( const std::string &, bool); -template std::u32string DecodeString( +template std::u32string DecodeString( const std::string &, bool); } diff --git a/flang/lib/parser/characters.h b/flang/lib/parser/characters.h index 5a72d63..ede6033 100644 --- a/flang/lib/parser/characters.h +++ b/flang/lib/parser/characters.h @@ -18,7 +18,6 @@ // Define some character classification predicates and // conversions here to avoid dependences upon and // also to accomodate Fortran tokenization. -// TODO: EBCDIC? #include #include @@ -31,8 +30,7 @@ namespace Fortran::parser { // The specific encodings that we can handle include: // LATIN_1: ISO 8859-1 Latin-1 // UTF_8: Multi-byte encoding of Unicode (ISO/IEC 10646) -// EUC_JP: 1-3 byte encoding of JIS X 0208 / 0212 -enum class Encoding { LATIN_1, UTF_8, EUC_JP }; +enum class Encoding { LATIN_1, UTF_8 }; inline constexpr bool IsUpperCaseLetter(char ch) { return ch >= 'A' && ch <= 'Z'; @@ -147,7 +145,6 @@ struct EncodedCharacter { template EncodedCharacter EncodeCharacter(char32_t ucs); template<> EncodedCharacter EncodeCharacter(char32_t); -template<> EncodedCharacter EncodeCharacter(char32_t); template<> EncodedCharacter EncodeCharacter(char32_t); EncodedCharacter EncodeCharacter(Encoding, char32_t ucs); @@ -156,8 +153,6 @@ template std::string EncodeString(const STRING &); extern template std::string EncodeString( const std::string &); -extern template std::string EncodeString( - const std::u16string &); extern template std::string EncodeString( const std::u32string &); @@ -194,7 +189,7 @@ void EmitQuotedChar(char32_t ch, const NORMAL &emit, const INSERTED &insert, std::string QuoteCharacterLiteral(const std::string &, bool backslashEscapes = true, Encoding = Encoding::LATIN_1); std::string QuoteCharacterLiteral(const std::u16string &, - bool backslashEscapes = true, Encoding = Encoding::EUC_JP); + bool backslashEscapes = true, Encoding = Encoding::UTF_8); std::string QuoteCharacterLiteral(const std::u32string &, bool backslashEscapes = true, Encoding = Encoding::UTF_8); @@ -210,9 +205,7 @@ DecodedCharacter DecodeRawCharacter(const char *, std::size_t); template<> DecodedCharacter DecodeRawCharacter( const char *, std::size_t); -template<> -DecodedCharacter DecodeRawCharacter( - const char *, std::size_t); + template<> DecodedCharacter DecodeRawCharacter(const char *, std::size_t); @@ -222,34 +215,19 @@ DecodedCharacter DecodeCharacter( const char *, std::size_t, bool backslashEscapes); extern template DecodedCharacter DecodeCharacter( const char *, std::size_t, bool); -extern template DecodedCharacter DecodeCharacter( - const char *, std::size_t, bool); extern template DecodedCharacter DecodeCharacter( const char *, std::size_t, bool); DecodedCharacter DecodeCharacter( Encoding, const char *, std::size_t, bool backslashEscapes); -template struct StringForEncoding; -template<> struct StringForEncoding { - using type = std::string; -}; -template<> struct StringForEncoding { - using type = std::u16string; -}; -template<> struct StringForEncoding { - using type = std::u32string; -}; -template -using StringFor = typename StringForEncoding::type; - -template -StringFor DecodeString(const std::string &, bool backslashEscapes); -extern template std::string DecodeString( +template +RESULT DecodeString(const std::string &, bool backslashEscapes); +extern template std::string DecodeString( const std::string &, bool); -extern template std::u16string DecodeString( +extern template std::u16string DecodeString( const std::string &, bool); -extern template std::u32string DecodeString( +extern template std::u32string DecodeString( const std::string &, bool); } #endif // FORTRAN_PARSER_CHARACTERS_H_ diff --git a/flang/lib/parser/dump-parse-tree.h b/flang/lib/parser/dump-parse-tree.h index 10fe66b..9cb92f5 100644 --- a/flang/lib/parser/dump-parse-tree.h +++ b/flang/lib/parser/dump-parse-tree.h @@ -352,7 +352,6 @@ public: NODE(parser::IntrinsicTypeSpec, DoubleComplex) NODE(parser::IntrinsicTypeSpec, DoublePrecision) NODE(parser::IntrinsicTypeSpec, Logical) - NODE(parser::IntrinsicTypeSpec, NCharacter) NODE(parser::IntrinsicTypeSpec, Real) NODE(parser, IoControlSpec) NODE(parser::IoControlSpec, Asynchronous) @@ -364,7 +363,6 @@ public: NODE(parser, IoUnit) NODE(parser, Keyword) NODE(parser, KindParam) - NODE(parser::KindParam, Kanji) NODE(parser, KindSelector) NODE(parser::KindSelector, StarSize) NODE(parser, LabelDoStmt) diff --git a/flang/lib/parser/features.h b/flang/lib/parser/features.h index b9c41f5..f595c63 100644 --- a/flang/lib/parser/features.h +++ b/flang/lib/parser/features.h @@ -24,7 +24,7 @@ ENUM_CLASS(LanguageFeature, BackslashEscapes, OldDebugLines, FixedFormContinuationWithColumn1Ampersand, LogicalAbbreviations, XOROperator, PunctuationInNames, OptionalFreeFormSpace, BOZExtensions, EmptyStatement, AlternativeNE, ExecutionPartNamelist, DECStructures, - DoubleComplex, Kanji, Byte, StarKind, QuadPrecision, SlashInitialization, + DoubleComplex, Byte, StarKind, QuadPrecision, SlashInitialization, TripletInArrayConstructor, MissingColons, SignedComplexLiteral, OldStyleParameter, ComplexConstructor, PercentLOC, SignedPrimary, FileName, Convert, Dispose, IOListLeadingComma, AbbreviatedEditDescriptor, diff --git a/flang/lib/parser/grammar.h b/flang/lib/parser/grammar.h index 1ff48e5..3a6bedf 100644 --- a/flang/lib/parser/grammar.h +++ b/flang/lib/parser/grammar.h @@ -530,7 +530,7 @@ TYPE_CONTEXT_PARSER("declaration type spec"_en_US, // integer-type-spec | REAL [kind-selector] | DOUBLE PRECISION | // COMPLEX [kind-selector] | CHARACTER [char-selector] | // LOGICAL [kind-selector] -// Extensions: DOUBLE COMPLEX, NCHARACTER, BYTE +// Extensions: DOUBLE COMPLEX, BYTE TYPE_CONTEXT_PARSER("intrinsic type spec"_en_US, first(construct(integerTypeSpec), construct( @@ -546,9 +546,6 @@ TYPE_CONTEXT_PARSER("intrinsic type spec"_en_US, construct("DOUBLE COMPLEX" >> extension( construct())), - construct(extension( - construct( - "NCHARACTER" >> maybe(Parser{})))), extension( construct(construct( "BYTE" >> construct>(pure(1))))))) @@ -661,7 +658,6 @@ TYPE_PARSER(construct(parenthesized(typeParamValue)) || // [kind-param _] " [rep-char]... " // "rep-char" is any non-control character. Doubled interior quotes are // combined. Backslash escapes can be enabled. -// PGI extension: nc'...' is Kanji. // N.B. charLiteralConstantWithoutKind does not skip preceding space. // N.B. the parsing of "name" takes care to not consume the '_'. constexpr auto charLiteralConstantWithoutKind{ @@ -671,11 +667,7 @@ TYPE_CONTEXT_PARSER("CHARACTER literal constant"_en_US, construct( kindParam / underscore, charLiteralConstantWithoutKind) || construct(construct>(), - space >> charLiteralConstantWithoutKind) || - construct( - construct>( - construct(construct("NC"_tok))), - charLiteralConstantWithoutKind)) + space >> charLiteralConstantWithoutKind)) // deprecated: Hollerith literals constexpr auto rawHollerithLiteral{ diff --git a/flang/lib/parser/parse-tree.h b/flang/lib/parser/parse-tree.h index e8eda79..3d8acc6 100644 --- a/flang/lib/parser/parse-tree.h +++ b/flang/lib/parser/parse-tree.h @@ -660,7 +660,7 @@ struct CharSelector { // integer-type-spec | REAL [kind-selector] | DOUBLE PRECISION | // COMPLEX [kind-selector] | CHARACTER [char-selector] | // LOGICAL [kind-selector] -// Extensions: DOUBLE COMPLEX, NCHARACTER (Kanji) +// Extensions: DOUBLE COMPLEX struct IntrinsicTypeSpec { UNION_CLASS_BOILERPLATE(IntrinsicTypeSpec); struct Real { @@ -685,9 +685,8 @@ struct IntrinsicTypeSpec { std::optional kind; }; EMPTY_CLASS(DoubleComplex); - WRAPPER_CLASS(NCharacter, std::optional); std::variant + Logical, DoubleComplex> u; }; @@ -737,8 +736,7 @@ struct DeclarationTypeSpec { // R709 kind-param -> digit-string | scalar-int-constant-name struct KindParam { UNION_CLASS_BOILERPLATE(KindParam); - EMPTY_CLASS(Kanji); - std::variant>>, Kanji> u; + std::variant>>> u; }; // R707 signed-int-literal-constant -> [sign] int-literal-constant diff --git a/flang/lib/parser/token-sequence.cc b/flang/lib/parser/token-sequence.cc index 2c4424a..df0ea02 100644 --- a/flang/lib/parser/token-sequence.cc +++ b/flang/lib/parser/token-sequence.cc @@ -166,8 +166,7 @@ TokenSequence &TokenSequence::ToLowerCase() { *p = ToLowerCaseLetter(*p); } } else { - // Kanji NC'...' character literal or literal with kind-param prefix - // name (e.g., K_"ABC"). + // Literal with kind-param prefix name (e.g., K_"ABC"). for (; *p != limit[-1]; ++p) { *p = ToLowerCaseLetter(*p); } diff --git a/flang/lib/parser/unparse.cc b/flang/lib/parser/unparse.cc index df37414..20f3fbf 100644 --- a/flang/lib/parser/unparse.cc +++ b/flang/lib/parser/unparse.cc @@ -132,7 +132,6 @@ public: void Post(const IntrinsicTypeSpec::DoubleComplex &) { Word("DOUBLE COMPLEX"); } - void Before(const IntrinsicTypeSpec::NCharacter &x) { Word("NCHARACTER"); } void Before(const IntegerTypeSpec &x) { // R705 Word("INTEGER"); } @@ -188,21 +187,12 @@ public: void Unparse(const CharLiteralConstant &x) { // R724 const auto &str{std::get(x.t)}; if (const auto &k{std::get>(x.t)}) { - if (std::holds_alternative(k->u)) { - Word("NC"); - std::u16string decoded{DecodeString(str, true)}; - std::string encoded{EncodeString(decoded)}; - Put(QuoteCharacterLiteral(encoded, backslashEscapes_)); - } else { - Walk(*k), Put('_'); - PutNormalized(str); - } - } else { - PutNormalized(str); + Walk(*k), Put('_'); } + PutNormalized(str); } void Unparse(const HollerithLiteralConstant &x) { - std::u32string ucs{DecodeString(x.v, false)}; + auto ucs{DecodeString(x.v, false)}; Unparse(ucs.size()); Put('H'); for (char32_t ch : ucs) { @@ -2634,7 +2624,7 @@ void UnparseVisitor::Put(const std::string &str) { } void UnparseVisitor::PutNormalized(const std::string &str) { - std::string decoded{DecodeString(str, true)}; + auto decoded{DecodeString(str, true)}; std::string encoded{EncodeString(decoded)}; Put(QuoteCharacterLiteral(encoded, backslashEscapes_)); } diff --git a/flang/lib/semantics/expression.cc b/flang/lib/semantics/expression.cc index 98e8150..d729ba5 100644 --- a/flang/lib/semantics/expression.cc +++ b/flang/lib/semantics/expression.cc @@ -321,8 +321,7 @@ template MaybeExpr AsMaybeExpr(std::optional &&x) { // Type kind parameter values for literal constants. int ExpressionAnalyzer::AnalyzeKindParam( - const std::optional &kindParam, int defaultKind, - int kanjiKind /* = -1 */) { + const std::optional &kindParam, int defaultKind) { if (!kindParam.has_value()) { return defaultKind; } @@ -341,13 +340,6 @@ int ExpressionAnalyzer::AnalyzeKindParam( } return defaultKind; }, - [&](parser::KindParam::Kanji) { - if (kanjiKind >= 0) { - return kanjiKind; - } - Say("Kanji not allowed here"_err_en_US); - return defaultKind; - }, }, kindParam->u); } @@ -522,13 +514,16 @@ MaybeExpr ExpressionAnalyzer::AnalyzeString(std::string &&string, int kind) { switch (kind) { case 1: return AsGenericExpr(Constant>{ - parser::DecodeString(string, true)}); + parser::DecodeString( + string, true)}); case 2: return AsGenericExpr(Constant>{ - parser::DecodeString(string, true)}); + parser::DecodeString( + string, true)}); case 4: return AsGenericExpr(Constant>{ - parser::DecodeString(string, true)}); + parser::DecodeString( + string, true)}); default: CRASH_NO_CASE; } } diff --git a/flang/lib/semantics/expression.h b/flang/lib/semantics/expression.h index ae7a775..5cc6fb5 100644 --- a/flang/lib/semantics/expression.h +++ b/flang/lib/semantics/expression.h @@ -290,8 +290,8 @@ private: } // Analysis subroutines - int AnalyzeKindParam(const std::optional &, - int defaultKind, int kanjiKind = -1); + int AnalyzeKindParam( + const std::optional &, int defaultKind); template MaybeExpr ExprOrVariable(const PARSED &); template MaybeExpr IntLiteralConstant(const PARSED &); MaybeExpr AnalyzeString(std::string &&, int kind); diff --git a/flang/lib/semantics/resolve-names.cc b/flang/lib/semantics/resolve-names.cc index 8197dc3..2c03f9b 100644 --- a/flang/lib/semantics/resolve-names.cc +++ b/flang/lib/semantics/resolve-names.cc @@ -693,7 +693,6 @@ public: void Post(const parser::IntrinsicTypeSpec::Complex &); void Post(const parser::IntrinsicTypeSpec::Logical &); void Post(const parser::IntrinsicTypeSpec::Character &); - void Post(const parser::IntrinsicTypeSpec::NCharacter &); void Post(const parser::CharSelector::LengthAndKind &); void Post(const parser::CharLength &); void Post(const parser::LengthSelector &); @@ -2764,15 +2763,6 @@ void DeclarationVisitor::Post(const parser::IntrinsicTypeSpec::Character &x) { std::move(*charInfo_.length), std::move(*charInfo_.kind))); charInfo_ = {}; } -void DeclarationVisitor::Post(const parser::IntrinsicTypeSpec::NCharacter &x) { - if (!charInfo_.length) { - charInfo_.length = ParamValue{1}; - } - CHECK(!charInfo_.kind.has_value()); - SetDeclTypeSpec(currScope().MakeCharacterType( - std::move(*charInfo_.length), KindExpr{2 /* EUC_JP */})); - charInfo_ = {}; -} void DeclarationVisitor::Post(const parser::CharSelector::LengthAndKind &x) { charInfo_.kind = EvaluateSubscriptIntExpr(x.kind); if (x.length) { @@ -3149,9 +3139,6 @@ bool DeclarationVisitor::Pre(const parser::ProcInterface &x) { } else if (name->source == "doublecomplex") { proc.u = parser::IntrinsicTypeSpec{ parser::IntrinsicTypeSpec::DoubleComplex{}}; - } else if (name->source == "ncharacter") { - proc.u = parser::IntrinsicTypeSpec{ - parser::IntrinsicTypeSpec::NCharacter{std::nullopt}}; } } } diff --git a/flang/tools/f18/f18.cc b/flang/tools/f18/f18.cc index 996bf58..0d9f1e3 100644 --- a/flang/tools/f18/f18.cc +++ b/flang/tools/f18/f18.cc @@ -465,8 +465,6 @@ int main(int argc, char *const argv[]) { driver.encoding = Fortran::parser::Encoding::UTF_8; } else if (arg == "-flatin") { driver.encoding = Fortran::parser::Encoding::LATIN_1; - } else if (arg == "-fkanji" || arg == "-Mx,125,4") { - driver.encoding = Fortran::parser::Encoding::EUC_JP; } else if (arg == "-help" || arg == "--help" || arg == "-?") { std::cerr << "f18 options:\n" @@ -481,8 +479,6 @@ int main(int argc, char *const argv[]) { << " -ed enable fixed form D lines\n" << " -E prescan & preprocess only\n" << " -module dir module output directory (default .)\n" - << " -fkanji interpret source as EUC_JP rather than " - "UTF-8\n" << " -flatin interpret source as Latin-1 (ISO 8859-1) " "rather than UTF-8\n" << " -fparse-only parse only, no output except messages\n" -- 2.7.4