* Signed complex literal constants
* DEC `STRUCTURE`, `RECORD`, `UNION`, and `MAP`
* Structure field access with `.field`
-* `NCHARACTER` type and `NC` Kanji character literals
* `BYTE` as synonym for `INTEGER(KIND=1)`
* Quad precision REAL literals with `Q`
* `X` prefix/suffix as synonym for `Z` on hexadecimal literals
* Comparsion of LOGICAL with ==/.EQ. rather than .EQV. (also .NEQV.) (PGI/Intel)
* Procedure pointers in COMMON blocks (PGI/Intel)
* Underindexing multi-dimensional arrays (e.g., A(1) rather than A(1,1)) (PGI only)
+* Legacy PGI `NCHARACTER` type and `NC` Kanji character literals
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+// Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// character literals. We repurpose '^' and '?' for newline and unknown
// characters (resp.), leaving the others alone in case this code might
// be useful in preprocssing.
- // TODO: EBCDIC?
if (c == '\n') {
// map newline to '^'
c = '^';
return result;
}
-// These are placeholders; the actual mapping is complicated.
-static char32_t JIS_0208ToUCS(char32_t jis) { return jis | 0x80000; }
-static char32_t JIS_0212ToUCS(char32_t jis) { return jis | 0x90000; }
-static bool IsUCSJIS_0212(char32_t ucs) { return (ucs & 0x90000) == 0x90000; }
-static char32_t UCSToJIS(char32_t ucs) { return ucs & 0xffff; }
-
-template<> EncodedCharacter EncodeCharacter<Encoding::EUC_JP>(char32_t ucs) {
- EncodedCharacter result;
- if (ucs <= 0x7f) {
- result.buffer[0] = ucs;
- result.bytes = 1;
- } else if (ucs <= 0xff) {
- result.buffer[0] = '\x8e'; // JIS X 0201
- result.buffer[1] = ucs;
- result.bytes = 2;
- } else if (IsUCSJIS_0212(ucs)) { // JIS X 0212
- char32_t jis{UCSToJIS(ucs)};
- result.buffer[0] = '\x8f';
- result.buffer[1] = 0x80 ^ (jis >> 8);
- result.buffer[2] = 0x80 ^ jis;
- result.bytes = 3;
- } else { // JIS X 0208
- char32_t jis{UCSToJIS(ucs)};
- result.buffer[0] = 0x80 ^ (jis >> 8);
- result.buffer[1] = 0x80 ^ jis;
- result.bytes = 2;
- }
- return result;
-}
-
EncodedCharacter EncodeCharacter(Encoding encoding, char32_t ucs) {
switch (encoding) {
case Encoding::LATIN_1: return EncodeCharacter<Encoding::LATIN_1>(ucs);
- case Encoding::EUC_JP: return EncodeCharacter<Encoding::EUC_JP>(ucs);
case Encoding::UTF_8: return EncodeCharacter<Encoding::UTF_8>(ucs);
default: CRASH_NO_CASE;
}
template std::string EncodeString<Encoding::LATIN_1, std::string>(
const std::string &);
-template std::string EncodeString<Encoding::EUC_JP, std::u16string>(
+template std::string EncodeString<Encoding::UTF_8, std::u16string>(
const std::u16string &);
template std::string EncodeString<Encoding::UTF_8, std::u32string>(
const std::u32string &);
}
}
-template<>
-DecodedCharacter DecodeRawCharacter<Encoding::EUC_JP>(
- const char *cp, std::size_t bytes) {
- auto p{reinterpret_cast<const std::uint8_t *>(cp)};
- char32_t ch{*p};
- if (ch <= 0x7f) {
- return {ch, 1};
- } else if (ch == 0x8e) {
- if (bytes >= 2) {
- return {p[1], 2}; // JIS X 0201
- }
- } else if (ch == 0x8f) { // JIS X 0212
- if (bytes >= 3) {
- return {JIS_0212ToUCS(((p[1] << 8) | p[2]) ^ 0x8080), 3};
- }
- } else if (bytes >= 2) { // assume JIS X 0208
- return {JIS_0208ToUCS(((ch << 8) | p[1]) ^ 0x8080), 2};
- }
- return {};
-}
-
static DecodedCharacter DecodeEscapedCharacter(
const char *cp, std::size_t bytes) {
if (cp[0] == '\\' && bytes >= 2) {
template DecodedCharacter DecodeCharacter<Encoding::LATIN_1>(
const char *, std::size_t, bool);
-template DecodedCharacter DecodeCharacter<Encoding::EUC_JP>(
- const char *, std::size_t, bool);
template DecodedCharacter DecodeCharacter<Encoding::UTF_8>(
const char *, std::size_t, bool);
switch (encoding) {
case Encoding::LATIN_1:
return DecodeCharacter<Encoding::LATIN_1>(cp, bytes, backslashEscapes);
- case Encoding::EUC_JP:
- return DecodeCharacter<Encoding::EUC_JP>(cp, bytes, backslashEscapes);
case Encoding::UTF_8:
return DecodeCharacter<Encoding::UTF_8>(cp, bytes, backslashEscapes);
default: CRASH_NO_CASE;
}
}
-template<Encoding ENCODING>
-StringFor<ENCODING> DecodeString(const std::string &s, bool backslashEscapes) {
- StringFor<ENCODING> result;
+template<typename RESULT, Encoding ENCODING>
+RESULT DecodeString(const std::string &s, bool backslashEscapes) {
+ RESULT result;
const char *p{s.c_str()};
for (auto bytes{s.size()}; bytes != 0;) {
DecodedCharacter decoded{
return result;
}
-template std::string DecodeString<Encoding::LATIN_1>(const std::string &, bool);
-template std::u16string DecodeString<Encoding::EUC_JP>(
+template std::string DecodeString<std::string, Encoding::LATIN_1>(
+ const std::string &, bool);
+template std::u16string DecodeString<std::u16string, Encoding::UTF_8>(
const std::string &, bool);
-template std::u32string DecodeString<Encoding::UTF_8>(
+template std::u32string DecodeString<std::u32string, Encoding::UTF_8>(
const std::string &, bool);
}
// Define some character classification predicates and
// conversions here to avoid dependences upon <cctype> and
// also to accomodate Fortran tokenization.
-// TODO: EBCDIC?
#include <cstddef>
#include <optional>
// The specific encodings that we can handle include:
// LATIN_1: ISO 8859-1 Latin-1
// UTF_8: Multi-byte encoding of Unicode (ISO/IEC 10646)
-// EUC_JP: 1-3 byte encoding of JIS X 0208 / 0212
-enum class Encoding { LATIN_1, UTF_8, EUC_JP };
+enum class Encoding { LATIN_1, UTF_8 };
inline constexpr bool IsUpperCaseLetter(char ch) {
return ch >= 'A' && ch <= 'Z';
template<Encoding ENCODING> EncodedCharacter EncodeCharacter(char32_t ucs);
template<> EncodedCharacter EncodeCharacter<Encoding::LATIN_1>(char32_t);
-template<> EncodedCharacter EncodeCharacter<Encoding::EUC_JP>(char32_t);
template<> EncodedCharacter EncodeCharacter<Encoding::UTF_8>(char32_t);
EncodedCharacter EncodeCharacter(Encoding, char32_t ucs);
std::string EncodeString(const STRING &);
extern template std::string EncodeString<Encoding::LATIN_1, std::string>(
const std::string &);
-extern template std::string EncodeString<Encoding::EUC_JP, std::u16string>(
- const std::u16string &);
extern template std::string EncodeString<Encoding::UTF_8, std::u32string>(
const std::u32string &);
std::string QuoteCharacterLiteral(const std::string &,
bool backslashEscapes = true, Encoding = Encoding::LATIN_1);
std::string QuoteCharacterLiteral(const std::u16string &,
- bool backslashEscapes = true, Encoding = Encoding::EUC_JP);
+ bool backslashEscapes = true, Encoding = Encoding::UTF_8);
std::string QuoteCharacterLiteral(const std::u32string &,
bool backslashEscapes = true, Encoding = Encoding::UTF_8);
template<>
DecodedCharacter DecodeRawCharacter<Encoding::LATIN_1>(
const char *, std::size_t);
-template<>
-DecodedCharacter DecodeRawCharacter<Encoding::EUC_JP>(
- const char *, std::size_t);
+
template<>
DecodedCharacter DecodeRawCharacter<Encoding::UTF_8>(const char *, std::size_t);
const char *, std::size_t, bool backslashEscapes);
extern template DecodedCharacter DecodeCharacter<Encoding::LATIN_1>(
const char *, std::size_t, bool);
-extern template DecodedCharacter DecodeCharacter<Encoding::EUC_JP>(
- const char *, std::size_t, bool);
extern template DecodedCharacter DecodeCharacter<Encoding::UTF_8>(
const char *, std::size_t, bool);
DecodedCharacter DecodeCharacter(
Encoding, const char *, std::size_t, bool backslashEscapes);
-template<Encoding ENCODING> struct StringForEncoding;
-template<> struct StringForEncoding<Encoding::LATIN_1> {
- using type = std::string;
-};
-template<> struct StringForEncoding<Encoding::EUC_JP> {
- using type = std::u16string;
-};
-template<> struct StringForEncoding<Encoding::UTF_8> {
- using type = std::u32string;
-};
-template<Encoding ENCODING>
-using StringFor = typename StringForEncoding<ENCODING>::type;
-
-template<Encoding ENCODING>
-StringFor<ENCODING> DecodeString(const std::string &, bool backslashEscapes);
-extern template std::string DecodeString<Encoding::LATIN_1>(
+template<typename RESULT, Encoding ENCODING>
+RESULT DecodeString(const std::string &, bool backslashEscapes);
+extern template std::string DecodeString<std::string, Encoding::LATIN_1>(
const std::string &, bool);
-extern template std::u16string DecodeString<Encoding::EUC_JP>(
+extern template std::u16string DecodeString<std::u16string, Encoding::UTF_8>(
const std::string &, bool);
-extern template std::u32string DecodeString<Encoding::UTF_8>(
+extern template std::u32string DecodeString<std::u32string, Encoding::UTF_8>(
const std::string &, bool);
}
#endif // FORTRAN_PARSER_CHARACTERS_H_
NODE(parser::IntrinsicTypeSpec, DoubleComplex)
NODE(parser::IntrinsicTypeSpec, DoublePrecision)
NODE(parser::IntrinsicTypeSpec, Logical)
- NODE(parser::IntrinsicTypeSpec, NCharacter)
NODE(parser::IntrinsicTypeSpec, Real)
NODE(parser, IoControlSpec)
NODE(parser::IoControlSpec, Asynchronous)
NODE(parser, IoUnit)
NODE(parser, Keyword)
NODE(parser, KindParam)
- NODE(parser::KindParam, Kanji)
NODE(parser, KindSelector)
NODE(parser::KindSelector, StarSize)
NODE(parser, LabelDoStmt)
FixedFormContinuationWithColumn1Ampersand, LogicalAbbreviations,
XOROperator, PunctuationInNames, OptionalFreeFormSpace, BOZExtensions,
EmptyStatement, AlternativeNE, ExecutionPartNamelist, DECStructures,
- DoubleComplex, Kanji, Byte, StarKind, QuadPrecision, SlashInitialization,
+ DoubleComplex, Byte, StarKind, QuadPrecision, SlashInitialization,
TripletInArrayConstructor, MissingColons, SignedComplexLiteral,
OldStyleParameter, ComplexConstructor, PercentLOC, SignedPrimary, FileName,
Convert, Dispose, IOListLeadingComma, AbbreviatedEditDescriptor,
// integer-type-spec | REAL [kind-selector] | DOUBLE PRECISION |
// COMPLEX [kind-selector] | CHARACTER [char-selector] |
// LOGICAL [kind-selector]
-// Extensions: DOUBLE COMPLEX, NCHARACTER, BYTE
+// Extensions: DOUBLE COMPLEX, BYTE
TYPE_CONTEXT_PARSER("intrinsic type spec"_en_US,
first(construct<IntrinsicTypeSpec>(integerTypeSpec),
construct<IntrinsicTypeSpec>(
construct<IntrinsicTypeSpec>("DOUBLE COMPLEX" >>
extension<LanguageFeature::DoubleComplex>(
construct<IntrinsicTypeSpec::DoubleComplex>())),
- construct<IntrinsicTypeSpec>(extension<LanguageFeature::Kanji>(
- construct<IntrinsicTypeSpec::NCharacter>(
- "NCHARACTER" >> maybe(Parser<LengthSelector>{})))),
extension<LanguageFeature::Byte>(
construct<IntrinsicTypeSpec>(construct<IntegerTypeSpec>(
"BYTE" >> construct<std::optional<KindSelector>>(pure(1)))))))
// [kind-param _] " [rep-char]... "
// "rep-char" is any non-control character. Doubled interior quotes are
// combined. Backslash escapes can be enabled.
-// PGI extension: nc'...' is Kanji.
// N.B. charLiteralConstantWithoutKind does not skip preceding space.
// N.B. the parsing of "name" takes care to not consume the '_'.
constexpr auto charLiteralConstantWithoutKind{
construct<CharLiteralConstant>(
kindParam / underscore, charLiteralConstantWithoutKind) ||
construct<CharLiteralConstant>(construct<std::optional<KindParam>>(),
- space >> charLiteralConstantWithoutKind) ||
- construct<CharLiteralConstant>(
- construct<std::optional<KindParam>>(
- construct<KindParam>(construct<KindParam::Kanji>("NC"_tok))),
- charLiteralConstantWithoutKind))
+ space >> charLiteralConstantWithoutKind))
// deprecated: Hollerith literals
constexpr auto rawHollerithLiteral{
// integer-type-spec | REAL [kind-selector] | DOUBLE PRECISION |
// COMPLEX [kind-selector] | CHARACTER [char-selector] |
// LOGICAL [kind-selector]
-// Extensions: DOUBLE COMPLEX, NCHARACTER (Kanji)
+// Extensions: DOUBLE COMPLEX
struct IntrinsicTypeSpec {
UNION_CLASS_BOILERPLATE(IntrinsicTypeSpec);
struct Real {
std::optional<KindSelector> kind;
};
EMPTY_CLASS(DoubleComplex);
- WRAPPER_CLASS(NCharacter, std::optional<LengthSelector>);
std::variant<IntegerTypeSpec, Real, DoublePrecision, Complex, Character,
- Logical, DoubleComplex, NCharacter>
+ Logical, DoubleComplex>
u;
};
// R709 kind-param -> digit-string | scalar-int-constant-name
struct KindParam {
UNION_CLASS_BOILERPLATE(KindParam);
- EMPTY_CLASS(Kanji);
- std::variant<std::uint64_t, Scalar<Integer<Constant<Name>>>, Kanji> u;
+ std::variant<std::uint64_t, Scalar<Integer<Constant<Name>>>> u;
};
// R707 signed-int-literal-constant -> [sign] int-literal-constant
*p = ToLowerCaseLetter(*p);
}
} else {
- // Kanji NC'...' character literal or literal with kind-param prefix
- // name (e.g., K_"ABC").
+ // Literal with kind-param prefix name (e.g., K_"ABC").
for (; *p != limit[-1]; ++p) {
*p = ToLowerCaseLetter(*p);
}
void Post(const IntrinsicTypeSpec::DoubleComplex &) {
Word("DOUBLE COMPLEX");
}
- void Before(const IntrinsicTypeSpec::NCharacter &x) { Word("NCHARACTER"); }
void Before(const IntegerTypeSpec &x) { // R705
Word("INTEGER");
}
void Unparse(const CharLiteralConstant &x) { // R724
const auto &str{std::get<std::string>(x.t)};
if (const auto &k{std::get<std::optional<KindParam>>(x.t)}) {
- if (std::holds_alternative<KindParam::Kanji>(k->u)) {
- Word("NC");
- std::u16string decoded{DecodeString<Encoding::EUC_JP>(str, true)};
- std::string encoded{EncodeString<Encoding::EUC_JP>(decoded)};
- Put(QuoteCharacterLiteral(encoded, backslashEscapes_));
- } else {
- Walk(*k), Put('_');
- PutNormalized(str);
- }
- } else {
- PutNormalized(str);
+ Walk(*k), Put('_');
}
+ PutNormalized(str);
}
void Unparse(const HollerithLiteralConstant &x) {
- std::u32string ucs{DecodeString<Encoding::UTF_8>(x.v, false)};
+ auto ucs{DecodeString<std::u32string, Encoding::UTF_8>(x.v, false)};
Unparse(ucs.size());
Put('H');
for (char32_t ch : ucs) {
}
void UnparseVisitor::PutNormalized(const std::string &str) {
- std::string decoded{DecodeString<Encoding::LATIN_1>(str, true)};
+ auto decoded{DecodeString<std::string, Encoding::LATIN_1>(str, true)};
std::string encoded{EncodeString<Encoding::LATIN_1>(decoded)};
Put(QuoteCharacterLiteral(encoded, backslashEscapes_));
}
// Type kind parameter values for literal constants.
int ExpressionAnalyzer::AnalyzeKindParam(
- const std::optional<parser::KindParam> &kindParam, int defaultKind,
- int kanjiKind /* = -1 */) {
+ const std::optional<parser::KindParam> &kindParam, int defaultKind) {
if (!kindParam.has_value()) {
return defaultKind;
}
}
return defaultKind;
},
- [&](parser::KindParam::Kanji) {
- if (kanjiKind >= 0) {
- return kanjiKind;
- }
- Say("Kanji not allowed here"_err_en_US);
- return defaultKind;
- },
},
kindParam->u);
}
switch (kind) {
case 1:
return AsGenericExpr(Constant<Type<TypeCategory::Character, 1>>{
- parser::DecodeString<parser::Encoding::LATIN_1>(string, true)});
+ parser::DecodeString<std::string, parser::Encoding::LATIN_1>(
+ string, true)});
case 2:
return AsGenericExpr(Constant<Type<TypeCategory::Character, 2>>{
- parser::DecodeString<parser::Encoding::EUC_JP>(string, true)});
+ parser::DecodeString<std::u16string, parser::Encoding::UTF_8>(
+ string, true)});
case 4:
return AsGenericExpr(Constant<Type<TypeCategory::Character, 4>>{
- parser::DecodeString<parser::Encoding::UTF_8>(string, true)});
+ parser::DecodeString<std::u32string, parser::Encoding::UTF_8>(
+ string, true)});
default: CRASH_NO_CASE;
}
}
}
// Analysis subroutines
- int AnalyzeKindParam(const std::optional<parser::KindParam> &,
- int defaultKind, int kanjiKind = -1);
+ int AnalyzeKindParam(
+ const std::optional<parser::KindParam> &, int defaultKind);
template<typename PARSED> MaybeExpr ExprOrVariable(const PARSED &);
template<typename PARSED> MaybeExpr IntLiteralConstant(const PARSED &);
MaybeExpr AnalyzeString(std::string &&, int kind);
void Post(const parser::IntrinsicTypeSpec::Complex &);
void Post(const parser::IntrinsicTypeSpec::Logical &);
void Post(const parser::IntrinsicTypeSpec::Character &);
- void Post(const parser::IntrinsicTypeSpec::NCharacter &);
void Post(const parser::CharSelector::LengthAndKind &);
void Post(const parser::CharLength &);
void Post(const parser::LengthSelector &);
std::move(*charInfo_.length), std::move(*charInfo_.kind)));
charInfo_ = {};
}
-void DeclarationVisitor::Post(const parser::IntrinsicTypeSpec::NCharacter &x) {
- if (!charInfo_.length) {
- charInfo_.length = ParamValue{1};
- }
- CHECK(!charInfo_.kind.has_value());
- SetDeclTypeSpec(currScope().MakeCharacterType(
- std::move(*charInfo_.length), KindExpr{2 /* EUC_JP */}));
- charInfo_ = {};
-}
void DeclarationVisitor::Post(const parser::CharSelector::LengthAndKind &x) {
charInfo_.kind = EvaluateSubscriptIntExpr(x.kind);
if (x.length) {
} else if (name->source == "doublecomplex") {
proc.u = parser::IntrinsicTypeSpec{
parser::IntrinsicTypeSpec::DoubleComplex{}};
- } else if (name->source == "ncharacter") {
- proc.u = parser::IntrinsicTypeSpec{
- parser::IntrinsicTypeSpec::NCharacter{std::nullopt}};
}
}
}
driver.encoding = Fortran::parser::Encoding::UTF_8;
} else if (arg == "-flatin") {
driver.encoding = Fortran::parser::Encoding::LATIN_1;
- } else if (arg == "-fkanji" || arg == "-Mx,125,4") {
- driver.encoding = Fortran::parser::Encoding::EUC_JP;
} else if (arg == "-help" || arg == "--help" || arg == "-?") {
std::cerr
<< "f18 options:\n"
<< " -ed enable fixed form D lines\n"
<< " -E prescan & preprocess only\n"
<< " -module dir module output directory (default .)\n"
- << " -fkanji interpret source as EUC_JP rather than "
- "UTF-8\n"
<< " -flatin interpret source as Latin-1 (ISO 8859-1) "
"rather than UTF-8\n"
<< " -fparse-only parse only, no output except messages\n"