From 3a8244df6fb88a6670470e603445c72f224db9e3 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Tue, 1 May 2018 05:02:45 +0000 Subject: [PATCH] Implement P0482R2, support for char8_t type. This is not yet part of any C++ working draft, and so is controlled by the flag -fchar8_t rather than a -std= flag. (The GCC implementation is controlled by a flag with the same name.) This implementation is experimental, and will be removed or revised substantially to match the proposal as it makes its way through the C++ committee. llvm-svn: 331244 --- clang/include/clang/AST/ASTContext.h | 1 + clang/include/clang/AST/BuiltinTypes.def | 3 ++ clang/include/clang/AST/Type.h | 1 + clang/include/clang/Basic/DiagnosticSemaKinds.td | 10 +++++ clang/include/clang/Basic/LangOptions.def | 1 + clang/include/clang/Basic/Specifiers.h | 1 + clang/include/clang/Basic/TokenKinds.def | 4 ++ clang/include/clang/Driver/Options.td | 4 ++ clang/include/clang/Sema/DeclSpec.h | 1 + clang/include/clang/Sema/Initialization.h | 6 +++ clang/include/clang/Serialization/ASTBitCodes.h | 3 ++ clang/lib/AST/ASTContext.cpp | 6 +++ clang/lib/AST/ExprConstant.cpp | 1 + clang/lib/AST/ItaniumMangle.cpp | 3 ++ clang/lib/AST/MicrosoftMangle.cpp | 1 + clang/lib/AST/NSAPI.cpp | 1 + clang/lib/AST/Type.cpp | 10 +++++ clang/lib/AST/TypeLoc.cpp | 2 + clang/lib/Analysis/PrintfFormatString.cpp | 1 + clang/lib/Basic/IdentifierTable.cpp | 16 ++++---- clang/lib/CodeGen/CGDebugInfo.cpp | 1 + clang/lib/CodeGen/CodeGenTypes.cpp | 1 + clang/lib/CodeGen/ItaniumCXXABI.cpp | 4 +- clang/lib/Driver/ToolChains/Clang.cpp | 3 ++ clang/lib/Format/FormatToken.cpp | 1 + clang/lib/Frontend/CompilerInvocation.cpp | 1 + clang/lib/Frontend/InitPreprocessor.cpp | 6 +++ clang/lib/Index/USRGeneration.cpp | 2 + clang/lib/Lex/PPExpressions.cpp | 2 +- clang/lib/Parse/ParseDecl.cpp | 7 ++++ clang/lib/Parse/ParseExpr.cpp | 1 + clang/lib/Parse/ParseExprCXX.cpp | 3 ++ clang/lib/Parse/ParseTentative.cpp | 3 ++ clang/lib/Sema/DeclSpec.cpp | 6 ++- clang/lib/Sema/SemaDecl.cpp | 3 ++ clang/lib/Sema/SemaDeclCXX.cpp | 7 +++- clang/lib/Sema/SemaExpr.cpp | 4 ++ clang/lib/Sema/SemaInit.cpp | 48 +++++++++++++++++++++--- clang/lib/Sema/SemaOverload.cpp | 2 + clang/lib/Sema/SemaTemplate.cpp | 4 +- clang/lib/Sema/SemaTemplateVariadic.cpp | 1 + clang/lib/Sema/SemaType.cpp | 5 +++ clang/lib/Serialization/ASTCommon.cpp | 3 ++ clang/lib/Serialization/ASTReader.cpp | 3 ++ clang/test/CodeGenCXX/char8_t.cpp | 8 ++++ clang/test/Lexer/char8_t.cpp | 17 +++++++++ clang/test/Lexer/cxx-features.cpp | 7 ++++ clang/test/SemaCXX/char8_t.cpp | 44 ++++++++++++++++++++++ 48 files changed, 254 insertions(+), 19 deletions(-) create mode 100644 clang/test/CodeGenCXX/char8_t.cpp create mode 100644 clang/test/Lexer/char8_t.cpp create mode 100644 clang/test/SemaCXX/char8_t.cpp diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 40561b3..af39a1f 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -999,6 +999,7 @@ public: CanQualType WCharTy; // [C++ 3.9.1p5]. CanQualType WideCharTy; // Same as WCharTy in C++, integer type in C99. CanQualType WIntTy; // [C99 7.24.1], integer type unchanged by default promotions. + CanQualType Char8Ty; // [C++20 proposal] CanQualType Char16Ty; // [C++0x 3.9.1p5], integer type in C99. CanQualType Char32Ty; // [C++0x 3.9.1p5], integer type in C99. CanQualType SignedCharTy, ShortTy, IntTy, LongTy, LongLongTy, Int128Ty; diff --git a/clang/include/clang/AST/BuiltinTypes.def b/clang/include/clang/AST/BuiltinTypes.def index e4f5f7d..4d4ed79 100644 --- a/clang/include/clang/AST/BuiltinTypes.def +++ b/clang/include/clang/AST/BuiltinTypes.def @@ -72,6 +72,9 @@ UNSIGNED_TYPE(UChar, UnsignedCharTy) // 'wchar_t' for targets where it's unsigned SHARED_SINGLETON_TYPE(UNSIGNED_TYPE(WChar_U, WCharTy)) +// 'char8_t' in C++20 (proposed) +UNSIGNED_TYPE(Char8, Char8Ty) + // 'char16_t' in C++ UNSIGNED_TYPE(Char16, Char16Ty) diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index ab6e113..95e4cad 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -1777,6 +1777,7 @@ public: bool isBooleanType() const; bool isCharType() const; bool isWideCharType() const; + bool isChar8Type() const; bool isChar16Type() const; bool isChar32Type() const; bool isAnyCharacterType() const; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index cb3f606..4410c32 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -2421,6 +2421,9 @@ def err_template_different_associated_constraints : Error< def warn_cxx98_compat_unicode_type : Warning< "'%0' type specifier is incompatible with C++98">, InGroup, DefaultIgnore; +def warn_cxx17_compat_unicode_type : Warning< + "'char8_t' type specifier is incompatible with C++ standards before C++20">, + InGroup, DefaultIgnore; // __make_integer_seq def err_integer_sequence_negative_length : Error< @@ -5822,6 +5825,13 @@ def err_array_init_wide_string_into_char : Error< "initializing char array with wide string literal">; def err_array_init_incompat_wide_string_into_wchar : Error< "initializing wide char array with incompatible wide string literal">; +def err_array_init_plain_string_into_char8_t : Error< + "initializing 'char8_t' array with plain string literal">; +def note_array_init_plain_string_into_char8_t : Note< + "add 'u8' prefix to form a 'char8_t' string literal">; +def err_array_init_utf8_string_into_char : Error< + "initialization of char array with UTF-8 string literal is not permitted " + "by '-fchar8_t'">; def err_array_init_different_type : Error< "cannot initialize array %diff{of type $ with array of type $|" "with different type of array}0,1">; diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 87173c8..f868110 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -106,6 +106,7 @@ LANGOPT(LineComment , 1, 0, "'//' comments") LANGOPT(Bool , 1, 0, "bool, true, and false keywords") LANGOPT(Half , 1, 0, "half keyword") LANGOPT(WChar , 1, CPlusPlus, "wchar_t keyword") +LANGOPT(Char8 , 1, 0, "char8_t keyword") LANGOPT(DeclSpecKeyword , 1, 0, "__declspec keyword") BENIGN_LANGOPT(DollarIdents , 1, 1, "'$' in identifiers") BENIGN_LANGOPT(AsmPreprocessor, 1, 0, "preprocessor in asm mode") diff --git a/clang/include/clang/Basic/Specifiers.h b/clang/include/clang/Basic/Specifiers.h index 377534b..d17e5a8 100644 --- a/clang/include/clang/Basic/Specifiers.h +++ b/clang/include/clang/Basic/Specifiers.h @@ -47,6 +47,7 @@ namespace clang { TST_void, TST_char, TST_wchar, // C++ wchar_t + TST_char8, // C++20 char8_t (proposed) TST_char16, // C++11 char16_t TST_char32, // C++11 char32_t TST_int, diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index f18e06f..6bf945b 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -260,6 +260,7 @@ PUNCTUATOR(caretcaret, "^^") // BOOLSUPPORT - This is a keyword if 'bool' is a built-in type // HALFSUPPORT - This is a keyword if 'half' is a built-in type // WCHARSUPPORT - This is a keyword if 'wchar_t' is a built-in type +// CHAR8SUPPORT - This is a keyword if 'char8_t' is a built-in type // KEYWORD(auto , KEYALL) KEYWORD(break , KEYALL) @@ -380,6 +381,9 @@ KEYWORD(co_yield , KEYCOROUTINES) MODULES_KEYWORD(module) MODULES_KEYWORD(import) +// C++ char8_t proposal +KEYWORD(char8_t , CHAR8SUPPORT) + // C11 Extension KEYWORD(_Float16 , KEYALL) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 80facb7..944a51c 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1525,6 +1525,10 @@ def frtti : Flag<["-"], "frtti">, Group; def : Flag<["-"], "fsched-interblock">, Group; def fshort_enums : Flag<["-"], "fshort-enums">, Group, Flags<[CC1Option]>, HelpText<"Allocate to an enum type only as many bytes as it needs for the declared range of possible values">; +def fchar8__t : Flag<["-"], "fchar8_t">, Group, Flags<[CC1Option]>, + HelpText<"Enable C++ builtin type char8_t">; +def fno_char8__t : Flag<["-"], "fno-char8_t">, Group, + HelpText<"Disable C++ builtin type char8_t">; def fshort_wchar : Flag<["-"], "fshort-wchar">, Group, HelpText<"Force wchar_t to be a short unsigned int">; def fno_short_wchar : Flag<["-"], "fno-short-wchar">, Group, diff --git a/clang/include/clang/Sema/DeclSpec.h b/clang/include/clang/Sema/DeclSpec.h index e9b116f..03fd90c 100644 --- a/clang/include/clang/Sema/DeclSpec.h +++ b/clang/include/clang/Sema/DeclSpec.h @@ -273,6 +273,7 @@ public: static const TST TST_void = clang::TST_void; static const TST TST_char = clang::TST_char; static const TST TST_wchar = clang::TST_wchar; + static const TST TST_char8 = clang::TST_char8; static const TST TST_char16 = clang::TST_char16; static const TST TST_char32 = clang::TST_char32; static const TST TST_int = clang::TST_int; diff --git a/clang/include/clang/Sema/Initialization.h b/clang/include/clang/Sema/Initialization.h index d11c1ee..c0f3842 100644 --- a/clang/include/clang/Sema/Initialization.h +++ b/clang/include/clang/Sema/Initialization.h @@ -952,6 +952,12 @@ public: /// literal. FK_IncompatWideStringIntoWideChar, + /// \brief Initializing char8_t array with plain string literal. + FK_PlainStringIntoUTF8Char, + + /// \brief Initializing char array with UTF-8 string literal. + FK_UTF8StringIntoPlainChar, + /// \brief Array type mismatch. FK_ArrayTypeMismatch, diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 1f4e034..4dbcd57 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -936,6 +936,9 @@ namespace serialization { /// \brief The '_Float16' type PREDEF_TYPE_FLOAT16_ID = 44, + /// \brief The C++ 'char8_t' type. + PREDEF_TYPE_CHAR8_ID = 45, + /// \brief OpenCL image types with auto numeration #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ PREDEF_TYPE_##Id##_ID, diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index d61ca58..d510714 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -1151,6 +1151,9 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target, WIntTy = getFromTargetType(Target.getWIntType()); + // C++20 (proposed) + InitBuiltinType(Char8Ty, BuiltinType::Char8); + if (LangOpts.CPlusPlus) // C++0x 3.9.1p5, extension for C++ InitBuiltinType(Char16Ty, BuiltinType::Char16); else // C99 @@ -1739,6 +1742,7 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const { case BuiltinType::Char_U: case BuiltinType::UChar: case BuiltinType::SChar: + case BuiltinType::Char8: Width = Target->getCharWidth(); Align = Target->getCharAlign(); break; @@ -5456,6 +5460,7 @@ QualType ASTContext::getPromotedIntegerType(QualType Promotable) const { // FIXME: Is there some better way to compute this? if (BT->getKind() == BuiltinType::WChar_S || BT->getKind() == BuiltinType::WChar_U || + BT->getKind() == BuiltinType::Char8 || BT->getKind() == BuiltinType::Char16 || BT->getKind() == BuiltinType::Char32) { bool FromIsSigned = BT->getKind() == BuiltinType::WChar_S; @@ -6202,6 +6207,7 @@ static char getObjCEncodingForPrimitiveKind(const ASTContext *C, switch (kind) { case BuiltinType::Void: return 'v'; case BuiltinType::Bool: return 'B'; + case BuiltinType::Char8: case BuiltinType::Char_U: case BuiltinType::UChar: return 'C'; case BuiltinType::Char16: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 8157724..c540dfbb 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -7326,6 +7326,7 @@ static int EvaluateBuiltinClassifyType(const CallExpr *E, return pointer_type_class; case BuiltinType::WChar_U: + case BuiltinType::Char8: case BuiltinType::Char16: case BuiltinType::Char32: case BuiltinType::ObjCId: diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 609d0eb..610400d 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -2525,6 +2525,9 @@ void CXXNameMangler::mangleType(const BuiltinType *T) { case BuiltinType::WChar_U: Out << 'w'; break; + case BuiltinType::Char8: + Out << "Du"; + break; case BuiltinType::Char16: Out << "Ds"; break; diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index 1039ae8..e72804b 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -1918,6 +1918,7 @@ void MicrosoftCXXNameMangler::mangleType(const BuiltinType *T, Qualifiers, Out << "$$T"; break; + case BuiltinType::Char8: case BuiltinType::Float16: mangleArtificalTagType(TTK_Struct, "_Float16", {"__clang"}); break; diff --git a/clang/lib/AST/NSAPI.cpp b/clang/lib/AST/NSAPI.cpp index 8adaef1..eb807f0 100644 --- a/clang/lib/AST/NSAPI.cpp +++ b/clang/lib/AST/NSAPI.cpp @@ -436,6 +436,7 @@ NSAPI::getNSNumberFactoryMethodKind(QualType T) const { case BuiltinType::Void: case BuiltinType::WChar_U: case BuiltinType::WChar_S: + case BuiltinType::Char8: case BuiltinType::Char16: case BuiltinType::Char32: case BuiltinType::Int128: diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index a2a6077..571dc2a 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -1763,6 +1763,12 @@ bool Type::isWideCharType() const { return false; } +bool Type::isChar8Type() const { + if (const BuiltinType *BT = dyn_cast(CanonicalType)) + return BT->getKind() == BuiltinType::Char8; + return false; +} + bool Type::isChar16Type() const { if (const auto *BT = dyn_cast(CanonicalType)) return BT->getKind() == BuiltinType::Char16; @@ -1785,6 +1791,7 @@ bool Type::isAnyCharacterType() const { case BuiltinType::Char_U: case BuiltinType::UChar: case BuiltinType::WChar_U: + case BuiltinType::Char8: case BuiltinType::Char16: case BuiltinType::Char32: case BuiltinType::Char_S: @@ -2419,6 +2426,7 @@ bool Type::isPromotableIntegerType() const { case BuiltinType::UShort: case BuiltinType::WChar_S: case BuiltinType::WChar_U: + case BuiltinType::Char8: case BuiltinType::Char16: case BuiltinType::Char32: return true; @@ -2655,6 +2663,8 @@ StringRef BuiltinType::getName(const PrintingPolicy &Policy) const { case WChar_S: case WChar_U: return Policy.MSWChar ? "__wchar_t" : "wchar_t"; + case Char8: + return "char8_t"; case Char16: return "char16_t"; case Char32: diff --git a/clang/lib/AST/TypeLoc.cpp b/clang/lib/AST/TypeLoc.cpp index 0ac50b3..57349b4 100644 --- a/clang/lib/AST/TypeLoc.cpp +++ b/clang/lib/AST/TypeLoc.cpp @@ -317,6 +317,8 @@ TypeSpecifierType BuiltinTypeLoc::getWrittenTypeSpec() const { case BuiltinType::Char_U: case BuiltinType::Char_S: return TST_char; + case BuiltinType::Char8: + return TST_char8; case BuiltinType::Char16: return TST_char16; case BuiltinType::Char32: diff --git a/clang/lib/Analysis/PrintfFormatString.cpp b/clang/lib/Analysis/PrintfFormatString.cpp index dfaed26..2043970 100644 --- a/clang/lib/Analysis/PrintfFormatString.cpp +++ b/clang/lib/Analysis/PrintfFormatString.cpp @@ -647,6 +647,7 @@ bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt, case BuiltinType::Bool: case BuiltinType::WChar_U: case BuiltinType::WChar_S: + case BuiltinType::Char8: // FIXME: Treat like 'char'? case BuiltinType::Char16: case BuiltinType::Char32: case BuiltinType::UInt128: diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index 6b01332..025104c 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -115,14 +115,15 @@ namespace { KEYNOOPENCL = 0x02000, WCHARSUPPORT = 0x04000, HALFSUPPORT = 0x08000, - KEYCONCEPTS = 0x10000, - KEYOBJC2 = 0x20000, - KEYZVECTOR = 0x40000, - KEYCOROUTINES = 0x80000, - KEYMODULES = 0x100000, - KEYCXX2A = 0x200000, + CHAR8SUPPORT = 0x10000, + KEYCONCEPTS = 0x20000, + KEYOBJC2 = 0x40000, + KEYZVECTOR = 0x80000, + KEYCOROUTINES = 0x100000, + KEYMODULES = 0x200000, + KEYCXX2A = 0x400000, KEYALLCXX = KEYCXX | KEYCXX11 | KEYCXX2A, - KEYALL = (0x3fffff & ~KEYNOMS18 & + KEYALL = (0x7fffff & ~KEYNOMS18 & ~KEYNOOPENCL) // KEYNOMS18 and KEYNOOPENCL are used to exclude. }; @@ -151,6 +152,7 @@ static KeywordStatus getKeywordStatus(const LangOptions &LangOpts, if (LangOpts.Bool && (Flags & BOOLSUPPORT)) return KS_Enabled; if (LangOpts.Half && (Flags & HALFSUPPORT)) return KS_Enabled; if (LangOpts.WChar && (Flags & WCHARSUPPORT)) return KS_Enabled; + if (LangOpts.Char8 && (Flags & CHAR8SUPPORT)) return KS_Enabled; if (LangOpts.AltiVec && (Flags & KEYALTIVEC)) return KS_Enabled; if (LangOpts.OpenCL && (Flags & KEYOPENCL)) return KS_Enabled; if (!LangOpts.CPlusPlus && (Flags & KEYNOCXX)) return KS_Enabled; diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 474018c..1220972 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -665,6 +665,7 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { case BuiltinType::SChar: Encoding = llvm::dwarf::DW_ATE_signed_char; break; + case BuiltinType::Char8: case BuiltinType::Char16: case BuiltinType::Char32: Encoding = llvm::dwarf::DW_ATE_UTF; diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index ccb6df9..ce1fdf9 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -437,6 +437,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { case BuiltinType::ULongLong: case BuiltinType::WChar_S: case BuiltinType::WChar_U: + case BuiltinType::Char8: case BuiltinType::Char16: case BuiltinType::Char32: ResultType = llvm::IntegerType::get(getLLVMContext(), diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index f92d7ec..0e35633 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -2706,6 +2706,7 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) { case BuiltinType::LongDouble: case BuiltinType::Float16: case BuiltinType::Float128: + case BuiltinType::Char8: case BuiltinType::Char16: case BuiltinType::Char32: case BuiltinType::Int128: @@ -3567,7 +3568,8 @@ void ItaniumCXXABI::EmitFundamentalRTTIDescriptors(bool DLLExport) { getContext().UnsignedInt128Ty, getContext().HalfTy, getContext().FloatTy, getContext().DoubleTy, getContext().LongDoubleTy, getContext().Float128Ty, - getContext().Char16Ty, getContext().Char32Ty + getContext().Char8Ty, getContext().Char16Ty, + getContext().Char32Ty }; for (const QualType &FundamentalType : FundamentalTypes) EmitFundamentalRTTIDescriptor(FundamentalType, DLLExport); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 331b4be..5e5dfde 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2687,6 +2687,9 @@ static void RenderCharacterOptions(const ArgList &Args, const llvm::Triple &T, CmdArgs.push_back("-fno-signed-char"); } + if (Args.hasFlag(options::OPT_fchar8__t, options::OPT_fno_char8__t, false)) + CmdArgs.push_back("-fchar8_t"); + if (const Arg *A = Args.getLastArg(options::OPT_fshort_wchar, options::OPT_fno_short_wchar)) { if (A->getOption().matches(options::OPT_fshort_wchar)) { diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp index 10ac392..c63f012 100644 --- a/clang/lib/Format/FormatToken.cpp +++ b/clang/lib/Format/FormatToken.cpp @@ -57,6 +57,7 @@ bool FormatToken::isSimpleTypeSpecifier() const { case tok::kw_bool: case tok::kw___underlying_type: case tok::annot_typename: + case tok::kw_char8_t: case tok::kw_char16_t: case tok::kw_char32_t: case tok::kw_typeof: diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 44f29ba..a6dc167 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -2362,6 +2362,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, Opts.ImplicitModules = !Args.hasArg(OPT_fno_implicit_modules); Opts.CharIsSigned = Opts.OpenCL || !Args.hasArg(OPT_fno_signed_char); Opts.WChar = Opts.CPlusPlus && !Args.hasArg(OPT_fno_wchar); + Opts.Char8 = Args.hasArg(OPT_fchar8__t); if (const Arg *A = Args.getLastArg(OPT_fwchar_type_EQ)) { Opts.WCharSize = llvm::StringSwitch(A->getValue()) .Case("char", 1) diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 192862d..8a87b9f 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -559,6 +559,10 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts, Builder.defineMacro("__cpp_experimental_concepts", "1"); if (LangOpts.CoroutinesTS) Builder.defineMacro("__cpp_coroutines", "201703L"); + + // Potential future breaking changes. + if (LangOpts.Char8) + Builder.defineMacro("__cpp_char8_t", "201803"); } static void InitializePredefinedMacros(const TargetInfo &TI, @@ -939,6 +943,8 @@ static void InitializePredefinedMacros(const TargetInfo &TI, InlineWidthBits)); DEFINE_LOCK_FREE_MACRO(BOOL, Bool); DEFINE_LOCK_FREE_MACRO(CHAR, Char); + if (LangOpts.Char8) + DEFINE_LOCK_FREE_MACRO(CHAR8_T, Char); // Treat char8_t like char. DEFINE_LOCK_FREE_MACRO(CHAR16_T, Char16); DEFINE_LOCK_FREE_MACRO(CHAR32_T, Char32); DEFINE_LOCK_FREE_MACRO(WCHAR_T, WChar); diff --git a/clang/lib/Index/USRGeneration.cpp b/clang/lib/Index/USRGeneration.cpp index ee1c950..ba536c7 100644 --- a/clang/lib/Index/USRGeneration.cpp +++ b/clang/lib/Index/USRGeneration.cpp @@ -650,6 +650,8 @@ void USRGenerator::VisitType(QualType T) { c = 'b'; break; case BuiltinType::UChar: c = 'c'; break; + case BuiltinType::Char8: + c = 'u'; break; // FIXME: Check this doesn't collide case BuiltinType::Char16: c = 'q'; break; case BuiltinType::Char32: diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp index d843182..b1ed0e1 100644 --- a/clang/lib/Lex/PPExpressions.cpp +++ b/clang/lib/Lex/PPExpressions.cpp @@ -363,7 +363,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, NumBits = TI.getChar16Width(); else if (Literal.isUTF32()) NumBits = TI.getChar32Width(); - else + else // char or char8_t NumBits = TI.getCharWidth(); // Set the width. diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index fc0ca61..e0948b3 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -3587,6 +3587,10 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, isInvalid = DS.SetTypeSpecType(DeclSpec::TST_wchar, Loc, PrevSpec, DiagID, Policy); break; + case tok::kw_char8_t: + isInvalid = DS.SetTypeSpecType(DeclSpec::TST_char8, Loc, PrevSpec, + DiagID, Policy); + break; case tok::kw_char16_t: isInvalid = DS.SetTypeSpecType(DeclSpec::TST_char16, Loc, PrevSpec, DiagID, Policy); @@ -4585,6 +4589,7 @@ bool Parser::isKnownToBeTypeSpecifier(const Token &Tok) const { case tok::kw_void: case tok::kw_char: case tok::kw_wchar_t: + case tok::kw_char8_t: case tok::kw_char16_t: case tok::kw_char32_t: case tok::kw_int: @@ -4661,6 +4666,7 @@ bool Parser::isTypeSpecifierQualifier() { case tok::kw_void: case tok::kw_char: case tok::kw_wchar_t: + case tok::kw_char8_t: case tok::kw_char16_t: case tok::kw_char32_t: case tok::kw_int: @@ -4817,6 +4823,7 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) { case tok::kw_void: case tok::kw_char: case tok::kw_wchar_t: + case tok::kw_char8_t: case tok::kw_char16_t: case tok::kw_char32_t: diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index ca5d3bc..1cc9856 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -1224,6 +1224,7 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, case tok::annot_decltype: case tok::kw_char: case tok::kw_wchar_t: + case tok::kw_char8_t: case tok::kw_char16_t: case tok::kw_char32_t: case tok::kw_bool: diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp index 8e39adf..276dea1 100644 --- a/clang/lib/Parse/ParseExprCXX.cpp +++ b/clang/lib/Parse/ParseExprCXX.cpp @@ -1962,6 +1962,9 @@ void Parser::ParseCXXSimpleTypeSpecifier(DeclSpec &DS) { case tok::kw_wchar_t: DS.SetTypeSpecType(DeclSpec::TST_wchar, Loc, PrevSpec, DiagID, Policy); break; + case tok::kw_char8_t: + DS.SetTypeSpecType(DeclSpec::TST_char8, Loc, PrevSpec, DiagID, Policy); + break; case tok::kw_char16_t: DS.SetTypeSpecType(DeclSpec::TST_char16, Loc, PrevSpec, DiagID, Policy); break; diff --git a/clang/lib/Parse/ParseTentative.cpp b/clang/lib/Parse/ParseTentative.cpp index ebd6f0f..17ff9f9 100644 --- a/clang/lib/Parse/ParseTentative.cpp +++ b/clang/lib/Parse/ParseTentative.cpp @@ -1052,6 +1052,7 @@ Parser::isExpressionOrTypeSpecifierSimple(tok::TokenKind Kind) { case tok::kw_class: case tok::kw_typename: case tok::kw_wchar_t: + case tok::kw_char8_t: case tok::kw_char16_t: case tok::kw_char32_t: case tok::kw__Decimal32: @@ -1523,6 +1524,7 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult, case tok::kw_char: case tok::kw_wchar_t: + case tok::kw_char8_t: case tok::kw_char16_t: case tok::kw_char32_t: case tok::kw_bool: @@ -1614,6 +1616,7 @@ bool Parser::isCXXDeclarationSpecifierAType() { // simple-type-specifier case tok::kw_char: case tok::kw_wchar_t: + case tok::kw_char8_t: case tok::kw_char16_t: case tok::kw_char32_t: case tok::kw_bool: diff --git a/clang/lib/Sema/DeclSpec.cpp b/clang/lib/Sema/DeclSpec.cpp index 2fad5a1..26d6202 100644 --- a/clang/lib/Sema/DeclSpec.cpp +++ b/clang/lib/Sema/DeclSpec.cpp @@ -329,6 +329,7 @@ bool Declarator::isDeclarationOfFunction() const { case TST_auto_type: case TST_bool: case TST_char: + case TST_char8: case TST_char16: case TST_char32: case TST_class: @@ -499,6 +500,7 @@ const char *DeclSpec::getSpecifierName(DeclSpec::TST T, case DeclSpec::TST_void: return "void"; case DeclSpec::TST_char: return "char"; case DeclSpec::TST_wchar: return Policy.MSWChar ? "__wchar_t" : "wchar_t"; + case DeclSpec::TST_char8: return "char8_t"; case DeclSpec::TST_char16: return "char16_t"; case DeclSpec::TST_char32: return "char32_t"; case DeclSpec::TST_int: return "int"; @@ -1202,7 +1204,9 @@ void DeclSpec::Finish(Sema &S, const PrintingPolicy &Policy) { StorageClassSpec == SCS_auto) S.Diag(StorageClassSpecLoc, diag::warn_auto_storage_class) << FixItHint::CreateRemoval(StorageClassSpecLoc); - if (TypeSpecType == TST_char16 || TypeSpecType == TST_char32) + if (TypeSpecType == TST_char8) + S.Diag(TSTLoc, diag::warn_cxx17_compat_unicode_type); + else if (TypeSpecType == TST_char16 || TypeSpecType == TST_char32) S.Diag(TSTLoc, diag::warn_cxx98_compat_unicode_type) << (TypeSpecType == TST_char16 ? "char16_t" : "char32_t"); if (Constexpr_specified) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 5867610..732853d 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -148,6 +148,9 @@ bool Sema::isSimpleTypeSpecifier(tok::TokenKind Kind) const { case tok::kw_decltype: return getLangOpts().CPlusPlus; + case tok::kw_char8_t: + return getLangOpts().Char8; + default: break; } diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 2cf1622..4339c9a 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -13187,6 +13187,7 @@ bool Sema::CheckLiteralOperatorDeclaration(FunctionDecl *FnDecl) { ParamType->isSpecificBuiltinType(BuiltinType::LongDouble) || Context.hasSameType(ParamType, Context.CharTy) || Context.hasSameType(ParamType, Context.WideCharTy) || + Context.hasSameType(ParamType, Context.Char8Ty) || Context.hasSameType(ParamType, Context.Char16Ty) || Context.hasSameType(ParamType, Context.Char32Ty)) { } else if (const PointerType *Ptr = ParamType->getAs()) { @@ -13247,10 +13248,12 @@ bool Sema::CheckLiteralOperatorDeclaration(FunctionDecl *FnDecl) { } QualType InnerType = PointeeType.getUnqualifiedType(); - // Only const char *, const wchar_t*, const char16_t*, and const char32_t* - // are allowed as the first parameter to a two-parameter function + // Only const char *, const wchar_t*, const char8_t*, const char16_t*, and + // const char32_t* are allowed as the first parameter to a two-parameter + // function if (!(Context.hasSameType(InnerType, Context.CharTy) || Context.hasSameType(InnerType, Context.WideCharTy) || + Context.hasSameType(InnerType, Context.Char8Ty) || Context.hasSameType(InnerType, Context.Char16Ty) || Context.hasSameType(InnerType, Context.Char32Ty))) { Diag((*Param)->getSourceRange().getBegin(), diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index b1ecbfa..58e70a4 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -1535,6 +1535,8 @@ Sema::ActOnStringLiteral(ArrayRef StringToks, Scope *UDLScope) { CharTy = Context.getWideCharType(); Kind = StringLiteral::Wide; } else if (Literal.isUTF8()) { + if (getLangOpts().Char8) + CharTy = Context.Char8Ty; Kind = StringLiteral::UTF8; } else if (Literal.isUTF16()) { CharTy = Context.Char16Ty; @@ -3094,6 +3096,8 @@ ExprResult Sema::ActOnCharacterConstant(const Token &Tok, Scope *UDLScope) { QualType Ty; if (Literal.isWide()) Ty = Context.WideCharTy; // L'x' -> wchar_t in C and C++. + else if (Literal.isUTF8() && getLangOpts().Char8) + Ty = Context.Char8Ty; // u8'x' -> char8_t when it exists. else if (Literal.isUTF16()) Ty = Context.Char16Ty; // u'x' -> char16_t in C11 and C++11. else if (Literal.isUTF32()) diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index e44eaa5..be33326 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -49,6 +49,8 @@ enum StringInitFailureKind { SIF_NarrowStringIntoWideChar, SIF_WideStringIntoChar, SIF_IncompatWideStringIntoWideChar, + SIF_UTF8StringIntoPlainChar, + SIF_PlainStringIntoUTF8Char, SIF_Other }; @@ -77,12 +79,21 @@ static StringInitFailureKind IsStringInit(Expr *Init, const ArrayType *AT, Context.getCanonicalType(AT->getElementType()).getUnqualifiedType(); switch (SL->getKind()) { - case StringLiteral::Ascii: case StringLiteral::UTF8: + // char8_t array can be initialized with a UTF-8 string. + if (ElemTy->isChar8Type()) + return SIF_None; + LLVM_FALLTHROUGH; + case StringLiteral::Ascii: // char array can be initialized with a narrow string. // Only allow char x[] = "foo"; not char x[] = L"foo"; if (ElemTy->isCharType()) - return SIF_None; + return (SL->getKind() == StringLiteral::UTF8 && + Context.getLangOpts().Char8) + ? SIF_UTF8StringIntoPlainChar + : SIF_None; + if (ElemTy->isChar8Type()) + return SIF_PlainStringIntoUTF8Char; if (IsWideCharCompatible(ElemTy, Context)) return SIF_NarrowStringIntoWideChar; return SIF_Other; @@ -94,7 +105,7 @@ static StringInitFailureKind IsStringInit(Expr *Init, const ArrayType *AT, case StringLiteral::UTF16: if (Context.typesAreCompatible(Context.Char16Ty, ElemTy)) return SIF_None; - if (ElemTy->isCharType()) + if (ElemTy->isCharType() || ElemTy->isChar8Type()) return SIF_WideStringIntoChar; if (IsWideCharCompatible(ElemTy, Context)) return SIF_IncompatWideStringIntoWideChar; @@ -102,7 +113,7 @@ static StringInitFailureKind IsStringInit(Expr *Init, const ArrayType *AT, case StringLiteral::UTF32: if (Context.typesAreCompatible(Context.Char32Ty, ElemTy)) return SIF_None; - if (ElemTy->isCharType()) + if (ElemTy->isCharType() || ElemTy->isChar8Type()) return SIF_WideStringIntoChar; if (IsWideCharCompatible(ElemTy, Context)) return SIF_IncompatWideStringIntoWideChar; @@ -110,7 +121,7 @@ static StringInitFailureKind IsStringInit(Expr *Init, const ArrayType *AT, case StringLiteral::Wide: if (Context.typesAreCompatible(Context.getWideCharType(), ElemTy)) return SIF_None; - if (ElemTy->isCharType()) + if (ElemTy->isCharType() || ElemTy->isChar8Type()) return SIF_WideStringIntoChar; if (IsWideCharCompatible(ElemTy, Context)) return SIF_IncompatWideStringIntoWideChar; @@ -3185,6 +3196,8 @@ bool InitializationSequence::isAmbiguous() const { case FK_NarrowStringIntoWideCharArray: case FK_WideStringIntoCharArray: case FK_IncompatWideStringIntoWideChar: + case FK_PlainStringIntoUTF8Char: + case FK_UTF8StringIntoPlainChar: case FK_AddressOfOverloadFailed: // FIXME: Could do better case FK_NonConstLValueReferenceBindingToTemporary: case FK_NonConstLValueReferenceBindingToBitfield: @@ -5362,6 +5375,12 @@ void InitializationSequence::InitializeFrom(Sema &S, case SIF_IncompatWideStringIntoWideChar: SetFailed(FK_IncompatWideStringIntoWideChar); return; + case SIF_PlainStringIntoUTF8Char: + SetFailed(FK_PlainStringIntoUTF8Char); + return; + case SIF_UTF8StringIntoPlainChar: + SetFailed(FK_UTF8StringIntoPlainChar); + return; case SIF_Other: break; } @@ -7591,6 +7610,17 @@ bool InitializationSequence::Diagnose(Sema &S, S.Diag(Kind.getLocation(), diag::err_array_init_incompat_wide_string_into_wchar); break; + case FK_PlainStringIntoUTF8Char: + S.Diag(Kind.getLocation(), + diag::err_array_init_plain_string_into_char8_t); + S.Diag(Args.front()->getLocStart(), + diag::note_array_init_plain_string_into_char8_t) + << FixItHint::CreateInsertion(Args.front()->getLocStart(), "u8"); + break; + case FK_UTF8StringIntoPlainChar: + S.Diag(Kind.getLocation(), + diag::err_array_init_utf8_string_into_char); + break; case FK_ArrayTypeMismatch: case FK_NonConstantArrayInit: S.Diag(Kind.getLocation(), @@ -8000,6 +8030,14 @@ void InitializationSequence::dump(raw_ostream &OS) const { OS << "incompatible wide string into wide char array"; break; + case FK_PlainStringIntoUTF8Char: + OS << "plain string literal into char8_t array"; + break; + + case FK_UTF8StringIntoPlainChar: + OS << "u8 string literal into char array"; + break; + case FK_ArrayTypeMismatch: OS << "array type mismatch"; break; diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 64cd52c..4b58988 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -7707,6 +7707,8 @@ class BuiltinOperatorOverloadBuilder { ArithmeticTypes.push_back(S.Context.BoolTy); ArithmeticTypes.push_back(S.Context.CharTy); ArithmeticTypes.push_back(S.Context.WCharTy); + if (S.Context.getLangOpts().Char8) + ArithmeticTypes.push_back(S.Context.Char8Ty); ArithmeticTypes.push_back(S.Context.Char16Ty); ArithmeticTypes.push_back(S.Context.Char32Ty); ArithmeticTypes.push_back(S.Context.SignedCharTy); diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index d98176a..04fdfae 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -6771,11 +6771,11 @@ Sema::BuildExpressionFromIntegralTemplateArgument(const TemplateArgument &Arg, Expr *E; if (T->isAnyCharacterType()) { - // This does not need to handle u8 character literals because those are - // of type char, and so can also be covered by an ASCII character literal. CharacterLiteral::CharacterKind Kind; if (T->isWideCharType()) Kind = CharacterLiteral::Wide; + else if (T->isChar8Type() && getLangOpts().Char8) + Kind = CharacterLiteral::UTF8; else if (T->isChar16Type()) Kind = CharacterLiteral::UTF16; else if (T->isChar32Type()) diff --git a/clang/lib/Sema/SemaTemplateVariadic.cpp b/clang/lib/Sema/SemaTemplateVariadic.cpp index 7aa0e31..37a9c26 100644 --- a/clang/lib/Sema/SemaTemplateVariadic.cpp +++ b/clang/lib/Sema/SemaTemplateVariadic.cpp @@ -822,6 +822,7 @@ bool Sema::containsUnexpandedParameterPacks(Declarator &D) { case TST_void: case TST_char: case TST_wchar: + case TST_char8: case TST_char16: case TST_char32: case TST_int: diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 0e71047..a6491e5 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -1277,6 +1277,11 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) { Result = Context.getUnsignedWCharType(); } break; + case DeclSpec::TST_char8: + assert(DS.getTypeSpecSign() == DeclSpec::TSS_unspecified && + "Unknown TSS value"); + Result = Context.Char8Ty; + break; case DeclSpec::TST_char16: assert(DS.getTypeSpecSign() == DeclSpec::TSS_unspecified && "Unknown TSS value"); diff --git a/clang/lib/Serialization/ASTCommon.cpp b/clang/lib/Serialization/ASTCommon.cpp index 535aacb..54cea92 100644 --- a/clang/lib/Serialization/ASTCommon.cpp +++ b/clang/lib/Serialization/ASTCommon.cpp @@ -100,6 +100,9 @@ serialization::TypeIdxFromBuiltin(const BuiltinType *BT) { case BuiltinType::NullPtr: ID = PREDEF_TYPE_NULLPTR_ID; break; + case BuiltinType::Char8: + ID = PREDEF_TYPE_CHAR8_ID; + break; case BuiltinType::Char16: ID = PREDEF_TYPE_CHAR16_ID; break; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index a1de22b..4b0220ab 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -6841,6 +6841,9 @@ QualType ASTReader::GetType(TypeID ID) { case PREDEF_TYPE_NULLPTR_ID: T = Context.NullPtrTy; break; + case PREDEF_TYPE_CHAR8_ID: + T = Context.Char8Ty; + break; case PREDEF_TYPE_CHAR16_ID: T = Context.Char16Ty; break; diff --git a/clang/test/CodeGenCXX/char8_t.cpp b/clang/test/CodeGenCXX/char8_t.cpp new file mode 100644 index 0000000..e4dba58 --- /dev/null +++ b/clang/test/CodeGenCXX/char8_t.cpp @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 -std=c++17 -emit-llvm -fchar8_t -triple x86_64-linux %s -o - | FileCheck %s + +// CHECK: define void @_Z1fDu( +void f(char8_t c) {} + +// CHECK: define void @_Z1gIiEvDTplplcvT__ELA4_KDuELDu114EE +template void g(decltype(T() + u8"foo" + u8'r')) {} +template void g(const char8_t*); diff --git a/clang/test/Lexer/char8_t.cpp b/clang/test/Lexer/char8_t.cpp new file mode 100644 index 0000000..20f820e --- /dev/null +++ b/clang/test/Lexer/char8_t.cpp @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 -std=c++2a -verify %s +// RUN: %clang_cc1 -std=c++2a -verify %s -fchar8_t + +#if defined(__cpp_char8_t) && __is_identifier(char8_t) +#error char8_t is an identifier under -fchar8_t +#endif + +#if !defined(__cpp_char8_t) && !__is_identifier(char8_t) +#error char8_t is a keyword under -fno-char8_t +#endif + +char8_t c8t; +#ifndef __cpp_char8_t +// expected-error@-2 {{unknown type}} +#else +// expected-no-diagnostics +#endif diff --git a/clang/test/Lexer/cxx-features.cpp b/clang/test/Lexer/cxx-features.cpp index a7d12e2..352f08e 100644 --- a/clang/test/Lexer/cxx-features.cpp +++ b/clang/test/Lexer/cxx-features.cpp @@ -6,6 +6,7 @@ // RUN: %clang_cc1 -std=c++1z -fcxx-exceptions -fsized-deallocation -fconcepts-ts -DCONCEPTS_TS=1 -verify %s // RUN: %clang_cc1 -fno-rtti -fno-threadsafe-statics -verify %s -DNO_EXCEPTIONS -DNO_RTTI -DNO_THREADSAFE_STATICS -fsized-deallocation // RUN: %clang_cc1 -fcoroutines-ts -DNO_EXCEPTIONS -DCOROUTINES -verify -fsized-deallocation %s +// RUN: %clang_cc1 -fchar8_t -DNO_EXCEPTIONS -DCHAR8_T -verify -fsized-deallocation %s // expected-no-diagnostics @@ -242,3 +243,9 @@ #if defined(COROUTINES) ? check(coroutines, 201703L, 201703L, 201703L, 201703L) : check(coroutines, 0, 0, 0, 0) #error "wrong value for __cpp_coroutines" #endif + +// --- not-yet-standard features -- + +#if defined(CHAR8_T) ? check(char8_t, 201803, 201803, 201803, 201803) : check(char8_t, 0, 0, 0, 0) +#error "wrong value for __cpp_char8_t" +#endif diff --git a/clang/test/SemaCXX/char8_t.cpp b/clang/test/SemaCXX/char8_t.cpp new file mode 100644 index 0000000..5eb3d70 --- /dev/null +++ b/clang/test/SemaCXX/char8_t.cpp @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -fchar8_t -std=c++2a -verify %s + +char8_t a = u8'a'; +char8_t b[] = u8"foo"; +char8_t c = 'a'; +char8_t d[] = "foo"; // expected-error {{initializing 'char8_t' array with plain string literal}} expected-note {{add 'u8' prefix}} + +char e = u8'a'; +char f[] = u8"foo"; // expected-error {{initialization of char array with UTF-8 string literal is not permitted by '-fchar8_t'}} +char g = 'a'; +char h[] = "foo"; + +void disambig() { + char8_t (a) = u8'x'; +} + +void operator""_a(char); +void operator""_a(const char*, decltype(sizeof(0))); + +void test_udl1() { + int &x = u8'a'_a; // expected-error {{no matching literal operator}} + float &y = u8"a"_a; // expected-error {{no matching literal operator}} +} + +int &operator""_a(char8_t); +float &operator""_a(const char8_t*, decltype(sizeof(0))); + +void test_udl2() { + int &x = u8'a'_a; + float &y = u8"a"_a; +} + +template void check(T &&t) { + using Check = E; + using Check = T; +} +void check_deduction() { + check(u8'a'); + check(u8"a\u1000"); +} + +static_assert(sizeof(char8_t) == 1); +static_assert(char8_t(-1) > 0); +static_assert(u8"\u0080"[0] > 0); -- 2.7.4