From d0d2772379bd89f1dce3c456520272678cf4b966 Mon Sep 17 00:00:00 2001 From: Corentin Jabot Date: Fri, 21 Oct 2022 14:33:26 +0200 Subject: [PATCH] [Clang] Implement P2513 Implement P2513 This change allows initializing an array of unsigned char, or char from u8 string literals. This was done both to support legacy code and for compatibility with C where char8_t will be typedef to unsigned char. This is backported to C++20 as per WG21 guidance. Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D136449 --- clang/docs/ReleaseNotes.rst | 6 ++- clang/include/clang/Basic/DiagnosticSemaKinds.td | 4 +- clang/lib/Frontend/InitPreprocessor.cpp | 2 +- clang/lib/Sema/SemaInit.cpp | 17 ++++++-- clang/test/Lexer/cxx-features.cpp | 4 +- clang/test/SemaCXX/char8_t.cpp | 54 +++++++++++++++++++++--- clang/test/SemaCXX/cxx2a-compat.cpp | 5 +-- clang/www/cxx_status.html | 2 +- 8 files changed, 72 insertions(+), 22 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 68cee53..49ef53f 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -557,6 +557,8 @@ C++2b Feature Support - Support label at end of compound statement (`P2324 `_). - Implemented `P1169R4: static operator() `_. +- Implemented "char8_t Compatibility and Portability Fix" (`P2513R3 `_). + This Change was applied to C++20 as a Defect Report. CUDA/HIP Language Changes in Clang ---------------------------------- @@ -654,8 +656,8 @@ libclang the behavior of ``QualType::getNonReferenceType`` for ``CXType``. - Introduced the new function ``clang_CXXMethod_isDeleted``, which queries whether the method is declared ``= delete``. -- ``clang_Cursor_getNumTemplateArguments``, ``clang_Cursor_getTemplateArgumentKind``, - ``clang_Cursor_getTemplateArgumentType``, ``clang_Cursor_getTemplateArgumentValue`` and +- ``clang_Cursor_getNumTemplateArguments``, ``clang_Cursor_getTemplateArgumentKind``, + ``clang_Cursor_getTemplateArgumentType``, ``clang_Cursor_getTemplateArgumentValue`` and ``clang_Cursor_getTemplateArgumentUnsignedValue`` now work on struct, class, and partial template specialization cursors in addition to function cursors. diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index b1d4757..8cf7378 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -6868,8 +6868,8 @@ def err_array_init_plain_string_into_char8_t : Error< def note_array_init_plain_string_into_char8_t : Note< "add 'u8' prefix to form a 'char8_t' string literal">; def err_array_init_utf8_string_into_char : Error< - "%select{|ISO C++20 does not permit }0initialization of char array with " - "UTF-8 string literal%select{ is not permitted by '-fchar8_t'|}0">; + "initialization of %select{|signed }0char array with " + "UTF-8 string literal is not permitted by %select{'-fchar8_t'|C++20}1">; def warn_cxx20_compat_utf8_string : Warning< "type of UTF-8 string literal will change from array of const char to " "array of const char8_t in C++20">, InGroup, DefaultIgnore; diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 2273fb1..96b93dc 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -705,7 +705,7 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts, Builder.defineMacro("__cpp_named_character_escapes", "202207L"); if (LangOpts.Char8) - Builder.defineMacro("__cpp_char8_t", "201811L"); + Builder.defineMacro("__cpp_char8_t", "202207L"); Builder.defineMacro("__cpp_impl_destroying_delete", "201806L"); // TS features. diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index db5580c..7ebf699 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -81,10 +81,20 @@ static StringInitFailureKind IsStringInit(Expr *Init, const ArrayType *AT, const QualType ElemTy = Context.getCanonicalType(AT->getElementType()).getUnqualifiedType(); + auto IsCharOrUnsignedChar = [](const QualType &T) { + const BuiltinType *BT = dyn_cast(T.getTypePtr()); + return BT && BT->isCharType() && BT->getKind() != BuiltinType::SChar; + }; + switch (SL->getKind()) { case StringLiteral::UTF8: // char8_t array can be initialized with a UTF-8 string. - if (ElemTy->isChar8Type()) + // - C++20 [dcl.init.string] (DR) + // Additionally, an array of char or unsigned char may be initialized + // by a UTF-8 string literal. + if (ElemTy->isChar8Type() || + (Context.getLangOpts().Char8 && + IsCharOrUnsignedChar(ElemTy.getCanonicalType()))) return SIF_None; [[fallthrough]]; case StringLiteral::Ordinary: @@ -9114,9 +9124,8 @@ bool InitializationSequence::Diagnose(Sema &S, << FixItHint::CreateInsertion(Args.front()->getBeginLoc(), "u8"); break; case FK_UTF8StringIntoPlainChar: - S.Diag(Kind.getLocation(), - diag::err_array_init_utf8_string_into_char) - << S.getLangOpts().CPlusPlus20; + S.Diag(Kind.getLocation(), diag::err_array_init_utf8_string_into_char) + << DestType->isSignedIntegerType() << S.getLangOpts().CPlusPlus20; break; case FK_ArrayTypeMismatch: case FK_NonConstantArrayInit: diff --git a/clang/test/Lexer/cxx-features.cpp b/clang/test/Lexer/cxx-features.cpp index ee52017..c12f2d2 100644 --- a/clang/test/Lexer/cxx-features.cpp +++ b/clang/test/Lexer/cxx-features.cpp @@ -66,9 +66,9 @@ #error "wrong value for __cpp_aggregate_paren_init" #endif -#if defined(CHAR8_T) ? check(char8_t, 201811, 201811, 201811, 201811, 201811, 201811) : \ +#if defined(CHAR8_T) ? check(char8_t, 202207, 202207, 202207, 202207, 202207, 202207) : \ defined(NO_CHAR8_T) ? check(char8_t, 0, 0, 0, 0, 0, 0) : \ - check(char8_t, 0, 0, 0, 0, 201811, 201811) + check(char8_t, 0, 0, 0, 0, 202207, 202207) #error "wrong value for __cpp_char8_t" #endif diff --git a/clang/test/SemaCXX/char8_t.cpp b/clang/test/SemaCXX/char8_t.cpp index f60a66d..5ffa550 100644 --- a/clang/test/SemaCXX/char8_t.cpp +++ b/clang/test/SemaCXX/char8_t.cpp @@ -1,5 +1,7 @@ // RUN: %clang_cc1 -fchar8_t -std=c++17 -verify %s -// RUN: %clang_cc1 -std=c++2a -verify %s +// RUN: %clang_cc1 -std=c++2a -verify=expected %s +// RUN: %clang_cc1 -std=c++2a -verify=expected -fno-signed-char %s + char8_t a = u8'a'; char8_t b[] = u8"foo"; @@ -7,15 +9,35 @@ char8_t c = 'a'; char8_t d[] = "foo"; // expected-error {{initializing 'char8_t' array with plain string literal}} expected-note {{add 'u8' prefix}} char e = u8'a'; -char f[] = u8"foo"; -#if __cplusplus <= 201703L -// expected-error@-2 {{initialization of char array with UTF-8 string literal is not permitted by '-fchar8_t'}} -#else -// expected-error@-4 {{ISO C++20 does not permit initialization of char array with UTF-8 string literal}} -#endif char g = 'a'; char h[] = "foo"; +unsigned char i[] = u8"foo"; +unsigned char j[] = { u8"foo" }; +char k[] = u8"foo"; +char l[] = { u8"foo" }; +signed char m[] = u8"foo"; // expected-error {{initialization of char array with UTF-8 string literal is not permitted}} +signed char n[] = { u8"foo" }; // expected-error {{cannot initialize an array element of type 'signed char' with an lvalue of type 'const char8_t[4]'}} + +const unsigned char* uptr = u8"foo"; // expected-error {{cannot initialize}} +const signed char* sptr = u8"foo"; // expected-error {{cannot initialize}} +const char* ptr = u8"foo"; // expected-error {{cannot initialize}} + +template +void check_values() { + constexpr T c[] = {0, static_cast(0xFF), 0x42}; + constexpr T a[] = u8"\x00\xFF\x42"; + + static_assert(a[0] == c[0]); + static_assert(a[1] == c[1]); + static_assert(a[2] == c[2]); +} + +void call_check_values() { + check_values(); + check_values(); +} + void disambig() { char8_t (a) = u8'x'; } @@ -48,3 +70,21 @@ void check_deduction() { static_assert(sizeof(char8_t) == 1); static_assert(char8_t(-1) > 0); static_assert(u8"\u0080"[0] > 0); + +namespace ambiguous { + +struct A { + char8_t s[10]; +}; +struct B { + char s[10]; +}; + +void f(A); // expected-note {{candidate}} +void f(B); // expected-note {{candidate}} + +int test() { + f({u8"foo"}); // expected-error {{call to 'f' is ambiguous}} +} + +} diff --git a/clang/test/SemaCXX/cxx2a-compat.cpp b/clang/test/SemaCXX/cxx2a-compat.cpp index 0e9eafd..4f20cf5 100644 --- a/clang/test/SemaCXX/cxx2a-compat.cpp +++ b/clang/test/SemaCXX/cxx2a-compat.cpp @@ -33,9 +33,8 @@ string u8str = u8"test" u8"test"; // expected-warning@-4 {{type of UTF-8 string literal will change}} expected-note@-4 {{remove 'u8' prefix}} // expected-warning@-4 {{type of UTF-8 string literal will change}} expected-note@-4 {{remove 'u8' prefix}} #else -// expected-error@-8 {{ISO C++20 does not permit initialization of char array with UTF-8 string literal}} -// expected-error@-8 {{cannot initialize a variable of type 'const char *' with an lvalue of type 'const char8_t[6]'}} -// expected-error@-8 {{no viable conversion from 'const char8_t[9]' to 'string'}} +// expected-error@-7 {{cannot initialize a variable of type 'const char *' with an lvalue of type 'const char8_t[6]'}} +// expected-error@-7 {{no viable conversion from 'const char8_t[9]' to 'string'}} #endif template diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index cbea6bb..d46e7bb 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -1502,7 +1502,7 @@ C++20, informally referred to as C++2b.

char8_t Compatibility and Portability Fix P2513R3 - No + Clang 16 Relax requirements on wchar_t to match existing practices -- 2.7.4