C: Implement C2X N2653 char8_t and UTF-8 string literal changes
authorTom Honermann <tom@honermann.net>
Tue, 2 Aug 2022 18:36:01 +0000 (14:36 -0400)
committerJoseph Myers <joseph@codesourcery.com>
Mon, 8 Aug 2022 19:50:38 +0000 (19:50 +0000)
This patch implements the core language and compiler dependent library
changes adopted for C2X via WG14 N2653.  The changes include:
- Change of type for UTF-8 string literals from array of const char to
  array of const char8_t (unsigned char).
- A new atomic_char8_t typedef.
- A new ATOMIC_CHAR8_T_LOCK_FREE macro defined in terms of the existing
  __GCC_ATOMIC_CHAR8_T_LOCK_FREE predefined macro.

gcc/ChangeLog:

* ginclude/stdatomic.h (atomic_char8_t,
ATOMIC_CHAR8_T_LOCK_FREE): New typedef and macro.

gcc/c/ChangeLog:

* c-parser.cc (c_parser_string_literal): Use char8_t as the type
of CPP_UTF8STRING when char8_t support is enabled.
* c-typeck.cc (digest_init): Allow initialization of an array
of character type by a string literal with type array of
char8_t.

gcc/c-family/ChangeLog:

* c-lex.cc (lex_string, lex_charconst): Use char8_t as the type
of CPP_UTF8CHAR and CPP_UTF8STRING when char8_t support is
enabled.
* c-opts.cc (c_common_post_options): Set flag_char8_t if
targeting C2x.

gcc/testsuite/ChangeLog:
* gcc.dg/atomic/c2x-stdatomic-lockfree-char8_t.c: New test.
* gcc.dg/atomic/gnu2x-stdatomic-lockfree-char8_t.c: New test.
* gcc.dg/c11-utf8str-type.c: New test.
* gcc.dg/c17-utf8str-type.c: New test.
* gcc.dg/c2x-utf8str-type.c: New test.
* gcc.dg/c2x-utf8str.c: New test.
* gcc.dg/gnu2x-utf8str-type.c: New test.
* gcc.dg/gnu2x-utf8str.c: New test.

13 files changed:
gcc/c-family/c-lex.cc
gcc/c-family/c-opts.cc
gcc/c/c-parser.cc
gcc/c/c-typeck.cc
gcc/ginclude/stdatomic.h
gcc/testsuite/gcc.dg/atomic/c2x-stdatomic-lockfree-char8_t.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/atomic/gnu2x-stdatomic-lockfree-char8_t.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/c11-utf8str-type.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/c17-utf8str-type.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/c2x-utf8str-type.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/c2x-utf8str.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/gnu2x-utf8str-type.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/gnu2x-utf8str.c [new file with mode: 0644]

index 8bfa4f4..0b6f94e 100644 (file)
@@ -1352,7 +1352,14 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
        default:
        case CPP_STRING:
        case CPP_UTF8STRING:
-         value = build_string (1, "");
+         if (type == CPP_UTF8STRING && flag_char8_t)
+           {
+             value = build_string (TYPE_PRECISION (char8_type_node)
+                                   / TYPE_PRECISION (char_type_node),
+                                   "");  /* char8_t is 8 bits */
+           }
+         else
+           value = build_string (1, "");
          break;
        case CPP_STRING16:
          value = build_string (TYPE_PRECISION (char16_type_node)
@@ -1425,9 +1432,7 @@ lex_charconst (const cpp_token *token)
     type = char16_type_node;
   else if (token->type == CPP_UTF8CHAR)
     {
-      if (!c_dialect_cxx ())
-       type = unsigned_char_type_node;
-      else if (flag_char8_t)
+      if (flag_char8_t)
         type = char8_type_node;
       else
         type = char_type_node;
index 4e14636..1cf119a 100644 (file)
@@ -1059,9 +1059,9 @@ c_common_post_options (const char **pfilename)
   if (flag_sized_deallocation == -1)
     flag_sized_deallocation = (cxx_dialect >= cxx14);
 
-  /* char8_t support is new in C++20.  */
+  /* char8_t support is implicitly enabled in C++20 and C2X.  */
   if (flag_char8_t == -1)
-    flag_char8_t = (cxx_dialect >= cxx20);
+    flag_char8_t = (cxx_dialect >= cxx20) || flag_isoc2x;
 
   if (flag_extern_tls_init)
     {
index 92049d1..fa93959 100644 (file)
@@ -7447,7 +7447,14 @@ c_parser_string_literal (c_parser *parser, bool translate, bool wide_ok)
        default:
        case CPP_STRING:
        case CPP_UTF8STRING:
-         value = build_string (1, "");
+         if (type == CPP_UTF8STRING && flag_char8_t)
+           {
+             value = build_string (TYPE_PRECISION (char8_type_node)
+                                   / TYPE_PRECISION (char_type_node),
+                                   "");  /* char8_t is 8 bits */
+           }
+         else
+           value = build_string (1, "");
          break;
        case CPP_STRING16:
          value = build_string (TYPE_PRECISION (char16_type_node)
@@ -7472,9 +7479,14 @@ c_parser_string_literal (c_parser *parser, bool translate, bool wide_ok)
     {
     default:
     case CPP_STRING:
-    case CPP_UTF8STRING:
       TREE_TYPE (value) = char_array_type_node;
       break;
+    case CPP_UTF8STRING:
+      if (flag_char8_t)
+       TREE_TYPE (value) = char8_array_type_node;
+      else
+       TREE_TYPE (value) = char_array_type_node;
+      break;
     case CPP_STRING16:
       TREE_TYPE (value) = char16_array_type_node;
       break;
index 8514488..d37de2a 100644 (file)
@@ -8056,7 +8056,7 @@ digest_init (location_t init_loc, tree type, tree init, tree origtype,
 
          if (char_array)
            {
-             if (typ2 != char_type_node)
+             if (typ2 != char_type_node && typ2 != char8_type_node)
                incompat_string_cst = true;
            }
          else if (!comptypes (typ1, typ2))
index bfcfdf6..9f2475b 100644 (file)
@@ -49,6 +49,9 @@ typedef _Atomic long atomic_long;
 typedef _Atomic unsigned long atomic_ulong;
 typedef _Atomic long long atomic_llong;
 typedef _Atomic unsigned long long atomic_ullong;
+#ifdef __CHAR8_TYPE__
+typedef _Atomic __CHAR8_TYPE__ atomic_char8_t;
+#endif
 typedef _Atomic __CHAR16_TYPE__ atomic_char16_t;
 typedef _Atomic __CHAR32_TYPE__ atomic_char32_t;
 typedef _Atomic __WCHAR_TYPE__ atomic_wchar_t;
@@ -97,6 +100,9 @@ extern void atomic_signal_fence (memory_order);
 
 #define ATOMIC_BOOL_LOCK_FREE          __GCC_ATOMIC_BOOL_LOCK_FREE
 #define ATOMIC_CHAR_LOCK_FREE          __GCC_ATOMIC_CHAR_LOCK_FREE
+#ifdef __GCC_ATOMIC_CHAR8_T_LOCK_FREE
+#define ATOMIC_CHAR8_T_LOCK_FREE       __GCC_ATOMIC_CHAR8_T_LOCK_FREE
+#endif
 #define ATOMIC_CHAR16_T_LOCK_FREE      __GCC_ATOMIC_CHAR16_T_LOCK_FREE
 #define ATOMIC_CHAR32_T_LOCK_FREE      __GCC_ATOMIC_CHAR32_T_LOCK_FREE
 #define ATOMIC_WCHAR_T_LOCK_FREE       __GCC_ATOMIC_WCHAR_T_LOCK_FREE
diff --git a/gcc/testsuite/gcc.dg/atomic/c2x-stdatomic-lockfree-char8_t.c b/gcc/testsuite/gcc.dg/atomic/c2x-stdatomic-lockfree-char8_t.c
new file mode 100644 (file)
index 0000000..1b692f5
--- /dev/null
@@ -0,0 +1,42 @@
+/* Test atomic_is_lock_free for char8_t.  */
+/* { dg-do run } */
+/* { dg-options "-std=c2x -pedantic-errors" } */
+
+#include <stdatomic.h>
+#include <stdint.h>
+
+extern void abort (void);
+
+_Atomic __CHAR8_TYPE__ ac8a;
+atomic_char8_t ac8t;
+
+#define CHECK_TYPE(MACRO, V1, V2)              \
+  do                                           \
+    {                                          \
+      int r1 = MACRO;                          \
+      int r2 = atomic_is_lock_free (&V1);      \
+      int r3 = atomic_is_lock_free (&V2);      \
+      if (r1 != 0 && r1 != 1 && r1 != 2)       \
+       abort ();                               \
+      if (r2 != 0 && r2 != 1)                  \
+       abort ();                               \
+      if (r3 != 0 && r3 != 1)                  \
+       abort ();                               \
+      if (r1 == 2 && r2 != 1)                  \
+       abort ();                               \
+      if (r1 == 2 && r3 != 1)                  \
+       abort ();                               \
+      if (r1 == 0 && r2 != 0)                  \
+       abort ();                               \
+      if (r1 == 0 && r3 != 0)                  \
+       abort ();                               \
+    }                                          \
+  while (0)
+
+int
+main ()
+{
+  CHECK_TYPE (ATOMIC_CHAR8_T_LOCK_FREE, ac8a, ac8t);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/atomic/gnu2x-stdatomic-lockfree-char8_t.c b/gcc/testsuite/gcc.dg/atomic/gnu2x-stdatomic-lockfree-char8_t.c
new file mode 100644 (file)
index 0000000..27a3cfe
--- /dev/null
@@ -0,0 +1,5 @@
+/* Test atomic_is_lock_free for char8_t with -std=gnu2x.  */
+/* { dg-do run } */
+/* { dg-options "-std=gnu2x -pedantic-errors" } */
+
+#include "c2x-stdatomic-lockfree-char8_t.c"
diff --git a/gcc/testsuite/gcc.dg/c11-utf8str-type.c b/gcc/testsuite/gcc.dg/c11-utf8str-type.c
new file mode 100644 (file)
index 0000000..8be9abb
--- /dev/null
@@ -0,0 +1,6 @@
+/* Test C11 UTF-8 string literal type.  */
+/* { dg-do compile } */
+/* { dg-options "-std=c11" } */
+
+_Static_assert (_Generic (u8"text", char*: 1, default: 2) == 1, "UTF-8 string literals have an unexpected type");
+_Static_assert (_Generic (u8"x"[0], char:  1, default: 2) == 1, "UTF-8 string literal elements have an unexpected type");
diff --git a/gcc/testsuite/gcc.dg/c17-utf8str-type.c b/gcc/testsuite/gcc.dg/c17-utf8str-type.c
new file mode 100644 (file)
index 0000000..515c6db
--- /dev/null
@@ -0,0 +1,6 @@
+/* Test C17 UTF-8 string literal type.  */
+/* { dg-do compile } */
+/* { dg-options "-std=c17" } */
+
+_Static_assert (_Generic (u8"text", char*: 1, default: 2) == 1, "UTF-8 string literals have an unexpected type");
+_Static_assert (_Generic (u8"x"[0], char:  1, default: 2) == 1, "UTF-8 string literal elements have an unexpected type");
diff --git a/gcc/testsuite/gcc.dg/c2x-utf8str-type.c b/gcc/testsuite/gcc.dg/c2x-utf8str-type.c
new file mode 100644 (file)
index 0000000..ebdde97
--- /dev/null
@@ -0,0 +1,6 @@
+/* Test C2X UTF-8 string literal type.  */
+/* { dg-do compile } */
+/* { dg-options "-std=c2x" } */
+
+_Static_assert (_Generic (u8"text", unsigned char*: 1, default: 2) == 1, "UTF-8 string literals have an unexpected type");
+_Static_assert (_Generic (u8"x"[0], unsigned char:  1, default: 2) == 1, "UTF-8 string literal elements have an unexpected type");
diff --git a/gcc/testsuite/gcc.dg/c2x-utf8str.c b/gcc/testsuite/gcc.dg/c2x-utf8str.c
new file mode 100644 (file)
index 0000000..2e4c392
--- /dev/null
@@ -0,0 +1,34 @@
+/* Test initialization by UTF-8 string literal in C2X.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target wchar } */
+/* { dg-options "-std=c2x" } */
+
+typedef __CHAR8_TYPE__  char8_t;
+typedef __CHAR16_TYPE__ char16_t;
+typedef __CHAR32_TYPE__ char32_t;
+typedef __WCHAR_TYPE__  wchar_t;
+
+/* Test that char, signed char, unsigned char, and char8_t arrays can be
+   initialized by a UTF-8 string literal.  */
+const char cbuf1[] = u8"text";
+const char cbuf2[] = { u8"text" };
+const signed char scbuf1[] = u8"text";
+const signed char scbuf2[] = { u8"text" };
+const unsigned char ucbuf1[] = u8"text";
+const unsigned char ucbuf2[] = { u8"text" };
+const char8_t c8buf1[] = u8"text";
+const char8_t c8buf2[] = { u8"text" };
+
+/* Test that a diagnostic is issued for attempted initialization of
+   other character types by a UTF-8 string literal.  */
+const char16_t c16buf1[] = u8"text";           /* { dg-error "from a string literal with type array of .unsigned char." } */
+const char16_t c16buf2[] = { u8"text" };       /* { dg-error "from a string literal with type array of .unsigned char." } */
+const char32_t c32buf1[] = u8"text";           /* { dg-error "from a string literal with type array of .unsigned char." } */
+const char32_t c32buf2[] = { u8"text" };       /* { dg-error "from a string literal with type array of .unsigned char." } */
+const wchar_t wbuf1[] = u8"text";              /* { dg-error "from a string literal with type array of .unsigned char." } */
+const wchar_t wbuf2[] = { u8"text" };          /* { dg-error "from a string literal with type array of .unsigned char." } */
+
+/* Test that char8_t arrays can be initialized by an ordinary string
+   literal.  */
+const char8_t c8buf3[] = "text";
+const char8_t c8buf4[] = { "text" };
diff --git a/gcc/testsuite/gcc.dg/gnu2x-utf8str-type.c b/gcc/testsuite/gcc.dg/gnu2x-utf8str-type.c
new file mode 100644 (file)
index 0000000..efe16ff
--- /dev/null
@@ -0,0 +1,5 @@
+/* Test C2X UTF-8 string literal type with -std=gnu2x.  */
+/* { dg-do compile } */
+/* { dg-options "-std=gnu2x" } */
+
+#include "c2x-utf8str-type.c"
diff --git a/gcc/testsuite/gcc.dg/gnu2x-utf8str.c b/gcc/testsuite/gcc.dg/gnu2x-utf8str.c
new file mode 100644 (file)
index 0000000..f3719ea
--- /dev/null
@@ -0,0 +1,34 @@
+/* Test initialization by UTF-8 string literal in C2X with -std=gnu2x.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target wchar } */
+/* { dg-options "-std=gnu2x" } */
+
+typedef __CHAR8_TYPE__  char8_t;
+typedef __CHAR16_TYPE__ char16_t;
+typedef __CHAR32_TYPE__ char32_t;
+typedef __WCHAR_TYPE__  wchar_t;
+
+/* Test that char, signed char, unsigned char, and char8_t arrays can be
+   initialized by a UTF-8 string literal.  */
+const char cbuf1[] = u8"text";
+const char cbuf2[] = { u8"text" };
+const signed char scbuf1[] = u8"text";
+const signed char scbuf2[] = { u8"text" };
+const unsigned char ucbuf1[] = u8"text";
+const unsigned char ucbuf2[] = { u8"text" };
+const char8_t c8buf1[] = u8"text";
+const char8_t c8buf2[] = { u8"text" };
+
+/* Test that a diagnostic is issued for attempted initialization of
+   other character types by a UTF-8 string literal.  */
+const char16_t c16buf1[] = u8"text";           /* { dg-error "from a string literal with type array of .unsigned char." } */
+const char16_t c16buf2[] = { u8"text" };       /* { dg-error "from a string literal with type array of .unsigned char." } */
+const char32_t c32buf1[] = u8"text";           /* { dg-error "from a string literal with type array of .unsigned char." } */
+const char32_t c32buf2[] = { u8"text" };       /* { dg-error "from a string literal with type array of .unsigned char." } */
+const wchar_t wbuf1[] = u8"text";              /* { dg-error "from a string literal with type array of .unsigned char." } */
+const wchar_t wbuf2[] = { u8"text" };          /* { dg-error "from a string literal with type array of .unsigned char." } */
+
+/* Test that char8_t arrays can be initialized by an ordinary string
+   literal.  */
+const char8_t c8buf3[] = "text";
+const char8_t c8buf4[] = { "text" };