case OPT_finput_charset_:
cpp_opts->input_charset = arg;
+ cpp_opts->cpp_input_charset_explicit = 1;
break;
case OPT_ftemplate_depth_:
lang_hooks.preprocess_options (parse_in);
cpp_post_options (parse_in);
init_global_opts_from_cpp (&global_options, cpp_get_options (parse_in));
+ /* For C++23 and explicit -finput-charset=UTF-8, turn on -Winvalid-utf8
+ by default and make it a pedwarn unless -Wno-invalid-utf8. */
+ if (cxx_dialect >= cxx23
+ && cpp_opts->cpp_input_charset_explicit
+ && strcmp (cpp_opts->input_charset, "UTF-8") == 0
+ && (cpp_opts->cpp_warn_invalid_utf8
+ || !global_options_set.x_warn_invalid_utf8))
+ {
+ global_options.x_warn_invalid_utf8 = 1;
+ cpp_opts->cpp_warn_invalid_utf8 = cpp_opts->cpp_pedantic ? 2 : 1;
+ }
/* Let diagnostics infrastructure know how to convert input files the same
way libcpp will do it, namely using the configured input charset and
C ObjC C++ ObjC++ CPP(warn_invalid_pch) CppReason(CPP_W_INVALID_PCH) Var(cpp_warn_invalid_pch) Init(0) Warning
Warn about PCH files that are found but not used.
+Winvalid-utf8
+C objC C++ ObjC++ CPP(cpp_warn_invalid_utf8) CppReason(CPP_W_INVALID_UTF8) Var(warn_invalid_utf8) Init(0) Warning
+Warn about invalid UTF-8 characters in comments.
+
Wjump-misses-init
C ObjC Var(warn_jump_misses_init) Warning LangEnabledby(C ObjC,Wc++-compat)
Warn when a jump misses a variable initialization.
-Winfinite-recursion @gol
-Winit-self -Winline -Wno-int-conversion -Wint-in-bool-context @gol
-Wno-int-to-pointer-cast -Wno-invalid-memory-model @gol
--Winvalid-pch -Wjump-misses-init -Wlarger-than=@var{byte-size} @gol
--Wlogical-not-parentheses -Wlogical-op -Wlong-long @gol
--Wno-lto-type-mismatch -Wmain -Wmaybe-uninitialized @gol
+-Winvalid-pch -Winvalid-utf8 -Wjump-misses-init @gol
+-Wlarger-than=@var{byte-size} -Wlogical-not-parentheses -Wlogical-op @gol
+-Wlong-long -Wno-lto-type-mismatch -Wmain -Wmaybe-uninitialized @gol
-Wmemset-elt-size -Wmemset-transposed-args @gol
-Wmisleading-indentation -Wmissing-attributes -Wmissing-braces @gol
-Wmissing-field-initializers -Wmissing-format-attribute @gol
Warn if a precompiled header (@pxref{Precompiled Headers}) is found in
the search path but cannot be used.
+@item -Winvalid-utf8
+@opindex Winvalid-utf8
+@opindex Wno-invalid-utf8
+Warn if an invalid UTF-8 character is found.
+This warning is on by default for C++23 if @option{-finput-charset=UTF-8}
+is used and turned into error with @option{-pedantic-errors}.
+
@item -Wlong-long
@opindex Wlong-long
@opindex Wno-long-long
--- /dev/null
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8 -Winvalid-utf8" }
+
+// a\80߿ࠀ𐀀a { dg-bogus "invalid UTF-8 character" }
+// a\80a { dg-warning "invalid UTF-8 character <80>" }
+// a¿a { dg-warning "invalid UTF-8 character <bf>" }
+// aÀa { dg-warning "invalid UTF-8 character <c0>" }
+// aÁa { dg-warning "invalid UTF-8 character <c1>" }
+// aõa { dg-warning "invalid UTF-8 character <f5>" }
+// aÿa { dg-warning "invalid UTF-8 character <ff>" }
+// aÂa { dg-warning "invalid UTF-8 character <c2>" }
+// aàa { dg-warning "invalid UTF-8 character <e0>" }
+// aà\80¿a { dg-warning "invalid UTF-8 character <e0><80><bf>" }
+// aà\9f\80a { dg-warning "invalid UTF-8 character <e0><9f><80>" }
+// aà¿a { dg-warning "invalid UTF-8 character <e0><bf>" }
+// aì\80a { dg-warning "invalid UTF-8 character <ec><80>" }
+// aa { dg-warning "invalid UTF-8 character <ed><a0><80>" }
+// að\80\80\80a { dg-warning "invalid UTF-8 character <f0><80><80><80>" }
+// að\8f¿¿a { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" }
+// aa { dg-warning "invalid UTF-8 character <f4><90><80><80>" }
+// aa { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" }
+// { dg-warning "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 }
+/* a\80߿ࠀ𐀀a { dg-bogus "invalid UTF-8 character" } */
+/* a\80a { dg-warning "invalid UTF-8 character <80>" } */
+/* a¿a { dg-warning "invalid UTF-8 character <bf>" } */
+/* aÀa { dg-warning "invalid UTF-8 character <c0>" } */
+/* aÁa { dg-warning "invalid UTF-8 character <c1>" } */
+/* aõa { dg-warning "invalid UTF-8 character <f5>" } */
+/* aÿa { dg-warning "invalid UTF-8 character <ff>" } */
+/* aÂa { dg-warning "invalid UTF-8 character <c2>" } */
+/* aàa { dg-warning "invalid UTF-8 character <e0>" } */
+/* aà\80¿a { dg-warning "invalid UTF-8 character <e0><80><bf>" } */
+/* aà\9f\80a { dg-warning "invalid UTF-8 character <e0><9f><80>" } */
+/* aà¿a { dg-warning "invalid UTF-8 character <e0><bf>" } */
+/* aì\80a { dg-warning "invalid UTF-8 character <ec><80>" } */
+/* aa { dg-warning "invalid UTF-8 character <ed><a0><80>" } */
+/* að\80\80\80a { dg-warning "invalid UTF-8 character <f0><80><80><80>" } */
+/* að\8f¿¿a { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" } */
+/* aa { dg-warning "invalid UTF-8 character <f4><90><80><80>" } */
+/* aa { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" } */
+/* { dg-warning "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 } */
--- /dev/null
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess { target { c || c++11 } } }
+// { dg-require-effective-target wchar }
+// { dg-options "-finput-charset=UTF-8 -Winvalid-utf8" }
+// { dg-additional-options "-std=gnu99" { target c } }
+
+#ifndef __cplusplus
+#include <wchar.h>
+typedef __CHAR16_TYPE__ char16_t;
+typedef __CHAR32_TYPE__ char32_t;
+#endif
+
+char32_t a = U'\80'; // { dg-warning "invalid UTF-8 character <80>" }
+char32_t b = U'¿'; // { dg-warning "invalid UTF-8 character <bf>" }
+char32_t c = U'À'; // { dg-warning "invalid UTF-8 character <c0>" }
+char32_t d = U'Á'; // { dg-warning "invalid UTF-8 character <c1>" }
+char32_t e = U'õ'; // { dg-warning "invalid UTF-8 character <f5>" }
+char32_t f = U'ÿ'; // { dg-warning "invalid UTF-8 character <ff>" }
+char32_t g = U'Â'; // { dg-warning "invalid UTF-8 character <c2>" }
+char32_t h = U'à'; // { dg-warning "invalid UTF-8 character <e0>" }
+char32_t i = U'à\80¿'; // { dg-warning "invalid UTF-8 character <e0><80><bf>" }
+char32_t j = U'à\9f\80'; // { dg-warning "invalid UTF-8 character <e0><9f><80>" }
+char32_t k = U'à¿'; // { dg-warning "invalid UTF-8 character <e0><bf>" }
+char32_t l = U'ì\80'; // { dg-warning "invalid UTF-8 character <ec><80>" }
+char32_t m = U''; // { dg-warning "invalid UTF-8 character <ed><a0><80>" }
+char32_t n = U'ð\80\80\80'; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" }
+char32_t o = U'ð\8f¿¿'; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" }
+char32_t p = U''; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" }
+char32_t q = U''; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 }
+const char32_t *A = U"\80߿ࠀ𐀀"; // { dg-bogus "invalid UTF-8 character" }
+const char32_t *B = U"\80"; // { dg-warning "invalid UTF-8 character <80>" }
+const char32_t *C = U"¿"; // { dg-warning "invalid UTF-8 character <bf>" }
+const char32_t *D = U"À"; // { dg-warning "invalid UTF-8 character <c0>" }
+const char32_t *E = U"Á"; // { dg-warning "invalid UTF-8 character <c1>" }
+const char32_t *F = U"õ"; // { dg-warning "invalid UTF-8 character <f5>" }
+const char32_t *G = U"ÿ"; // { dg-warning "invalid UTF-8 character <ff>" }
+const char32_t *H = U"Â"; // { dg-warning "invalid UTF-8 character <c2>" }
+const char32_t *I = U"à"; // { dg-warning "invalid UTF-8 character <e0>" }
+const char32_t *J = U"à\80¿"; // { dg-warning "invalid UTF-8 character <e0><80><bf>" }
+const char32_t *K = U"à\9f\80"; // { dg-warning "invalid UTF-8 character <e0><9f><80>" }
+const char32_t *L = U"à¿"; // { dg-warning "invalid UTF-8 character <e0><bf>" }
+const char32_t *M = U"ì\80"; // { dg-warning "invalid UTF-8 character <ec><80>" }
+const char32_t *N = U""; // { dg-warning "invalid UTF-8 character <ed><a0><80>" }
+const char32_t *O = U"ð\80\80\80"; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" }
+const char32_t *P = U"ð\8f¿¿"; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" }
+const char32_t *Q = U""; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" }
+const char32_t *R = U""; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 }
+const char32_t *A1 = UR"(\80߿ࠀ𐀀)"; // { dg-bogus "invalid UTF-8 character" }
+const char32_t *B1 = UR"(\80)"; // { dg-warning "invalid UTF-8 character <80>" }
+const char32_t *C1 = UR"(¿)"; // { dg-warning "invalid UTF-8 character <bf>" }
+const char32_t *D1 = UR"(À)"; // { dg-warning "invalid UTF-8 character <c0>" }
+const char32_t *E1 = UR"(Á)"; // { dg-warning "invalid UTF-8 character <c1>" }
+const char32_t *F1 = UR"(õ)"; // { dg-warning "invalid UTF-8 character <f5>" }
+const char32_t *G1 = UR"(ÿ)"; // { dg-warning "invalid UTF-8 character <ff>" }
+const char32_t *H1 = UR"(Â)"; // { dg-warning "invalid UTF-8 character <c2>" }
+const char32_t *I1 = UR"(à)"; // { dg-warning "invalid UTF-8 character <e0>" }
+const char32_t *J1 = UR"(à\80¿)"; // { dg-warning "invalid UTF-8 character <e0><80><bf>" }
+const char32_t *K1 = UR"(à\9f\80)"; // { dg-warning "invalid UTF-8 character <e0><9f><80>" }
+const char32_t *L1 = UR"(à¿)"; // { dg-warning "invalid UTF-8 character <e0><bf>" }
+const char32_t *M1 = UR"(ì\80)"; // { dg-warning "invalid UTF-8 character <ec><80>" }
+const char32_t *N1 = UR"()"; // { dg-warning "invalid UTF-8 character <ed><a0><80>" }
+const char32_t *O1 = UR"(ð\80\80\80)"; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" }
+const char32_t *P1 = UR"(ð\8f¿¿)"; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" }
+const char32_t *Q1 = UR"()"; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" }
+const char32_t *R1 = UR"()"; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 }
+const char *A2 = u8"\80߿ࠀ𐀀"; // { dg-bogus "invalid UTF-8 character" }
+const char *B2 = u8"\80"; // { dg-warning "invalid UTF-8 character <80>" }
+const char *C2 = u8"¿"; // { dg-warning "invalid UTF-8 character <bf>" }
+const char *D2 = u8"À"; // { dg-warning "invalid UTF-8 character <c0>" }
+const char *E2 = u8"Á"; // { dg-warning "invalid UTF-8 character <c1>" }
+const char *F2 = u8"õ"; // { dg-warning "invalid UTF-8 character <f5>" }
+const char *G2 = u8"ÿ"; // { dg-warning "invalid UTF-8 character <ff>" }
+const char *H2 = u8"Â"; // { dg-warning "invalid UTF-8 character <c2>" }
+const char *I2 = u8"à"; // { dg-warning "invalid UTF-8 character <e0>" }
+const char *J2 = u8"à\80¿"; // { dg-warning "invalid UTF-8 character <e0><80><bf>" }
+const char *K2 = u8"à\9f\80"; // { dg-warning "invalid UTF-8 character <e0><9f><80>" }
+const char *L2 = u8"à¿"; // { dg-warning "invalid UTF-8 character <e0><bf>" }
+const char *M2 = u8"ì\80"; // { dg-warning "invalid UTF-8 character <ec><80>" }
+const char *N2 = u8""; // { dg-warning "invalid UTF-8 character <ed><a0><80>" }
+const char *O2 = u8"ð\80\80\80"; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" }
+const char *P2 = u8"ð\8f¿¿"; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" }
+const char *Q2 = u8""; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" }
+const char *R2 = u8""; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 }
--- /dev/null
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8 -Winvalid-utf8" }
+
+#define I(x)
+I(\80߿ࠀ𐀀) // { dg-bogus "invalid UTF-8 character" }
+ // { dg-error "is not valid in an identifier" "" { target c++ } .-1 }
+I(\80) // { dg-warning "invalid UTF-8 character <80>" }
+I(¿) // { dg-warning "invalid UTF-8 character <bf>" }
+I(À) // { dg-warning "invalid UTF-8 character <c0>" }
+I(Á) // { dg-warning "invalid UTF-8 character <c1>" }
+I(õ) // { dg-warning "invalid UTF-8 character <f5>" }
+I(ÿ) // { dg-warning "invalid UTF-8 character <ff>" }
+I(Â) // { dg-warning "invalid UTF-8 character <c2>" }
+I(à) // { dg-warning "invalid UTF-8 character <e0>" }
+I(à\80¿) // { dg-warning "invalid UTF-8 character <e0><80><bf>" }
+I(à\9f\80) // { dg-warning "invalid UTF-8 character <e0><9f><80>" }
+I(à¿) // { dg-warning "invalid UTF-8 character <e0><bf>" }
+I(ì\80) // { dg-warning "invalid UTF-8 character <ec><80>" }
+I() // { dg-warning "invalid UTF-8 character <ed><a0><80>" }
+I(ð\80\80\80) // { dg-warning "invalid UTF-8 character <f0><80><80><80>" }
+I(ð\8f¿¿) // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" }
+I() // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c } }
+ // { dg-error "is not valid in an identifier" "" { target c++ } .-1 }
+I() // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c } }
+ // { dg-error "is not valid in an identifier" "" { target c++ } .-1 }
--- /dev/null
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8" }
+
+// a\80߿ࠀ𐀀a { dg-bogus "invalid UTF-8 character" }
+// a\80a { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
+// a¿a { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
+// aÀa { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
+// aÁa { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
+// aõa { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
+// aÿa { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
+// aÂa { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
+// aàa { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
+// aà\80¿a { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+// aà\9f\80a { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+// aà¿a { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+// aì\80a { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+// að\80\80\80a { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+// að\8f¿¿a { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+// { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+/* a\80߿ࠀ𐀀a { dg-bogus "invalid UTF-8 character" } */
+/* a\80a { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } } */
+/* a¿a { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } } */
+/* aÀa { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } } */
+/* aÁa { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } } */
+/* aõa { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } } */
+/* aÿa { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } } */
+/* aÂa { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } } */
+/* aàa { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } } */
+/* aà\80¿a { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } } */
+/* aà\9f\80a { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } } */
+/* aà¿a { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } } */
+/* aì\80a { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } } */
+/* að\80\80\80a { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } } */
+/* að\8f¿¿a { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } } */
+/* { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 } */
--- /dev/null
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8 -pedantic" }
+
+#define I(x)
+I(\80߿ࠀ𐀀) // { dg-bogus "invalid UTF-8 character" }
+ // { dg-error "is not valid in an identifier" "" { target *-*-* } .-1 }
+I(\80) // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
+I(¿) // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
+I(À) // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
+I(Á) // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
+I(õ) // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
+I(ÿ) // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
+I(Â) // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
+I(à) // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
+I(à\80¿) // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+I(à\9f\80) // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+I(à¿) // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+I(ì\80) // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+I(ð\80\80\80) // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+I(ð\8f¿¿) // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+I() // { dg-error "is not valid in an identifier" }
+I() // { dg-error "is not valid in an identifier" }
--- /dev/null
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8 -pedantic-errors" }
+
+#define I(x)
+I(\80߿ࠀ𐀀) // { dg-bogus "invalid UTF-8 character" }
+ // { dg-error "is not valid in an identifier" "" { target *-*-* } .-1 }
+I(\80) // { dg-error "invalid UTF-8 character <80>" "" { target c++23 } }
+I(¿) // { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } }
+I(À) // { dg-error "invalid UTF-8 character <c0>" "" { target c++23 } }
+I(Á) // { dg-error "invalid UTF-8 character <c1>" "" { target c++23 } }
+I(õ) // { dg-error "invalid UTF-8 character <f5>" "" { target c++23 } }
+I(ÿ) // { dg-error "invalid UTF-8 character <ff>" "" { target c++23 } }
+I(Â) // { dg-error "invalid UTF-8 character <c2>" "" { target c++23 } }
+I(à) // { dg-error "invalid UTF-8 character <e0>" "" { target c++23 } }
+I(à\80¿) // { dg-error "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+I(à\9f\80) // { dg-error "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+I(à¿) // { dg-error "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+I(ì\80) // { dg-error "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+I() // { dg-error "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+I(ð\80\80\80) // { dg-error "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+I(ð\8f¿¿) // { dg-error "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+I() // { dg-error "is not valid in an identifier" }
+I() // { dg-error "is not valid in an identifier" }
--- /dev/null
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8 -pedantic-errors -Wno-invalid-utf8" }
+
+#define I(x)
+I(\80߿ࠀ𐀀) // { dg-bogus "invalid UTF-8 character" }
+ // { dg-error "is not valid in an identifier" "" { target *-*-* } .-1 }
+I(\80) // { dg-bogus "invalid UTF-8 character <80>" }
+I(¿) // { dg-bogus "invalid UTF-8 character <bf>" }
+I(À) // { dg-bogus "invalid UTF-8 character <c0>" }
+I(Á) // { dg-bogus "invalid UTF-8 character <c1>" }
+I(õ) // { dg-bogus "invalid UTF-8 character <f5>" }
+I(ÿ) // { dg-bogus "invalid UTF-8 character <ff>" }
+I(Â) // { dg-bogus "invalid UTF-8 character <c2>" }
+I(à) // { dg-bogus "invalid UTF-8 character <e0>" }
+I(à\80¿) // { dg-bogus "invalid UTF-8 character <e0><80><bf>" }
+I(à\9f\80) // { dg-bogus "invalid UTF-8 character <e0><9f><80>" }
+I(à¿) // { dg-bogus "invalid UTF-8 character <e0><bf>" }
+I(ì\80) // { dg-bogus "invalid UTF-8 character <ec><80>" }
+I() // { dg-bogus "invalid UTF-8 character <ed><a0><80>" }
+I(ð\80\80\80) // { dg-bogus "invalid UTF-8 character <f0><80><80><80>" }
+I(ð\8f¿¿) // { dg-bogus "invalid UTF-8 character <f0><8f><bf><bf>" }
+I() // { dg-error "is not valid in an identifier" }
+I() // { dg-error "is not valid in an identifier" }
--- /dev/null
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8 -pedantic" }
+
+// a\80߿ࠀ𐀀a { dg-bogus "invalid UTF-8 character" }
+// a\80a { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
+// a¿a { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
+// aÀa { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
+// aÁa { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
+// aõa { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
+// aÿa { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
+// aÂa { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
+// aàa { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
+// aà\80¿a { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+// aà\9f\80a { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+// aà¿a { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+// aì\80a { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+// að\80\80\80a { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+// að\8f¿¿a { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+// { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+/* a\80߿ࠀ𐀀a { dg-bogus "invalid UTF-8 character" } */
+/* a\80a { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } } */
+/* a¿a { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } } */
+/* aÀa { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } } */
+/* aÁa { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } } */
+/* aõa { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } } */
+/* aÿa { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } } */
+/* aÂa { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } } */
+/* aàa { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } } */
+/* aà\80¿a { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } } */
+/* aà\9f\80a { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } } */
+/* aà¿a { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } } */
+/* aì\80a { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } } */
+/* að\80\80\80a { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } } */
+/* að\8f¿¿a { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } } */
+/* { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 } */
--- /dev/null
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8 -pedantic-errors" }
+
+// a\80߿ࠀ𐀀a { dg-bogus "invalid UTF-8 character" }
+// a\80a { dg-error "invalid UTF-8 character <80>" "" { target c++23 } }
+// a¿a { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } }
+// aÀa { dg-error "invalid UTF-8 character <c0>" "" { target c++23 } }
+// aÁa { dg-error "invalid UTF-8 character <c1>" "" { target c++23 } }
+// aõa { dg-error "invalid UTF-8 character <f5>" "" { target c++23 } }
+// aÿa { dg-error "invalid UTF-8 character <ff>" "" { target c++23 } }
+// aÂa { dg-error "invalid UTF-8 character <c2>" "" { target c++23 } }
+// aàa { dg-error "invalid UTF-8 character <e0>" "" { target c++23 } }
+// aà\80¿a { dg-error "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+// aà\9f\80a { dg-error "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+// aà¿a { dg-error "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+// aì\80a { dg-error "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+// aa { dg-error "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+// að\80\80\80a { dg-error "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+// að\8f¿¿a { dg-error "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+// aa { dg-error "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+// aa { dg-error "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+// { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+/* a\80߿ࠀ𐀀a { dg-bogus "invalid UTF-8 character" } */
+/* a\80a { dg-error "invalid UTF-8 character <80>" "" { target c++23 } } */
+/* a¿a { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } } */
+/* aÀa { dg-error "invalid UTF-8 character <c0>" "" { target c++23 } } */
+/* aÁa { dg-error "invalid UTF-8 character <c1>" "" { target c++23 } } */
+/* aõa { dg-error "invalid UTF-8 character <f5>" "" { target c++23 } } */
+/* aÿa { dg-error "invalid UTF-8 character <ff>" "" { target c++23 } } */
+/* aÂa { dg-error "invalid UTF-8 character <c2>" "" { target c++23 } } */
+/* aàa { dg-error "invalid UTF-8 character <e0>" "" { target c++23 } } */
+/* aà\80¿a { dg-error "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } } */
+/* aà\9f\80a { dg-error "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } } */
+/* aà¿a { dg-error "invalid UTF-8 character <e0><bf>" "" { target c++23 } } */
+/* aì\80a { dg-error "invalid UTF-8 character <ec><80>" "" { target c++23 } } */
+/* aa { dg-error "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } } */
+/* að\80\80\80a { dg-error "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } } */
+/* að\8f¿¿a { dg-error "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } } */
+/* aa { dg-error "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } } */
+/* aa { dg-error "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } } */
+/* { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } .-1 } */
--- /dev/null
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8 -pedantic-errors -Wno-invalid-utf8" }
+
+// a\80߿ࠀ𐀀a { dg-bogus "invalid UTF-8 character" }
+// a\80a { dg-bogus "invalid UTF-8 character <80>" }
+// a¿a { dg-bogus "invalid UTF-8 character <bf>" }
+// aÀa { dg-bogus "invalid UTF-8 character <c0>" }
+// aÁa { dg-bogus "invalid UTF-8 character <c1>" }
+// aõa { dg-bogus "invalid UTF-8 character <f5>" }
+// aÿa { dg-bogus "invalid UTF-8 character <ff>" }
+// aÂa { dg-bogus "invalid UTF-8 character <c2>" }
+// aàa { dg-bogus "invalid UTF-8 character <e0>" }
+// aà\80¿a { dg-bogus "invalid UTF-8 character <e0><80><bf>" }
+// aà\9f\80a { dg-bogus "invalid UTF-8 character <e0><9f><80>" }
+// aà¿a { dg-bogus "invalid UTF-8 character <e0><bf>" }
+// aì\80a { dg-bogus "invalid UTF-8 character <ec><80>" }
+// aa { dg-bogus "invalid UTF-8 character <ed><a0><80>" }
+// að\80\80\80a { dg-bogus "invalid UTF-8 character <f0><80><80><80>" }
+// að\8f¿¿a { dg-bogus "invalid UTF-8 character <f0><8f><bf><bf>" }
+// aa { dg-bogus "invalid UTF-8 character <f4><90><80><80>" }
+// aa { dg-bogus "invalid UTF-8 character <fd><bf><bf><bf>" }
+// { dg-bogus "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 }
+/* a\80߿ࠀ𐀀a { dg-bogus "invalid UTF-8 character" } */
+/* a\80a { dg-bogus "invalid UTF-8 character <80>" } */
+/* a¿a { dg-bogus "invalid UTF-8 character <bf>" } */
+/* aÀa { dg-bogus "invalid UTF-8 character <c0>" } */
+/* aÁa { dg-bogus "invalid UTF-8 character <c1>" } */
+/* aõa { dg-bogus "invalid UTF-8 character <f5>" } */
+/* aÿa { dg-bogus "invalid UTF-8 character <ff>" } */
+/* aÂa { dg-bogus "invalid UTF-8 character <c2>" } */
+/* aàa { dg-bogus "invalid UTF-8 character <e0>" } */
+/* aà\80¿a { dg-bogus "invalid UTF-8 character <e0><80><bf>" } */
+/* aà\9f\80a { dg-bogus "invalid UTF-8 character <e0><9f><80>" } */
+/* aà¿a { dg-bogus "invalid UTF-8 character <e0><bf>" } */
+/* aì\80a { dg-bogus "invalid UTF-8 character <ec><80>" } */
+/* aa { dg-bogus "invalid UTF-8 character <ed><a0><80>" } */
+/* að\80\80\80a { dg-bogus "invalid UTF-8 character <f0><80><80><80>" } */
+/* að\8f¿¿a { dg-bogus "invalid UTF-8 character <f0><8f><bf><bf>" } */
+/* aa { dg-bogus "invalid UTF-8 character <f4><90><80><80>" } */
+/* aa { dg-bogus "invalid UTF-8 character <fd><bf><bf><bf>" } */
+/* { dg-bogus "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 } */
--- /dev/null
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess { target c++11 } }
+// { dg-options "-finput-charset=UTF-8" }
+
+char32_t a = U'\80'; // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
+char32_t b = U'¿'; // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
+char32_t c = U'À'; // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
+char32_t d = U'Á'; // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
+char32_t e = U'õ'; // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
+char32_t f = U'ÿ'; // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
+char32_t g = U'Â'; // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
+char32_t h = U'à'; // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
+char32_t i = U'à\80¿'; // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+char32_t j = U'à\9f\80'; // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+char32_t k = U'à¿'; // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+char32_t l = U'ì\80'; // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+char32_t m = U''; // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+char32_t n = U'ð\80\80\80'; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+char32_t o = U'ð\8f¿¿'; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+char32_t p = U''; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+char32_t q = U''; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+auto A = U"\80߿ࠀ𐀀"; // { dg-bogus "invalid UTF-8 character" }
+auto B = U"\80"; // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
+auto C = U"¿"; // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
+auto D = U"À"; // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
+auto E = U"Á"; // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
+auto F = U"õ"; // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
+auto G = U"ÿ"; // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
+auto H = U"Â"; // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
+auto I = U"à"; // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
+auto J = U"à\80¿"; // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+auto K = U"à\9f\80"; // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+auto L = U"à¿"; // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+auto M = U"ì\80"; // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+auto N = U""; // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+auto O = U"ð\80\80\80"; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+auto P = U"ð\8f¿¿"; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+auto Q = U""; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+auto R = U""; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+auto A1 = UR"(\80߿ࠀ𐀀)"; // { dg-bogus "invalid UTF-8 character" }
+auto B1 = UR"(\80)"; // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
+auto C1 = UR"(¿)"; // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
+auto D1 = UR"(À)"; // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
+auto E1 = UR"(Á)"; // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
+auto F1 = UR"(õ)"; // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
+auto G1 = UR"(ÿ)"; // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
+auto H1 = UR"(Â)"; // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
+auto I1 = UR"(à)"; // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
+auto J1 = UR"(à\80¿)"; // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+auto K1 = UR"(à\9f\80)"; // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+auto L1 = UR"(à¿)"; // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+auto M1 = UR"(ì\80)"; // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+auto N1 = UR"()"; // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+auto O1 = UR"(ð\80\80\80)"; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+auto P1 = UR"(ð\8f¿¿)"; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+auto Q1 = UR"()"; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+auto R1 = UR"()"; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+auto A2 = u8"\80߿ࠀ𐀀"; // { dg-bogus "invalid UTF-8 character" }
+auto B2 = u8"\80"; // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
+auto C2 = u8"¿"; // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
+auto D2 = u8"À"; // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
+auto E2 = u8"Á"; // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
+auto F2 = u8"õ"; // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
+auto G2 = u8"ÿ"; // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
+auto H2 = u8"Â"; // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
+auto I2 = u8"à"; // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
+auto J2 = u8"à\80¿"; // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+auto K2 = u8"à\9f\80"; // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+auto L2 = u8"à¿"; // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+auto M2 = u8"ì\80"; // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+auto N2 = u8""; // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+auto O2 = u8"ð\80\80\80"; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+auto P2 = u8"ð\8f¿¿"; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+auto Q2 = u8""; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+auto R2 = u8""; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
--- /dev/null
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess { target c++11 } }
+// { dg-options "-finput-charset=UTF-8 -pedantic" }
+
+char32_t a = U'\80'; // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
+char32_t b = U'¿'; // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
+char32_t c = U'À'; // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
+char32_t d = U'Á'; // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
+char32_t e = U'õ'; // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
+char32_t f = U'ÿ'; // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
+char32_t g = U'Â'; // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
+char32_t h = U'à'; // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
+char32_t i = U'à\80¿'; // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+char32_t j = U'à\9f\80'; // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+char32_t k = U'à¿'; // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+char32_t l = U'ì\80'; // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+char32_t m = U''; // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+char32_t n = U'ð\80\80\80'; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+char32_t o = U'ð\8f¿¿'; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+char32_t p = U''; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+char32_t q = U''; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+auto A = U"\80߿ࠀ𐀀"; // { dg-bogus "invalid UTF-8 character" }
+auto B = U"\80"; // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
+auto C = U"¿"; // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
+auto D = U"À"; // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
+auto E = U"Á"; // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
+auto F = U"õ"; // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
+auto G = U"ÿ"; // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
+auto H = U"Â"; // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
+auto I = U"à"; // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
+auto J = U"à\80¿"; // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+auto K = U"à\9f\80"; // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+auto L = U"à¿"; // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+auto M = U"ì\80"; // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+auto N = U""; // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+auto O = U"ð\80\80\80"; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+auto P = U"ð\8f¿¿"; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+auto Q = U""; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+auto R = U""; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+auto A1 = UR"(\80߿ࠀ𐀀)"; // { dg-bogus "invalid UTF-8 character" }
+auto B1 = UR"(\80)"; // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
+auto C1 = UR"(¿)"; // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
+auto D1 = UR"(À)"; // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
+auto E1 = UR"(Á)"; // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
+auto F1 = UR"(õ)"; // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
+auto G1 = UR"(ÿ)"; // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
+auto H1 = UR"(Â)"; // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
+auto I1 = UR"(à)"; // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
+auto J1 = UR"(à\80¿)"; // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+auto K1 = UR"(à\9f\80)"; // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+auto L1 = UR"(à¿)"; // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+auto M1 = UR"(ì\80)"; // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+auto N1 = UR"()"; // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+auto O1 = UR"(ð\80\80\80)"; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+auto P1 = UR"(ð\8f¿¿)"; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+auto Q1 = UR"()"; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+auto R1 = UR"()"; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+auto A2 = u8"\80߿ࠀ𐀀"; // { dg-bogus "invalid UTF-8 character" }
+auto B2 = u8"\80"; // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
+auto C2 = u8"¿"; // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
+auto D2 = u8"À"; // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
+auto E2 = u8"Á"; // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
+auto F2 = u8"õ"; // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
+auto G2 = u8"ÿ"; // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
+auto H2 = u8"Â"; // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
+auto I2 = u8"à"; // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
+auto J2 = u8"à\80¿"; // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+auto K2 = u8"à\9f\80"; // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+auto L2 = u8"à¿"; // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+auto M2 = u8"ì\80"; // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+auto N2 = u8""; // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+auto O2 = u8"ð\80\80\80"; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+auto P2 = u8"ð\8f¿¿"; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+auto Q2 = u8""; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+auto R2 = u8""; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
--- /dev/null
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess { target c++11 } }
+// { dg-options "-finput-charset=UTF-8 -pedantic-errors" }
+
+char32_t a = U'\80'; // { dg-error "invalid UTF-8 character <80>" "" { target c++23 } }
+char32_t b = U'¿'; // { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } }
+char32_t c = U'À'; // { dg-error "invalid UTF-8 character <c0>" "" { target c++23 } }
+char32_t d = U'Á'; // { dg-error "invalid UTF-8 character <c1>" "" { target c++23 } }
+char32_t e = U'õ'; // { dg-error "invalid UTF-8 character <f5>" "" { target c++23 } }
+char32_t f = U'ÿ'; // { dg-error "invalid UTF-8 character <ff>" "" { target c++23 } }
+char32_t g = U'Â'; // { dg-error "invalid UTF-8 character <c2>" "" { target c++23 } }
+char32_t h = U'à'; // { dg-error "invalid UTF-8 character <e0>" "" { target c++23 } }
+char32_t i = U'à\80¿'; // { dg-error "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+char32_t j = U'à\9f\80'; // { dg-error "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+char32_t k = U'à¿'; // { dg-error "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+char32_t l = U'ì\80'; // { dg-error "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+char32_t m = U''; // { dg-error "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+char32_t n = U'ð\80\80\80'; // { dg-error "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+char32_t o = U'ð\8f¿¿'; // { dg-error "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+char32_t p = U''; // { dg-error "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+char32_t q = U''; // { dg-error "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+auto A = U"\80߿ࠀ𐀀"; // { dg-bogus "invalid UTF-8 character" }
+auto B = U"\80"; // { dg-error "invalid UTF-8 character <80>" "" { target c++23 } }
+auto C = U"¿"; // { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } }
+auto D = U"À"; // { dg-error "invalid UTF-8 character <c0>" "" { target c++23 } }
+auto E = U"Á"; // { dg-error "invalid UTF-8 character <c1>" "" { target c++23 } }
+auto F = U"õ"; // { dg-error "invalid UTF-8 character <f5>" "" { target c++23 } }
+auto G = U"ÿ"; // { dg-error "invalid UTF-8 character <ff>" "" { target c++23 } }
+auto H = U"Â"; // { dg-error "invalid UTF-8 character <c2>" "" { target c++23 } }
+auto I = U"à"; // { dg-error "invalid UTF-8 character <e0>" "" { target c++23 } }
+auto J = U"à\80¿"; // { dg-error "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+auto K = U"à\9f\80"; // { dg-error "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+auto L = U"à¿"; // { dg-error "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+auto M = U"ì\80"; // { dg-error "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+auto N = U""; // { dg-error "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+auto O = U"ð\80\80\80"; // { dg-error "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+auto P = U"ð\8f¿¿"; // { dg-error "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+auto Q = U""; // { dg-error "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+auto R = U""; // { dg-error "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+auto A1 = UR"(\80߿ࠀ𐀀)"; // { dg-bogus "invalid UTF-8 character" }
+auto B1 = UR"(\80)"; // { dg-error "invalid UTF-8 character <80>" "" { target c++23 } }
+auto C1 = UR"(¿)"; // { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } }
+auto D1 = UR"(À)"; // { dg-error "invalid UTF-8 character <c0>" "" { target c++23 } }
+auto E1 = UR"(Á)"; // { dg-error "invalid UTF-8 character <c1>" "" { target c++23 } }
+auto F1 = UR"(õ)"; // { dg-error "invalid UTF-8 character <f5>" "" { target c++23 } }
+auto G1 = UR"(ÿ)"; // { dg-error "invalid UTF-8 character <ff>" "" { target c++23 } }
+auto H1 = UR"(Â)"; // { dg-error "invalid UTF-8 character <c2>" "" { target c++23 } }
+auto I1 = UR"(à)"; // { dg-error "invalid UTF-8 character <e0>" "" { target c++23 } }
+auto J1 = UR"(à\80¿)"; // { dg-error "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+auto K1 = UR"(à\9f\80)"; // { dg-error "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+auto L1 = UR"(à¿)"; // { dg-error "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+auto M1 = UR"(ì\80)"; // { dg-error "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+auto N1 = UR"()"; // { dg-error "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+auto O1 = UR"(ð\80\80\80)"; // { dg-error "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+auto P1 = UR"(ð\8f¿¿)"; // { dg-error "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+auto Q1 = UR"()"; // { dg-error "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+auto R1 = UR"()"; // { dg-error "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+auto A2 = u8"\80߿ࠀ𐀀"; // { dg-bogus "invalid UTF-8 character" }
+auto B2 = u8"\80"; // { dg-error "invalid UTF-8 character <80>" "" { target c++23 } }
+auto C2 = u8"¿"; // { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } }
+auto D2 = u8"À"; // { dg-error "invalid UTF-8 character <c0>" "" { target c++23 } }
+auto E2 = u8"Á"; // { dg-error "invalid UTF-8 character <c1>" "" { target c++23 } }
+auto F2 = u8"õ"; // { dg-error "invalid UTF-8 character <f5>" "" { target c++23 } }
+auto G2 = u8"ÿ"; // { dg-error "invalid UTF-8 character <ff>" "" { target c++23 } }
+auto H2 = u8"Â"; // { dg-error "invalid UTF-8 character <c2>" "" { target c++23 } }
+auto I2 = u8"à"; // { dg-error "invalid UTF-8 character <e0>" "" { target c++23 } }
+auto J2 = u8"à\80¿"; // { dg-error "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+auto K2 = u8"à\9f\80"; // { dg-error "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+auto L2 = u8"à¿"; // { dg-error "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+auto M2 = u8"ì\80"; // { dg-error "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+auto N2 = u8""; // { dg-error "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+auto O2 = u8"ð\80\80\80"; // { dg-error "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+auto P2 = u8"ð\8f¿¿"; // { dg-error "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+auto Q2 = u8""; // { dg-error "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+auto R2 = u8""; // { dg-error "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
--- /dev/null
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess { target c++11 } }
+// { dg-options "-finput-charset=UTF-8 -pedantic-errors -Wno-invalid-utf8" }
+
+char32_t a = U'\80'; // { dg-bogus "invalid UTF-8 character <80>" "" { target c++23 } }
+char32_t b = U'¿'; // { dg-bogus "invalid UTF-8 character <bf>" "" { target c++23 } }
+char32_t c = U'À'; // { dg-bogus "invalid UTF-8 character <c0>" "" { target c++23 } }
+char32_t d = U'Á'; // { dg-bogus "invalid UTF-8 character <c1>" "" { target c++23 } }
+char32_t e = U'õ'; // { dg-bogus "invalid UTF-8 character <f5>" "" { target c++23 } }
+char32_t f = U'ÿ'; // { dg-bogus "invalid UTF-8 character <ff>" "" { target c++23 } }
+char32_t g = U'Â'; // { dg-bogus "invalid UTF-8 character <c2>" "" { target c++23 } }
+char32_t h = U'à'; // { dg-bogus "invalid UTF-8 character <e0>" "" { target c++23 } }
+char32_t i = U'à\80¿'; // { dg-bogus "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+char32_t j = U'à\9f\80'; // { dg-bogus "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+char32_t k = U'à¿'; // { dg-bogus "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+char32_t l = U'ì\80'; // { dg-bogus "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+char32_t m = U''; // { dg-bogus "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+char32_t n = U'ð\80\80\80'; // { dg-bogus "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+char32_t o = U'ð\8f¿¿'; // { dg-bogus "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+char32_t p = U''; // { dg-bogus "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+char32_t q = U''; // { dg-bogus "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-bogus "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+auto A = U"\80߿ࠀ𐀀"; // { dg-bogus "invalid UTF-8 character" }
+auto B = U"\80"; // { dg-bogus "invalid UTF-8 character <80>" "" { target c++23 } }
+auto C = U"¿"; // { dg-bogus "invalid UTF-8 character <bf>" "" { target c++23 } }
+auto D = U"À"; // { dg-bogus "invalid UTF-8 character <c0>" "" { target c++23 } }
+auto E = U"Á"; // { dg-bogus "invalid UTF-8 character <c1>" "" { target c++23 } }
+auto F = U"õ"; // { dg-bogus "invalid UTF-8 character <f5>" "" { target c++23 } }
+auto G = U"ÿ"; // { dg-bogus "invalid UTF-8 character <ff>" "" { target c++23 } }
+auto H = U"Â"; // { dg-bogus "invalid UTF-8 character <c2>" "" { target c++23 } }
+auto I = U"à"; // { dg-bogus "invalid UTF-8 character <e0>" "" { target c++23 } }
+auto J = U"à\80¿"; // { dg-bogus "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+auto K = U"à\9f\80"; // { dg-bogus "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+auto L = U"à¿"; // { dg-bogus "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+auto M = U"ì\80"; // { dg-bogus "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+auto N = U""; // { dg-bogus "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+auto O = U"ð\80\80\80"; // { dg-bogus "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+auto P = U"ð\8f¿¿"; // { dg-bogus "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+auto Q = U""; // { dg-bogus "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+auto R = U""; // { dg-bogus "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-bogus "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+auto A1 = UR"(\80߿ࠀ𐀀)"; // { dg-bogus "invalid UTF-8 character" }
+auto B1 = UR"(\80)"; // { dg-bogus "invalid UTF-8 character <80>" "" { target c++23 } }
+auto C1 = UR"(¿)"; // { dg-bogus "invalid UTF-8 character <bf>" "" { target c++23 } }
+auto D1 = UR"(À)"; // { dg-bogus "invalid UTF-8 character <c0>" "" { target c++23 } }
+auto E1 = UR"(Á)"; // { dg-bogus "invalid UTF-8 character <c1>" "" { target c++23 } }
+auto F1 = UR"(õ)"; // { dg-bogus "invalid UTF-8 character <f5>" "" { target c++23 } }
+auto G1 = UR"(ÿ)"; // { dg-bogus "invalid UTF-8 character <ff>" "" { target c++23 } }
+auto H1 = UR"(Â)"; // { dg-bogus "invalid UTF-8 character <c2>" "" { target c++23 } }
+auto I1 = UR"(à)"; // { dg-bogus "invalid UTF-8 character <e0>" "" { target c++23 } }
+auto J1 = UR"(à\80¿)"; // { dg-bogus "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+auto K1 = UR"(à\9f\80)"; // { dg-bogus "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+auto L1 = UR"(à¿)"; // { dg-bogus "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+auto M1 = UR"(ì\80)"; // { dg-bogus "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+auto N1 = UR"()"; // { dg-bogus "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+auto O1 = UR"(ð\80\80\80)"; // { dg-bogus "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+auto P1 = UR"(ð\8f¿¿)"; // { dg-bogus "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+auto Q1 = UR"()"; // { dg-bogus "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+auto R1 = UR"()"; // { dg-bogus "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-bogus "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+auto A2 = u8"\80߿ࠀ𐀀"; // { dg-bogus "invalid UTF-8 character" }
+auto B2 = u8"\80"; // { dg-bogus "invalid UTF-8 character <80>" "" { target c++23 } }
+auto C2 = u8"¿"; // { dg-bogus "invalid UTF-8 character <bf>" "" { target c++23 } }
+auto D2 = u8"À"; // { dg-bogus "invalid UTF-8 character <c0>" "" { target c++23 } }
+auto E2 = u8"Á"; // { dg-bogus "invalid UTF-8 character <c1>" "" { target c++23 } }
+auto F2 = u8"õ"; // { dg-bogus "invalid UTF-8 character <f5>" "" { target c++23 } }
+auto G2 = u8"ÿ"; // { dg-bogus "invalid UTF-8 character <ff>" "" { target c++23 } }
+auto H2 = u8"Â"; // { dg-bogus "invalid UTF-8 character <c2>" "" { target c++23 } }
+auto I2 = u8"à"; // { dg-bogus "invalid UTF-8 character <e0>" "" { target c++23 } }
+auto J2 = u8"à\80¿"; // { dg-bogus "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+auto K2 = u8"à\9f\80"; // { dg-bogus "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+auto L2 = u8"à¿"; // { dg-bogus "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+auto M2 = u8"ì\80"; // { dg-bogus "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+auto N2 = u8""; // { dg-bogus "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+auto O2 = u8"ð\80\80\80"; // { dg-bogus "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+auto P2 = u8"ð\8f¿¿"; // { dg-bogus "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+auto Q2 = u8""; // { dg-bogus "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+auto R2 = u8""; // { dg-bogus "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-bogus "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
--- /dev/null
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8" }
+
+#define I(x)
+I(\80߿ࠀ𐀀) // { dg-bogus "invalid UTF-8 character" }
+ // { dg-error "is not valid in an identifier" "" { target *-*-* } .-1 }
+I(\80) // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
+I(¿) // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
+I(À) // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
+I(Á) // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
+I(õ) // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
+I(ÿ) // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
+I(Â) // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
+I(à) // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
+I(à\80¿) // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+I(à\9f\80) // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+I(à¿) // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+I(ì\80) // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+I(ð\80\80\80) // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+I(ð\8f¿¿) // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+I() // { dg-error "is not valid in an identifier" }
+I() // { dg-error "is not valid in an identifier" }
case, no diagnostic is emitted, and the return value of FALSE should cause
a new token to be formed.
- Unlike _cpp_valid_ucn, this will never be called when lexing a string; only
- a potential identifier, or a CPP_OTHER token. NST is unused in the latter
- case.
+ _cpp_valid_utf8 can be called when lexing a potential identifier, or a
+ CPP_OTHER token or for the purposes of -Winvalid-utf8 warning in string or
+ character literals. NST is unused when not in a potential identifier.
As in _cpp_valid_ucn, IDENTIFIER_POS is 0 when not in an identifier, 1 for
the start of an identifier, or 2 otherwise. */
cpp_bidirectional_level. */
unsigned char cpp_warn_bidirectional;
+ /* True if libcpp should warn about invalid UTF-8 characters in comments.
+ 2 if it should be a pedwarn. */
+ unsigned char cpp_warn_invalid_utf8;
+
+ /* True if -finput-charset= option has been used explicitly. */
+ bool cpp_input_charset_explicit;
+
/* Dependency generation. */
struct
{
CPP_W_CXX11_COMPAT,
CPP_W_CXX20_COMPAT,
CPP_W_EXPANSION_TO_DEFINED,
- CPP_W_BIDIRECTIONAL
+ CPP_W_BIDIRECTIONAL,
+ CPP_W_INVALID_UTF8
};
/* Callback for header lookup for HEADER, which is the name of a
CPP_OPTION (pfile, ext_numeric_literals) = 1;
CPP_OPTION (pfile, warn_date_time) = 0;
CPP_OPTION (pfile, cpp_warn_bidirectional) = bidirectional_unpaired;
+ CPP_OPTION (pfile, cpp_warn_invalid_utf8) = 0;
+ CPP_OPTION (pfile, cpp_input_charset_explicit) = 0;
/* Default CPP arithmetic to something sensible for the host for the
benefit of dumb users like fix-header. */
#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
+/* ISO 10646 defines the UCS codespace as the range 0-0x10FFFF inclusive. */
+#define UCS_LIMIT 0x10FFFF
+
static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
static int skip_line_comment (cpp_reader *);
static void skip_whitespace (cpp_reader *, cppchar_t);
bidi::on_char (kind, ucn_p, loc);
}
+static const cppchar_t utf8_continuation = 0x80;
+static const cppchar_t utf8_signifier = 0xC0;
+
+/* Emit -Winvalid-utf8 warning on invalid UTF-8 character starting
+ at PFILE->buffer->cur. Return a pointer after the diagnosed
+ invalid character. */
+
+static const uchar *
+_cpp_warn_invalid_utf8 (cpp_reader *pfile)
+{
+ cpp_buffer *buffer = pfile->buffer;
+ const uchar *cur = buffer->cur;
+ bool pedantic = (CPP_PEDANTIC (pfile)
+ && CPP_OPTION (pfile, cpp_warn_invalid_utf8) == 2);
+
+ if (cur[0] < utf8_signifier
+ || cur[1] < utf8_continuation || cur[1] >= utf8_signifier)
+ {
+ if (pedantic)
+ cpp_error_with_line (pfile, CPP_DL_PEDWARN,
+ pfile->line_table->highest_line,
+ CPP_BUF_COL (buffer),
+ "invalid UTF-8 character <%x>",
+ cur[0]);
+ else
+ cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
+ pfile->line_table->highest_line,
+ CPP_BUF_COL (buffer),
+ "invalid UTF-8 character <%x>",
+ cur[0]);
+ return cur + 1;
+ }
+ else if (cur[2] < utf8_continuation || cur[2] >= utf8_signifier)
+ {
+ if (pedantic)
+ cpp_error_with_line (pfile, CPP_DL_PEDWARN,
+ pfile->line_table->highest_line,
+ CPP_BUF_COL (buffer),
+ "invalid UTF-8 character <%x><%x>",
+ cur[0], cur[1]);
+ else
+ cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
+ pfile->line_table->highest_line,
+ CPP_BUF_COL (buffer),
+ "invalid UTF-8 character <%x><%x>",
+ cur[0], cur[1]);
+ return cur + 2;
+ }
+ else if (cur[3] < utf8_continuation || cur[3] >= utf8_signifier)
+ {
+ if (pedantic)
+ cpp_error_with_line (pfile, CPP_DL_PEDWARN,
+ pfile->line_table->highest_line,
+ CPP_BUF_COL (buffer),
+ "invalid UTF-8 character <%x><%x><%x>",
+ cur[0], cur[1], cur[2]);
+ else
+ cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
+ pfile->line_table->highest_line,
+ CPP_BUF_COL (buffer),
+ "invalid UTF-8 character <%x><%x><%x>",
+ cur[0], cur[1], cur[2]);
+ return cur + 3;
+ }
+ else
+ {
+ if (pedantic)
+ cpp_error_with_line (pfile, CPP_DL_PEDWARN,
+ pfile->line_table->highest_line,
+ CPP_BUF_COL (buffer),
+ "invalid UTF-8 character <%x><%x><%x><%x>",
+ cur[0], cur[1], cur[2], cur[3]);
+ else
+ cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
+ pfile->line_table->highest_line,
+ CPP_BUF_COL (buffer),
+ "invalid UTF-8 character <%x><%x><%x><%x>",
+ cur[0], cur[1], cur[2], cur[3]);
+ return cur + 4;
+ }
+}
+
+/* Helper function of *skip_*_comment and lex*_string. For C,
+ character at CUR[-1] with MSB set handle -Wbidi-chars* and
+ -Winvalid-utf8 diagnostics and return pointer to first character
+ that should be processed next. */
+
+static inline const uchar *
+_cpp_handle_multibyte_utf8 (cpp_reader *pfile, uchar c,
+ const uchar *cur, bool warn_bidi_p,
+ bool warn_invalid_utf8_p)
+{
+ /* If this is a beginning of a UTF-8 encoding, it might be
+ a bidirectional control character. */
+ if (c == bidi::utf8_start && warn_bidi_p)
+ {
+ location_t loc;
+ bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc);
+ maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
+ }
+ if (!warn_invalid_utf8_p)
+ return cur;
+ if (c >= utf8_signifier)
+ {
+ cppchar_t s;
+ const uchar *pstr = cur - 1;
+ if (_cpp_valid_utf8 (pfile, &pstr, pfile->buffer->rlimit, 0, NULL, &s)
+ && s <= UCS_LIMIT)
+ return pstr;
+ }
+ pfile->buffer->cur = cur - 1;
+ return _cpp_warn_invalid_utf8 (pfile);
+}
+
/* Skip a C-style block comment. We find the end of the comment by
seeing if an asterisk is before every '/' we encounter. Returns
nonzero if comment terminated by EOF, zero otherwise.
const uchar *cur = buffer->cur;
uchar c;
const bool warn_bidi_p = pfile->warn_bidi_p ();
+ const bool warn_invalid_utf8_p = CPP_OPTION (pfile, cpp_warn_invalid_utf8);
+ const bool warn_bidi_or_invalid_utf8_p = warn_bidi_p | warn_invalid_utf8_p;
cur++;
if (*cur == '/')
cur = buffer->cur;
}
- /* If this is a beginning of a UTF-8 encoding, it might be
- a bidirectional control character. */
- else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
- {
- location_t loc;
- bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc);
- maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
- }
+ else if (__builtin_expect (c >= utf8_continuation, 0)
+ && warn_bidi_or_invalid_utf8_p)
+ cur = _cpp_handle_multibyte_utf8 (pfile, c, cur, warn_bidi_p,
+ warn_invalid_utf8_p);
}
buffer->cur = cur;
cpp_buffer *buffer = pfile->buffer;
location_t orig_line = pfile->line_table->highest_line;
const bool warn_bidi_p = pfile->warn_bidi_p ();
+ const bool warn_invalid_utf8_p = CPP_OPTION (pfile, cpp_warn_invalid_utf8);
+ const bool warn_bidi_or_invalid_utf8_p = warn_bidi_p | warn_invalid_utf8_p;
- if (!warn_bidi_p)
+ if (!warn_bidi_or_invalid_utf8_p)
while (*buffer->cur != '\n')
buffer->cur++;
- else
+ else if (!warn_invalid_utf8_p)
{
while (*buffer->cur != '\n'
&& *buffer->cur != bidi::utf8_start)
maybe_warn_bidi_on_close (pfile, buffer->cur);
}
}
+ else
+ {
+ while (*buffer->cur != '\n')
+ {
+ if (*buffer->cur < utf8_continuation)
+ {
+ buffer->cur++;
+ continue;
+ }
+ buffer->cur
+ = _cpp_handle_multibyte_utf8 (pfile, *buffer->cur, buffer->cur + 1,
+ warn_bidi_p, warn_invalid_utf8_p);
+ }
+ if (warn_bidi_p)
+ maybe_warn_bidi_on_close (pfile, buffer->cur);
+ }
_cpp_process_line_notes (pfile, true);
return orig_line != pfile->line_table->highest_line;
}
}
-static const cppchar_t utf8_signifier = 0xC0;
-
/* Returns TRUE if the sequence starting at buffer->cur is valid in
an identifier. FIRST is TRUE if this starts an identifier. */
{
const uchar *pos = base;
const bool warn_bidi_p = pfile->warn_bidi_p ();
+ const bool warn_invalid_utf8_p = CPP_OPTION (pfile, cpp_warn_invalid_utf8);
+ const bool warn_bidi_or_invalid_utf8_p = warn_bidi_p | warn_invalid_utf8_p;
/* 'tis a pity this information isn't passed down from the lexer's
initial categorization of the token. */
pos = base = pfile->buffer->cur;
note = &pfile->buffer->notes[pfile->buffer->cur_note];
}
- else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0)
- && warn_bidi_p)
- {
- location_t loc;
- bidi::kind kind = get_bidi_utf8 (pfile, pos - 1, &loc);
- maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
- }
+ else if (__builtin_expect ((unsigned char) c >= utf8_continuation, 0)
+ && warn_bidi_or_invalid_utf8_p)
+ pos = _cpp_handle_multibyte_utf8 (pfile, c, pos, warn_bidi_p,
+ warn_invalid_utf8_p);
}
if (warn_bidi_p)
terminator = '>', type = CPP_HEADER_NAME;
const bool warn_bidi_p = pfile->warn_bidi_p ();
+ const bool warn_invalid_utf8_p = CPP_OPTION (pfile, cpp_warn_invalid_utf8);
+ const bool warn_bidi_or_invalid_utf8_p = warn_bidi_p | warn_invalid_utf8_p;
for (;;)
{
cppchar_t c = *cur++;
}
else if (c == '\0')
saw_NUL = true;
- else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
- {
- location_t loc;
- bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc);
- maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
- }
+ else if (__builtin_expect (c >= utf8_continuation, 0)
+ && warn_bidi_or_invalid_utf8_p)
+ cur = _cpp_handle_multibyte_utf8 (pfile, c, cur, warn_bidi_p,
+ warn_invalid_utf8_p);
}
if (saw_NUL && !pfile->state.skipping)
default:
{
const uchar *base = --buffer->cur;
+ static int no_warn_cnt;
/* Check for an extended identifier ($ or UCN or UTF-8). */
struct normalize_state nst = INITIAL_NORMALIZE_STATE;
const uchar *pstr = base;
cppchar_t s;
if (_cpp_valid_utf8 (pfile, &pstr, buffer->rlimit, 0, NULL, &s))
- buffer->cur = pstr;
+ {
+ if (s > UCS_LIMIT && CPP_OPTION (pfile, cpp_warn_invalid_utf8))
+ {
+ buffer->cur = base;
+ _cpp_warn_invalid_utf8 (pfile);
+ }
+ buffer->cur = pstr;
+ }
+ else if (CPP_OPTION (pfile, cpp_warn_invalid_utf8))
+ {
+ buffer->cur = base;
+ const uchar *end = _cpp_warn_invalid_utf8 (pfile);
+ buffer->cur = base + 1;
+ no_warn_cnt = end - buffer->cur;
+ }
+ }
+ else if (c >= utf8_continuation
+ && CPP_OPTION (pfile, cpp_warn_invalid_utf8))
+ {
+ if (no_warn_cnt)
+ --no_warn_cnt;
+ else
+ {
+ buffer->cur = base;
+ _cpp_warn_invalid_utf8 (pfile);
+ buffer->cur = base + 1;
+ }
}
create_literal (pfile, result, base, buffer->cur - base, CPP_OTHER);
break;