From 88501dc74911b00186298fe1fffe8dfb1f09b1c1 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 4 Aug 2022 10:26:31 -0700 Subject: [PATCH] [Sema] -Wformat: support C23 format specifier %b %B MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Close #56885: WG14 N2630 added %b to fprintf/fscanf and recommended %B for fprintf. This patch teaches -Wformat %b for the printf/scanf family of functions and %B for the printf family of functions. glibc 2.35 and latest Android bionic added %b/%B printf support. From https://www.openwall.com/lists/libc-coord/2022/07/ no scanf support is available yet. Like GCC, we don't test library support. GCC 12 -Wformat -pedantic emits a warning: > warning: ISO C17 does not support the ‘%b’ gnu_printf format [-Wformat=] The behavior is not ported. Note: `freebsd_kernel_printf` uses %b differently. Reviewed By: aaron.ballman, dim, enh Differential Revision: https://reviews.llvm.org/D131057 --- clang/docs/ReleaseNotes.rst | 3 +++ clang/include/clang/AST/FormatString.h | 6 +++++- clang/lib/AST/FormatString.cpp | 6 ++++++ clang/lib/AST/PrintfFormatString.cpp | 25 +++++++++++++++++-------- clang/lib/AST/ScanfFormatString.cpp | 2 ++ clang/test/Sema/format-strings-fixit.c | 4 ++++ clang/test/Sema/format-strings-scanf.c | 5 +++++ clang/test/Sema/format-strings.c | 8 +++++++- clang/test/SemaObjC/format-strings-objc.m | 2 +- 9 files changed, 50 insertions(+), 11 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 85d360e..b9729a6 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -69,6 +69,9 @@ Improvements to Clang's diagnostics `Issue 50055: `_. - Clang will now check compile-time determinable string literals as format strings. This fixes `Issue 55805: `_. +- ``-Wformat`` now recognizes ``%b`` for the ``printf``/``scanf`` family of + functions and ``%B`` for the ``printf`` family of functions. Fixes + `Issue 56885: `_. Non-comprehensive list of changes in this release ------------------------------------------------- diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h index d793338..0f1f6b1 100644 --- a/clang/include/clang/AST/FormatString.h +++ b/clang/include/clang/AST/FormatString.h @@ -127,8 +127,12 @@ public: dArg, DArg, // Apple extension iArg, + // C23 conversion specifiers. + bArg, + BArg, + IntArgBeg = dArg, - IntArgEnd = iArg, + IntArgEnd = BArg, oArg, OArg, // Apple extension diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp index 1a1dddc..1a76343 100644 --- a/clang/lib/AST/FormatString.cpp +++ b/clang/lib/AST/FormatString.cpp @@ -624,6 +624,8 @@ analyze_format_string::LengthModifier::toString() const { const char *ConversionSpecifier::toString() const { switch (kind) { + case bArg: return "b"; + case BArg: return "B"; case dArg: return "d"; case DArg: return "D"; case iArg: return "i"; @@ -753,6 +755,8 @@ bool FormatSpecifier::hasValidLengthModifier(const TargetInfo &Target, case LengthModifier::AsSizeT: case LengthModifier::AsPtrDiff: switch (CS.getKind()) { + case ConversionSpecifier::bArg: + case ConversionSpecifier::BArg: case ConversionSpecifier::dArg: case ConversionSpecifier::DArg: case ConversionSpecifier::iArg: @@ -908,6 +912,8 @@ bool FormatSpecifier::hasStandardLengthModifier() const { bool FormatSpecifier::hasStandardConversionSpecifier( const LangOptions &LangOpt) const { switch (CS.getKind()) { + case ConversionSpecifier::bArg: + case ConversionSpecifier::BArg: case ConversionSpecifier::cArg: case ConversionSpecifier::dArg: case ConversionSpecifier::iArg: diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp index d972c12..4f5fbdc 100644 --- a/clang/lib/AST/PrintfFormatString.cpp +++ b/clang/lib/AST/PrintfFormatString.cpp @@ -326,6 +326,14 @@ static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H, case 's': k = ConversionSpecifier::sArg; break; case 'u': k = ConversionSpecifier::uArg; break; case 'x': k = ConversionSpecifier::xArg; break; + // C23. + case 'b': + if (isFreeBSDKPrintf) + k = ConversionSpecifier::FreeBSDbArg; // int followed by char * + else + k = ConversionSpecifier::bArg; + break; + case 'B': k = ConversionSpecifier::BArg; break; // POSIX specific. case 'C': k = ConversionSpecifier::CArg; break; case 'S': k = ConversionSpecifier::SArg; break; @@ -337,11 +345,6 @@ static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H, case '@': k = ConversionSpecifier::ObjCObjArg; break; // Glibc specific. case 'm': k = ConversionSpecifier::PrintErrno; break; - // FreeBSD kernel specific. - case 'b': - if (isFreeBSDKPrintf) - k = ConversionSpecifier::FreeBSDbArg; // int followed by char * - break; case 'r': if (isFreeBSDKPrintf) k = ConversionSpecifier::FreeBSDrArg; // int @@ -961,8 +964,10 @@ bool PrintfSpecifier::hasValidAlternativeForm() const { if (!HasAlternativeForm) return true; - // Alternate form flag only valid with the oxXaAeEfFgG conversions + // Alternate form flag only valid with the bBoxXaAeEfFgG conversions switch (CS.getKind()) { + case ConversionSpecifier::bArg: + case ConversionSpecifier::BArg: case ConversionSpecifier::oArg: case ConversionSpecifier::OArg: case ConversionSpecifier::xArg: @@ -988,8 +993,10 @@ bool PrintfSpecifier::hasValidLeadingZeros() const { if (!HasLeadingZeroes) return true; - // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions + // Leading zeroes flag only valid with the bBdiouxXaAeEfFgG conversions switch (CS.getKind()) { + case ConversionSpecifier::bArg: + case ConversionSpecifier::BArg: case ConversionSpecifier::dArg: case ConversionSpecifier::DArg: case ConversionSpecifier::iArg: @@ -1080,8 +1087,10 @@ bool PrintfSpecifier::hasValidPrecision() const { if (Precision.getHowSpecified() == OptionalAmount::NotSpecified) return true; - // Precision is only valid with the diouxXaAeEfFgGsP conversions + // Precision is only valid with the bBdiouxXaAeEfFgGsP conversions switch (CS.getKind()) { + case ConversionSpecifier::bArg: + case ConversionSpecifier::BArg: case ConversionSpecifier::dArg: case ConversionSpecifier::DArg: case ConversionSpecifier::iArg: diff --git a/clang/lib/AST/ScanfFormatString.cpp b/clang/lib/AST/ScanfFormatString.cpp index 7679f02..73454a2 100644 --- a/clang/lib/AST/ScanfFormatString.cpp +++ b/clang/lib/AST/ScanfFormatString.cpp @@ -161,6 +161,7 @@ static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, default: break; case '%': k = ConversionSpecifier::PercentArg; break; + case 'b': k = ConversionSpecifier::bArg; break; case 'A': k = ConversionSpecifier::AArg; break; case 'E': k = ConversionSpecifier::EArg; break; case 'F': k = ConversionSpecifier::FArg; break; @@ -267,6 +268,7 @@ ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const { llvm_unreachable("Unsupported LengthModifier Type"); // Unsigned int. + case ConversionSpecifier::bArg: case ConversionSpecifier::oArg: case ConversionSpecifier::OArg: case ConversionSpecifier::uArg: diff --git a/clang/test/Sema/format-strings-fixit.c b/clang/test/Sema/format-strings-fixit.c index e6d638b..64614e1 100644 --- a/clang/test/Sema/format-strings-fixit.c +++ b/clang/test/Sema/format-strings-fixit.c @@ -21,6 +21,7 @@ void test(void) { printf("%s", (int) 123); printf("abc%0f", "testing testing 123"); printf("%u", (long) -12); + printf("%b", (long) -13); printf("%p", 123); printf("%c\n", "x"); printf("%c\n", 1.23); @@ -160,6 +161,7 @@ void test2(int intSAParm[static 2]) { scanf("%f", (my_int_type*)&intVar); // Preserve the original formatting. + scanf("%b", &longVar); scanf("%o", &longVar); scanf("%u", &longVar); scanf("%x", &longVar); @@ -179,6 +181,7 @@ void test2(int intSAParm[static 2]) { // CHECK: printf("%d", (int) 123); // CHECK: printf("abc%s", "testing testing 123"); // CHECK: printf("%ld", (long) -12); +// CHECK: printf("%ld", (long) -13); // CHECK: printf("%d", 123); // CHECK: printf("%s\n", "x"); // CHECK: printf("%f\n", 1.23); @@ -246,6 +249,7 @@ void test2(int intSAParm[static 2]) { // CHECK: scanf("%ju", (my_uintmax_type*)&uIntmaxVar); // CHECK: scanf("%td", (my_ptrdiff_type*)&ptrdiffVar); // CHECK: scanf("%d", (my_int_type*)&intVar); +// CHECK: scanf("%ld", &longVar); // CHECK: scanf("%lo", &longVar); // CHECK: scanf("%lu", &longVar); // CHECK: scanf("%lx", &longVar); diff --git a/clang/test/Sema/format-strings-scanf.c b/clang/test/Sema/format-strings-scanf.c index 7b864bfd..d6e4031 100644 --- a/clang/test/Sema/format-strings-scanf.c +++ b/clang/test/Sema/format-strings-scanf.c @@ -40,9 +40,11 @@ void test(const char *s, int *i) { scanf("%0d", i); // expected-warning{{zero field width in scanf format string is unused}} scanf("%00d", i); // expected-warning{{zero field width in scanf format string is unused}} scanf("%d%[asdfasdfd", i, s); // expected-warning{{no closing ']' for '%[' in scanf format string}} + scanf("%B", i); // expected-warning{{invalid conversion specifier 'B'}} unsigned short s_x; scanf ("%" "hu" "\n", &s_x); // no-warning + scanf("%hb", &s_x); scanf("%y", i); // expected-warning{{invalid conversion specifier 'y'}} scanf("%%"); // no-warning scanf("%%%1$d", i); // no-warning @@ -57,6 +59,7 @@ void test(const char *s, int *i) { scanf("%s", (signed char*)0); // no-warning scanf("%s", (unsigned char*)0); // no-warning scanf("%hhu", (signed char*)0); // no-warning + scanf("%hhb", (signed char*)0); // no-warning } void bad_length_modifiers(char *s, void *p, wchar_t *ws, long double *ld) { @@ -193,6 +196,7 @@ void test_qualifiers(const int *cip, volatile int* vip, void test_size_types(void) { size_t s = 0; scanf("%zu", &s); // No warning. + scanf("%zb", &s); double d1 = 0.; scanf("%zu", &d1); // expected-warning-re{{format specifies type 'size_t *' (aka '{{.+}}') but the argument has type 'double *'}} @@ -213,6 +217,7 @@ void test_size_types(void) { void test_ptrdiff_t_types(void) { __UNSIGNED_PTRDIFF_TYPE__ p1 = 0; scanf("%tu", &p1); // No warning. + scanf("%tb", &p1); double d1 = 0.; scanf("%tu", &d1); // expected-warning-re{{format specifies type 'unsigned ptrdiff_t *' (aka '{{.+}}') but the argument has type 'double *'}} diff --git a/clang/test/Sema/format-strings.c b/clang/test/Sema/format-strings.c index 85052d5..919bd74 100644 --- a/clang/test/Sema/format-strings.c +++ b/clang/test/Sema/format-strings.c @@ -199,7 +199,7 @@ void check_writeback_specifier(void) void check_invalid_specifier(FILE* fp, char *buf) { - printf("%s%lb%d","unix",10,20); // expected-warning {{invalid conversion specifier 'b'}} expected-warning {{data argument not used by format string}} + printf("%s%lv%d","unix",10,20); // expected-warning {{invalid conversion specifier 'v'}} expected-warning {{data argument not used by format string}} fprintf(fp,"%%%l"); // expected-warning {{incomplete format specifier}} sprintf(buf,"%%%%%ld%d%d", 1, 2, 3); // expected-warning{{format specifies type 'long' but the argument has type 'int'}} snprintf(buf, 2, "%%%%%ld%;%d", 1, 2, 3); // expected-warning{{format specifies type 'long' but the argument has type 'int'}} expected-warning {{invalid conversion specifier ';'}} expected-warning {{data argument not used by format string}} @@ -304,6 +304,7 @@ void test10(int x, float f, int i, long long lli) { printf("%qp", (void *)0); // expected-warning{{length modifier 'q' results in undefined behavior or no effect with 'p' conversion specifier}} printf("hhX %hhX", (unsigned char)10); // no-warning printf("llX %llX", (long long) 10); // no-warning + printf("%llb %llB", (long long) 10, (long long) 10); // no-warning // This is fine, because there is an implicit conversion to an int. printf("%d", (unsigned char) 10); // no-warning printf("%d", (long long) 10); // expected-warning{{format specifies type 'int' but the argument has type 'long long'}} @@ -429,6 +430,7 @@ void bug7377_bad_length_mod_usage(void) { // Bad flag usage printf("%#p", (void *) 0); // expected-warning{{flag '#' results in undefined behavior with 'p' conversion specifier}} printf("%0d", -1); // no-warning + printf("%0b%0B", -1u, -1u); // no-warning printf("%-p", (void *) 0); // no-warning #if !defined(__ANDROID__) && !defined(__Fuchsia__) printf("%#n", (int *) 0); // expected-warning{{flag '#' results in undefined behavior with 'n' conversion specifier}} @@ -499,6 +501,7 @@ void rdar8332221(va_list ap, int *x, long *y) { void pr8641(void) { printf("%#x\n", 10); printf("%#X\n", 10); + printf("%#b %#15.8B\n", 10, 10u); } void posix_extensions(void) { @@ -508,6 +511,8 @@ void posix_extensions(void) { printf("%'i\n", 123456789); // no-warning printf("%'f\n", (float) 1.0); // no-warning printf("%'p\n", (void*) 0); // expected-warning{{results in undefined behavior with 'p' conversion specifier}} + printf("%'b\n", 123456789); // expected-warning{{results in undefined behavior with 'b' conversion specifier}} + printf("%'B\n", 123456789); // expected-warning{{results in undefined behavior with 'B' conversion specifier}} } // PR8486 @@ -540,6 +545,7 @@ void check_char(unsigned char x, signed char y) { printf("%hhi", x); // no-warning printf("%c", x); // no-warning printf("%hhu", y); // no-warning + printf("%hhb %hhB", x, x); // no-warning } // Test suppression of individual warnings. diff --git a/clang/test/SemaObjC/format-strings-objc.m b/clang/test/SemaObjC/format-strings-objc.m index 71f22c0..51638f9 100644 --- a/clang/test/SemaObjC/format-strings-objc.m +++ b/clang/test/SemaObjC/format-strings-objc.m @@ -56,7 +56,7 @@ int printf(const char * restrict, ...) ; void check_nslog(unsigned k) { NSLog(@"%d%%", k); // no-warning - NSLog(@"%s%lb%d", "unix", 10, 20); // expected-warning {{invalid conversion specifier 'b'}} expected-warning {{data argument not used by format string}} + NSLog(@"%s%lv%d", "unix", 10, 20); // expected-warning {{invalid conversion specifier 'v'}} expected-warning {{data argument not used by format string}} } // Check type validation -- 2.7.4