From dd230efe703f34678ce52280e50238abf908aaa1 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 28 Aug 2023 00:49:49 -0700 Subject: [PATCH] [sanitizer] Intercept glibc 2.38 __isoc23_* functions `strtol("0b1", 0, 0)` can be (pre-C23) 0 or (C23) 1. `sscanf("0b10", "%i", &x)` is similar. glibc 2.38 introduced `__isoc23_strtol` and `__isoc23_scanf` family functions for binary compatibility. When `_ISOC2X_SOURCE` is defined (implied by `_GNU_SOURCE`) or `__STDC_VERSION__ > 201710L`, `__GLIBC_USE_ISOC2X` is defined to 1 and these `__isoc23_*` symbols are used. Add `__isoc23_` versions for the following interceptors: * sanitizer_common_interceptors.inc implements strtoimax/strtoumax. Remove incorrect FIXME about https://github.com/google/sanitizers/issues/321 * asan_interceptors.cpp implements just strtol and strtoll. The default `replace_str` mode checks `nptr` is readable and `endptr` is writable. atoi reuses the existing strtol interceptor. * msan_interceptors.cpp implements strtol family functions and their `_l` versions. Tested by lib/msan/tests/msan_test.cpp * sanitizer_common_interceptors.inc implements scanf family functions. The strtol family functions are spreaded, which is not great, but the patch (intended for release/17.x) does not attempt to address the issue. Add symbols to lib/sanitizer_common/symbolizer/scripts/global_symbols.txt to support both glibc pre-2.38 and 2.38. When build bots migrate to glibc 2.38+, we will lose test coverage for non-isoc23 versions since the existing C++ unittests imply `_GNU_SOURCE`. Add test/sanitizer_common/TestCases/{strtol.c,scanf.c}. They catch msan false positive in the absence of the interceptors. Fix https://github.com/llvm/llvm-project/issues/64388 Fix https://github.com/llvm/llvm-project/issues/64946 Link: https://lists.gnu.org/archive/html/info-gnu/2023-07/msg00010.html ("The GNU C Library version 2.38 is now available") Reviewed By: #sanitizers, vitalybuka, mgorny Differential Revision: https://reviews.llvm.org/D158943 (cherry picked from commit ad7e2501000da2494860f06a306dfe8c08cc07c3) --- compiler-rt/lib/asan/asan_interceptors.cpp | 50 ++++++++------- compiler-rt/lib/msan/msan_interceptors.cpp | 37 +++++++++++ .../sanitizer_common_interceptors.inc | 73 +++++++++++++++++----- .../symbolizer/scripts/global_symbols.txt | 7 +++ .../test/sanitizer_common/TestCases/scanf.c | 24 +++++++ .../test/sanitizer_common/TestCases/strtol.c | 61 ++++++++++++++++++ 6 files changed, 214 insertions(+), 38 deletions(-) create mode 100644 compiler-rt/test/sanitizer_common/TestCases/scanf.c create mode 100644 compiler-rt/test/sanitizer_common/TestCases/strtol.c diff --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp index df879b1..5158e99 100644 --- a/compiler-rt/lib/asan/asan_interceptors.cpp +++ b/compiler-rt/lib/asan/asan_interceptors.cpp @@ -588,19 +588,34 @@ INTERCEPTOR(char*, strncpy, char *to, const char *from, uptr size) { return REAL(strncpy)(to, from, size); } -INTERCEPTOR(long, strtol, const char *nptr, char **endptr, int base) { - void *ctx; - ASAN_INTERCEPTOR_ENTER(ctx, strtol); - ENSURE_ASAN_INITED(); - if (!flags()->replace_str) { - return REAL(strtol)(nptr, endptr, base); - } +template +static ALWAYS_INLINE auto StrtolImpl(void *ctx, Fn real, const char *nptr, + char **endptr, int base) + -> decltype(real(nullptr, nullptr, 0)) { + if (!flags()->replace_str) + return real(nptr, endptr, base); char *real_endptr; - long result = REAL(strtol)(nptr, &real_endptr, base); + auto res = real(nptr, &real_endptr, base); StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base); - return result; + return res; } +# define INTERCEPTOR_STRTO_BASE(ret_type, func) \ + INTERCEPTOR(ret_type, func, const char *nptr, char **endptr, int base) { \ + void *ctx; \ + ASAN_INTERCEPTOR_ENTER(ctx, func); \ + ENSURE_ASAN_INITED(); \ + return StrtolImpl(ctx, REAL(func), nptr, endptr, base); \ + } + +INTERCEPTOR_STRTO_BASE(long, strtol) +INTERCEPTOR_STRTO_BASE(long long, strtoll) + +# if SANITIZER_GLIBC +INTERCEPTOR_STRTO_BASE(long, __isoc23_strtol) +INTERCEPTOR_STRTO_BASE(long long, __isoc23_strtoll) +# endif + INTERCEPTOR(int, atoi, const char *nptr) { void *ctx; ASAN_INTERCEPTOR_ENTER(ctx, atoi); @@ -639,19 +654,6 @@ INTERCEPTOR(long, atol, const char *nptr) { return result; } -INTERCEPTOR(long long, strtoll, const char *nptr, char **endptr, int base) { - void *ctx; - ASAN_INTERCEPTOR_ENTER(ctx, strtoll); - ENSURE_ASAN_INITED(); - if (!flags()->replace_str) { - return REAL(strtoll)(nptr, endptr, base); - } - char *real_endptr; - long long result = REAL(strtoll)(nptr, &real_endptr, base); - StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base); - return result; -} - INTERCEPTOR(long long, atoll, const char *nptr) { void *ctx; ASAN_INTERCEPTOR_ENTER(ctx, atoll); @@ -752,6 +754,10 @@ void InitializeAsanInterceptors() { ASAN_INTERCEPT_FUNC(atoll); ASAN_INTERCEPT_FUNC(strtol); ASAN_INTERCEPT_FUNC(strtoll); +# if SANITIZER_GLIBC + ASAN_INTERCEPT_FUNC(__isoc23_strtol); + ASAN_INTERCEPT_FUNC(__isoc23_strtoll); +# endif // Intecept jump-related functions. ASAN_INTERCEPT_FUNC(longjmp); diff --git a/compiler-rt/lib/msan/msan_interceptors.cpp b/compiler-rt/lib/msan/msan_interceptors.cpp index f5e0d3c..9cb65d5 100644 --- a/compiler-rt/lib/msan/msan_interceptors.cpp +++ b/compiler-rt/lib/msan/msan_interceptors.cpp @@ -464,6 +464,25 @@ INTERCEPTORS_STRTO_BASE(long long, wcstoll, wchar_t) INTERCEPTORS_STRTO_BASE(unsigned long, wcstoul, wchar_t) INTERCEPTORS_STRTO_BASE(unsigned long long, wcstoull, wchar_t) +#if SANITIZER_GLIBC +INTERCEPTORS_STRTO(double, __isoc23_strtod, char) +INTERCEPTORS_STRTO(float, __isoc23_strtof, char) +INTERCEPTORS_STRTO(long double, __isoc23_strtold, char) +INTERCEPTORS_STRTO_BASE(long, __isoc23_strtol, char) +INTERCEPTORS_STRTO_BASE(long long, __isoc23_strtoll, char) +INTERCEPTORS_STRTO_BASE(unsigned long, __isoc23_strtoul, char) +INTERCEPTORS_STRTO_BASE(unsigned long long, __isoc23_strtoull, char) +INTERCEPTORS_STRTO_BASE(u64, __isoc23_strtouq, char) + +INTERCEPTORS_STRTO(double, __isoc23_wcstod, wchar_t) +INTERCEPTORS_STRTO(float, __isoc23_wcstof, wchar_t) +INTERCEPTORS_STRTO(long double, __isoc23_wcstold, wchar_t) +INTERCEPTORS_STRTO_BASE(long, __isoc23_wcstol, wchar_t) +INTERCEPTORS_STRTO_BASE(long long, __isoc23_wcstoll, wchar_t) +INTERCEPTORS_STRTO_BASE(unsigned long, __isoc23_wcstoul, wchar_t) +INTERCEPTORS_STRTO_BASE(unsigned long long, __isoc23_wcstoull, wchar_t) +#endif + #if SANITIZER_NETBSD #define INTERCEPT_STRTO(func) \ INTERCEPT_FUNCTION(func); \ @@ -1748,6 +1767,24 @@ void InitializeInterceptors() { INTERCEPT_STRTO(wcstoul); INTERCEPT_STRTO(wcstoll); INTERCEPT_STRTO(wcstoull); +#ifdef SANITIZER_GLIBC + INTERCEPT_STRTO(__isoc23_strtod); + INTERCEPT_STRTO(__isoc23_strtof); + INTERCEPT_STRTO(__isoc23_strtold); + INTERCEPT_STRTO(__isoc23_strtol); + INTERCEPT_STRTO(__isoc23_strtoul); + INTERCEPT_STRTO(__isoc23_strtoll); + INTERCEPT_STRTO(__isoc23_strtoull); + INTERCEPT_STRTO(__isoc23_strtouq); + INTERCEPT_STRTO(__isoc23_wcstod); + INTERCEPT_STRTO(__isoc23_wcstof); + INTERCEPT_STRTO(__isoc23_wcstold); + INTERCEPT_STRTO(__isoc23_wcstol); + INTERCEPT_STRTO(__isoc23_wcstoul); + INTERCEPT_STRTO(__isoc23_wcstoll); + INTERCEPT_STRTO(__isoc23_wcstoull); +#endif + #ifdef SANITIZER_NLDBL_VERSION INTERCEPT_FUNCTION_VER(vswprintf, SANITIZER_NLDBL_VERSION); INTERCEPT_FUNCTION_VER(swprintf, SANITIZER_NLDBL_VERSION); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index 299561b..0e563fa 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -1491,6 +1491,16 @@ VSCANF_INTERCEPTOR_IMPL(__isoc99_vsscanf, false, str, format, ap) INTERCEPTOR(int, __isoc99_vfscanf, void *stream, const char *format, va_list ap) VSCANF_INTERCEPTOR_IMPL(__isoc99_vfscanf, false, stream, format, ap) + +INTERCEPTOR(int, __isoc23_vscanf, const char *format, va_list ap) +VSCANF_INTERCEPTOR_IMPL(__isoc23_vscanf, false, format, ap) + +INTERCEPTOR(int, __isoc23_vsscanf, const char *str, const char *format, + va_list ap) +VSCANF_INTERCEPTOR_IMPL(__isoc23_vsscanf, false, str, format, ap) + +INTERCEPTOR(int, __isoc23_vfscanf, void *stream, const char *format, va_list ap) +VSCANF_INTERCEPTOR_IMPL(__isoc23_vfscanf, false, stream, format, ap) #endif // SANITIZER_INTERCEPT_ISOC99_SCANF INTERCEPTOR(int, scanf, const char *format, ...) @@ -1511,6 +1521,15 @@ FORMAT_INTERCEPTOR_IMPL(__isoc99_fscanf, __isoc99_vfscanf, stream, format) INTERCEPTOR(int, __isoc99_sscanf, const char *str, const char *format, ...) FORMAT_INTERCEPTOR_IMPL(__isoc99_sscanf, __isoc99_vsscanf, str, format) + +INTERCEPTOR(int, __isoc23_scanf, const char *format, ...) +FORMAT_INTERCEPTOR_IMPL(__isoc23_scanf, __isoc23_vscanf, format) + +INTERCEPTOR(int, __isoc23_fscanf, void *stream, const char *format, ...) +FORMAT_INTERCEPTOR_IMPL(__isoc23_fscanf, __isoc23_vfscanf, stream, format) + +INTERCEPTOR(int, __isoc23_sscanf, const char *str, const char *format, ...) +FORMAT_INTERCEPTOR_IMPL(__isoc23_sscanf, __isoc23_vsscanf, str, format) #endif #endif @@ -1534,7 +1553,13 @@ FORMAT_INTERCEPTOR_IMPL(__isoc99_sscanf, __isoc99_vsscanf, str, format) COMMON_INTERCEPT_FUNCTION(__isoc99_fscanf); \ COMMON_INTERCEPT_FUNCTION(__isoc99_vscanf); \ COMMON_INTERCEPT_FUNCTION(__isoc99_vsscanf); \ - COMMON_INTERCEPT_FUNCTION(__isoc99_vfscanf); + COMMON_INTERCEPT_FUNCTION(__isoc99_vfscanf); \ + COMMON_INTERCEPT_FUNCTION(__isoc23_scanf); \ + COMMON_INTERCEPT_FUNCTION(__isoc23_sscanf); \ + COMMON_INTERCEPT_FUNCTION(__isoc23_fscanf); \ + COMMON_INTERCEPT_FUNCTION(__isoc23_vscanf); \ + COMMON_INTERCEPT_FUNCTION(__isoc23_vsscanf); \ + COMMON_INTERCEPT_FUNCTION(__isoc23_vfscanf); #else #define INIT_ISOC99_SCANF #endif @@ -3539,30 +3564,26 @@ UNUSED static inline void StrtolFixAndCheck(void *ctx, const char *nptr, (real_endptr - nptr) + 1 : 0); } - #if SANITIZER_INTERCEPT_STRTOIMAX -INTERCEPTOR(INTMAX_T, strtoimax, const char *nptr, char **endptr, int base) { - void *ctx; - COMMON_INTERCEPTOR_ENTER(ctx, strtoimax, nptr, endptr, base); - // FIXME: under ASan the call below may write to freed memory and corrupt - // its metadata. See - // https://github.com/google/sanitizers/issues/321. +template +static ALWAYS_INLINE auto StrtoimaxImpl(void *ctx, Fn real, const char *nptr, + char **endptr, int base) + -> decltype(real(nullptr, nullptr, 0)) { char *real_endptr; - INTMAX_T res = REAL(strtoimax)(nptr, &real_endptr, base); + auto res = real(nptr, &real_endptr, base); StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base); return res; } +INTERCEPTOR(INTMAX_T, strtoimax, const char *nptr, char **endptr, int base) { + void *ctx; + COMMON_INTERCEPTOR_ENTER(ctx, strtoimax, nptr, endptr, base); + return StrtoimaxImpl(ctx, REAL(strtoimax), nptr, endptr, base); +} INTERCEPTOR(UINTMAX_T, strtoumax, const char *nptr, char **endptr, int base) { void *ctx; COMMON_INTERCEPTOR_ENTER(ctx, strtoumax, nptr, endptr, base); - // FIXME: under ASan the call below may write to freed memory and corrupt - // its metadata. See - // https://github.com/google/sanitizers/issues/321. - char *real_endptr; - UINTMAX_T res = REAL(strtoumax)(nptr, &real_endptr, base); - StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base); - return res; + return StrtoimaxImpl(ctx, REAL(strtoumax), nptr, endptr, base); } #define INIT_STRTOIMAX \ @@ -3572,6 +3593,25 @@ INTERCEPTOR(UINTMAX_T, strtoumax, const char *nptr, char **endptr, int base) { #define INIT_STRTOIMAX #endif +#if SANITIZER_INTERCEPT_STRTOIMAX && SANITIZER_GLIBC +INTERCEPTOR(INTMAX_T, __isoc23_strtoimax, const char *nptr, char **endptr, int base) { + void *ctx; + COMMON_INTERCEPTOR_ENTER(ctx, __isoc23_strtoimax, nptr, endptr, base); + return StrtoimaxImpl(ctx, REAL(__isoc23_strtoimax), nptr, endptr, base); +} +INTERCEPTOR(UINTMAX_T, __isoc23_strtoumax, const char *nptr, char **endptr, int base) { + void *ctx; + COMMON_INTERCEPTOR_ENTER(ctx, __isoc23_strtoumax, nptr, endptr, base); + return StrtoimaxImpl(ctx, REAL(__isoc23_strtoumax), nptr, endptr, base); +} + +# define INIT_STRTOIMAX_C23 \ + COMMON_INTERCEPT_FUNCTION(__isoc23_strtoimax); \ + COMMON_INTERCEPT_FUNCTION(__isoc23_strtoumax); +#else +# define INIT_STRTOIMAX_C23 +#endif + #if SANITIZER_INTERCEPT_MBSTOWCS INTERCEPTOR(SIZE_T, mbstowcs, wchar_t *dest, const char *src, SIZE_T len) { void *ctx; @@ -10304,6 +10344,7 @@ static void InitializeCommonInterceptors() { INIT_GETCWD; INIT_GET_CURRENT_DIR_NAME; INIT_STRTOIMAX; + INIT_STRTOIMAX_C23; INIT_MBSTOWCS; INIT_MBSNRTOWCS; INIT_WCSTOMBS; diff --git a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt index 509e3f1..819cfca 100644 --- a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt +++ b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt @@ -34,6 +34,13 @@ __interceptor_pthread_setspecific w __interceptor_read w __interceptor_realpath w __isinf U +__isoc23_sscanf U +__isoc23_strtol U +__isoc23_strtoll U +__isoc23_strtoll_l U +__isoc23_strtoull U +__isoc23_strtoull_l U +__isoc23_vsscanf U __isoc99_sscanf U __isoc99_vsscanf U __moddi3 U diff --git a/compiler-rt/test/sanitizer_common/TestCases/scanf.c b/compiler-rt/test/sanitizer_common/TestCases/scanf.c new file mode 100644 index 0000000..a7f35c2 --- /dev/null +++ b/compiler-rt/test/sanitizer_common/TestCases/scanf.c @@ -0,0 +1,24 @@ +// RUN: %clang -std=c17 %s -o %t && %run %t +/// Test __isoc23_* for glibc 2.38+. +// RUN: %clang -std=c23 %s -o %t && %run %t + +#include +#include +#include + +int test_vsscanf(const char *buf, const char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + int ret = vsscanf(buf, fmt, ap); + va_end(ap); + return ret; +} + +int main(int argc, char **argv) { + int x, y; + assert(sscanf("42", "%d", &x) == 1); + assert(x == 42); + assert(test_vsscanf("42", "%d", &y) == 1); + assert(y == 42); + return 0; +} diff --git a/compiler-rt/test/sanitizer_common/TestCases/strtol.c b/compiler-rt/test/sanitizer_common/TestCases/strtol.c new file mode 100644 index 0000000..9947cde --- /dev/null +++ b/compiler-rt/test/sanitizer_common/TestCases/strtol.c @@ -0,0 +1,61 @@ +// RUN: %clang -std=c17 %s -o %t && %run %t +/// Test __isoc23_* for glibc 2.38+. +// RUN: %clang -std=c23 %s -o %t && %run %t + +#include +#include +#include +#include +#include + +#define TESTL(func) \ + { \ + char *end; \ + long l = (long)func("42", &end, 0); \ + assert(l == 42); \ + assert(*end == '\0'); \ + } + +#define TESTF(func) \ + { \ + char *end; \ + long l = (long)func("42", &end); \ + assert(l == 42); \ + assert(*end == '\0'); \ + } + +#define WTESTL(func) \ + { \ + wchar_t *end; \ + long l = (long)func(L"42", &end, 0); \ + assert(l == 42); \ + assert(*end == L'\0'); \ + } + +#define WTESTF(func) \ + { \ + wchar_t *end; \ + long l = (long)func(L"42", &end); \ + assert(l == 42); \ + assert(*end == '\0'); \ + } + +int main() { + TESTL(strtol); + TESTL(strtoll); + TESTL(strtoimax); + TESTL(strtoul); + TESTL(strtoull); + TESTL(strtoumax); + TESTF(strtof); + TESTF(strtod); + TESTF(strtold); + + WTESTL(wcstol); + WTESTL(wcstoll); + WTESTL(wcstoul); + WTESTL(wcstoull); + WTESTF(wcstof); + WTESTF(wcstod); + WTESTF(wcstold); +} -- 2.7.4