From 4ab77b9f28207b04901dc986624e37f288769903 Mon Sep 17 00:00:00 2001 From: Alex Shlyapnikov Date: Thu, 23 Mar 2017 21:39:52 +0000 Subject: [PATCH] [asan] Add an interceptor for strtok Summary: This change addresses https://github.com/google/sanitizers/issues/766. I tested the change with make check-asan and the newly added test case. Reviewers: ygribov, kcc, alekseyshl Subscribers: kubamracek, llvm-commits Patch by mrigger Differential Revision: https://reviews.llvm.org/D30384 llvm-svn: 298650 --- .../sanitizer_common_interceptors.inc | 47 +++++++++ .../lib/sanitizer_common/sanitizer_flags.inc | 3 + .../sanitizer_platform_interceptors.h | 1 + compiler-rt/test/asan/TestCases/strtok.c | 108 +++++++++++++++++++++ 4 files changed, 159 insertions(+) create mode 100644 compiler-rt/test/asan/TestCases/strtok.c diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index 75026e0..8e5734273 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -495,6 +495,52 @@ INTERCEPTOR(char*, strcasestr, const char *s1, const char *s2) { #define INIT_STRCASESTR #endif +#if SANITIZER_INTERCEPT_STRTOK + +INTERCEPTOR(char*, strtok, char *str, const char *delimiters) { + void *ctx; + COMMON_INTERCEPTOR_ENTER(ctx, strtok, str, delimiters); + if (!common_flags()->intercept_strtok) { + return REAL(strtok)(str, delimiters); + } + if (common_flags()->strict_string_checks) { + // If strict_string_checks is enabled, we check the whole first argument + // string on the first call (strtok saves this string in a static buffer + // for subsequent calls). We do not need to check strtok's result. + // As the delimiters can change, we check them every call. + if (str != nullptr) { + COMMON_INTERCEPTOR_READ_RANGE(ctx, str, REAL(strlen)(str) + 1); + } + COMMON_INTERCEPTOR_READ_RANGE(ctx, delimiters, + REAL(strlen)(delimiters) + 1); + return REAL(strtok)(str, delimiters); + } else { + // However, when strict_string_checks is disabled we cannot check the + // whole string on the first call. Instead, we check the result string + // which is guaranteed to be a NULL-terminated substring of the first + // argument. We also conservatively check one character of str and the + // delimiters. + if (str != nullptr) { + COMMON_INTERCEPTOR_READ_STRING(ctx, str, 1); + } + COMMON_INTERCEPTOR_READ_RANGE(ctx, delimiters, 1); + char *result = REAL(strtok)(str, delimiters); + if (result != nullptr) { + COMMON_INTERCEPTOR_READ_RANGE(ctx, result, REAL(strlen)(result) + 1); + } else if (str != nullptr) { + // No delimiter were found, it's safe to assume that the entire str was + // scanned. + COMMON_INTERCEPTOR_READ_RANGE(ctx, str, REAL(strlen)(str) + 1); + } + return result; + } +} + +#define INIT_STRTOK COMMON_INTERCEPT_FUNCTION(strtok) +#else +#define INIT_STRTOK +#endif + #if SANITIZER_INTERCEPT_MEMMEM DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_memmem, uptr called_pc, const void *s1, SIZE_T len1, const void *s2, @@ -6079,6 +6125,7 @@ static void InitializeCommonInterceptors() { INIT_STRCHRNUL; INIT_STRRCHR; INIT_STRSPN; + INIT_STRTOK; INIT_STRPBRK; INIT_MEMSET; INIT_MEMMOVE; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc index 1306c72..f111369 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc @@ -193,6 +193,9 @@ COMMON_FLAG(bool, intercept_strstr, true, COMMON_FLAG(bool, intercept_strspn, true, "If set, uses custom wrappers for strspn and strcspn function " "to find more errors.") +COMMON_FLAG(bool, intercept_strtok, true, + "If set, uses a custom wrapper for the strtok function " + "to find more errors.") COMMON_FLAG(bool, intercept_strpbrk, true, "If set, uses custom wrappers for strpbrk function " "to find more errors.") diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index 43ec558..e565575 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -74,6 +74,7 @@ #define SANITIZER_INTERCEPT_STRCMP 1 #define SANITIZER_INTERCEPT_STRSTR 1 #define SANITIZER_INTERCEPT_STRCASESTR SI_NOT_WINDOWS +#define SANITIZER_INTERCEPT_STRTOK 1 #define SANITIZER_INTERCEPT_STRCHR 1 #define SANITIZER_INTERCEPT_STRCHRNUL SI_UNIX_NOT_MAC #define SANITIZER_INTERCEPT_STRRCHR 1 diff --git a/compiler-rt/test/asan/TestCases/strtok.c b/compiler-rt/test/asan/TestCases/strtok.c new file mode 100644 index 0000000..7aa263a --- /dev/null +++ b/compiler-rt/test/asan/TestCases/strtok.c @@ -0,0 +1,108 @@ +// RUN: %clang_asan %s -o %t + +// Test overflows with strict_string_checks + +// RUN: %env_asan_opts=strict_string_checks=true not %run %t test1 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CHECK1 +// RUN: %env_asan_opts=intercept_strtok=false%run %t test1 2>&1 +// RUN: %env_asan_opts=strict_string_checks=true not %run %t test2 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CHECK2 +// RUN: %env_asan_opts=intercept_strtok=false %run %t test2 2>&1 +// RUN: %env_asan_opts=strict_string_checks=true not %run %t test3 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CHECK3 +// RUN: %env_asan_opts=intercept_strtok=false %run %t test3 2>&1 +// RUN: %env_asan_opts=strict_string_checks=true %run %t test4 2>&1 +// RUN: %env_asan_opts=intercept_strtok=false %run %t test4 2>&1 + +// Test overflows with !strict_string_checks +// RUN: %env_asan_opts=strict_string_checks=false not %run %t test5 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CHECK5 +// RUN: %env_asan_opts=intercept_strtok=false %run %t test5 2>&1 +// RUN: %env_asan_opts=strict_string_checks=false not %run %t test6 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CHECK6 +// RUN: %env_asan_opts=intercept_strtok=false %run %t test6 2>&1 + + +#include +#include +#include + +// Check that we find overflows in the delimiters on the first call +// with strict_string_checks. +void test1() { + char *token; + char s[4] = "abc"; + char token_delimiter[2] = "b"; + __asan_poison_memory_region ((char *)&token_delimiter[1], 2); + token = strtok(s, token_delimiter); + // CHECK1:'token_delimiter' <== Memory access at offset {{[0-9]+}} partially overflows this variable + assert(strcmp(token, "a") == 0); +} + +// Check that we find overflows in the delimiters on the second call (str == NULL) +// with strict_string_checks. +void test2() { + char *token; + char s[4] = "abc"; + char token_delimiter[2] = "b"; + token = strtok(s, token_delimiter); + assert(strcmp(token, "a") == 0); + __asan_poison_memory_region ((char *)&token_delimiter[1], 2); + token = strtok(NULL, token_delimiter); + // CHECK2:'token_delimiter' <== Memory access at offset {{[0-9]+}} partially overflows this variable + assert(strcmp(token, "c") == 0); +} + +// Check that we find overflows in the string (only on the first call) with strict_string_checks. +void test3() { + char *token; + char s[4] = "abc"; + char token_delimiter[2] = "b"; + __asan_poison_memory_region ((char *)&s[3], 2); + token = strtok(s, token_delimiter); + // CHECK3:'s' <== Memory access at offset {{[0-9]+}} partially overflows this variable + assert(token == s); +} + +// Check that we do not crash when strtok returns NULL with strict_string_checks. +void test4() { + char *token; + char s[] = ""; + char token_delimiter[] = "a"; + token = strtok(s, token_delimiter); + assert(token == NULL); +} + +// Check that we find overflows in the string (only on the first call) with !strict_string_checks. +void test5() { + char *token; + char s[4] = "abc"; + char token_delimiter[2] = "d"; + __asan_poison_memory_region ((char *)&s[2], 2); + __asan_poison_memory_region ((char *)&token_delimiter[1], 2); + token = strtok(s, token_delimiter); + // CHECK5:'s' <== Memory access at offset {{[0-9]+}} partially overflows this variable + assert(token == s); +} + +// Check that we find overflows in the delimiters (only on the first call) with !strict_string_checks. +void test6() { + char *token; + char s[4] = "abc"; + char token_delimiter[1] = {'d'}; + __asan_poison_memory_region ((char *)&token_delimiter[1], 2); + token = strtok(s, &token_delimiter[1]); + // CHECK6:'token_delimiter' <== Memory access at offset {{[0-9]+}} overflows this variable + assert(strcmp(token, "abc") == 0); +} + +int main(int argc, char **argv) { + if (argc != 2) return 1; + if (!strcmp(argv[1], "test1")) test1(); + if (!strcmp(argv[1], "test2")) test2(); + if (!strcmp(argv[1], "test3")) test3(); + if (!strcmp(argv[1], "test4")) test4(); + if (!strcmp(argv[1], "test5")) test5(); + if (!strcmp(argv[1], "test6")) test6(); + return 0; +} -- 2.7.4