From 9d2c0fad59745bf67aa6471e8c9e96c351f0de59 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Thu, 3 Feb 2022 16:21:27 -0500 Subject: [PATCH] analyzer: fixes to memcpy [PR103872] PR analyzer/103872 reports a failure of gcc.dg/analyzer/pr103526.c on riscv64-unknown-elf-gcc. The issue is that I wrote the test on x86_64 where a memcpy in the test is optimized to a write to a read/write pair, whereas due to alignment differences the analyzer can see it as a memcpy call, revealing problems with the analyzer's implementation of memcpy. This patch reimplements region_model::impl_call_memcpy in terms of a get_store_value followed by a set_value, fixing the issue. gcc/analyzer/ChangeLog: PR analyzer/103872 * region-model-impl-calls.cc (region_model::impl_call_memcpy): Reimplement in terms of a get_store_value followed by a set_value. gcc/testsuite/ChangeLog: PR analyzer/103872 * gcc.dg/analyzer/memcpy-1.c: Add alternate versions of test cases in which the calls to memcpy are hidden from the optimizer. Add further test cases. * gcc.dg/analyzer/taint-size-1.c: Add test coverage for memcpy with tainted size. Signed-off-by: David Malcolm --- gcc/analyzer/region-model-impl-calls.cc | 28 +++--- gcc/testsuite/gcc.dg/analyzer/memcpy-1.c | 125 +++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/analyzer/taint-size-1.c | 9 ++ 3 files changed, 148 insertions(+), 14 deletions(-) diff --git a/gcc/analyzer/region-model-impl-calls.cc b/gcc/analyzer/region-model-impl-calls.cc index e959297..95d9921 100644 --- a/gcc/analyzer/region-model-impl-calls.cc +++ b/gcc/analyzer/region-model-impl-calls.cc @@ -491,29 +491,29 @@ region_model::impl_call_malloc (const call_details &cd) } /* Handle the on_call_pre part of "memcpy" and "__builtin_memcpy". */ +// TODO: complain about overlapping src and dest. void region_model::impl_call_memcpy (const call_details &cd) { - const svalue *dest_sval = cd.get_arg_svalue (0); + const svalue *dest_ptr_sval = cd.get_arg_svalue (0); + const svalue *src_ptr_sval = cd.get_arg_svalue (1); const svalue *num_bytes_sval = cd.get_arg_svalue (2); - const region *dest_reg = deref_rvalue (dest_sval, cd.get_arg_tree (0), + const region *dest_reg = deref_rvalue (dest_ptr_sval, cd.get_arg_tree (0), cd.get_ctxt ()); + const region *src_reg = deref_rvalue (src_ptr_sval, cd.get_arg_tree (1), + cd.get_ctxt ()); - cd.maybe_set_lhs (dest_sval); - - if (tree num_bytes = num_bytes_sval->maybe_get_constant ()) - { - /* "memcpy" of zero size is a no-op. */ - if (zerop (num_bytes)) - return; - } - - check_region_for_write (dest_reg, cd.get_ctxt ()); + cd.maybe_set_lhs (dest_ptr_sval); - /* Otherwise, mark region's contents as unknown. */ - mark_region_as_unknown (dest_reg, cd.get_uncertainty ()); + const region *sized_src_reg + = m_mgr->get_sized_region (src_reg, NULL_TREE, num_bytes_sval); + const region *sized_dest_reg + = m_mgr->get_sized_region (dest_reg, NULL_TREE, num_bytes_sval); + const svalue *src_contents_sval + = get_store_value (sized_src_reg, cd.get_ctxt ()); + set_value (sized_dest_reg, src_contents_sval, cd.get_ctxt ()); } /* Handle the on_call_pre part of "memset" and "__builtin_memset". */ diff --git a/gcc/testsuite/gcc.dg/analyzer/memcpy-1.c b/gcc/testsuite/gcc.dg/analyzer/memcpy-1.c index f120eac..a9368d3 100644 --- a/gcc/testsuite/gcc.dg/analyzer/memcpy-1.c +++ b/gcc/testsuite/gcc.dg/analyzer/memcpy-1.c @@ -1,6 +1,16 @@ #include #include "analyzer-decls.h" +/* Function for thwarting expansion of memcpy by optimizer. */ + +typedef void * (*memcpy_t) (void *dst, const void *src, size_t n); + +static memcpy_t __attribute__((noinline)) +get_memcpy (void) +{ + return memcpy; +} + void *test_1 (void *dst, void *src, size_t n) { void *result = memcpy (dst, src, n); @@ -15,6 +25,14 @@ void *test_1a (void *dst, void *src, size_t n) return result; } +void *test_1b (void *dst, void *src, size_t n) +{ + memcpy_t fn = get_memcpy (); + void *result = fn (dst, src, n); + __analyzer_eval (result == dst); /* { dg-warning "TRUE" } */ + return result; +} + void test_2 (int i) { int j; @@ -29,6 +47,14 @@ void test_2a (int i) __analyzer_eval (i == j); /* { dg-warning "TRUE" } */ } +void test_2b (int i) +{ + int j; + memcpy_t fn = get_memcpy (); + fn (&j, &i, sizeof (int)); + __analyzer_eval (i == j); /* { dg-warning "TRUE" } */ +} + void test_3 (void *src, size_t n) { char buf[40], other[40]; @@ -41,3 +67,102 @@ void test_3 (void *src, size_t n) __analyzer_eval (buf[0] == 'a'); /* { dg-warning "UNKNOWN" } */ __analyzer_eval (other[0] == 'b'); /* { dg-warning "TRUE" } */ } + +void test_3b (void *src, size_t n) +{ + char buf[40], other[40]; + memcpy_t fn = get_memcpy (); + buf[0] = 'a'; + other[0] = 'b'; + __analyzer_eval (buf[0] == 'a'); /* { dg-warning "TRUE" } */ + __analyzer_eval (other[0] == 'b'); /* { dg-warning "TRUE" } */ + + fn (buf, src, n); + __analyzer_eval (buf[0] == 'a'); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (other[0] == 'b'); /* { dg-warning "TRUE" } */ +} + +/* Overwriting a zeroed buffer, then memcpy of the result. */ + +void test_4 (int a, int b) +{ + int src[1024]; + int dst[1024]; + memset (src, 0, sizeof (src)); + src[42] = a; + src[100] = b; + __analyzer_eval (src[0] == 0); /* { dg-warning "TRUE" } */ + __analyzer_eval (src[42] == a); /* { dg-warning "TRUE" } */ + __analyzer_eval (src[100] == b); /* { dg-warning "TRUE" } */ + __analyzer_eval (src[1023] == 0); /* { dg-warning "TRUE" } */ + + memcpy (dst, src, sizeof (src)); + __analyzer_eval (dst[0] == 0); /* { dg-warning "TRUE" } */ + __analyzer_eval (dst[42] == a); /* { dg-warning "TRUE" } */ + __analyzer_eval (dst[100] == b); /* { dg-warning "TRUE" } */ + __analyzer_eval (dst[1023] == 0); /* { dg-warning "TRUE" } */ +} + +void test_4b (int a, int b) +{ + int src[1024]; + int dst[1024]; + memcpy_t fn = get_memcpy (); + memset (src, 0, sizeof (src)); + src[42] = a; + src[100] = b; + __analyzer_eval (src[0] == 0); /* { dg-warning "TRUE" } */ + __analyzer_eval (src[42] == a); /* { dg-warning "TRUE" } */ + __analyzer_eval (src[100] == b); /* { dg-warning "TRUE" } */ + __analyzer_eval (src[1023] == 0); /* { dg-warning "TRUE" } */ + + fn (dst, src, sizeof (src)); + __analyzer_eval (dst[0] == 0); /* { dg-warning "TRUE" } */ + __analyzer_eval (dst[42] == a); /* { dg-warning "TRUE" } */ + __analyzer_eval (dst[100] == b); /* { dg-warning "TRUE" } */ + __analyzer_eval (dst[1023] == 0); /* { dg-warning "TRUE" } */ +} + +/* Populating a buffer from an unknown buffer. */ + +void test_5 (void *src, size_t sz) +{ + char dst[1024]; + memcpy (dst, src, sizeof (dst)); + __analyzer_eval (dst[0] == 0); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (dst[1023] == 0); /* { dg-warning "UNKNOWN" } */ +} + +void test_5b (void *src, size_t sz) +{ + char dst[1024]; + memcpy_t fn = get_memcpy (); + fn (dst, src, sizeof (dst)); + __analyzer_eval (dst[0] == 0); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (dst[1023] == 0); /* { dg-warning "UNKNOWN" } */ +} + +/* Zero-sized memcpy. */ + +void test_6 (void *dst, void *src) +{ + memcpy (dst, src, 0); +} + +void test_6b (void *dst, void *src) +{ + memcpy_t fn = get_memcpy (); + fn (dst, src, 0); +} + +/* memcpy to string literal. */ + +void test_7 (void *src, size_t sz) +{ + memcpy ((void *)"hello world", src, sz); /* { dg-warning "write to string literal" } */ +} + +void test_7b (void *src, size_t sz) +{ + memcpy ((void *)"hello world", src, sz); /* { dg-warning "write to string literal" } */ +} diff --git a/gcc/testsuite/gcc.dg/analyzer/taint-size-1.c b/gcc/testsuite/gcc.dg/analyzer/taint-size-1.c index 64c9c26..0b166f7 100644 --- a/gcc/testsuite/gcc.dg/analyzer/taint-size-1.c +++ b/gcc/testsuite/gcc.dg/analyzer/taint-size-1.c @@ -12,6 +12,7 @@ struct foo }; char buf[100]; +char buf2[100]; /* memset with tainted size. */ @@ -30,3 +31,11 @@ void test_1 (FILE *f) // TOOD: better messages for state changes } } + +/* memcpy with tainted size. */ + +void __attribute__((tainted_args)) +test_2 (size_t sz) +{ + memcpy (buf, buf2, sz); /* { dg-warning "use of attacker-controlled value 'sz' as size without upper-bounds checking" } */ +} -- 2.7.4