From 623bc0276849d48ada5a7a2e3e94bd79de42c3db Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Mon, 17 Aug 2020 16:35:10 -0400 Subject: [PATCH] analyzer: consider initializers for globals [PR96651] PR analyzer/96651 reports a false positive in which a global that can't have been touched yet is checked in "main". The analyzer fails to reject code paths in which the initial value of the global makes the path condition impossible. This patch detects cases where the code path begins at the entrypoint of "main", and extracts values from initializers for globals that can't have been touched yet, rather than using a symbolic "INIT_VAL(REG)", fixing the false positive. gcc/analyzer/ChangeLog: PR analyzer/96651 * region-model.cc (region_model::called_from_main_p): New. (region_model::get_store_value): Move handling for globals into... (region_model::get_initial_value_for_global): ...this new function, and add logic for extracting values from decl initializers. * region-model.h (decl_region::get_svalue_for_constructor): New decl. (decl_region::get_svalue_for_initializer): New decl. (region_model::called_from_main_p): New decl. (region_model::get_initial_value_for_global): New. * region.cc (decl_region::maybe_get_constant_value): Move logic for getting an svalue from a CONSTRUCTOR node to... (decl_region::get_svalue_for_constructor): ...this new function. (decl_region::get_svalue_for_initializer): New. * store.cc (get_svalue_for_ctor_val): Rewrite in terms of region_model::get_rvalue. * store.h (binding_cluster::get_map): New accessor. gcc/testsuite/ChangeLog: PR analyzer/96651 * gcc.dg/analyzer/pr96651-1.c: New test. * gcc.dg/analyzer/pr96651-2.c: New test. --- gcc/analyzer/region-model.cc | 82 ++++++++++++++++++++++++++++--- gcc/analyzer/region-model.h | 6 +++ gcc/analyzer/region.cc | 57 ++++++++++++++++----- gcc/analyzer/store.cc | 12 ++--- gcc/analyzer/store.h | 2 + gcc/testsuite/gcc.dg/analyzer/pr96651-1.c | 22 +++++++++ gcc/testsuite/gcc.dg/analyzer/pr96651-2.c | 72 +++++++++++++++++++++++++++ 7 files changed, 224 insertions(+), 29 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr96651-1.c create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr96651-2.c diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc index c3d9ca7..5b08e48 100644 --- a/gcc/analyzer/region-model.cc +++ b/gcc/analyzer/region-model.cc @@ -1204,6 +1204,76 @@ region_model::get_rvalue (tree expr, region_model_context *ctxt) return get_rvalue (path_var (expr, get_stack_depth () - 1), ctxt); } +/* Return true if this model is on a path with "main" as the entrypoint + (as opposed to one in which we're merely analyzing a subset of the + path through the code). */ + +bool +region_model::called_from_main_p () const +{ + if (!m_current_frame) + return false; + /* Determine if the oldest stack frame in this model is for "main". */ + const frame_region *frame0 = get_frame_at_index (0); + gcc_assert (frame0); + return id_equal (DECL_NAME (frame0->get_function ()->decl), "main"); +} + +/* Subroutine of region_model::get_store_value for when REG is (or is within) + a global variable that hasn't been touched since the start of this path + (or was implicitly touched due to a call to an unknown function). */ + +const svalue * +region_model::get_initial_value_for_global (const region *reg) const +{ + /* Get the decl that REG is for (or is within). */ + const decl_region *base_reg + = reg->get_base_region ()->dyn_cast_decl_region (); + gcc_assert (base_reg); + tree decl = base_reg->get_decl (); + + /* Special-case: to avoid having to explicitly update all previously + untracked globals when calling an unknown fn, they implicitly have + an unknown value if an unknown call has occurred, unless this is + static to-this-TU and hasn't escaped. Globals that have escaped + are explicitly tracked, so we shouldn't hit this case for them. */ + if (m_store.called_unknown_fn_p () && TREE_PUBLIC (decl)) + return m_mgr->get_or_create_unknown_svalue (reg->get_type ()); + + /* If we are on a path from the entrypoint from "main" and we have a + global decl defined in this TU that hasn't been touched yet, then + the initial value of REG can be taken from the initialization value + of the decl. */ + if (called_from_main_p () && !DECL_EXTERNAL (decl)) + { + /* Get the initializer value for base_reg. */ + const svalue *base_reg_init + = base_reg->get_svalue_for_initializer (m_mgr); + gcc_assert (base_reg_init); + if (reg == base_reg) + return base_reg_init; + else + { + /* Get the value for REG within base_reg_init. */ + binding_cluster c (base_reg); + c.bind (m_mgr->get_store_manager (), base_reg, base_reg_init, + BK_direct); + const svalue *sval + = c.get_any_binding (m_mgr->get_store_manager (), reg); + if (sval) + { + if (reg->get_type ()) + sval = m_mgr->get_or_create_cast (reg->get_type (), + sval); + return sval; + } + } + } + + /* Otherwise, return INIT_VAL(REG). */ + return m_mgr->get_or_create_initial_value (reg); +} + /* Get a value for REG, looking it up in the store, or otherwise falling back to "initial" or "unknown" values. */ @@ -1256,14 +1326,10 @@ region_model::get_store_value (const region *reg) const would have returned UNKNOWN, and we would already have returned that above). */ - /* Special-case: to avoid having to explicitly update all previously - untracked globals when calling an unknown fn, we instead change - the default here so we implicitly have an unknown value for such - regions. */ - if (m_store.called_unknown_fn_p ()) - if (reg->get_base_region ()->get_parent_region ()->get_kind () - == RK_GLOBALS) - return m_mgr->get_or_create_unknown_svalue (reg->get_type ()); + /* Handle globals. */ + if (reg->get_base_region ()->get_parent_region ()->get_kind () + == RK_GLOBALS) + return get_initial_value_for_global (reg); return m_mgr->get_or_create_initial_value (reg); } diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h index 3d044bf..79d739e 100644 --- a/gcc/analyzer/region-model.h +++ b/gcc/analyzer/region-model.h @@ -1870,6 +1870,9 @@ public: int get_stack_depth () const; const svalue *maybe_get_constant_value (region_model_manager *mgr) const; + const svalue *get_svalue_for_constructor (tree ctor, + region_model_manager *mgr) const; + const svalue *get_svalue_for_initializer (region_model_manager *mgr) const; private: tree m_decl; @@ -2701,6 +2704,9 @@ class region_model void record_dynamic_extents (const region *reg, const svalue *size_in_bytes); + bool called_from_main_p () const; + const svalue *get_initial_value_for_global (const region *reg) const; + /* Storing this here to avoid passing it around everywhere. */ region_model_manager *const m_mgr; diff --git a/gcc/analyzer/region.cc b/gcc/analyzer/region.cc index 770e2cb..c3dc8cd 100644 --- a/gcc/analyzer/region.cc +++ b/gcc/analyzer/region.cc @@ -886,20 +886,53 @@ decl_region::maybe_get_constant_value (region_model_manager *mgr) const && DECL_IN_CONSTANT_POOL (m_decl) && DECL_INITIAL (m_decl) && TREE_CODE (DECL_INITIAL (m_decl)) == CONSTRUCTOR) - { - tree ctor = DECL_INITIAL (m_decl); - gcc_assert (!TREE_CLOBBER_P (ctor)); + return get_svalue_for_constructor (DECL_INITIAL (m_decl), mgr); + return NULL; +} - /* Create a binding map, applying ctor to it, using this - decl_region as the base region when building child regions - for offset calculations. */ - binding_map map; - map.apply_ctor_to_region (this, ctor, mgr); +/* Get an svalue for CTOR, a CONSTRUCTOR for this region's decl. */ - /* Return a compound svalue for the map we built. */ - return mgr->get_or_create_compound_svalue (get_type (), map); - } - return NULL; +const svalue * +decl_region::get_svalue_for_constructor (tree ctor, + region_model_manager *mgr) const +{ + gcc_assert (!TREE_CLOBBER_P (ctor)); + + /* Create a binding map, applying ctor to it, using this + decl_region as the base region when building child regions + for offset calculations. */ + binding_map map; + map.apply_ctor_to_region (this, ctor, mgr); + + /* Return a compound svalue for the map we built. */ + return mgr->get_or_create_compound_svalue (get_type (), map); +} + +/* For use on decl_regions for global variables. + + Get an svalue for the initial value of this region at entry to + "main" (either based on DECL_INITIAL, or implicit initialization to + zero. */ + +const svalue * +decl_region::get_svalue_for_initializer (region_model_manager *mgr) const +{ + tree init = DECL_INITIAL (m_decl); + if (!init) + { + /* Implicit initialization to zero; use a compound_svalue for it. */ + binding_cluster c (this); + c.zero_fill_region (mgr->get_store_manager (), this); + return mgr->get_or_create_compound_svalue (TREE_TYPE (m_decl), + c.get_map ()); + } + + if (TREE_CODE (init) == CONSTRUCTOR) + return get_svalue_for_constructor (init, mgr); + + /* Reuse the get_rvalue logic from region_model. */ + region_model m (mgr); + return m.get_rvalue (path_var (init, 0), NULL); } /* class field_region : public region. */ diff --git a/gcc/analyzer/store.cc b/gcc/analyzer/store.cc index 5af86d0..d854f4e 100644 --- a/gcc/analyzer/store.cc +++ b/gcc/analyzer/store.cc @@ -396,15 +396,9 @@ get_subregion_within_ctor (const region *parent_reg, tree index, static const svalue * get_svalue_for_ctor_val (tree val, region_model_manager *mgr) { - if (TREE_CODE (val) == ADDR_EXPR) - { - gcc_assert (TREE_CODE (TREE_OPERAND (val, 0)) == STRING_CST); - const string_region *str_reg - = mgr->get_region_for_string (TREE_OPERAND (val, 0)); - return mgr->get_ptr_svalue (TREE_TYPE (val), str_reg); - } - gcc_assert (CONSTANT_CLASS_P (val)); - return mgr->get_or_create_constant_svalue (val); + /* Reuse the get_rvalue logic from region_model. */ + region_model m (mgr); + return m.get_rvalue (path_var (val, 0), NULL); } /* Bind values from CONSTRUCTOR to this map, relative to diff --git a/gcc/analyzer/store.h b/gcc/analyzer/store.h index 16bad03..bc9dc2e 100644 --- a/gcc/analyzer/store.h +++ b/gcc/analyzer/store.h @@ -451,6 +451,8 @@ public: iterator_t begin () const { return m_map.begin (); } iterator_t end () const { return m_map.end (); } + const binding_map &get_map () const { return m_map; } + private: const svalue *get_any_value (const binding_key *key) const; void get_overlapping_bindings (store_manager *mgr, const region *reg, diff --git a/gcc/testsuite/gcc.dg/analyzer/pr96651-1.c b/gcc/testsuite/gcc.dg/analyzer/pr96651-1.c new file mode 100644 index 0000000..2f3a9b4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/pr96651-1.c @@ -0,0 +1,22 @@ +#include +#include +#include + +static int a; + +int main(void) +{ + char *src = NULL; + char buf[128]; + + /* "a" can't have been touched yet, and thus + is implicitly zero. */ + switch (a) { + case 1: + strcpy(buf, src); /* { dg-bogus "NULL" } */ + break; + case 0: + strcpy(buf, "hello"); + } + printf("%s\n", buf); +} diff --git a/gcc/testsuite/gcc.dg/analyzer/pr96651-2.c b/gcc/testsuite/gcc.dg/analyzer/pr96651-2.c new file mode 100644 index 0000000..249a32b --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/pr96651-2.c @@ -0,0 +1,72 @@ +#include "analyzer-decls.h" + +extern void unknown_fn (void *, void *); + +static int a; +static int b = 42; +int c; +int d = 17; +struct { int x; int y; char rgb[3]; } s = {5, 10, {0x80, 0x40, 0x20}}; +void *e = &d; + +extern struct _IO_FILE *stderr; + +/* If we're not on a direct path from "main", we know nothing about + the values of globals. */ + +void test (void) +{ + __analyzer_eval (a == 0); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (b == 42); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (c == 0); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (d == 17); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (s.rgb[2] == 0x20); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (e == &d); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (stderr == 0); /* { dg-warning "UNKNOWN" } */ +} + +static void __attribute__((noinline)) +called_from_main (void) +{ + /* When accessed from main, the vars still have their initializer values. */ + __analyzer_eval (a == 0); /* { dg-warning "TRUE" } */ + __analyzer_eval (b == 42); /* { dg-warning "TRUE" } */ + __analyzer_eval (c == 0); /* { dg-warning "TRUE" } */ + __analyzer_eval (d == 17); /* { dg-warning "TRUE" } */ + __analyzer_eval (s.rgb[2] == 0x20); /* { dg-warning "TRUE" } */ + __analyzer_eval (e == &d); /* { dg-warning "TRUE" } */ + /* ...apart from those defined in a different TU (or that were inited + before "main"). */ + __analyzer_eval (stderr == 0); /* { dg-warning "UNKNOWN" } */ +} + +int main (void) +{ + /* When accessed from main, the vars still have their initializer values. */ + __analyzer_eval (a == 0); /* { dg-warning "TRUE" } */ + __analyzer_eval (b == 42); /* { dg-warning "TRUE" } */ + __analyzer_eval (c == 0); /* { dg-warning "TRUE" } */ + __analyzer_eval (d == 17); /* { dg-warning "TRUE" } */ + __analyzer_eval (s.rgb[2] == 0x20); /* { dg-warning "TRUE" } */ + __analyzer_eval (e == &d); /* { dg-warning "TRUE" } */ + /* ...apart from those defined in a different TU (or that were inited + before "main"). */ + __analyzer_eval (stderr == 0); /* { dg-warning "UNKNOWN" } */ + + called_from_main (); + + unknown_fn (&a, &c); + + /* "a" escaped above and so could have been written to. */ + __analyzer_eval (a == 0); /* { dg-warning "UNKNOWN" } */ + /* "b" doesn't escape and is static, and so must still have its + initial value. */ + __analyzer_eval (b == 42); /* { dg-warning "TRUE" } */ + /* The other globals are non-static and so have implicitly escaped, + and so could have been written to. */ + __analyzer_eval (c == 0); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (d == 17); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (s.rgb[2] == 0x20); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (e == &d); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (stderr == 0); /* { dg-warning "UNKNOWN" } */ +} -- 2.7.4