Create a Corpus Group API extension
authorDodji Seketeli <dodji@redhat.com>
Thu, 30 Mar 2017 10:51:32 +0000 (12:51 +0200)
committerDodji Seketeli <dodji@redhat.com>
Mon, 3 Jul 2017 15:45:36 +0000 (17:45 +0200)
To support the upcomping analysis of the Linux kernel and its modules,
we need a way to represent a union of corpora.  The first corpus
loaded would be the one representing the vmlinux binary.  Subsequent
corpora loaded would be those representing the modules.

This patch provides the new abigail::ir::corpus_group type that
represents such a corpus group.

* include/abg-corpus.h (corpus::{find_translation_unit,
get_type_per_loc_map}): Declare new member functions.
(corpus::{get_architecture_name, is_empty}): Make these member functions
const.
(corpus::{get_sorted_fun_symbols, get_functions, get_variables,
get_unreferenced_function_symbols,
get_unreferenced_variable_symbols}): Make these member functions
virtual.
(class corpus_group): Declare a new type.
* include/abg-fwd.h (corpus_sptr, corpus_group_sptr)
(string_tu_map_type, istring_var_decl_ptr_map_type)
(istring_function_decl_ptr_map_type): Define new typedefs.
* src/abg-corpus-priv.h (corpus_priv::{path_tu_map,
type_per_loc_map_}): Add new data members.
* src/abg-corpus.cc (corpus_add): Complete the function comment.
Assert that at most one translation unit of a given path can be
added to the corpus.
(corpus::{find_translation_unit, get_type_per_loc_map}): Define
new member functions.
(corpus::{get_architecture_name}): Make this member function
const.
(struct corpus_group::priv): Define new type.
(corpus_group::{corpus_group, ~corpus_group, add_corpus,
get_corpora, is_empty, get_functions, get_variables,
get_var_symbol_map, get_fun_symbol_map, get_sorted_fun_symbols,
get_sorted_var_symbols, get_unreferenced_function_symbols,
get_unreferenced_variable_symbols}): Define member functions of
the new corpus_group type.

Signed-off-by: Dodji Seketeli <dodji@redhat.com>
include/abg-corpus.h
include/abg-fwd.h
src/abg-corpus-priv.h
src/abg-corpus.cc

index c6fd5baee719765bbac5ad7371b1b1a62243ccee..2c09cdac09a1c80af5bb04a30df5e5e56bd755c5 100644 (file)
@@ -93,6 +93,9 @@ public:
   const translation_units&
   get_translation_units() const;
 
+  const translation_unit_sptr
+  find_translation_unit(const string &path) const;
+
   void
   drop_translation_units();
 
@@ -102,6 +105,12 @@ public:
   const type_maps&
   get_types() const;
 
+  type_maps&
+  get_type_per_loc_map();
+
+  const type_maps&
+  get_type_per_loc_map() const;
+
   origin
   get_origin() const;
 
@@ -127,12 +136,12 @@ public:
   set_soname(const string&);
 
   const string&
-  get_architecture_name();
+  get_architecture_name() const;
 
   void
   set_architecture_name(const string&);
 
-  bool
+  virtual bool
   is_empty() const;
 
   bool
@@ -162,7 +171,7 @@ public:
   const string_elf_symbols_map_type&
   get_undefined_fun_symbol_map() const;
 
-  const elf_symbols&
+  virtual const elf_symbols&
   get_sorted_fun_symbols() const;
 
   const elf_symbols&
@@ -180,7 +189,7 @@ public:
   const string_elf_symbols_map_type&
   get_undefined_var_symbol_map() const;
 
-  const elf_symbols&
+  virtual const elf_symbols&
   get_sorted_var_symbols() const;
 
   const elf_symbols&
@@ -206,7 +215,7 @@ public:
   const elf_symbol_sptr
   lookup_variable_symbol(const elf_symbol& symbol) const;
 
-  const functions&
+  virtual const functions&
   get_functions() const;
 
   const vector<function_decl*>*
@@ -215,16 +224,16 @@ public:
   void
   sort_functions();
 
-  const variables&
+  virtual const variables&
   get_variables() const;
 
   void
   sort_variables();
 
-  const elf_symbols&
+  virtual const elf_symbols&
   get_unreferenced_function_symbols() const;
 
-  const elf_symbols&
+  virtual const elf_symbols&
   get_unreferenced_variable_symbols() const;
 
   vector<string>&
@@ -325,6 +334,65 @@ public:
   maybe_add_var_to_exported_vars(var_decl*);
 }; //corpus::exported_decls_builder
 
+/// Abstraction of a group of corpora.
+///
+/// A corpus group is a union of corpora.  It provides a unified view
+/// of a set of corpora.  It lets you get the set of functions,
+/// variables and symbols that are defined and exported by a set of
+/// corpora.
+class corpus_group : public corpus
+{
+  struct priv;
+  typedef shared_ptr<priv> priv_sptr;
+
+  priv_sptr priv_;
+
+  // Forbid copy
+  corpus_group(const corpus_group&);
+
+public:
+  typedef vector<corpus_sptr> corpora_type;
+
+  corpus_group(ir::environment*, const string&);
+
+  virtual ~corpus_group();
+
+  void add_corpus(const corpus_sptr&);
+
+  const corpora_type&
+  get_corpora() const;
+
+  virtual bool
+  is_empty() const;
+
+  virtual const corpus::functions&
+  get_functions() const;
+
+  virtual const corpus::variables&
+  get_variables() const;
+
+  const string_elf_symbols_map_type&
+  get_var_symbol_map() const;
+
+  const string_elf_symbols_map_type&
+  get_fun_symbol_map() const;
+
+  virtual const elf_symbols&
+  get_sorted_fun_symbols() const;
+
+  virtual const elf_symbols&
+  get_sorted_var_symbols() const;
+
+  virtual const elf_symbols&
+  get_unreferenced_function_symbols() const;
+
+  virtual const elf_symbols&
+  get_unreferenced_variable_symbols() const;
+
+  bool
+  operator==(const corpus_group&) const;
+}; // end class corpus_group
+
 }// end namespace ir
 }//end namespace abigail
 #endif //__ABG_CORPUS_H__
index 8767913d4781208a42b77be950e5e97a266ab883..bba5be6149b6e82d183be3ed129097c2fae65991 100644 (file)
@@ -31,6 +31,7 @@
 #include <vector>
 #include <string>
 #include <tr1/functional>
+#include <tr1/unordered_map>
 #include <typeinfo>
 #include <utility> // for std::rel_ops, at least.
 #include <ostream>
@@ -66,6 +67,7 @@ namespace abigail
 // Inject some types.
 using std::tr1::shared_ptr;
 using std::tr1::weak_ptr;
+using std::tr1::unordered_map;
 using std::string;
 using std::vector;
 
@@ -78,6 +80,10 @@ namespace ir
 // Forward declarations for corpus.
 
 class corpus;
+typedef shared_ptr<corpus> corpus_sptr;
+
+class corpus_group;
+typedef shared_ptr<corpus_group> corpus_group_sptr;
 
 // Forward declarations for ir.
 
@@ -121,6 +127,9 @@ class translation_unit;
 typedef shared_ptr<translation_unit> translation_unit_sptr;
 /// Convenience typedef for a vector of @ref translation_unit_sptr.
 typedef std::vector<translation_unit_sptr> translation_units;
+/// Convenience typedef for a map that associates a string to a
+/// translation unit.
+typedef unordered_map<string, translation_unit_sptr> string_tu_map_type;
 
 class decl_base;
 
@@ -213,6 +222,10 @@ class var_decl;
 /// Convenience typedef for a shared pointer on a @ref var_decl
 typedef shared_ptr<var_decl> var_decl_sptr;
 
+typedef unordered_map<interned_string,
+                     var_decl*,
+                     hash_interned_string> istring_var_decl_ptr_map_type;
+
 class scope_decl;
 
 /// Convenience typedef for a shared pointer on a @ref scope_decl.
@@ -223,6 +236,10 @@ class function_decl;
 /// Convenience typedef for a shared pointer on a @ref function_decl
 typedef shared_ptr<function_decl> function_decl_sptr;
 
+typedef unordered_map<interned_string,
+                     function_decl*,
+                     hash_interned_string> istring_function_decl_ptr_map_type;
+
 class method_decl;
 
 typedef shared_ptr<method_decl> method_decl_sptr;
index 120aaca0b2aeeffc4517329934d77b7a193ca145..d6b6659d5e42fb29fc86a7b36d4bb64127f7cb2d 100644 (file)
@@ -690,6 +690,7 @@ struct corpus::priv
   string                                       soname;
   string                                       architecture_name;
   translation_units                            members;
+  string_tu_map_type                           path_tu_map;
   vector<function_decl*>                       fns;
   vector<var_decl*>                            vars;
   string_elf_symbols_map_sptr                  var_symbol_map;
@@ -714,6 +715,7 @@ struct corpus::priv
   // data member.  Otherwise, the lookup must be made by looking into
   // the type maps of each translation unit.
   type_maps                                    types_;
+  type_maps                                    type_per_loc_map_;
 
 private:
   priv();
@@ -732,7 +734,7 @@ public:
   type_maps&
   get_types();
 
-   const type_maps&
+  const type_maps&
   get_types() const;
 }; // end struct corpus::priv
 
index 7e3c27fa4b27607d34f0854cdba44fcc200067b0..f40c9546d18f525494e4e1f7b2d9eb05f680946c 100644 (file)
@@ -1,6 +1,6 @@
 // -*- mode: C++ -*-
 //
-// Copyright (C) 2013-2016 Red Hat, Inc.
+// Copyright (C) 2013-2017 Red Hat, Inc.
 //
 // This file is part of the GNU Application Binary Interface Generic
 // Analysis and Instrumentation Library (libabigail).  This library is
@@ -525,6 +525,10 @@ corpus::set_environment(environment* e) const
 /// the corpus are going to be serialized on disk in the file
 /// associated to the current corpus.
 ///
+/// Note that two translation units with the same path (as returned by
+/// translation_unit::get_path) cannot be added to the same @ref
+/// corpus.  If that happens, the library aborts.
+///
 /// @param tu the new translation unit to add.
 void
 corpus::add(const translation_unit_sptr tu)
@@ -535,6 +539,14 @@ corpus::add(const translation_unit_sptr tu)
   assert(tu->get_environment() == get_environment());
 
   priv_->members.push_back(tu);
+  if (!tu->get_path().empty())
+    {
+      // Update the path -> translation_unit map.
+      string_tu_map_type::const_iterator i =
+       priv_->path_tu_map.find(tu->get_path());
+      assert(i == priv_->path_tu_map.end());
+      priv_->path_tu_map[tu->get_path()] = tu;
+    }
 
   tu->set_corpus(this);
 }
@@ -546,6 +558,23 @@ const translation_units&
 corpus::get_translation_units() const
 {return priv_->members;}
 
+/// Find the translation unit that has a given path.
+///
+/// @param path the path of the translation unit to look for.
+///
+/// @return the translation unit found, if any.  Otherwise, return
+/// nil.
+const translation_unit_sptr
+corpus::find_translation_unit(const string &path) const
+{
+  string_tu_map_type::const_iterator i =
+    priv_->path_tu_map.find(path);
+
+  if (i == priv_->path_tu_map.end())
+    return translation_unit_sptr();
+  return i->second;
+}
+
 /// Erase the translation units contained in this in-memory object.
 ///
 /// Note that the on-disk archive file that contains the serialized
@@ -569,6 +598,32 @@ const type_maps&
 corpus::get_types() const
 {return priv_->types_;}
 
+/// Get the maps that associate a location string to a certain kind of
+/// type.
+///
+/// The location string is the result of the invocation to the
+/// function abigail::ir::location::expand().  It has the form
+/// "file.c:4:1", with 'file.c' being the file name, '4' being the
+/// line number and '1' being the column number.
+///
+/// @return the maps.
+const type_maps&
+corpus::get_type_per_loc_map() const
+{return priv_->type_per_loc_map_;}
+
+/// Get the maps that associate a location string to a certain kind of
+/// type.
+///
+/// The location string is the result of the invocation to the
+/// function abigail::ir::location::expand().  It has the form
+/// "file.c:4:1", with 'file.c' being the file name, '4' being the
+/// line number and '1' being the column number.
+///
+/// @return the maps.
+type_maps&
+corpus::get_type_per_loc_map()
+{return priv_->type_per_loc_map_;}
+
 /// Getter for the origin of the corpus.
 ///
 /// @return the origin of the corpus.
@@ -660,7 +715,7 @@ corpus::set_soname(const string& soname)
 ///
 /// @return the architecture name string.
 const string&
-corpus::get_architecture_name()
+corpus::get_architecture_name() const
 {return priv_->architecture_name;}
 
 /// Setter for the architecture name of the corpus.
@@ -1426,5 +1481,337 @@ corpus::get_exported_decls_builder() const
 }
 
 // </corpus stuff>
+
+// <corpus_group stuff>
+
+/// Type of the private data of @ref corpus_group
+struct corpus_group::priv
+{
+  corpora_type                 corpora;
+  istring_function_decl_ptr_map_type fns_map;
+  vector<function_decl*>       fns;
+  istring_var_decl_ptr_map_type vars_map;
+  vector<var_decl*>            vars;
+  string_elf_symbols_map_type  var_symbol_map;
+  string_elf_symbols_map_type  fun_symbol_map;
+  elf_symbols                  sorted_var_symbols;
+  elf_symbols                  sorted_fun_symbols;
+  unordered_map<string, elf_symbol_sptr> unrefed_fun_symbol_map;
+  elf_symbols                  unrefed_fun_symbols;
+  unordered_map<string, elf_symbol_sptr> unrefed_var_symbol_map;
+  elf_symbols                  unrefed_var_symbols;
+
+  priv()
+  {}
+}; // end struct::priv
+
+/// Default constructor of the @ref corpus_group type.
+corpus_group::corpus_group(environment* env, const string& path = "")
+  : corpus(env, path), priv_(new priv)
+{}
+
+/// Desctructor of the @ref corpus_group type.
+corpus_group::~corpus_group()
+{}
+
+/// Add a new corpus to the current instance of @ref corpus_group.
+///
+/// @param corp the new corpus to add.
+void
+corpus_group::add_corpus(const corpus_sptr& corp)
+{
+  if (!corp)
+    return;
+
+  // Ensure the new environment patches the current one.
+  if (const environment* cur_env = get_environment())
+    {
+      if (environment* corp_env = corp->get_environment())
+       assert(cur_env == corp_env);
+    }
+  else
+    set_environment(corp->get_environment());
+
+  // Ensure the new architecture name matches the current one.
+  string cur_arch = get_architecture_name(),
+    corp_arch = corp->get_architecture_name();
+  if (cur_arch.empty())
+    set_architecture_name(corp_arch);
+  else
+    assert(cur_arch == corp_arch);
+
+  priv_->corpora.push_back(corp);
+}
+
+/// Getter of the vector of corpora held by the current @ref
+/// corpus_group.
+///
+/// @return the vector corpora.
+const corpus_group::corpora_type&
+corpus_group::get_corpora() const
+{return priv_->corpora;}
+
+/// Test if the current corpus group is empty.
+///
+/// @return true iff the current corpus group is empty.
+bool
+corpus_group::is_empty() const
+{return get_corpora().empty();}
+
+/// Get the functions exported by the corpora of the current corpus
+/// group.
+///
+/// Upon its first invocation, this function walks the corpora
+/// contained in the corpus group and caches the functions they exported.
+///
+/// Subsequent invocations just return the cached functions.
+///
+/// @return the exported functions.
+const corpus::functions&
+corpus_group::get_functions() const
+{
+  if (priv_->fns.empty())
+    for (corpora_type::const_iterator i = get_corpora().begin();
+        i != get_corpora().end();
+        ++i)
+      {
+       corpus_sptr c = *i;
+       for (corpus::functions::const_iterator f = c->get_functions().begin();
+            f != c->get_functions().end();
+            ++f)
+         {
+           interned_string fid = (*f)->get_id();
+           istring_function_decl_ptr_map_type::const_iterator j =
+             priv_->fns_map.find(fid);
+
+           if (j != priv_->fns_map.end())
+             // Don't cache the same function twice ...
+             continue;
+
+           priv_->fns_map[fid] = *f;
+           // really cache the function now.
+           priv_->fns.push_back(*f);
+         }
+      }
+
+  return priv_->fns;
+}
+
+/// Get the global variables exported by the corpora of the current
+/// corpus group.
+///
+/// Upon its first invocation, this function walks the corpora
+/// contained in the corpus group and caches the variables they
+/// export.
+///
+/// @return the exported variables.
+const corpus::variables&
+corpus_group::get_variables() const
+{
+  if (priv_->vars.empty())
+    for (corpora_type::const_iterator i = get_corpora().begin();
+        i != get_corpora().end();
+        ++i)
+      {
+       corpus_sptr c = *i;
+       for (corpus::variables::const_iterator v = c->get_variables().begin();
+            v != c->get_variables().end();
+            ++v)
+         {
+           interned_string vid = (*v)->get_id();
+           istring_var_decl_ptr_map_type::const_iterator j =
+             priv_->vars_map.find(vid);
+
+           if (j != priv_->vars_map.end())
+             // Don't cache the same variable twice ...
+             continue;
+
+           priv_->vars_map[vid] = *v;
+           // Really cache the variable now.
+           priv_->vars.push_back(*v);
+         }
+      }
+
+  return priv_->vars;
+}
+
+/// Get the symbols of the global variables exported by the corpora of
+/// the current @ref corpus_group.
+///
+/// @return the symbols of the global variables exported by the corpora
+const string_elf_symbols_map_type&
+corpus_group::get_var_symbol_map() const
+{
+  if (priv_->var_symbol_map.empty())
+    for (corpora_type::const_iterator i = get_corpora().begin();
+        i != get_corpora().end();
+        ++i)
+      priv_->var_symbol_map.insert((*i)->get_var_symbol_map().begin(),
+                                    (*i)->get_var_symbol_map().end());
+
+  return priv_->var_symbol_map;
+}
+
+/// Get the symbols of the global functions exported by the corpora of
+/// the current @ref corpus_group.
+///
+/// @return the symbols of the global functions exported by the corpora
+const string_elf_symbols_map_type&
+corpus_group::get_fun_symbol_map() const
+{
+  if (priv_->fun_symbol_map.empty())
+    for (corpora_type::const_iterator i = get_corpora().begin();
+        i != get_corpora().end();
+        ++i)
+      priv_->fun_symbol_map.insert((*i)->get_fun_symbol_map().begin(),
+                                  (*i)->get_fun_symbol_map().end());
+
+  return priv_->fun_symbol_map;
+}
+
+/// Get a sorted vector of the symbols of the functions exported by
+/// the corpora of the current group.
+///
+/// @return the sorted vectors of the exported function symbols.
+const elf_symbols&
+corpus_group::get_sorted_fun_symbols() const
+{
+  if (priv_->sorted_fun_symbols.empty()
+      && !get_fun_symbol_map().empty())
+    {
+      for (corpora_type::const_iterator i = get_corpora().begin();
+          i != get_corpora().end();
+          ++i)
+       {
+         corpus_sptr c = *i;
+         for (string_elf_symbols_map_type::const_iterator j =
+                c->get_fun_symbol_map().begin();
+              j != c->get_fun_symbol_map().begin();
+              ++j)
+           priv_->sorted_fun_symbols.insert(priv_->sorted_fun_symbols.end(),
+                                            j->second.begin(),
+                                            j->second.end());
+       }
+      comp_elf_symbols_functor comp;
+      std::sort(priv_->sorted_fun_symbols.begin(),
+               priv_->sorted_fun_symbols.end(),
+               comp);
+    }
+
+  return priv_->sorted_fun_symbols;
+}
+
+/// Get a sorted vector of the symbols of the variables exported by
+/// the corpora of the current group.
+///
+/// @return the sorted vectors of the exported variable symbols.
+const elf_symbols&
+corpus_group::get_sorted_var_symbols() const
+{
+  if (priv_->sorted_var_symbols.empty()
+      && !get_var_symbol_map().empty())
+    {
+      for (corpora_type::const_iterator i = get_corpora().begin();
+          i != get_corpora().end();
+          ++i)
+       {
+         corpus_sptr c = *i;
+         for (string_elf_symbols_map_type::const_iterator j =
+                c->get_var_symbol_map().begin();
+              j != c->get_var_symbol_map().begin();
+              ++j)
+           priv_->sorted_var_symbols.insert(priv_->sorted_var_symbols.end(),
+                                            j->second.begin(),
+                                            j->second.end());
+       }
+      comp_elf_symbols_functor comp;
+      std::sort(priv_->sorted_var_symbols.begin(),
+               priv_->sorted_var_symbols.end(),
+               comp);
+    }
+
+  return priv_->sorted_var_symbols;
+}
+
+/// Get the set of function symbols not referenced by any debug info,
+/// from all the corpora of the current corpus group.
+///
+/// Upon its first invocation, this function walks all the copora of
+/// this corpus group and caches the unreferenced symbols they
+/// export.  The function then returns the cache.
+///
+/// Upon subsequent invocations, this functions just returns the
+/// cached symbols.
+///
+/// @return the unreferenced symbols.
+const elf_symbols&
+corpus_group::get_unreferenced_function_symbols() const
+{
+  if (priv_->unrefed_fun_symbols.empty())
+    for (corpora_type::const_iterator i = get_corpora().begin();
+        i != get_corpora().end();
+        ++i)
+      {
+       corpus_sptr c = *i;
+       for (elf_symbols::const_iterator e =
+              c->get_unreferenced_function_symbols().begin();
+            e != c->get_unreferenced_function_symbols().end();
+            ++e)
+         {
+           string sym_id = (*e)->get_id_string();
+           unordered_map<string, elf_symbol_sptr>::const_iterator j =
+             priv_->unrefed_fun_symbol_map.find(sym_id);
+           if (j != priv_->unrefed_fun_symbol_map.end())
+             continue;
+
+           priv_->unrefed_fun_symbol_map[sym_id] = *e;
+           priv_->unrefed_fun_symbols.push_back(*e);
+         }
+      }
+
+  return priv_->unrefed_fun_symbols;
+}
+
+/// Get the set of variable symbols not referenced by any debug info,
+/// from all the corpora of the current corpus group.
+///
+/// Upon its first invocation, this function walks all the copora of
+/// this corpus group and caches the unreferenced symbols they
+/// export.  The function then returns the cache.
+///
+/// Upon subsequent invocations, this functions just returns the
+/// cached symbols.
+///
+/// @return the unreferenced symbols.
+const elf_symbols&
+corpus_group::get_unreferenced_variable_symbols() const
+{
+  if (priv_->unrefed_var_symbols.empty())
+    for (corpora_type::const_iterator i = get_corpora().begin();
+        i != get_corpora().end();
+        ++i)
+      {
+       corpus_sptr c = *i;
+       for (elf_symbols::const_iterator e =
+              c->get_unreferenced_variable_symbols().begin();
+            e != c->get_unreferenced_variable_symbols().end();
+            ++e)
+         {
+           string sym_id = (*e)->get_id_string();
+           unordered_map<string, elf_symbol_sptr>::const_iterator j =
+             priv_->unrefed_var_symbol_map.find(sym_id);
+           if (j != priv_->unrefed_var_symbol_map.end())
+             continue;
+
+           priv_->unrefed_var_symbol_map[sym_id] = *e;
+           priv_->unrefed_var_symbols.push_back(*e);
+         }
+      }
+
+  return priv_->unrefed_var_symbols;
+}
+
+// </corpus_group stuff>
+
 }// end namespace ir
 }// end namespace abigail