3 // Copyright (C) 2013-2019 Red Hat, Inc.
5 // This file is part of the GNU Application Binary Interface Generic
6 // Analysis and Instrumentation Library (libabigail). This library is
7 // free software; you can redistribute it and/or modify it under the
8 // terms of the GNU Lesser General Public License as published by the
9 // Free Software Foundation; either version 3, or (at your option) any
12 // This library is distributed in the hope that it will be useful, but
13 // WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 // General Lesser Public License for more details.
17 // You should have received a copy of the GNU Lesser General Public
18 // License along with this program; see the file COPYING-LGPLV3. If
19 // not, see <http://www.gnu.org/licenses/>.
21 // Author: Dodji Seketeli
25 /// This file contains the definitions of the entry points to
26 /// de-serialize an instance of @ref abigail::corpus from a file in
27 /// elf format, containing dwarf information.
30 #include <sys/types.h>
39 #include <elfutils/libdwfl.h>
48 #include "abg-cxx-compat.h"
49 #include "abg-ir-priv.h"
50 #include "abg-suppression-priv.h"
51 #include "abg-corpus-priv.h"
53 #include "abg-internal.h"
54 // <headers defining libabigail's API go under here>
55 ABG_BEGIN_EXPORT_DECLARATIONS
57 #include "abg-dwarf-reader.h"
58 #include "abg-sptr-utils.h"
59 #include "abg-tools-utils.h"
61 ABG_END_EXPORT_DECLARATIONS
62 // </headers defining libabigail's API>
65 #define UINT64_MAX 0xffffffffffffffff
75 /// The namespace for the DWARF reader.
76 namespace dwarf_reader
79 using abg_compat::dynamic_pointer_cast;
80 using abg_compat::static_pointer_cast;
81 using abg_compat::unordered_map;
82 using abg_compat::unordered_set;
87 /// Where a DIE comes from. For instance, a DIE can come from the main
88 /// debug info section, the alternate debug info section or from the
89 /// type unit section.
92 NO_DEBUG_INFO_DIE_SOURCE,
93 PRIMARY_DEBUG_INFO_DIE_SOURCE,
94 ALT_DEBUG_INFO_DIE_SOURCE,
96 NUMBER_OF_DIE_SOURCES, // This one must always be the latest
100 /// Prefix increment operator for @ref die_source.
102 /// @param source the die_source to increment.
103 /// @return the incremented source.
105 operator++(die_source& source)
107 source = static_cast<die_source>(source + 1);
111 /// A functor used by @ref dwfl_sptr.
115 operator()(Dwfl* dwfl)
117 };//end struct dwfl_deleter
119 /// A convenience typedef for a shared pointer to a Dwfl.
120 typedef shared_ptr<Dwfl> dwfl_sptr;
122 /// A convenience typedef for a vector of Dwarf_Off.
123 typedef vector<Dwarf_Off> dwarf_offsets_type;
125 /// Convenience typedef for a map which key is the offset of a dwarf
126 /// die and which value is the corresponding artefact.
127 typedef unordered_map<Dwarf_Off, type_or_decl_base_sptr> die_artefact_map_type;
129 /// Convenience typedef for a map which key is the offset of a dwarf
130 /// die, (given by dwarf_dieoffset()) and which value is the
131 /// corresponding class_decl.
132 typedef unordered_map<Dwarf_Off, class_decl_sptr> die_class_map_type;
134 /// Convenience typedef for a map which key is the offset of a dwarf
135 /// die, (given by dwarf_dieoffset()) and which value is the
136 /// corresponding class_or_union_sptr.
137 typedef unordered_map<Dwarf_Off, class_or_union_sptr> die_class_or_union_map_type;
139 /// Convenience typedef for a map which key the offset of a dwarf die
140 /// and which value is the corresponding function_decl.
141 typedef unordered_map<Dwarf_Off, function_decl_sptr> die_function_decl_map_type;
143 /// Convenience typedef for a map which key is the offset of a dwarf
144 /// die and which value is the corresponding function_type.
145 typedef unordered_map<Dwarf_Off, function_type_sptr> die_function_type_map_type;
147 /// Convenience typedef for a map which key is the offset of a
148 /// DW_TAG_compile_unit and the value is the corresponding @ref
149 /// translation_unit_sptr.
150 typedef unordered_map<Dwarf_Off, translation_unit_sptr> die_tu_map_type;
152 /// Convenience typedef for a map which key is the offset of a DIE and
153 /// the value is the corresponding qualified name of the DIE.
154 typedef unordered_map<Dwarf_Off, interned_string> die_istring_map_type;
156 /// Convenience typedef for a map which is an interned_string and
157 /// which value is a vector of offsets.
158 typedef unordered_map<interned_string,
160 hash_interned_string>
161 istring_dwarf_offsets_map_type;
163 /// Convenience typedef for a map which key is an elf address and
164 /// which value is an elf_symbol_sptr.
165 typedef unordered_map<GElf_Addr, elf_symbol_sptr> addr_elf_symbol_sptr_map_type;
167 /// Convenience typedef for a set of ELF addresses.
168 typedef unordered_set<GElf_Addr> address_set_type;
170 typedef unordered_set<interned_string, hash_interned_string> istring_set_type;
172 /// Convenience typedef for a shared pointer to an @ref address_set_type.
173 typedef shared_ptr<address_set_type> address_set_sptr;
175 /// Convenience typedef for a shared pointer to an
176 /// addr_elf_symbol_sptr_map_type.
177 typedef shared_ptr<addr_elf_symbol_sptr_map_type> addr_elf_symbol_sptr_map_sptr;
179 /// Convenience typedef for a map that associates an @ref
180 /// interned_string to a @ref function_type_sptr.
181 typedef unordered_map<interned_string,
183 hash_interned_string> istring_fn_type_map_type;
185 /// Convenience typedef for a stack containing the scopes up to the
186 /// current point in the abigail Internal Representation (aka IR) tree
187 /// that is being built.
188 typedef stack<scope_decl*> scope_stack_type;
190 /// Convenience typedef for a map which key is a dwarf offset. The
191 /// value is also a dwarf offset.
192 typedef unordered_map<Dwarf_Off, Dwarf_Off> offset_offset_map_type;
194 /// Convenience typedef for a map which key is a string and which
195 /// value is a vector of smart pointer to a class.
196 typedef unordered_map<string, classes_type> string_classes_map;
198 /// The abstraction of the place where a partial unit has been
199 /// imported. This is what the DW_TAG_imported_unit DIE expresses.
201 /// This type thus contains:
202 /// - the offset to which the partial unit is imported
203 /// - the offset of the imported partial unit.
204 /// - the offset of the imported partial unit.
205 struct imported_unit_point
207 Dwarf_Off offset_of_import;
208 // The boolean below is true iff the imported unit comes from the
209 // alternate debug info file.
210 die_source imported_unit_die_source;
211 Dwarf_Off imported_unit_die_off;
212 Dwarf_Off imported_unit_cu_off;
213 Dwarf_Off imported_unit_child_off;
215 /// Default constructor for @ref the type imported_unit_point.
216 imported_unit_point ()
217 : offset_of_import(),
218 imported_unit_die_source(PRIMARY_DEBUG_INFO_DIE_SOURCE),
219 imported_unit_die_off(),
220 imported_unit_cu_off(),
221 imported_unit_child_off()
224 /// Constructor of @ref the type imported_unit_point.
226 /// @param import_off the offset of the point at which the unit has
228 imported_unit_point (Dwarf_Off import_off)
229 : offset_of_import(import_off),
230 imported_unit_die_source(PRIMARY_DEBUG_INFO_DIE_SOURCE),
231 imported_unit_die_off(),
232 imported_unit_cu_off(),
233 imported_unit_child_off()
236 /// Constructor of @ref the type imported_unit_point.
238 /// @param import_off the offset of the point at which the unit has
241 /// @param from where the imported DIE comes from.
243 /// @param imported_die the die of the unit that has been imported.
244 imported_unit_point (Dwarf_Off import_off,
245 const Dwarf_Die& imported_die,
247 : offset_of_import(import_off),
248 imported_unit_die_source(from),
249 imported_unit_die_off(dwarf_dieoffset
250 (const_cast<Dwarf_Die*>(&imported_die))),
251 imported_unit_cu_off(),
252 imported_unit_child_off()
254 Dwarf_Die imported_unit_child;
256 dwarf_child(const_cast<Dwarf_Die*>(&imported_die),
257 &imported_unit_child);
258 imported_unit_child_off =
259 dwarf_dieoffset(const_cast<Dwarf_Die*>(&imported_unit_child));
261 Dwarf_Die cu_die_memory;
264 cu_die = dwarf_diecu(const_cast<Dwarf_Die*>(&imported_unit_child),
265 &cu_die_memory, 0, 0);
266 imported_unit_cu_off = dwarf_dieoffset(cu_die);
268 }; // struct imported_unit_point
270 /// Convenience typedef for a vector of @ref imported_unit_point.
271 typedef vector<imported_unit_point> imported_unit_points_type;
273 /// Convenience typedef for a vector of @ref imported_unit_point.
274 typedef unordered_map<Dwarf_Off, imported_unit_points_type>
275 tu_die_imported_unit_points_map_type;
277 /// "Less than" operator for instances of @ref imported_unit_point
280 /// @param the left hand side operand of the "Less than" operator.
282 /// @param the right hand side operand of the "Less than" operator.
284 /// @return true iff @p l is less than @p r.
286 operator<(const imported_unit_point& l, const imported_unit_point& r)
287 {return l.offset_of_import < r.offset_of_import;}
290 add_symbol_to_map(const elf_symbol_sptr& sym,
291 string_elf_symbols_map_type& map);
294 find_symbol_table_section(Elf* elf_handle, Elf_Scn*& section);
297 get_symbol_versionning_sections(Elf* elf_handle,
298 Elf_Scn*& versym_section,
299 Elf_Scn*& verdef_section,
300 Elf_Scn*& verneed_section);
303 get_parent_die(const read_context& ctxt,
304 const Dwarf_Die* die,
305 Dwarf_Die& parent_die,
306 size_t where_offset);
309 get_scope_die(const read_context& ctxt,
310 const Dwarf_Die* die,
312 Dwarf_Die& scope_die);
315 die_is_anonymous(const Dwarf_Die* die);
318 die_is_type(const Dwarf_Die* die);
321 die_is_decl(const Dwarf_Die* die);
324 die_is_namespace(const Dwarf_Die* die);
327 die_is_unspecified(Dwarf_Die* die);
330 die_is_void_type(Dwarf_Die* die);
333 die_is_pointer_type(const Dwarf_Die* die);
336 pointer_or_qual_die_of_anonymous_class_type(const Dwarf_Die* die);
339 die_is_reference_type(const Dwarf_Die* die);
342 die_is_pointer_or_reference_type(const Dwarf_Die* die);
345 die_is_pointer_reference_or_typedef_type(const Dwarf_Die* die);
348 die_is_class_type(const Dwarf_Die* die);
351 die_is_qualified_type(const Dwarf_Die* die);
354 die_is_function_type(const Dwarf_Die *die);
357 die_has_object_pointer(const Dwarf_Die* die,
358 Dwarf_Die& object_pointer);
361 die_this_pointer_from_object_pointer(Dwarf_Die* die,
362 Dwarf_Die& this_pointer);
365 die_this_pointer_is_const(Dwarf_Die* die);
368 die_object_pointer_is_for_const_method(Dwarf_Die* die);
371 die_is_at_class_scope(const read_context& ctxt,
372 const Dwarf_Die* die,
374 Dwarf_Die& class_scope_die);
376 eval_last_constant_dwarf_sub_expr(Dwarf_Op* expr,
379 bool& is_tls_address);
381 static translation_unit::language
382 dwarf_language_to_tu_language(size_t l);
385 die_unsigned_constant_attribute(const Dwarf_Die* die,
390 die_signed_constant_attribute(const Dwarf_Die*die,
395 die_constant_attribute(const Dwarf_Die *die,
397 array_type_def::subrange_type::bound_value &value);
400 die_attribute_has_form(const Dwarf_Die* die,
405 form_is_DW_FORM_strx(unsigned form);
408 die_attribute_is_signed(const Dwarf_Die* die, unsigned attr_name);
411 die_attribute_is_unsigned(const Dwarf_Die* die, unsigned attr_name);
414 die_attribute_has_no_signedness(const Dwarf_Die* die, unsigned attr_name);
417 die_address_attribute(Dwarf_Die* die, unsigned attr_name, Dwarf_Addr& result);
420 die_name(const Dwarf_Die* die);
423 die_location(const read_context& ctxt, const Dwarf_Die* die);
426 die_location_address(Dwarf_Die* die,
428 bool& is_tls_address);
431 die_die_attribute(const Dwarf_Die* die,
434 bool look_thru_abstract_origin = true);
437 get_internal_anonymous_die_prefix_name(const Dwarf_Die *die);
440 build_internal_anonymous_die_name(const string &base_name,
441 size_t anonymous_type_index);
445 get_internal_anonymous_die_name(Dwarf_Die *die,
446 size_t anonymous_type_index);
449 die_qualified_type_name(const read_context& ctxt,
450 const Dwarf_Die* die,
454 die_qualified_decl_name(const read_context& ctxt,
455 const Dwarf_Die* die,
459 die_qualified_name(const read_context& ctxt,
460 const Dwarf_Die* die,
464 die_qualified_type_name_empty(const read_context& ctxt,
465 const Dwarf_Die* die, size_t where,
466 string &qualified_name);
469 die_return_and_parm_names_from_fn_type_die(const read_context& ctxt,
470 const Dwarf_Die* die,
473 string &return_type_name,
475 vector<string>& parm_names,
480 die_function_signature(const read_context& ctxt,
481 const Dwarf_Die *die,
482 size_t where_offset);
485 die_peel_qual_ptr(Dwarf_Die *die, Dwarf_Die& peeled_die);
488 die_function_type_is_method_type(const read_context& ctxt,
489 const Dwarf_Die *die,
491 Dwarf_Die& object_pointer_die,
492 Dwarf_Die& class_die,
496 die_pretty_print_type(read_context& ctxt,
497 const Dwarf_Die* die,
498 size_t where_offset);
501 die_pretty_print_decl(read_context& ctxt,
502 const Dwarf_Die* die,
503 size_t where_offset);
506 die_pretty_print(read_context& ctxt,
507 const Dwarf_Die* die,
508 size_t where_offset);
511 maybe_canonicalize_type(const Dwarf_Die* die,
515 maybe_canonicalize_type(const type_base_sptr& t,
519 maybe_canonicalize_type(const Dwarf_Die* die,
520 const type_base_sptr& t,
524 get_default_array_lower_bound(translation_unit::language l);
527 find_lower_bound_in_imported_unit_points(const imported_unit_points_type&,
529 imported_unit_points_type::const_iterator&);
531 static array_type_def::subrange_sptr
532 build_subrange_type(read_context& ctxt,
533 const Dwarf_Die* die,
535 bool associate_type_to_die = true);
538 build_subranges_from_array_type_die(read_context& ctxt,
539 const Dwarf_Die* die,
540 array_type_def::subranges_type& subranges,
542 bool associate_type_to_die = true);
545 compare_dies(const read_context& ctxt,
546 const Dwarf_Die *l, const Dwarf_Die *r,
547 bool update_canonical_dies_on_the_fly);
549 /// Convert an elf symbol type (given by the ELF{32,64}_ST_TYPE
550 /// macros) into an elf_symbol::type value.
552 /// Note that this function aborts when given an unexpected value.
554 /// @param the symbol type value to convert.
556 /// @return the converted value.
557 static elf_symbol::type
558 stt_to_elf_symbol_type(unsigned char stt)
560 elf_symbol::type t = elf_symbol::NOTYPE_TYPE;
565 t = elf_symbol::NOTYPE_TYPE;
568 t = elf_symbol::OBJECT_TYPE;
571 t = elf_symbol::FUNC_TYPE;
574 t = elf_symbol::SECTION_TYPE;
577 t = elf_symbol::FILE_TYPE;
580 t = elf_symbol::COMMON_TYPE;
583 t = elf_symbol::TLS_TYPE;
586 t = elf_symbol::GNU_IFUNC_TYPE;
589 // An unknown value that probably ought to be supported? Let's
590 // abort right here rather than yielding garbage.
591 ABG_ASSERT_NOT_REACHED;
597 /// Convert an elf symbol binding (given by the ELF{32,64}_ST_BIND
598 /// macros) into an elf_symbol::binding value.
600 /// Note that this function aborts when given an unexpected value.
602 /// @param the symbol binding value to convert.
604 /// @return the converted value.
605 static elf_symbol::binding
606 stb_to_elf_symbol_binding(unsigned char stb)
608 elf_symbol::binding b = elf_symbol::GLOBAL_BINDING;
613 b = elf_symbol::LOCAL_BINDING;
616 b = elf_symbol::GLOBAL_BINDING;
619 b = elf_symbol::WEAK_BINDING;
622 b = elf_symbol::GNU_UNIQUE_BINDING;
625 ABG_ASSERT_NOT_REACHED;
632 /// Convert an ELF symbol visiblity given by the symbols ->st_other
633 /// data member as returned by the GELF_ST_VISIBILITY macro into a
634 /// elf_symbol::visiblity value.
636 /// @param stv the value of the ->st_other data member of the ELF
639 /// @return the converted elf_symbol::visiblity value.
640 static elf_symbol::visibility
641 stv_to_elf_symbol_visibility(unsigned char stv)
644 elf_symbol::visibility v = elf_symbol::DEFAULT_VISIBILITY;
649 v = elf_symbol::DEFAULT_VISIBILITY;
652 v = elf_symbol::INTERNAL_VISIBILITY;
655 v = elf_symbol::HIDDEN_VISIBILITY;
658 v = elf_symbol::PROTECTED_VISIBILITY;
661 ABG_ASSERT_NOT_REACHED;
667 /// Convert the value of the e_machine field of GElf_Ehdr into a
668 /// string. This is to get a string representing the architecture of
669 /// the elf file at hand.
671 /// @param e_machine the value of GElf_Ehdr::e_machine.
673 /// @return the string representation of GElf_Ehdr::e_machine.
675 e_machine_to_string(GElf_Half e_machine)
681 result = "elf-no-arch";
684 result = "elf-att-we-32100";
687 result = "elf-sun-sparc";
690 result = "elf-intel-80386";
693 result = "elf-motorola-68k";
696 result = "elf-motorola-88k";
699 result = "elf-intel-80860";
702 result = "elf-mips-r3000-be";
705 result = "elf-ibm-s370";
708 result = "elf-mips-r3000-le";
711 result = "elf-hp-parisc";
714 result = "elf-fujitsu-vpp500";
717 result = "elf-sun-sparc-v8plus";
720 result = "elf-intel-80960";
723 result = "elf-powerpc";
726 result = "elf-powerpc-64";
729 result = "elf-ibm-s390";
732 result = "elf-nec-v800";
735 result = "elf-fujitsu-fr20";
738 result = "elf-trw-rh32";
741 result = "elf-motorola-rce";
747 result = "elf-digital-alpha";
750 result = "elf-hitachi-sh";
753 result = "elf-sun-sparc-v9-64";
756 result = "elf-siemens-tricore";
759 result = "elf-argonaut-risc-core";
762 result = "elf-hitachi-h8-300";
765 result = "elf-hitachi-h8-300h";
768 result = "elf-hitachi-h8s";
771 result = "elf-hitachi-h8-500";
774 result = "elf-intel-ia-64";
777 result = "elf-stanford-mips-x";
780 result = "elf-motorola-coldfire";
783 result = "elf-motorola-68hc12";
786 result = "elf-fujitsu-mma";
789 result = "elf-siemens-pcp";
792 result = "elf-sony-ncpu";
795 result = "elf-denso-ndr1";
798 result = "elf-motorola-starcore";
801 result = "elf-toyota-me16";
804 result = "elf-stm-st100";
807 result = "elf-alc-tinyj";
810 result = "elf-amd-x86_64";
813 result = "elf-sony-pdsp";
816 result = "elf-siemens-fx66";
819 result = "elf-stm-st9+";
822 result = "elf-stm-st7";
825 result = "elf-motorola-68hc16";
828 result = "elf-motorola-68hc11";
831 result = "elf-motorola-68hc08";
834 result = "elf-motorola-68hc05";
837 result = "elf-sg-svx";
840 result = "elf-stm-st19";
843 result = "elf-digital-vax";
846 result = "elf-axis-cris";
849 result = "elf-infineon-javelin";
852 result = "elf-firepath";
855 result = "elf-lsi-zsp";
858 result = "elf-don-knuth-mmix";
861 result = "elf-harvard-huany";
864 result = "elf-sitera-prism";
867 result = "elf-atmel-avr";
870 result = "elf-fujistu-fr30";
873 result = "elf-mitsubishi-d10v";
876 result = "elf-mitsubishi-d30v";
879 result = "elf-nec-v850";
882 result = "elf-mitsubishi-m32r";
885 result = "elf-matsushita-mn10300";
888 result = "elf-matsushita-mn10200";
891 result = "elf-picojava";
894 result = "elf-openrisc-32";
897 result = "elf-arc-a5";
900 result = "elf-tensilica-xtensa";
903 #ifdef HAVE_EM_AARCH64_MACRO
905 result = "elf-arm-aarch64";
909 #ifdef HAVE_EM_TILEPRO_MACRO
911 result = "elf-tilera-tilepro";
915 #ifdef HAVE_EM_TILEGX_MACRO
917 result = "elf-tilera-tilegx";
922 result = "elf-last-arch-number";
925 result = "elf-non-official-alpha";
929 std::ostringstream o;
930 o << "elf-unknown-arch-value-" << e_machine;
938 /// The kind of ELF hash table found by the function
939 /// find_hash_table_section_index.
942 NO_HASH_TABLE_KIND = 0,
943 SYSV_HASH_TABLE_KIND,
947 /// Get the offset offset of the hash table section.
949 /// @param elf_handle the elf handle to use.
951 /// @param ht_section_offset this is set to the resulting offset
952 /// of the hash table section. This is set iff the function returns true.
954 /// @param symtab_section_offset the offset of the section of the
955 /// symbol table the hash table refers to.
956 static hash_table_kind
957 find_hash_table_section_index(Elf* elf_handle,
958 size_t& ht_section_index,
959 size_t& symtab_section_index)
962 return NO_HASH_TABLE_KIND;
964 GElf_Shdr header_mem, *section_header;
965 bool found_sysv_ht = false, found_gnu_ht = false;
966 for (Elf_Scn* section = elf_nextscn(elf_handle, 0);
968 section = elf_nextscn(elf_handle, section))
970 section_header= gelf_getshdr(section, &header_mem);
971 if (section_header->sh_type != SHT_HASH
972 && section_header->sh_type != SHT_GNU_HASH)
975 ht_section_index = elf_ndxscn(section);
976 symtab_section_index = section_header->sh_link;
978 if (section_header->sh_type == SHT_HASH)
979 found_sysv_ht = true;
980 else if (section_header->sh_type == SHT_GNU_HASH)
985 return GNU_HASH_TABLE_KIND;
986 else if (found_sysv_ht)
987 return SYSV_HASH_TABLE_KIND;
989 return NO_HASH_TABLE_KIND;
992 /// Find the symbol table.
994 /// If we are looking at a relocatable or executable file, this
995 /// function will return the .symtab symbol table (of type
996 /// SHT_SYMTAB). But if we are looking at a DSO it returns the
997 /// .dynsym symbol table (of type SHT_DYNSYM).
999 /// @param elf_handle the elf handle to consider.
1001 /// @param symtab the symbol table found.
1003 /// @return true iff the symbol table is found.
1005 find_symbol_table_section(Elf* elf_handle, Elf_Scn*& symtab)
1007 Elf_Scn* section = 0, *dynsym = 0, *sym_tab = 0;
1008 while ((section = elf_nextscn(elf_handle, section)) != 0)
1010 GElf_Shdr header_mem, *header;
1011 header = gelf_getshdr(section, &header_mem);
1012 if (header->sh_type == SHT_DYNSYM)
1014 else if (header->sh_type == SHT_SYMTAB)
1018 if (dynsym || sym_tab)
1021 GElf_Ehdr* elf_header = gelf_getehdr(elf_handle, &eh_mem);
1022 if (elf_header->e_type == ET_REL
1023 || elf_header->e_type == ET_EXEC)
1024 symtab = sym_tab ? sym_tab : dynsym;
1026 symtab = dynsym ? dynsym : sym_tab;
1032 /// Find the index (in the section headers table) of the symbol table
1035 /// If we are looking at a relocatable or executable file, this
1036 /// function will return the index for the .symtab symbol table (of
1037 /// type SHT_SYMTAB). But if we are looking at a DSO it returns the
1038 /// index for the .dynsym symbol table (of type SHT_DYNSYM).
1040 /// @param elf_handle the elf handle to use.
1042 /// @param symtab_index the index of the symbol_table, that was found.
1044 /// @return true iff the symbol table section index was found.
1046 find_symbol_table_section_index(Elf* elf_handle,
1047 size_t& symtab_index)
1049 Elf_Scn* section = 0;
1050 if (!find_symbol_table_section(elf_handle, section))
1053 symtab_index = elf_ndxscn(section);
1057 /// Find and return a section by its name and its type.
1059 /// @param elf_handle the elf handle to use.
1061 /// @param name the name of the section.
1063 /// @param section_type the type of the section. This is the
1064 /// Elf32_Shdr::sh_type (or Elf64_Shdr::sh_type) data member.
1065 /// Examples of values of this parameter are SHT_PROGBITS or SHT_NOBITS.
1067 /// @return the section found, nor nil if none was found.
1069 find_section(Elf* elf_handle, const string& name, Elf64_Word section_type)
1071 size_t section_header_string_index = 0;
1072 if (elf_getshdrstrndx (elf_handle, §ion_header_string_index) < 0)
1075 Elf_Scn* section = 0;
1076 GElf_Shdr header_mem, *header;
1077 while ((section = elf_nextscn(elf_handle, section)) != 0)
1079 header = gelf_getshdr(section, &header_mem);
1080 if (header == NULL || header->sh_type != section_type)
1083 const char* section_name =
1084 elf_strptr(elf_handle, section_header_string_index, header->sh_name);
1085 if (section_name && name == section_name)
1092 /// Test if the ELF binary denoted by a given ELF handle is a Linux
1095 /// @param elf_handle the ELF handle to consider.
1097 /// @return true iff the binary denoted by @p elf_handle is a Linux
1100 binary_is_linux_kernel_module(Elf *elf_handle)
1102 return (find_section(elf_handle, ".modinfo", SHT_PROGBITS)
1103 && find_section(elf_handle,
1104 ".gnu.linkonce.this_module",
1108 /// Test if the ELF binary denoted by a given ELF handle is a Linux
1109 /// Kernel binary (either vmlinux or a kernel module).
1111 /// @param elf_handle the ELF handle to consider.
1113 /// @return true iff the binary denoted by @p elf_handle is a Linux
1116 binary_is_linux_kernel(Elf *elf_handle)
1118 return (find_section(elf_handle,
1119 "__ksymtab_strings",
1121 || binary_is_linux_kernel_module(elf_handle));
1124 /// Find and return the .text section.
1126 /// @param elf_handle the elf handle to use.
1128 /// @return the .text section found.
1130 find_text_section(Elf* elf_handle)
1131 {return find_section(elf_handle, ".text", SHT_PROGBITS);}
1133 /// Find and return the .bss section.
1135 /// @param elf_handle.
1137 /// @return the .bss section found.
1139 find_bss_section(Elf* elf_handle)
1140 {return find_section(elf_handle, ".bss", SHT_NOBITS);}
1142 /// Find and return the .rodata section.
1144 /// @param elf_handle.
1146 /// @return the .rodata section found.
1148 find_rodata_section(Elf* elf_handle)
1149 {return find_section(elf_handle, ".rodata", SHT_PROGBITS);}
1151 /// Find and return the .data section.
1153 /// @param elf_handle the elf handle to use.
1155 /// @return the .data section found.
1157 find_data_section(Elf* elf_handle)
1158 {return find_section(elf_handle, ".data", SHT_PROGBITS);}
1160 /// Find and return the .data1 section.
1162 /// @param elf_handle the elf handle to use.
1164 /// @return the .data1 section found.
1166 find_data1_section(Elf* elf_handle)
1167 {return find_section(elf_handle, ".data1", SHT_PROGBITS);}
1169 /// Find the __ksymtab_strings section of a Linux kernel binary.
1172 /// @return the find_ksymtab_strings_section of the linux kernel
1173 /// binary denoted by @p elf_handle, or nil if such a section could
1176 find_ksymtab_strings_section(Elf *elf_handle)
1178 if (binary_is_linux_kernel(elf_handle))
1179 return find_section(elf_handle, "__ksymtab_strings", SHT_PROGBITS);
1183 /// Get the address at which a given binary is loaded in memoryâ‹…
1185 /// @param elf_handle the elf handle for the binary to consider.
1187 /// @param load_address the address where the binary is loaded. This
1188 /// is set by the function iff it returns true.
1190 /// @return true if the function could get the binary load address
1191 /// and assign @p load_address to it.
1193 get_binary_load_address(Elf *elf_handle,
1194 GElf_Addr &load_address)
1197 GElf_Ehdr *elf_header = gelf_getehdr(elf_handle, &eh_mem);
1198 size_t num_segments = elf_header->e_phnum;
1199 GElf_Phdr *program_header = 0;
1201 bool found_loaded_segment = false;
1204 for (unsigned i = 0; i < num_segments; ++i)
1206 program_header = gelf_getphdr(elf_handle, i, &ph_mem);
1207 if (program_header && program_header->p_type == PT_LOAD)
1209 if (!found_loaded_segment)
1211 result = program_header->p_vaddr;
1212 found_loaded_segment = true;
1215 if (program_header->p_vaddr < result)
1216 // The resulting load address we want is the lowest
1217 // load address of all the loaded segments.
1218 result = program_header->p_vaddr;
1222 if (found_loaded_segment)
1224 load_address = result;
1230 /// Find the file name of the alternate debug info file.
1232 /// @param elf_module the elf module to consider.
1234 /// @param out parameter. Is set to the file name of the alternate
1235 /// debug info file, iff this function returns true.
1237 /// @return true iff the location of the alternate debug info file was
1240 find_alt_debug_info_link(Dwfl_Module *elf_module,
1241 string &alt_file_name)
1244 Dwarf *dwarf = dwfl_module_getdwarf(elf_module, &bias);
1245 Elf *elf = dwarf_getelf(dwarf);
1246 GElf_Ehdr ehmem, *elf_header;
1247 elf_header = gelf_getehdr(elf, &ehmem);
1249 Elf_Scn* section = 0;
1250 while ((section = elf_nextscn(elf, section)) != 0)
1252 GElf_Shdr header_mem, *header;
1253 header = gelf_getshdr(section, &header_mem);
1254 if (header->sh_type != SHT_PROGBITS)
1257 const char *section_name = elf_strptr(elf,
1258 elf_header->e_shstrndx,
1263 size_t buildid_len = 0;
1264 if (section_name != 0
1265 && strcmp(section_name, ".gnu_debugaltlink") == 0)
1267 Elf_Data *data = elf_getdata(section, 0);
1268 if (data != 0 && data->d_size != 0)
1270 alt_name = (char*) data->d_buf;
1271 char *end_of_alt_name =
1272 (char *) memchr(alt_name, '\0', data->d_size);
1273 buildid_len = data->d_size - (end_of_alt_name - alt_name + 1);
1274 if (buildid_len == 0)
1276 buildid = end_of_alt_name + 1;
1282 if (buildid == 0 || alt_name == 0)
1285 alt_file_name = alt_name;
1292 /// Find alternate debuginfo file of a given "link" under a set of
1293 /// root directories.
1295 /// The link is a string that is read by the function
1296 /// find_alt_debug_info_link(). That link is a path that is relative
1297 /// to a given debug info file, e.g, "../../../.dwz/something.debug".
1298 /// It designates the alternate debug info file associated to a given
1299 /// debug info file.
1301 /// This function will thus try to find the .dwz/something.debug file
1302 /// under some given root directories.
1304 /// @param root_dirs the set of root directories to look from.
1306 /// @param alt_file_name a relative path to the alternate debug info
1307 /// file to look for.
1309 /// @param alt_file_path the resulting absolute path to the alternate
1310 /// debuginfo path denoted by @p alt_file_name and found under one of
1311 /// the directories in @p root_dirs. This is set iff the function
1314 /// @return true iff the function found the alternate debuginfo file.
1316 find_alt_debug_info_path(const vector<char**> root_dirs,
1317 const string &alt_file_name,
1318 string &alt_file_path)
1320 if (alt_file_name.empty())
1323 string altfile_name = tools_utils::trim_leading_string(alt_file_name, "../");
1325 for (vector<char**>::const_iterator i = root_dirs.begin();
1326 i != root_dirs.end();
1328 if (tools_utils::find_file_under_dir(**i, altfile_name, alt_file_path))
1334 /// Return the alternate debug info associated to a given main debug
1337 /// @param elf_module the elf module to consider.
1339 /// @param debug_root_dirs a set of root debuginfo directories under
1340 /// which too look for the alternate debuginfo file.
1342 /// @param alt_file_name output parameter. This is set to the file
1343 /// path of the alternate debug info file associated to @p elf_module.
1344 /// This is set iff the function returns a non-null result.
1346 /// @param alt_fd the file descriptor used to access the alternate
1347 /// debug info. If this parameter is set by the function, then the
1348 /// caller needs to fclose it, otherwise the file descriptor is going
1349 /// to be leaked. Note however that on recent versions of elfutils
1350 /// where libdw.h contains the function dwarf_getalt(), this parameter
1351 /// is set to 0, so it doesn't need to be fclosed.
1353 /// Note that the alternate debug info file is a DWARF extension as of
1354 /// DWARF 4 ans is decribed at
1355 /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1.
1357 /// @return the alternate debuginfo, or null. If @p alt_fd is
1358 /// non-zero, then the caller of this function needs to call
1359 /// dwarf_end() on the returned alternate debuginfo pointer,
1360 /// otherwise, it's going to be leaked.
1362 find_alt_debug_info(Dwfl_Module *elf_module,
1363 const vector<char**> debug_root_dirs,
1364 string& alt_file_name,
1367 if (elf_module == 0)
1371 find_alt_debug_info_link(elf_module, alt_file_name);
1373 #ifdef LIBDW_HAS_DWARF_GETALT
1374 // We are on recent versions of elfutils where the function
1375 // dwarf_getalt exists, so let's use it.
1376 Dwarf_Addr bias = 0;
1377 Dwarf* dwarf = dwfl_module_getdwarf(elf_module, &bias);
1378 result = dwarf_getalt(dwarf);
1381 // We are on an old version of elfutils where the function
1382 // dwarf_getalt doesn't exist yet, so let's open code its
1385 const char *file_name = 0;
1386 void **user_data = 0;
1387 Dwarf_Addr low_addr = 0;
1390 file_name = dwfl_module_info(elf_module, &user_data,
1391 &low_addr, 0, 0, 0, 0, 0);
1393 alt_fd = dwfl_standard_find_debuginfo(elf_module, user_data,
1394 file_name, low_addr,
1395 alt_name, file_name,
1398 result = dwarf_begin(alt_fd, DWARF_C_READ);
1403 // So we didn't find the alternate debuginfo file from the
1404 // information that is in the debuginfo file associated to
1405 // elf_module. Maybe the alternate debuginfo file is located
1406 // under one of the directories in debug_root_dirs. So let's
1408 string alt_file_path;
1409 if (!find_alt_debug_info_path(debug_root_dirs,
1414 // If we reach this point it means we have found the path to the
1415 // alternate debuginfo file and it's in alt_file_path. So let's
1416 // open it and read it.
1417 int fd = open(alt_file_path.c_str(), O_RDONLY);
1420 result = dwarf_begin(fd, DWARF_C_READ);
1422 #ifdef LIBDW_HAS_DWARF_GETALT
1423 Dwarf_Addr bias = 0;
1424 Dwarf* dwarf = dwfl_module_getdwarf(elf_module, &bias);
1425 dwarf_setalt(dwarf, result);
1432 /// Compare a symbol name against another name, possibly demangling
1433 /// the symbol_name before performing the comparison.
1435 /// @param symbol_name the symbol_name to take in account.
1437 /// @param name the second name to take in account.
1439 /// @param demangle if true, demangle @p symbol_name and compare the
1440 /// result of the demangling with @p name.
1442 /// @return true iff symbol_name equals name.
1444 compare_symbol_name(const string& symbol_name,
1450 string m = demangle_cplus_mangled_name(symbol_name);
1453 return symbol_name == name;
1456 /// Return the SHT_GNU_versym, SHT_GNU_verdef and SHT_GNU_verneed
1457 /// sections that are involved in symbol versionning.
1459 /// @param elf_handle the elf handle to use.
1461 /// @param versym_section the SHT_GNU_versym section found. If the
1462 /// section wasn't found, this is set to nil.
1464 /// @param verdef_section the SHT_GNU_verdef section found. If the
1465 /// section wasn't found, this is set to nil.
1467 /// @param verneed_section the SHT_GNU_verneed section found. If the
1468 /// section wasn't found, this is set to nil.
1470 /// @return true iff at least one of the sections where found.
1472 get_symbol_versionning_sections(Elf* elf_handle,
1473 Elf_Scn*& versym_section,
1474 Elf_Scn*& verdef_section,
1475 Elf_Scn*& verneed_section)
1477 Elf_Scn* section = NULL;
1479 Elf_Scn* versym = NULL, *verdef = NULL, *verneed = NULL;
1481 while ((section = elf_nextscn(elf_handle, section)) != NULL)
1483 GElf_Shdr* h = gelf_getshdr(section, &mem);
1484 if (h->sh_type == SHT_GNU_versym)
1486 else if (h->sh_type == SHT_GNU_verdef)
1488 else if (h->sh_type == SHT_GNU_verneed)
1492 if (versym || verdef || verneed)
1494 // At least one the versionning sections was found. Return it.
1495 versym_section = versym;
1496 verdef_section = verdef;
1497 verneed_section = verneed;
1504 /// Get the version definition (from the SHT_GNU_verdef section) of a
1505 /// given symbol represented by a pointer to GElf_Versym.
1507 /// @param elf_hande the elf handle to use.
1509 /// @param versym the symbol to get the version definition for.
1511 /// @param verdef_section the SHT_GNU_verdef section.
1513 /// @param version the resulting version definition. This is set iff
1514 /// the function returns true.
1516 /// @return true upon successful completion, false otherwise.
1518 get_version_definition_for_versym(Elf* elf_handle,
1519 GElf_Versym* versym,
1520 Elf_Scn* verdef_section,
1521 elf_symbol::version& version)
1523 Elf_Data* verdef_data = elf_getdata(verdef_section, NULL);
1524 GElf_Verdef verdef_mem;
1525 GElf_Verdef* verdef = gelf_getverdef(verdef_data, 0, &verdef_mem);
1526 size_t vd_offset = 0;
1528 for (;; vd_offset += verdef->vd_next)
1532 if (verdef->vd_ndx == (*versym & 0x7fff))
1533 // Found the version of the symbol.
1535 vd_offset += verdef->vd_next;
1536 verdef = (verdef->vd_next == 0
1538 : gelf_getverdef(verdef_data, vd_offset, &verdef_mem));
1543 GElf_Verdaux verdaux_mem;
1544 GElf_Verdaux *verdaux = gelf_getverdaux(verdef_data,
1545 vd_offset + verdef->vd_aux,
1547 GElf_Shdr header_mem;
1548 GElf_Shdr* verdef_section_header = gelf_getshdr(verdef_section,
1550 size_t verdef_stridx = verdef_section_header->sh_link;
1551 version.str(elf_strptr(elf_handle, verdef_stridx, verdaux->vda_name));
1552 if (*versym & 0x8000)
1553 version.is_default(false);
1555 version.is_default(true);
1558 if (!verdef || verdef->vd_next == 0)
1564 /// Get the version needed (from the SHT_GNU_verneed section) to
1565 /// resolve an undefined symbol represented by a pointer to
1568 /// @param elf_hande the elf handle to use.
1570 /// @param versym the symbol to get the version definition for.
1572 /// @param verneed_section the SHT_GNU_verneed section.
1574 /// @param version the resulting version definition. This is set iff
1575 /// the function returns true.
1577 /// @return true upon successful completion, false otherwise.
1579 get_version_needed_for_versym(Elf* elf_handle,
1580 GElf_Versym* versym,
1581 Elf_Scn* verneed_section,
1582 elf_symbol::version& version)
1584 if (versym == 0 || elf_handle == 0 || verneed_section == 0)
1587 size_t vn_offset = 0;
1588 Elf_Data* verneed_data = elf_getdata(verneed_section, NULL);
1589 GElf_Verneed verneed_mem;
1590 GElf_Verneed* verneed = gelf_getverneed(verneed_data, 0, &verneed_mem);
1592 for (;verneed; vn_offset += verneed->vn_next)
1594 size_t vna_offset = vn_offset;
1595 GElf_Vernaux vernaux_mem;
1596 GElf_Vernaux *vernaux = gelf_getvernaux(verneed_data,
1597 vn_offset + verneed->vn_aux,
1599 for (;vernaux != 0 && verneed;)
1601 if (vernaux->vna_other == *versym)
1602 // Found the version of the symbol.
1604 vna_offset += verneed->vn_next;
1605 verneed = (verneed->vn_next == 0
1607 : gelf_getverneed(verneed_data, vna_offset, &verneed_mem));
1610 if (verneed != 0 && vernaux != 0 && vernaux->vna_other == *versym)
1612 GElf_Shdr header_mem;
1613 GElf_Shdr* verneed_section_header = gelf_getshdr(verneed_section,
1615 size_t verneed_stridx = verneed_section_header->sh_link;
1616 version.str(elf_strptr(elf_handle,
1618 vernaux->vna_name));
1619 if (*versym & 0x8000)
1620 version.is_default(false);
1622 version.is_default(true);
1626 if (!verneed || verneed->vn_next == 0)
1632 /// Return the version for a symbol that is at a given index in its
1633 /// SHT_SYMTAB section.
1635 /// @param elf_handle the elf handle to use.
1637 /// @param symbol_index the index of the symbol to consider.
1639 /// @param get_def_version if this is true, it means that that we want
1640 /// the version for a defined symbol; in that case, the version is
1641 /// looked for in a section of type SHT_GNU_verdef. Otherwise, if
1642 /// this parameter is false, this means that we want the version for
1643 /// an undefined symbol; in that case, the version is the needed one
1644 /// for the symbol to be resolved; so the version is looked fo in a
1645 /// section of type SHT_GNU_verneed.
1647 /// @param version the version found for symbol at @p symbol_index.
1649 /// @return true iff a version was found for symbol at index @p
1652 get_version_for_symbol(Elf* elf_handle,
1653 size_t symbol_index,
1654 bool get_def_version,
1655 elf_symbol::version& version)
1657 Elf_Scn *versym_section = NULL,
1658 *verdef_section = NULL,
1659 *verneed_section = NULL;
1661 if (!get_symbol_versionning_sections(elf_handle,
1667 GElf_Versym versym_mem;
1668 Elf_Data* versym_data = (versym_section)
1669 ? elf_getdata(versym_section, NULL)
1671 GElf_Versym* versym = (versym_data)
1672 ? gelf_getversym(versym_data, symbol_index, &versym_mem)
1675 if (versym == 0 || *versym <= 1)
1676 // I got these value from the code of readelf.c in elfutils.
1677 // Apparently, if the symbol version entry has these values, the
1678 // symbol must be discarded. This is not documented in the
1679 // official specification.
1682 if (get_def_version)
1684 if (*versym == 0x8001)
1685 // I got this value from the code of readelf.c in elfutils
1686 // too. It's not really documented in the official
1691 && get_version_definition_for_versym(elf_handle, versym,
1692 verdef_section, version))
1698 && get_version_needed_for_versym(elf_handle, versym,
1699 verneed_section, version))
1706 /// Lookup a symbol using the SysV ELF hash table.
1708 /// Note that this function hasn't been tested. So it hasn't been
1709 /// debugged yet. IOW, it is not known to work. Or rather, it's
1710 /// almost like it's surely doesn't work ;-)
1712 /// Use it at your own risks. :-)
1714 ///@parm env the environment we are operating from.
1716 /// @param elf_handle the elf_handle to use.
1718 /// @param sym_name the symbol name to look for.
1720 /// @param ht_index the index (in the section headers table) of the
1721 /// hash table section to use.
1723 /// @param sym_tab_index the index (in the section headers table) of
1724 /// the symbol table to use.
1726 /// @param demangle if true, demangle @p sym_name before comparing it
1727 /// to names from the symbol table.
1729 /// @param syms_found a vector of symbols found with the name @p
1730 /// sym_name. table.
1732 lookup_symbol_from_sysv_hash_tab(const environment* env,
1734 const string& sym_name,
1736 size_t sym_tab_index,
1738 vector<elf_symbol_sptr>& syms_found)
1740 Elf_Scn* sym_tab_section = elf_getscn(elf_handle, sym_tab_index);
1741 ABG_ASSERT(sym_tab_section);
1743 Elf_Data* sym_tab_data = elf_getdata(sym_tab_section, 0);
1744 ABG_ASSERT(sym_tab_data);
1746 GElf_Shdr sheader_mem;
1747 GElf_Shdr* sym_tab_section_header = gelf_getshdr(sym_tab_section,
1749 Elf_Scn* hash_section = elf_getscn(elf_handle, ht_index);
1750 ABG_ASSERT(hash_section);
1752 // Poke at the different parts of the hash table and get them ready
1754 unsigned long hash = elf_hash(sym_name.c_str());
1755 Elf_Data* ht_section_data = elf_getdata(hash_section, 0);
1756 Elf32_Word* ht_data = reinterpret_cast<Elf32_Word*>(ht_section_data->d_buf);
1757 size_t nb_buckets = ht_data[0];
1758 size_t nb_chains = ht_data[1];
1760 if (nb_buckets == 0)
1761 // An empty hash table. Not sure if that is possible, but it
1762 // would mean an empty table of exported symbols.
1765 //size_t nb_chains = ht_data[1];
1766 Elf32_Word* ht_buckets = &ht_data[2];
1767 Elf32_Word* ht_chains = &ht_buckets[nb_buckets];
1769 // Now do the real work.
1770 size_t bucket = hash % nb_buckets;
1771 size_t symbol_index = ht_buckets[bucket];
1774 const char* sym_name_str;
1776 elf_symbol::type sym_type;
1777 elf_symbol::binding sym_binding;
1778 elf_symbol::visibility sym_visibility;
1780 Elf_Scn *strings_section = find_ksymtab_strings_section(elf_handle);
1781 size_t strings_ndx = strings_section
1782 ? elf_ndxscn(strings_section)
1787 ABG_ASSERT(gelf_getsym(sym_tab_data, symbol_index, &symbol));
1788 sym_name_str = elf_strptr(elf_handle,
1789 sym_tab_section_header->sh_link,
1792 && compare_symbol_name(sym_name_str, sym_name, demangle))
1794 sym_type = stt_to_elf_symbol_type(GELF_ST_TYPE(symbol.st_info));
1795 sym_binding = stb_to_elf_symbol_binding(GELF_ST_BIND(symbol.st_info));
1797 stv_to_elf_symbol_visibility(GELF_ST_VISIBILITY(symbol.st_other));
1798 sym_size = symbol.st_size;
1799 elf_symbol::version ver;
1800 if (get_version_for_symbol(elf_handle, symbol_index,
1801 /*get_def_version=*/true, ver))
1802 ABG_ASSERT(!ver.str().empty());
1803 elf_symbol_sptr symbol_found =
1804 elf_symbol::create(env,
1810 symbol.st_shndx != SHN_UNDEF,
1811 symbol.st_shndx == SHN_COMMON,
1812 ver, sym_visibility,
1813 symbol.st_shndx == strings_ndx);
1814 syms_found.push_back(symbol_found);
1817 symbol_index = ht_chains[symbol_index];
1818 } while (symbol_index != STN_UNDEF || symbol_index >= nb_chains);
1823 /// Get the size of the elf class, in bytes.
1825 /// @param elf_handle the elf handle to use.
1827 /// @return the size computed.
1829 get_elf_class_size_in_bytes(Elf* elf_handle)
1834 ABG_ASSERT(gelf_getehdr(elf_handle, &hdr));
1835 int c = hdr.e_ident[EI_CLASS];
1846 ABG_ASSERT_NOT_REACHED;
1852 /// Get a given word of a bloom filter, referred to by the index of
1853 /// the word. The word size depends on the current elf class and this
1854 /// function abstracts that nicely.
1856 /// @param elf_handle the elf handle to use.
1858 /// @param bloom_filter the bloom filter to consider.
1860 /// @param index the index of the bloom filter to return.
1862 bloom_word_at(Elf* elf_handle,
1863 Elf32_Word* bloom_filter,
1866 GElf_Word result = 0;
1868 ABG_ASSERT(gelf_getehdr(elf_handle, &h));
1870 c = h.e_ident[EI_CLASS];
1875 result = bloom_filter[index];
1879 GElf_Word* f= reinterpret_cast<GElf_Word*>(bloom_filter);
1890 /// The abstraction of the gnu elf hash table.
1892 /// The members of this struct are explained at
1893 /// - https://sourceware.org/ml/binutils/2006-10/msg00377.html
1894 /// - https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections.
1898 Elf32_Word* buckets;
1900 size_t first_sym_index;
1903 Elf32_Word* bloom_filter;
1906 Elf_Scn* sym_tab_section;
1907 GElf_Shdr sym_tab_section_header;
1921 }; // end struct gnu_ht
1923 /// Setup the members of the gnu hash table.
1925 /// @param elf_handle a handle on the elf file to use.
1927 /// @param ht_index the index (into the elf section headers table) of
1928 /// the hash table section to use.
1930 /// @param sym_tab_index the index (into the elf section headers
1931 /// table) of the symbol table the gnu hash table is about.
1933 /// @param ht the resulting hash table.
1935 /// @return true iff the hash table @ ht could be setup.
1937 setup_gnu_ht(Elf* elf_handle,
1939 size_t sym_tab_index,
1942 ht.sym_tab_section = elf_getscn(elf_handle, sym_tab_index);
1943 ABG_ASSERT(ht.sym_tab_section);
1944 ABG_ASSERT(gelf_getshdr(ht.sym_tab_section, &ht.sym_tab_section_header));
1946 ht.sym_tab_section_header.sh_size / ht.sym_tab_section_header.sh_entsize;
1947 Elf_Scn* hash_section = elf_getscn(elf_handle, ht_index);
1948 ABG_ASSERT(hash_section);
1950 // Poke at the different parts of the hash table and get them ready
1952 Elf_Data* ht_section_data = elf_getdata(hash_section, 0);
1953 Elf32_Word* ht_data = reinterpret_cast<Elf32_Word*>(ht_section_data->d_buf);
1955 ht.nb_buckets = ht_data[0];
1956 if (ht.nb_buckets == 0)
1957 // An empty hash table. Not sure if that is possible, but it
1958 // would mean an empty table of exported symbols.
1960 ht.first_sym_index = ht_data[1];
1961 // The number of words used by the bloom filter. A size of a word
1963 ht.bf_nwords = ht_data[2];
1964 // The shift used by the bloom filter code.
1965 ht.shift = ht_data[3];
1966 // The data of the bloom filter proper.
1967 ht.bloom_filter = &ht_data[4];
1968 // The size of the bloom filter in 4 bytes word. This is going to
1969 // be used to index the 'bloom_filter' above, which is of type
1970 // Elf32_Word*; thus we need that bf_size be expressed in 4 bytes
1972 ht.bf_size = (get_elf_class_size_in_bytes(elf_handle) / 4) * ht.bf_nwords;
1973 // The buckets of the hash table.
1974 ht.buckets = ht.bloom_filter + ht.bf_size;
1975 // The chain of the hash table.
1976 ht.chain = ht.buckets + ht.nb_buckets;
1981 /// Look into the symbol tables of the underlying elf file and find
1982 /// the symbol we are being asked.
1984 /// This function uses the GNU hash table for the symbol lookup.
1986 /// The reference of for the implementation of this function can be
1988 /// - https://sourceware.org/ml/binutils/2006-10/msg00377.html
1989 /// - https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections.
1991 /// @param elf_handle the elf handle to use.
1993 /// @param sym_name the name of the symbol to look for.
1995 /// @param ht_index the index of the hash table header to use.
1997 /// @param sym_tab_index the index of the symbol table header to use
1998 /// with this hash table.
2000 /// @param demangle if true, demangle @p sym_name.
2002 /// @param syms_found the vector of symbols found with the name @p
2005 /// @return true if a symbol was actually found.
2007 lookup_symbol_from_gnu_hash_tab(const environment* env,
2009 const string& sym_name,
2011 size_t sym_tab_index,
2013 vector<elf_symbol_sptr>& syms_found)
2016 if (!setup_gnu_ht(elf_handle, ht_index, sym_tab_index, ht))
2019 // Now do the real work.
2021 // Compute bloom hashes (GNU hash and second bloom specific hashes).
2022 size_t h1 = elf_gnu_hash(sym_name.c_str());
2023 size_t h2 = h1 >> ht.shift;
2024 // The size of one of the words used in the bloom
2026 int c = get_elf_class_size_in_bytes(elf_handle) * 8;
2027 int n = (h1 / c) % ht.bf_nwords;
2028 unsigned char bitmask = (1 << (h1 % c)) | (1 << (h2 % c));
2030 // Test if the symbol is *NOT* present in this ELF file.
2031 if ((bloom_word_at(elf_handle, ht.bloom_filter, n) & bitmask) != bitmask)
2034 size_t i = ht.buckets[h1 % ht.nb_buckets];
2038 Elf32_Word stop_word, *stop_wordp;
2039 elf_symbol::version ver;
2041 const char* sym_name_str;
2044 elf_symbol::type sym_type;
2045 elf_symbol::binding sym_binding;
2046 elf_symbol::visibility sym_visibility;
2047 Elf_Scn *strings_section = find_ksymtab_strings_section(elf_handle);
2048 size_t strings_ndx = strings_section
2049 ? elf_ndxscn(strings_section)
2052 // Let's walk the hash table and record the versions of all the
2053 // symbols which name equal sym_name.
2054 for (i = ht.buckets[h1 % ht.nb_buckets],
2055 stop_wordp = &ht.chain[i - ht.first_sym_index];
2058 < ht.chain + (ht.sym_count - ht.first_sym_index));
2061 stop_word = *stop_wordp;
2062 if ((stop_word & ~ 1)!= (h1 & ~1))
2063 // A given bucket can reference several hashes. Here we
2064 // stumbled across a hash value different from the one we are
2065 // looking for. Let's keep walking.
2068 ABG_ASSERT(gelf_getsym(elf_getdata(ht.sym_tab_section, 0),
2070 sym_name_str = elf_strptr(elf_handle,
2071 ht.sym_tab_section_header.sh_link,
2074 && compare_symbol_name(sym_name_str, sym_name, demangle))
2076 // So we found a symbol (in the symbol table) that equals
2077 // sym_name. Now lets try to get its version and record it.
2078 sym_type = stt_to_elf_symbol_type(GELF_ST_TYPE(symbol.st_info));
2079 sym_binding = stb_to_elf_symbol_binding(GELF_ST_BIND(symbol.st_info));
2081 stv_to_elf_symbol_visibility(GELF_ST_VISIBILITY(symbol.st_other));
2083 if (get_version_for_symbol(elf_handle, i,
2084 /*get_def_version=*/true,
2086 ABG_ASSERT(!ver.str().empty());
2088 elf_symbol_sptr symbol_found =
2089 elf_symbol::create(env, i,
2092 sym_type, sym_binding,
2093 symbol.st_shndx != SHN_UNDEF,
2094 symbol.st_shndx == SHN_COMMON,
2095 ver, sym_visibility,
2096 symbol.st_shndx == strings_ndx);
2097 syms_found.push_back(symbol_found);
2102 // The last bit of the stop_word is 1. That means we need to
2103 // stop here. We reached the end of the chain of values
2104 // referenced by the hask bucket.
2110 /// Look into the symbol tables of the underlying elf file and find
2111 /// the symbol we are being asked.
2113 /// This function uses the elf hash table (be it the GNU hash table or
2114 /// the sysv hash table) for the symbol lookup.
2116 /// @param env the environment we are operating from.
2118 /// @param elf_handle the elf handle to use.
2120 /// @param ht_kind the kind of hash table to use. This is returned by
2121 /// the function function find_hash_table_section_index.
2123 /// @param ht_index the index (in the section headers table) of the
2124 /// hash table section to use.
2126 /// @param sym_tab_index the index (in section headers table) of the
2127 /// symbol table index to use with this hash table.
2129 /// @param symbol_name the name of the symbol to look for.
2131 /// @param demangle if true, demangle @p sym_name.
2133 /// @param syms_found the symbols that were actually found with the
2134 /// name @p symbol_name.
2136 /// @return true iff the function found the symbol from the elf hash
2139 lookup_symbol_from_elf_hash_tab(const environment* env,
2141 hash_table_kind ht_kind,
2143 size_t symtab_index,
2144 const string& symbol_name,
2146 vector<elf_symbol_sptr>& syms_found)
2148 if (elf_handle == 0 || symbol_name.empty())
2151 if (ht_kind == NO_HASH_TABLE_KIND)
2154 if (ht_kind == SYSV_HASH_TABLE_KIND)
2155 return lookup_symbol_from_sysv_hash_tab(env,
2156 elf_handle, symbol_name,
2161 else if (ht_kind == GNU_HASH_TABLE_KIND)
2162 return lookup_symbol_from_gnu_hash_tab(env,
2163 elf_handle, symbol_name,
2171 /// Lookup a symbol from the symbol table directly.
2174 /// @param env the environment we are operating from.
2176 /// @param elf_handle the elf handle to use.
2178 /// @param sym_name the name of the symbol to look up.
2180 /// @param sym_tab_index the index (in the section headers table) of
2181 /// the symbol table section.
2183 /// @param demangle if true, demangle the names found in the symbol
2184 /// table before comparing them with @p sym_name.
2186 /// @param sym_name_found the actual name of the symbol found.
2188 /// @param sym_type the type of the symbol found.
2190 /// @param sym_binding the binding of the symbol found.
2192 /// @param sym_versions the versions of the symbol found.
2194 /// @return true iff the symbol was found.
2196 lookup_symbol_from_symtab(const environment* env,
2198 const string& sym_name,
2199 size_t sym_tab_index,
2201 vector<elf_symbol_sptr>& syms_found)
2203 // TODO: read all of the symbol table, store it in memory in a data
2204 // structure that associates each symbol with its versions and in
2205 // which lookups of a given symbol is fast.
2206 Elf_Scn* sym_tab_section = elf_getscn(elf_handle, sym_tab_index);
2207 ABG_ASSERT(sym_tab_section);
2209 GElf_Shdr header_mem;
2210 GElf_Shdr * sym_tab_header = gelf_getshdr(sym_tab_section,
2213 size_t symcount = sym_tab_header->sh_size / sym_tab_header->sh_entsize;
2214 Elf_Data* symtab = elf_getdata(sym_tab_section, NULL);
2217 elf_symbol::version ver;
2219 Elf_Scn *strings_section = find_ksymtab_strings_section(elf_handle);
2220 size_t strings_ndx = strings_section
2221 ? elf_ndxscn(strings_section)
2224 for (size_t i = 0; i < symcount; ++i)
2227 sym = gelf_getsym(symtab, i, &sym_mem);
2228 name_str = elf_strptr(elf_handle,
2229 sym_tab_header->sh_link,
2232 if (name_str && compare_symbol_name(name_str, sym_name, demangle))
2234 elf_symbol::type sym_type =
2235 stt_to_elf_symbol_type(GELF_ST_TYPE(sym->st_info));
2236 elf_symbol::binding sym_binding =
2237 stb_to_elf_symbol_binding(GELF_ST_BIND(sym->st_info));
2238 elf_symbol::visibility sym_visibility =
2239 stv_to_elf_symbol_visibility(GELF_ST_VISIBILITY(sym->st_other));
2240 bool sym_is_defined = sym->st_shndx != SHN_UNDEF;
2241 bool sym_is_common = sym->st_shndx == SHN_COMMON;
2243 if (get_version_for_symbol(elf_handle, i,
2244 /*get_def_version=*/sym_is_defined,
2246 ABG_ASSERT(!ver.str().empty());
2247 elf_symbol_sptr symbol_found =
2248 elf_symbol::create(env, i, sym->st_size,
2250 sym_binding, sym_is_defined,
2251 sym_is_common, ver, sym_visibility,
2252 sym->st_shndx == strings_ndx);
2253 syms_found.push_back(symbol_found);
2264 /// Look into the symbol tables of the underlying elf file and see
2265 /// if we find a given symbol.
2267 /// @param env the environment we are operating from.
2269 /// @param symbol_name the name of the symbol to look for.
2271 /// @param demangle if true, try to demangle the symbol name found in
2272 /// the symbol table before comparing it to @p symbol_name.
2274 /// @param syms_found the list of symbols found, with the name @p
2277 /// @param sym_type this is set to the type of the symbol found. This
2278 /// shall b a standard elf.h value for symbol types, that is SHT_OBJECT,
2279 /// STT_FUNC, STT_IFUNC, etc ...
2281 /// Note that this parameter is set iff the function returns true.
2283 /// @param sym_binding this is set to the binding of the symbol found.
2284 /// This is a standard elf.h value of the symbol binding kind, that
2285 /// is, STB_LOCAL, STB_GLOBAL, or STB_WEAK.
2287 /// @param symbol_versions the versions of the symbol @p symbol_name,
2288 /// if it was found.
2290 /// @return true iff a symbol with the name @p symbol_name was found.
2292 lookup_symbol_from_elf(const environment* env,
2294 const string& symbol_name,
2296 vector<elf_symbol_sptr>& syms_found)
2298 size_t hash_table_index = 0, symbol_table_index = 0;
2299 hash_table_kind ht_kind = NO_HASH_TABLE_KIND;
2302 ht_kind = find_hash_table_section_index(elf_handle,
2304 symbol_table_index);
2306 if (ht_kind == NO_HASH_TABLE_KIND)
2308 if (!find_symbol_table_section_index(elf_handle, symbol_table_index))
2311 return lookup_symbol_from_symtab(env,
2319 return lookup_symbol_from_elf_hash_tab(env,
2329 /// Look into the symbol tables of the underlying elf file and see if
2330 /// we find a given public (global or weak) symbol of function type.
2332 /// @param env the environment we are operating from.
2334 /// @param elf_handle the elf handle to use for the query.
2336 /// @param symbol_name the function symbol to look for.
2338 /// @param func_syms the vector of public functions symbols found, if
2341 /// @return true iff the symbol was found.
2343 lookup_public_function_symbol_from_elf(const environment* env,
2345 const string& symbol_name,
2346 vector<elf_symbol_sptr>& func_syms)
2348 vector<elf_symbol_sptr> syms_found;
2351 if (lookup_symbol_from_elf(env, elf_handle, symbol_name,
2352 /*demangle=*/false, syms_found))
2354 for (vector<elf_symbol_sptr>::const_iterator i = syms_found.begin();
2355 i != syms_found.end();
2358 elf_symbol::type type = (*i)->get_type();
2359 elf_symbol::binding binding = (*i)->get_binding();
2361 if ((type == elf_symbol::FUNC_TYPE
2362 || type == elf_symbol::GNU_IFUNC_TYPE
2363 || type == elf_symbol::COMMON_TYPE)
2364 && (binding == elf_symbol::GLOBAL_BINDING
2365 || binding == elf_symbol::WEAK_BINDING))
2367 func_syms.push_back(*i);
2376 /// Look into the symbol tables of the underlying elf file and see if
2377 /// we find a given public (global or weak) symbol of variable type.
2379 /// @param env the environment we are operating from.
2381 /// @param elf the elf handle to use for the query.
2383 /// @param symname the variable symbol to look for.
2385 /// @param var_syms the vector of public variable symbols found, if any.
2387 /// @return true iff symbol @p symname was found.
2389 lookup_public_variable_symbol_from_elf(const environment* env,
2391 const string& symname,
2392 vector<elf_symbol_sptr>& var_syms)
2394 vector<elf_symbol_sptr> syms_found;
2397 if (lookup_symbol_from_elf(env, elf, symname, /*demangle=*/false, syms_found))
2399 for (vector<elf_symbol_sptr>::const_iterator i = syms_found.begin();
2400 i != syms_found.end();
2402 if ((*i)->is_variable()
2403 && ((*i)->get_binding() == elf_symbol::GLOBAL_BINDING
2404 || (*i)->get_binding() == elf_symbol::WEAK_BINDING))
2406 var_syms.push_back(*i);
2414 /// Get data tag information of an ELF file by looking up into its
2417 /// @param elf the elf handle to use for the query.
2419 /// @param dt_tag data tag to look for in dynamic segment
2420 /// @param dt_tag_data vector of found information for a given @p data_tag
2422 /// @return true iff data tag @p data_tag was found
2425 lookup_data_tag_from_dynamic_segment(Elf* elf,
2426 Elf64_Sxword data_tag,
2427 vector<string>& dt_tag_data)
2429 size_t num_prog_headers = 0;
2431 if (elf_getphdrnum(elf, &num_prog_headers) < 0)
2434 // Cycle through each program header.
2435 for (size_t i = 0; i < num_prog_headers; ++i)
2438 GElf_Phdr *phdr = gelf_getphdr(elf, i, &phdr_mem);
2439 if (phdr == NULL || phdr->p_type != PT_DYNAMIC)
2442 // Poke at the dynamic segment like a section, so that we can
2443 // get its section header information; also we'd like to read
2444 // the data of the segment by using elf_getdata() but that
2445 // function needs a Elf_Scn data structure to act on.
2446 // Elfutils doesn't really have any particular function to
2447 // access segment data, other than the functions used to
2448 // access section data.
2449 Elf_Scn *dynamic_section = gelf_offscn(elf, phdr->p_offset);
2451 GElf_Shdr *dynamic_section_header = gelf_getshdr(dynamic_section,
2453 if (dynamic_section_header == NULL
2454 || dynamic_section_header->sh_type != SHT_DYNAMIC)
2457 // Get data of the dynamic segment (seen as a section).
2458 Elf_Data *data = elf_getdata(dynamic_section, NULL);
2462 // Get the index of the section headers string table.
2463 size_t string_table_index = 0;
2464 ABG_ASSERT (elf_getshdrstrndx(elf, &string_table_index) >= 0);
2466 size_t dynamic_section_header_entry_size = gelf_fsize(elf,
2472 gelf_getshdr(elf_getscn(elf,
2473 dynamic_section_header->sh_link),
2475 ABG_ASSERT(link != NULL);
2477 size_t num_dynamic_section_entries =
2478 dynamic_section_header->sh_size / dynamic_section_header_entry_size;
2480 // Now walk through all the DT_* data tags that are in the
2482 for (size_t j = 0; j < num_dynamic_section_entries; ++j)
2484 GElf_Dyn dynamic_section_mem;
2485 GElf_Dyn *dynamic_section = gelf_getdyn(data,
2487 &dynamic_section_mem);
2488 if (dynamic_section->d_tag == data_tag)
2490 dt_tag_data.push_back(elf_strptr(elf,
2491 dynamic_section_header->sh_link,
2492 dynamic_section->d_un.d_val));
2500 /// Convert the type of ELF file into @ref elf_type.
2502 /// @param elf the elf handle to use for the query.
2504 /// @return the @ref elf_type for a given elf type.
2506 elf_file_type(Elf* elf)
2509 GElf_Ehdr *header = gelf_getehdr (elf, &ehdr_mem);
2510 vector<string> dt_debug_data;
2512 switch (header->e_type)
2515 if (lookup_data_tag_from_dynamic_segment(elf, DT_DEBUG, dt_debug_data))
2516 return ELF_TYPE_PI_EXEC;
2518 return ELF_TYPE_DSO;
2520 return ELF_TYPE_EXEC;
2522 return ELF_TYPE_RELOCATABLE;
2524 return ELF_TYPE_UNKNOWN;
2528 // ---------------------------------------
2529 // <location expression evaluation types>
2530 // ---------------------------------------
2532 /// An abstraction of a value representing the result of the
2533 /// evaluation of a dwarf expression. This is abstraction represents
2534 /// a partial view on the possible values because we are only
2535 /// interested in extracting the latest and longuest constant
2536 /// sub-expression of a given dwarf expression.
2540 int64_t const_value_;
2548 expr_result(bool is_const)
2549 : is_const_(is_const),
2553 explicit expr_result(int64_t v)
2558 /// @return true if the value is a constant. Otherwise, return
2559 /// false, meaning the value represents a quantity for which we need
2560 /// inferior (a running program) state to determine the value.
2566 /// @param f a flag saying if the value is set to a constant or not.
2571 /// Get the current constant value iff this represents a
2574 /// @param value the out parameter. Is set to the constant value of
2575 /// the @ref expr_result. This is set iff the function return true.
2577 ///@return true if this has a constant value, false otherwise.
2579 const_value(int64_t& value)
2583 value = const_value_;
2589 /// Getter of the constant value of the current @ref expr_result.
2591 /// Note that the current @ref expr_result must be constant,
2592 /// otherwise the current process is aborted.
2594 /// @return the constant value of the current @ref expr_result.
2598 ABG_ASSERT(is_const());
2599 return const_value_;
2602 operator int64_t() const
2603 {return const_value();}
2606 operator=(const int64_t v)
2613 operator==(const expr_result& o) const
2614 {return const_value_ == o.const_value_ && is_const_ == o.is_const_;}
2617 operator>=(const expr_result& o) const
2618 {return const_value_ >= o.const_value_;}
2621 operator<=(const expr_result& o) const
2622 {return const_value_ <= o.const_value_;}
2625 operator>(const expr_result& o) const
2626 {return const_value_ > o.const_value_;}
2629 operator<(const expr_result& o) const
2630 {return const_value_ < o.const_value_;}
2633 operator+(const expr_result& v) const
2635 expr_result r(*this);
2636 r.const_value_ += v.const_value_;
2637 r.is_const_ = r.is_const_ && v.is_const_;
2642 operator+=(int64_t v)
2649 operator-(const expr_result& v) const
2651 expr_result r(*this);
2652 r.const_value_ -= v.const_value_;
2653 r.is_const_ = r.is_const_ && v.is_const_;
2658 operator%(const expr_result& v) const
2660 expr_result r(*this);
2661 r.const_value_ %= v.const_value_;
2662 r.is_const_ = r.is_const_ && v.is_const();
2667 operator*(const expr_result& v) const
2669 expr_result r(*this);
2670 r.const_value_ *= v.const_value_;
2671 r.is_const_ = r.is_const_ && v.is_const();
2676 operator|(const expr_result& v) const
2678 expr_result r(*this);
2679 r.const_value_ |= v.const_value_;
2680 r.is_const_ = r.is_const_ && v.is_const_;
2685 operator^(const expr_result& v) const
2687 expr_result r(*this);
2688 r.const_value_ ^= v.const_value_;
2689 r.is_const_ = r.is_const_ && v.is_const_;
2694 operator>>(const expr_result& v) const
2696 expr_result r(*this);
2697 r.const_value_ = r.const_value_ >> v.const_value_;
2698 r.is_const_ = r.is_const_ && v.is_const_;
2703 operator<<(const expr_result& v) const
2705 expr_result r(*this);
2706 r.const_value_ = r.const_value_ << v.const_value_;
2707 r.is_const_ = r.is_const_ && v.is_const_;
2714 expr_result r(*this);
2715 r.const_value_ = ~r.const_value_;
2722 expr_result r(*this);
2723 r.const_value_ = -r.const_value_;
2730 expr_result r = *this;
2731 r.const_value_ = std::abs(static_cast<long double>(r.const_value()));
2736 operator&(const expr_result& o)
2738 expr_result r(*this);
2739 r.const_value_ &= o.const_value_;
2740 r.is_const_ = r.is_const_ && o.is_const_;
2745 operator/(const expr_result& o)
2747 expr_result r(*this);
2748 r.is_const_ = r.is_const_ && o.is_const_;
2749 return r.const_value() / o.const_value();
2751 };// class end expr_result;
2753 /// A class that implements a stack of @ref expr_result, to be used in
2754 /// the engine evaluating DWARF expressions.
2755 class expr_result_stack_type
2757 vector<expr_result> elems_;
2761 expr_result_stack_type()
2762 {elems_.reserve(4);}
2765 operator[](unsigned i)
2767 unsigned s = elems_.size();
2769 return elems_[s - 1 -i];
2773 operator[](unsigned i) const
2774 {return const_cast<expr_result_stack_type*>(this)->operator[](i);}
2778 {return elems_.size();}
2780 vector<expr_result>::reverse_iterator
2782 {return elems_.rbegin();}
2784 const vector<expr_result>::reverse_iterator
2786 {return const_cast<expr_result_stack_type*>(this)->begin();}
2788 vector<expr_result>::reverse_iterator
2790 {return elems_.rend();}
2792 const vector<expr_result>::reverse_iterator
2794 {return const_cast<expr_result_stack_type*>(this)->end();}
2798 {return elems_.back();}
2802 {return const_cast<expr_result_stack_type*>(this)->front();}
2805 push_front(expr_result e)
2806 {elems_.push_back(e);}
2811 expr_result r = front();
2817 erase(vector<expr_result>::reverse_iterator i)
2818 {elems_.erase(--i.base());}
2823 }; // end class expr_result_stack_type
2825 /// Abstraction of the evaluation context of a dwarf expression.
2826 struct dwarf_expr_eval_context
2829 expr_result_stack_type stack;
2830 // Is set to true if the result of the expression that got evaluated
2831 // is a TLS address.
2834 dwarf_expr_eval_context()
2835 : accum(/*is_const=*/false),
2838 stack.push_front(expr_result(true));
2845 stack.push_front(expr_result(true));
2846 accum = expr_result(false);
2847 set_tls_addr = false;
2850 /// Set a flag to to tell that the result of the expression that got
2851 /// evaluated is a TLS address.
2853 /// @param f true iff the result of the expression that got
2854 /// evaluated is a TLS address, false otherwise.
2856 set_tls_address(bool f)
2859 /// Getter for the flag that tells if the result of the expression
2860 /// that got evaluated is a TLS address.
2862 /// @return true iff the result of the expression that got evaluated
2863 /// is a TLS address.
2865 set_tls_address() const
2866 {return set_tls_addr;}
2871 expr_result r = stack.front();
2877 push(const expr_result& v)
2878 {stack.push_front(v);}
2879 };//end class dwarf_expr_eval_context
2881 // ---------------------------------------
2882 // </location expression evaluation types>
2883 // ---------------------------------------
2885 /// An enum for the diffent kinds of linux kernel specific symbol
2887 enum kernel_symbol_table_kind
2889 /// This is for an undefined kind of kernel symbol table.
2890 KERNEL_SYMBOL_TABLE_KIND_UNDEFINED,
2892 /// The __ksymtab symbol table.
2893 KERNEL_SYMBOL_TABLE_KIND_KSYMTAB,
2895 /// The __ksymtab_gpl symbol table.
2896 KERNEL_SYMBOL_TABLE_KIND_KSYMTAB_GPL
2899 /// An enum which specifies the format of the kernel symbol table
2900 /// (__ksymtab or __ksymtab_gpl).
2903 /// This enumerator means that no __ksymtab format has been
2905 UNDEFINED_KSYMTAB_FORMAT,
2907 /// Before Linux v4.19, the format of the __ksymtab (and the
2908 /// __ksymtab_gpl) section was the following.
2910 /// It's an array of entries. Each entry describes a symbol. Each
2911 /// entry is made of two words. each is of the word size of the
2912 /// architecture. (8-bytes on a 64 bits arch and 4-bytes on a 32
2913 /// bits arch) The first word is the address of a symbol. The
2914 /// second one is the address of a static global variable symbol
2915 /// which value is the string representing the symbol name. That
2916 /// string is in the __ksymtab_strings section.
2918 /// So we are mostly interested in the symbol address part of each
2921 /// Thus this enumerator means that we have the pre v4.19 __ksymtab
2923 PRE_V4_19_KSYMTAB_FORMAT,
2925 /// Since, Linux v4.19, the format of the __ksymtab section has
2926 /// changed. The commit that changed is
2927 /// https://github.com/torvalds/linux/commit/7290d58095712a89f845e1bca05334796dd49ed2.
2929 /// The __ksymtab and __ksymtab_gpl sections each are an array of
2930 /// entries. Each entry describes a symbol. Each entry is made of
2931 /// two words. Each word is 4-bytes length. The first word is the
2932 /// 'place-relative' address of a symbol. The second one is the
2933 /// 'place-relative' address of a static global variable symbol
2934 /// which value is the string representing the symbol name. That
2935 /// string is in the __ksymtab_strings section.
2937 /// Below is the description of what a "place-relative address"
2938 /// means. For that, we are going to define the meaning of four
2939 /// values: 'N', 'S', 'O', and 'A'.
2941 /// *** 'N' and '0' ***
2942 /// Suppose 'N' is the value of the number stored at offset 'O' (big
2943 /// oh, not zero) in the __ksymtab section.
2946 /// That N designates a symbol in the symtab section which value is
2947 /// S. So S is the symbol value (in the .symtab symbol table)
2948 /// referred to by the number N found at offset 'O'.
2951 /// Also, suppose the __ksymtab section will be loaded at memory
2952 /// address A, as indicated by the 'address' field of the section
2953 /// header describing the __ksymtab section.
2955 /// So here is the formula that gives us S, from N:
2959 /// Storing addresses this way does away with the need to have
2960 /// relocations for the __ksymtab section. So in effect, vmlinux
2961 /// binaries implementing this new format of __ksymtab won't have
2962 /// any .rela__ksymtab relocation section for the __ksymtab section
2963 /// in particular (nor any relocation section at all).
2966 /// Note that we are mostly interested in the symbol address part of
2968 V4_19_KSYMTAB_FORMAT
2969 }; // end enum ksymtab_format
2971 /// The context used to build ABI corpus from debug info in DWARF
2974 /// This context is to be created by create_read_context(). It's then
2975 /// passed to all the routines that read specific dwarf bits as they
2976 /// get some important data from it.
2978 /// When a new data member is added to this context, it must be
2979 /// initiliazed by the read_context::initiliaze() function. So please
2987 bool load_in_linux_kernel_mode;
2988 bool load_all_types;
2989 bool ignore_symbol_table;
2995 load_in_linux_kernel_mode(),
2997 ignore_symbol_table(),
3001 };// read_context::options_type
3003 /// A set of containers that contains one container per kind of @ref
3004 /// die_source. This allows to associate DIEs to things, depending
3005 /// on the source of the DIE.
3006 template <typename ContainerType>
3007 class die_source_dependant_container_set
3009 ContainerType primary_debug_info_container_;
3010 ContainerType alt_debug_info_container_;
3011 ContainerType type_unit_container_;
3015 /// Getter for the container associated to DIEs coming from a
3016 /// given @ref die_source.
3018 /// @param source the die_source for which we want the container.
3020 /// @return the container that associates DIEs coming from @p
3021 /// source to something.
3023 get_container(die_source source)
3025 ContainerType *result = 0;
3028 case PRIMARY_DEBUG_INFO_DIE_SOURCE:
3029 result = &primary_debug_info_container_;
3031 case ALT_DEBUG_INFO_DIE_SOURCE:
3032 result = &alt_debug_info_container_;
3034 case TYPE_UNIT_DIE_SOURCE:
3035 result = &type_unit_container_;
3037 case NO_DEBUG_INFO_DIE_SOURCE:
3038 case NUMBER_OF_DIE_SOURCES:
3039 ABG_ASSERT_NOT_REACHED;
3044 /// Getter for the container associated to DIEs coming from a
3045 /// given @ref die_source.
3047 /// @param source the die_source for which we want the container.
3049 /// @return the container that associates DIEs coming from @p
3050 /// source to something.
3051 const ContainerType&
3052 get_container(die_source source) const
3054 return const_cast<die_source_dependant_container_set*>(this)->
3055 get_container(source);
3058 /// Getter for the container associated to DIEs coming from the
3059 /// same source as a given DIE.
3061 /// @param ctxt the read context to consider.
3063 /// @param die the DIE which should have the same source as the
3064 /// source of the container we want.
3066 /// @return the container that associates DIEs coming from the
3067 /// same source as @p die.
3069 get_container(const read_context& ctxt, const Dwarf_Die *die)
3071 die_source source = NO_DEBUG_INFO_DIE_SOURCE;
3072 ABG_ASSERT(ctxt.get_die_source(die, source));
3073 return get_container(source);
3076 /// Getter for the container associated to DIEs coming from the
3077 /// same source as a given DIE.
3079 /// @param ctxt the read context to consider.
3081 /// @param die the DIE which should have the same source as the
3082 /// source of the container we want.
3084 /// @return the container that associates DIEs coming from the
3085 /// same source as @p die.
3086 const ContainerType&
3087 get_container(const read_context& ctxt, const Dwarf_Die *die) const
3089 return const_cast<die_source_dependant_container_set*>(this)->
3090 get_container(ctxt, die);
3093 /// Clear the container set.
3097 primary_debug_info_container_.clear();
3098 alt_debug_info_container_.clear();
3099 type_unit_container_.clear();
3101 }; // end die_dependant_container_set
3103 suppr::suppressions_type supprs_;
3104 unsigned short dwarf_version_;
3105 Dwfl_Callbacks offline_callbacks_;
3106 // The set of directories under which to look for debug info.
3107 vector<char**> debug_info_root_paths_;
3110 // The alternate debug info. Alternate debug info sections are a
3111 // DWARF extension as of DWARF4 and are described at
3112 // http://www.dwarfstd.org/ShowIssue.php?issue=120604.1. Below are
3113 // the file desctor used to access the alternate debug info
3114 // sections, and the representation of the DWARF debug info. Both
3115 // need to be freed after we are done using them, with fclose and
3119 string alt_debug_info_path_;
3120 // The address range of the offline elf file we are looking at.
3121 Dwfl_Module* elf_module_;
3122 mutable Elf* elf_handle_;
3124 mutable Elf_Scn* bss_section_;
3125 mutable Elf_Scn* text_section_;
3126 mutable Elf_Scn* rodata_section_;
3127 mutable Elf_Scn* data_section_;
3128 mutable Elf_Scn* data1_section_;
3129 mutable Elf_Scn* symtab_section_;
3130 // The "Official procedure descriptor section, aka .opd", used in
3131 // ppc64 elf v1 binaries. This section contains the procedure
3132 // descriptors on that platform.
3133 Elf_Scn* opd_section_;
3134 /// The format of the special __ksymtab section from the linux
3136 mutable ksymtab_format ksymtab_format_;
3137 /// The size of one entry of the __ksymtab section.
3138 mutable size_t ksymtab_entry_size_;
3139 /// The number of entries in the __ksymtab section.
3140 mutable size_t nb_ksymtab_entries_;
3141 /// The number of entries in the __ksymtab_gpl section.
3142 mutable size_t nb_ksymtab_gpl_entries_;
3143 /// The special __ksymtab and __ksymtab_gpl sections from linux
3144 /// kernel or module binaries. The former is used to store
3145 /// references to symbols exported using the EXPORT_SYMBOL macro
3146 /// from the linux kernel. The latter is used to store references
3147 /// to symbols exported using the EXPORT_SYMBOL_GPL macro from the
3149 Elf_Scn* ksymtab_section_;
3150 Elf_Scn* ksymtab_reloc_section_;
3151 Elf_Scn* ksymtab_gpl_section_;
3152 Elf_Scn* ksymtab_gpl_reloc_section_;
3153 Elf_Scn* ksymtab_strings_section_;
3154 Elf_Scn* versym_section_;
3155 Elf_Scn* verdef_section_;
3156 Elf_Scn* verneed_section_;
3157 bool symbol_versionning_sections_loaded_;
3158 bool symbol_versionning_sections_found_;
3159 Dwarf_Die* cur_tu_die_;
3160 mutable dwarf_expr_eval_context dwarf_expr_eval_context_;
3161 // A set of maps (one per kind of die source) that associates a decl
3162 // string representation with the DIEs (offsets) representing that
3164 mutable die_source_dependant_container_set<istring_dwarf_offsets_map_type>
3165 decl_die_repr_die_offsets_maps_;
3166 // A set of maps (one per kind of die source) that associates a type
3167 // string representation with the DIEs (offsets) representing that
3169 mutable die_source_dependant_container_set<istring_dwarf_offsets_map_type>
3170 type_die_repr_die_offsets_maps_;
3171 mutable die_source_dependant_container_set<die_istring_map_type>
3172 die_qualified_name_maps_;
3173 mutable die_source_dependant_container_set<die_istring_map_type>
3174 die_pretty_repr_maps_;
3175 mutable die_source_dependant_container_set<die_istring_map_type>
3176 die_pretty_type_repr_maps_;
3177 // A set of maps (one per kind of die source) that associates the
3178 // offset of a decl die to its corresponding decl artifact.
3179 mutable die_source_dependant_container_set<die_artefact_map_type>
3180 decl_die_artefact_maps_;
3181 // A set of maps (one per kind of die source) that associates the
3182 // offset of a type die to its corresponding type artifact.
3183 mutable die_source_dependant_container_set<die_artefact_map_type>
3184 type_die_artefact_maps_;
3185 /// A set of vectors (one per kind of die source) that associates
3186 /// the offset of a type DIE to the offset of its canonical DIE.
3187 mutable die_source_dependant_container_set<offset_offset_map_type>
3188 canonical_type_die_offsets_;
3189 /// A set of vectors (one per kind of die source) that associates
3190 /// the offset of a decl DIE to the offset of its canonical DIE.
3191 mutable die_source_dependant_container_set<offset_offset_map_type>
3192 canonical_decl_die_offsets_;
3193 /// A map that associates a function type representations to
3194 /// function types, inside a translation unit.
3195 mutable istring_fn_type_map_type per_tu_repr_to_fn_type_maps_;
3197 die_class_or_union_map_type die_wip_classes_map_;
3198 die_class_or_union_map_type alternate_die_wip_classes_map_;
3199 die_class_or_union_map_type type_unit_die_wip_classes_map_;
3200 die_function_type_map_type die_wip_function_types_map_;
3201 die_function_type_map_type alternate_die_wip_function_types_map_;
3202 die_function_type_map_type type_unit_die_wip_function_types_map_;
3203 die_function_decl_map_type die_function_with_no_symbol_map_;
3204 vector<Dwarf_Off> types_to_canonicalize_;
3205 vector<Dwarf_Off> alt_types_to_canonicalize_;
3206 vector<Dwarf_Off> type_unit_types_to_canonicalize_;
3207 vector<type_base_sptr> extra_types_to_canonicalize_;
3208 string_classes_map decl_only_classes_map_;
3209 die_tu_map_type die_tu_map_;
3210 corpus_group_sptr cur_corpus_group_;
3211 corpus_sptr cur_corpus_;
3212 translation_unit_sptr cur_tu_;
3213 scope_decl_sptr nil_scope_;
3214 scope_stack_type scope_stack_;
3215 offset_offset_map_type primary_die_parent_map_;
3216 // A map that associates each tu die to a vector of unit import
3217 // points, in the main debug info
3218 tu_die_imported_unit_points_map_type tu_die_imported_unit_points_map_;
3219 // A map that associates each tu die to a vector of unit import
3220 // points, in the alternate debug info
3221 tu_die_imported_unit_points_map_type alt_tu_die_imported_unit_points_map_;
3222 tu_die_imported_unit_points_map_type type_units_tu_die_imported_unit_points_map_;
3223 // A DIE -> parent map for DIEs coming from the alternate debug info
3225 offset_offset_map_type alternate_die_parent_map_;
3226 offset_offset_map_type type_section_die_parent_map_;
3227 list<var_decl_sptr> var_decls_to_add_;
3228 addr_elf_symbol_sptr_map_sptr fun_addr_sym_map_;
3229 // On PPC64, the function entry point address is different from the
3230 // GElf_Sym::st_value value, which is the address of the descriptor
3231 // of the function. The map below thus associates the address of
3232 // the entry point to the function symbol. If we are not on ppc64,
3233 // then this map ought to be empty. Only the fun_addr_sym_map_ is
3234 // used in that case. On ppc64, though, both maps are used.
3235 addr_elf_symbol_sptr_map_sptr fun_entry_addr_sym_map_;
3236 string_elf_symbols_map_sptr fun_syms_;
3237 addr_elf_symbol_sptr_map_sptr var_addr_sym_map_;
3238 string_elf_symbols_map_sptr var_syms_;
3239 string_elf_symbols_map_sptr undefined_fun_syms_;
3240 string_elf_symbols_map_sptr undefined_var_syms_;
3241 address_set_sptr linux_exported_fn_syms_;
3242 address_set_sptr linux_exported_var_syms_;
3243 address_set_sptr linux_exported_gpl_fn_syms_;
3244 address_set_sptr linux_exported_gpl_var_syms_;
3245 vector<string> dt_needed_;
3247 string elf_architecture_;
3248 corpus::exported_decls_builder* exported_decls_builder_;
3249 options_type options_;
3254 /// Constructor of read_context.
3256 /// @param elf_path the path to the elf file the context is to be
3259 /// @param debug_info_root_paths a vector of pointers to the path to
3260 /// the root directory under which the debug info is to be found for
3261 /// @p elf_path. Leave this empty if the debug info is not in a
3264 /// @param environment the environment used by the current context.
3265 /// This environment contains resources needed by the reader and by
3266 /// the types and declarations that are to be created later. Note
3267 /// that ABI artifacts that are to be compared all need to be
3268 /// created within the same environment.
3270 /// Please also note that the life time of this environment object
3271 /// must be greater than the life time of the resulting @ref
3272 /// read_context the context uses resources that are allocated in
3273 /// the environment.
3275 /// @param load_all_types if set to false only the types that are
3276 /// reachable from publicly exported declarations (of functions and
3277 /// variables) are read. If set to true then all types found in the
3278 /// debug information are loaded.
3280 /// @param linux_kernel_mode if set to true, then consider the special
3281 /// linux kernel symbol tables when determining if a symbol is
3282 /// exported or not.
3283 read_context(const string& elf_path,
3284 const vector<char**>& debug_info_root_paths,
3285 ir::environment* environment,
3286 bool load_all_types,
3287 bool linux_kernel_mode)
3289 initialize(elf_path, debug_info_root_paths, environment,
3290 load_all_types, linux_kernel_mode);
3293 /// Initializer of read_context.
3295 /// @param elf_path the path to the elf file the context is to be
3298 /// @param debug_info_root_paths a vector of pointers to the path to
3299 /// the root directory under which the debug info is to be found for
3300 /// @p elf_path. Leave this empty if the debug info is not in a
3303 /// @param environment the environment used by the current context.
3304 /// This environment contains resources needed by the reader and by
3305 /// the types and declarations that are to be created later. Note
3306 /// that ABI artifacts that are to be compared all need to be
3307 /// created within the same environment.
3309 /// Please also note that the life time of this environment object
3310 /// must be greater than the life time of the resulting @ref
3311 /// read_context the context uses resources that are allocated in
3312 /// the environment.
3314 /// @param load_all_types if set to false only the types that are
3315 /// reachable from publicly exported declarations (of functions and
3316 /// variables) are read. If set to true then all types found in the
3317 /// debug information are loaded.
3319 /// @param linux_kernel_mode if set to true, then consider the
3320 /// special linux kernel symbol tables when determining if a symbol
3321 /// is exported or not.
3323 initialize(const string& elf_path,
3324 const vector<char**>& debug_info_root_paths,
3325 ir::environment* environment,
3326 bool load_all_types,
3327 bool linux_kernel_mode)
3336 elf_path_ = elf_path;
3339 rodata_section_ = 0;
3342 symtab_section_ = 0;
3344 ksymtab_format_ = UNDEFINED_KSYMTAB_FORMAT;
3345 ksymtab_entry_size_ = 0;
3346 nb_ksymtab_entries_ = 0;
3347 nb_ksymtab_gpl_entries_ = 0;
3348 ksymtab_section_ = 0;
3349 ksymtab_reloc_section_ = 0;
3350 ksymtab_gpl_section_ = 0;
3351 ksymtab_gpl_reloc_section_ = 0;
3352 ksymtab_strings_section_ = 0;
3353 versym_section_ = 0;
3354 verdef_section_ = 0;
3355 verneed_section_ = 0;
3356 symbol_versionning_sections_loaded_ = 0;
3357 symbol_versionning_sections_found_ = 0;
3359 exported_decls_builder_ = 0;
3361 clear_alt_debug_info_data();
3364 decl_die_repr_die_offsets_maps_.clear();
3365 type_die_repr_die_offsets_maps_.clear();
3366 die_qualified_name_maps_.clear();
3367 die_pretty_repr_maps_.clear();
3368 die_pretty_type_repr_maps_.clear();
3369 decl_die_artefact_maps_.clear();
3370 type_die_artefact_maps_.clear();
3371 canonical_type_die_offsets_.clear();
3372 canonical_decl_die_offsets_.clear();
3373 die_wip_classes_map_.clear();
3374 alternate_die_wip_classes_map_.clear();
3375 type_unit_die_wip_classes_map_.clear();
3376 die_wip_function_types_map_.clear();
3377 alternate_die_wip_function_types_map_.clear();
3378 type_unit_die_wip_function_types_map_.clear();
3379 die_function_with_no_symbol_map_.clear();
3380 types_to_canonicalize_.clear();
3381 alt_types_to_canonicalize_.clear();
3382 type_unit_types_to_canonicalize_.clear();
3383 extra_types_to_canonicalize_.clear();
3384 decl_only_classes_map_.clear();
3385 die_tu_map_.clear();
3386 cur_corpus_group_.reset();
3387 cur_corpus_.reset();
3389 primary_die_parent_map_.clear();
3390 tu_die_imported_unit_points_map_.clear();
3391 alt_tu_die_imported_unit_points_map_.clear();
3392 type_units_tu_die_imported_unit_points_map_.clear();
3393 alternate_die_parent_map_.clear();
3394 type_section_die_parent_map_.clear();
3395 var_decls_to_add_.clear();
3396 fun_addr_sym_map_.reset();
3397 fun_entry_addr_sym_map_.reset();
3399 var_addr_sym_map_.reset();
3401 undefined_fun_syms_.reset();
3402 undefined_var_syms_.reset();
3403 linux_exported_fn_syms_.reset();
3404 linux_exported_var_syms_.reset();
3405 linux_exported_gpl_fn_syms_.reset();
3406 linux_exported_gpl_var_syms_.reset();
3409 elf_architecture_.clear();
3411 clear_per_translation_unit_data();
3413 memset(&offline_callbacks_, 0, sizeof(offline_callbacks_));
3414 create_default_dwfl(debug_info_root_paths);
3415 options_.env = environment;
3416 options_.load_in_linux_kernel_mode = linux_kernel_mode;
3417 options_.load_all_types = load_all_types;
3418 load_in_linux_kernel_mode(linux_kernel_mode);
3421 /// Clear the resources related to the alternate DWARF data.
3423 clear_alt_debug_info_data()
3431 dwarf_end(alt_dwarf_);
3434 alt_debug_info_path_.clear();
3438 /// Detructor of the @ref read_context type.
3441 clear_alt_debug_info_data();
3444 /// Clear the data that is relevant only for the current translation
3445 /// unit being read. The rest of the data is relevant for the
3446 /// entire ABI corpus.
3448 clear_per_translation_unit_data()
3450 while (!scope_stack().empty())
3451 scope_stack().pop();
3452 var_decls_to_re_add_to_tree().clear();
3453 per_tu_repr_to_fn_type_maps().clear();
3456 /// Clear the data that is relevant for the current corpus being
3459 clear_per_corpus_data()
3461 die_qualified_name_maps_.clear();
3462 die_pretty_repr_maps_.clear();
3463 die_pretty_type_repr_maps_.clear();
3464 clear_types_to_canonicalize();
3467 /// Getter of the options of the read context.
3469 /// @return the options of the read context.
3474 /// Getter of the options of the read context.
3476 /// @return the options of the read context.
3481 /// Getter of the options of the read context.
3483 /// @return the options of the read context.
3485 options(const options_type& o)
3488 /// Getter for the current environment.
3490 /// @return the current environment.
3491 const ir::environment*
3493 {return options_.env;}
3495 /// Getter for the current environment.
3497 /// @return the current environment.
3500 {return options_.env;}
3502 /// Setter for the current environment.
3504 /// @param env the new current environment.
3506 env(ir::environment* env)
3507 {options_.env = env;}
3509 /// Getter of the suppression specifications to be used during
3510 /// ELF/DWARF parsing.
3512 /// @return the suppression specifications.
3513 const suppr::suppressions_type&
3514 get_suppressions() const
3517 /// Getter of the suppression specifications to be used during
3518 /// ELF/DWARF parsing.
3520 /// @return the suppression specifications.
3521 suppr::suppressions_type&
3525 /// Getter for the callbacks of the Dwarf Front End library of
3526 /// elfutils that is used by this reader to read dwarf.
3528 /// @return the callbacks.
3529 const Dwfl_Callbacks*
3530 offline_callbacks() const
3531 {return &offline_callbacks_;}
3533 /// Getter for the callbacks of the Dwarf Front End library of
3534 /// elfutils that is used by this reader to read dwarf.
3535 /// @returnthe callbacks
3538 {return &offline_callbacks_;}
3540 /// Constructor for a default Dwfl handle that knows how to load debug
3541 /// info from a library or executable elf file.
3543 /// @param debug_info_root_paths a vector of pointers to the root
3544 /// path under which to look for the debug info of the elf files
3545 /// that are later handled by the Dwfl. This is for cases where the
3546 /// debug info is split into a different file from the binary we
3547 /// want to inspect. On Red Hat compatible systems, this root path
3548 /// is usually /usr/lib/debug by default. If this argument is set
3549 /// to the empty set, then "./debug" and /usr/lib/debug will be
3550 /// searched for sub-directories containing the debug info file.
3551 /// Note that for now, elfutils wants this path to be absolute
3552 /// otherwise things just don't work and the debug info is not
3555 /// @return the constructed Dwfl handle.
3557 create_default_dwfl(const vector<char**>& debug_info_root_paths)
3559 offline_callbacks()->find_debuginfo = dwfl_standard_find_debuginfo;
3560 offline_callbacks()->section_address = dwfl_offline_section_address;
3561 offline_callbacks()->debuginfo_path =
3562 debug_info_root_paths.empty() ? 0 : debug_info_root_paths.front();
3563 handle_.reset(dwfl_begin(offline_callbacks()),
3565 debug_info_root_paths_ = debug_info_root_paths;
3569 dwarf_version() const
3570 {return dwarf_version_;}
3573 dwarf_version(unsigned short v)
3574 {dwarf_version_ = v;}
3576 /// Getter for a smart pointer to a handle on the dwarf front end
3577 /// library that we use to read dwarf.
3579 /// @return the dwfl handle.
3584 /// Setter for a smart pointer to a handle on the dwarf front end
3585 /// library that we use to read dwarf.
3587 /// @param h the new dwfl handle.
3589 dwfl_handle(dwfl_sptr& h)
3594 {return elf_module_;}
3596 /// Return the ELF descriptor for the binary we are analizing.
3598 /// @return a pointer to the Elf descriptor representing the binary
3599 /// we are analizing.
3603 if (elf_handle_ == 0)
3608 elf_handle_ = dwfl_module_getelf(elf_module(), &bias);
3614 /// Return the ELF descriptor used for DWARF access.
3616 /// This can be the same as read_context::elf_handle() above, if the
3617 /// DWARF info is in the same ELF file as the one of the binary we
3618 /// are analizing. It is different if e.g, the debug info is split
3619 /// from the ELF file we are analizing.
3621 /// @return a pointer to the ELF descriptor used to access debug
3624 dwarf_elf_handle() const
3625 {return dwarf_getelf(dwarf());}
3627 /// Test if the debug information is in a separate ELF file wrt the
3628 /// main ELF file of the program (application or shared library) we
3631 /// @return true if the debug information is in a separate ELF file
3632 /// compared to the main ELF file of the program (application or
3633 /// shared library) that we are looking at.
3635 dwarf_is_splitted() const
3636 {return dwarf_elf_handle() != elf_handle();}
3638 /// Add paths to the set of paths under which to look for split
3639 /// debuginfo files.
3641 /// @param debug_info_root_paths the paths to add.
3643 add_debug_info_root_paths(const vector<char **>& debug_info_root_paths)
3645 debug_info_root_paths_.insert(debug_info_root_paths_.end(),
3646 debug_info_root_paths.begin(),
3647 debug_info_root_paths.end());
3650 /// Add a path to the set of paths under which to look for split
3651 /// debuginfo files.
3653 /// @param debug_info_root_path the path to add.
3655 add_debug_info_root_path(char** debug_info_root_path)
3656 {debug_info_root_paths_.push_back(debug_info_root_path);}
3658 /// Find the alternate debuginfo file associated to a given elf file.
3660 /// @param elf_module represents the elf file to consider.
3662 /// @param alt_file_name the resulting path to the alternate
3663 /// debuginfo file found. This is set iff the function returns a
3666 find_alt_debug_info(Dwfl_Module *elf_module,
3667 string& alt_file_name,
3671 result = dwarf_reader::find_alt_debug_info(elf_module,
3672 debug_info_root_paths_,
3673 alt_file_name, alt_fd);
3677 /// Load the debug info associated with an elf file that is at a
3680 /// @return a pointer to the DWARF debug info pointer upon
3681 /// successful debug info loading, NULL otherwise.
3692 dwfl_report_offline(dwfl_handle().get(),
3693 basename(const_cast<char*>(elf_path().c_str())),
3696 dwfl_report_end(dwfl_handle().get(), 0, 0);
3698 Dwarf_Addr bias = 0;
3699 dwarf_ = dwfl_module_getdwarf(elf_module_, &bias);
3700 // Look for split debuginfo files under multiple possible
3702 for (vector<char**>::const_iterator i = debug_info_root_paths_.begin();
3703 dwarf_ == 0 && i != debug_info_root_paths_.end();
3706 offline_callbacks()->debuginfo_path = *i;
3707 dwarf_ = dwfl_module_getdwarf(elf_module_, &bias);
3711 alt_dwarf_ = find_alt_debug_info(elf_module_,
3712 alt_debug_info_path_,
3718 /// Return the main debug info we are looking at.
3720 /// @return the main debug info.
3725 /// Return the alternate debug info we are looking at.
3727 /// Note that "alternate debug info sections" is a GNU extension as
3728 /// of DWARF4 and is described at
3729 /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1
3731 /// @return the alternate debug info.
3734 {return alt_dwarf_;}
3736 /// Return the correct debug info, depending on the DIE source we
3739 /// @param source the DIE source to consider.
3741 /// @return the right debug info, depending on @p source.
3743 dwarf_per_die_source(die_source source) const
3748 case PRIMARY_DEBUG_INFO_DIE_SOURCE:
3749 case TYPE_UNIT_DIE_SOURCE:
3752 case ALT_DEBUG_INFO_DIE_SOURCE:
3753 result = alt_dwarf();
3755 case NO_DEBUG_INFO_DIE_SOURCE:
3756 case NUMBER_OF_DIE_SOURCES:
3757 ABG_ASSERT_NOT_REACHED;
3762 /// Return the path to the alternate debug info as contained in the
3763 /// .gnu_debugaltlink section of the main elf file.
3765 /// Note that "alternate debug info sections" is a GNU extension as
3766 /// of DWARF4 and is described at
3767 /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1
3769 /// @return the path to the alternate debug info file, or an empty
3770 /// path if no alternate debug info file is associated.
3772 alt_debug_info_path() const
3773 {return alt_debug_info_path_;}
3775 /// Return the path to the ELF path we are reading.
3777 /// @return the elf path.
3782 /// Return the bss section of the ELF file we are reading.
3784 /// The first time this function is called, the ELF file is scanned
3785 /// to look for the section we are looking for. Once the section is
3786 /// found, it's cached.
3788 /// Subsequent calls to this function just return the cached
3791 /// @return the bss section.
3796 bss_section_ = find_bss_section(elf_handle());
3797 return bss_section_;
3800 /// Return the text section of the ELF file we are reading.
3802 /// The first time this function is called, the ELF file is scanned
3803 /// to look for the section we are looking for. Once the section is
3804 /// found, it's cached.
3806 /// Subsequent calls to this function just return the cached
3809 /// return the text section.
3811 text_section() const
3814 text_section_ = find_text_section(elf_handle());
3815 return text_section_;
3818 /// Return the rodata section of the ELF file we are reading.
3820 /// The first time this function is called, the ELF file is scanned
3821 /// to look for the section we are looking for. Once the section is
3822 /// found, it's cached.
3824 /// Subsequent calls to this function just return the cached
3827 /// return the rodata section.
3829 rodata_section() const
3831 if (!rodata_section_)
3832 rodata_section_ =find_rodata_section(elf_handle());
3833 return rodata_section_;
3836 /// Return the data section of the ELF file we are reading.
3838 /// The first time this function is called, the ELF file is scanned
3839 /// to look for the section we are looking for. Once the section is
3840 /// found, it's cached.
3842 /// Subsequent calls to this function just return the cached
3845 /// return the data section.
3847 data_section() const
3850 data_section_ = find_data_section(elf_handle());
3851 return data_section_;
3854 /// Return the data1 section of the ELF file we are reading.
3856 /// The first time this function is called, the ELF file is scanned
3857 /// to look for the section we are looking for. Once the section is
3858 /// found, it's cached.
3860 /// Subsequent calls to this function just return the cached
3863 /// return the data1 section.
3865 data1_section() const
3867 if (!data1_section_)
3868 data1_section_ = find_data1_section(elf_handle());
3869 return data1_section_;
3874 {return cur_tu_die_;}
3877 cur_tu_die(Dwarf_Die* cur_tu_die)
3878 {cur_tu_die_ = cur_tu_die;}
3880 dwarf_expr_eval_context&
3881 dwarf_expr_eval_ctxt() const
3882 {return dwarf_expr_eval_context_;}
3884 /// Getter of the maps set that associates a representation of a
3885 /// decl DIE to a vector of offsets of DIEs having that representation.
3887 /// @return the maps set that associates a representation of a decl
3888 /// DIE to a vector of offsets of DIEs having that representation.
3889 const die_source_dependant_container_set<istring_dwarf_offsets_map_type>&
3890 decl_die_repr_die_offsets_maps() const
3891 {return decl_die_repr_die_offsets_maps_;}
3893 /// Getter of the maps set that associates a representation of a
3894 /// decl DIE to a vector of offsets of DIEs having that representation.
3896 /// @return the maps set that associates a representation of a decl
3897 /// DIE to a vector of offsets of DIEs having that representation.
3898 die_source_dependant_container_set<istring_dwarf_offsets_map_type>&
3899 decl_die_repr_die_offsets_maps()
3900 {return decl_die_repr_die_offsets_maps_;}
3902 /// Getter of the maps set that associate a representation of a type
3903 /// DIE to a vector of offsets of DIEs having that representation.
3905 /// @return the maps set that associate a representation of a type
3906 /// DIE to a vector of offsets of DIEs having that representation.
3907 const die_source_dependant_container_set<istring_dwarf_offsets_map_type>&
3908 type_die_repr_die_offsets_maps() const
3909 {return type_die_repr_die_offsets_maps_;}
3911 /// Getter of the maps set that associate a representation of a type
3912 /// DIE to a vector of offsets of DIEs having that representation.
3914 /// @return the maps set that associate a representation of a type
3915 /// DIE to a vector of offsets of DIEs having that representation.
3916 die_source_dependant_container_set<istring_dwarf_offsets_map_type>&
3917 type_die_repr_die_offsets_maps()
3918 {return type_die_repr_die_offsets_maps_;}
3921 /// Compute the offset of the canonical DIE of a given DIE.
3923 /// @param die the DIE to consider.
3925 /// @param canonical_die_offset out parameter. This is set to the
3926 /// resulting canonical DIE that was computed.
3928 /// @param die_as_type if yes, it means @p die has to be considered
3931 compute_canonical_die_offset(const Dwarf_Die *die,
3932 Dwarf_Off &canonical_die_offset,
3933 bool die_as_type) const
3935 offset_offset_map_type &canonical_dies =
3937 ? const_cast<read_context*>(this)->canonical_type_die_offsets_.
3938 get_container(*this, die)
3939 : const_cast<read_context*>(this)->canonical_decl_die_offsets_.
3940 get_container(*this, die);
3942 Dwarf_Die canonical_die;
3943 compute_canonical_die(die, canonical_dies, canonical_die, die_as_type);
3945 canonical_die_offset = dwarf_dieoffset(&canonical_die);
3948 /// Compute (find) the canonical DIE of a given DIE.
3950 /// @param die the DIE to consider.
3952 /// @param canonical_dies the vector in which the canonical dies ar
3953 /// stored. The index of each element is the offset of the DIE we
3954 /// want the canonical DIE for. And the value of the element at
3955 /// that index is the canonical DIE offset we are looking for.
3957 /// @param canonical_die_offset out parameter. This is set to the
3958 /// resulting canonical DIE that was computed.
3960 /// @param die_as_type if yes, it means @p die has to be considered
3963 compute_canonical_die(const Dwarf_Die *die,
3964 offset_offset_map_type& canonical_dies,
3965 Dwarf_Die &canonical_die,
3966 bool die_as_type) const
3969 ABG_ASSERT(get_die_source(die, source));
3971 Dwarf_Off die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
3973 compute_canonical_die(die_offset, source,
3975 canonical_die, die_as_type);
3978 /// Compute (find) the canonical DIE of a given DIE.
3980 /// @param die_offset the offset of the DIE to consider.
3982 /// @param source the source of the DIE to consider.
3984 /// @param canonical_dies the vector in which the canonical dies ar
3985 /// stored. The index of each element is the offset of the DIE we
3986 /// want the canonical DIE for. And the value of the element at
3987 /// that index is the canonical DIE offset we are looking for.
3989 /// @param canonical_die_offset out parameter. This is set to the
3990 /// resulting canonical DIE that was computed.
3992 /// @param die_as_type if yes, it means @p die has to be considered
3995 compute_canonical_die(Dwarf_Off die_offset,
3997 offset_offset_map_type& canonical_dies,
3998 Dwarf_Die &canonical_die,
3999 bool die_as_type) const
4001 // The map that associates the string representation of 'die'
4002 // with a vector of offsets of potentially equivalent DIEs.
4003 istring_dwarf_offsets_map_type& map =
4005 ? (const_cast<read_context*>(this)->
4006 type_die_repr_die_offsets_maps().get_container(source))
4007 : (const_cast<read_context*>(this)->
4008 decl_die_repr_die_offsets_maps().get_container(source));
4011 ABG_ASSERT(dwarf_offdie(dwarf_per_die_source(source), die_offset, &die));
4013 // The variable repr is the the string representation of 'die'.
4015 // Even if die_as_type is true -- which means that 'die' is said
4016 // to be considered as a type -- we always consider a
4017 // DW_TAG_subprogram DIE as a decl here, as far as its string
4018 // representation is concerned.
4019 interned_string name =
4021 ? get_die_pretty_type_representation(&die, /*where=*/0)
4022 : get_die_pretty_representation(&die, /*where=*/0);
4024 Dwarf_Off canonical_die_offset = 0;
4025 istring_dwarf_offsets_map_type::iterator i = map.find(name);
4028 dwarf_offsets_type offsets;
4029 offsets.push_back(die_offset);
4030 map[name] = offsets;
4031 set_canonical_die_offset(canonical_dies, die_offset, die_offset);
4032 get_die_from_offset(source, die_offset, &canonical_die);
4036 if (odr_is_relevant(&die))
4038 // ODR is relevant for this DIE. In this case, all types with
4039 // the same name are considered equivalent. So the array
4040 // i->second shoud only have on element. If not, then
4041 // the DIEs referenced in the array should all compare equal.
4042 // Otherwise, this is an ODR violation. In any case, return
4043 // the first element of the array.
4044 // ABG_ASSERT(i->second.size() == 1);
4045 canonical_die_offset = i->second.front();
4046 get_die_from_offset(source, canonical_die_offset, &canonical_die);
4047 set_canonical_die_offset(canonical_dies, die_offset, die_offset);
4051 Dwarf_Off cur_die_offset;
4052 Dwarf_Die potential_canonical_die;
4053 for (dwarf_offsets_type::const_iterator o = i->second.begin();
4054 o != i->second.end();
4057 cur_die_offset = *o;
4058 get_die_from_offset(source, cur_die_offset, &potential_canonical_die);
4059 if (compare_dies(*this, &die, &potential_canonical_die,
4060 /*update_canonical_dies_on_the_fly=*/false))
4062 canonical_die_offset = cur_die_offset;
4063 set_canonical_die_offset(canonical_dies, die_offset,
4064 canonical_die_offset);
4065 get_die_from_offset(source, canonical_die_offset, &canonical_die);
4070 canonical_die_offset = die_offset;
4071 i->second.push_back(die_offset);
4072 set_canonical_die_offset(canonical_dies, die_offset, die_offset);
4073 get_die_from_offset(source, canonical_die_offset, &canonical_die);
4076 /// Getter of the canonical DIE of a given DIE.
4078 /// @param die the DIE to consider.
4080 /// @param canonical_die output parameter. Is set to the resuling
4081 /// canonical die, if this function returns true.
4083 /// @param where the offset of the logical DIE we are supposed to be
4084 /// calling this function from. If set to zero this means this is
4087 /// @param die_as_type if set to yes, it means @p die is to be
4088 /// considered as a type DIE.
4090 /// @return true iff a canonical DIE was found for @p die.
4092 get_canonical_die(const Dwarf_Die *die,
4093 Dwarf_Die &canonical_die,
4095 bool die_as_type) const
4098 ABG_ASSERT(get_die_source(die, source));
4100 offset_offset_map_type &canonical_dies =
4102 ? const_cast<read_context*>(this)->canonical_type_die_offsets_.
4103 get_container(source)
4104 : const_cast<read_context*>(this)->canonical_decl_die_offsets_.
4105 get_container(source);
4107 Dwarf_Off die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
4108 if (Dwarf_Off canonical_die_offset =
4109 get_canonical_die_offset(canonical_dies, die_offset))
4111 get_die_from_offset(source, canonical_die_offset, &canonical_die);
4115 // The map that associates the string representation of 'die'
4116 // with a vector of offsets of potentially equivalent DIEs.
4117 istring_dwarf_offsets_map_type& map =
4119 ? (const_cast<read_context*>(this)->
4120 type_die_repr_die_offsets_maps().get_container(*this, die))
4121 : (const_cast<read_context*>(this)->
4122 decl_die_repr_die_offsets_maps().get_container(*this, die));
4124 // The variable repr is the the string representation of 'die'.
4126 // Even if die_as_type is true -- which means that 'die' is said
4127 // to be considered as a type -- we always consider a
4128 // DW_TAG_subprogram DIE as a decl here, as far as its string
4129 // representation is concerned.
4130 interned_string name =
4131 (die_as_type /*&& dwarf_tag(die) != DW_TAG_subprogram*/)
4132 ? get_die_pretty_type_representation(die, where)
4133 : get_die_pretty_representation(die, where);
4135 istring_dwarf_offsets_map_type::iterator i = map.find(name);
4139 if (odr_is_relevant(die))
4141 // ODR is relevant for this DIE. In this case, all types with
4142 // the same name are considered equivalent. So the array
4143 // i->second shoud only have on element. If not, then
4144 // the DIEs referenced in the array should all compare equal.
4145 // Otherwise, this is an ODR violation. In any case, return
4146 // the first element of the array.
4147 // ABG_ASSERT(i->second.size() == 1);
4148 Dwarf_Off canonical_die_offset = i->second.front();
4149 get_die_from_offset(source, canonical_die_offset, &canonical_die);
4150 set_canonical_die_offset(canonical_dies,
4152 canonical_die_offset);
4156 Dwarf_Off cur_die_offset;
4157 for (dwarf_offsets_type::const_iterator o = i->second.begin();
4158 o != i->second.end();
4161 cur_die_offset = *o;
4162 get_die_from_offset(source, cur_die_offset, &canonical_die);
4163 // compare die and canonical_die.
4164 if (compare_dies(*this, die, &canonical_die,
4165 /*update_canonical_dies_on_the_fly=*/true))
4167 set_canonical_die_offset(canonical_dies,
4177 /// Retrieve the canonical DIE of a given DIE.
4179 /// The canonical DIE is a DIE that is structurally equivalent to
4182 /// Note that this function caches the canonical DIE that was
4183 /// computed. Subsequent invocations of this function on the same
4184 /// DIE return the same cached DIE.
4186 /// @param die the DIE to get a canonical type for.
4188 /// @param canonical_die the resulting canonical DIE.
4190 /// @param where the offset of the logical DIE we are supposed to be
4191 /// calling this function from. If set to zero this means this is
4194 /// @param die_as_type if true, consider DIE is a type.
4196 /// @return true if an *existing* canonical DIE was found.
4197 /// Otherwise, @p die is considered as being a canonical DIE for
4198 /// itself. @p canonical_die is thus set to the canonical die in
4201 get_or_compute_canonical_die(const Dwarf_Die* die,
4202 Dwarf_Die& canonical_die,
4204 bool die_as_type) const
4207 ABG_ASSERT(get_die_source(die, source));
4209 offset_offset_map_type &canonical_dies =
4211 ? const_cast<read_context*>(this)->canonical_type_die_offsets_.
4212 get_container(source)
4213 : const_cast<read_context*>(this)->canonical_decl_die_offsets_.
4214 get_container(source);
4216 Dwarf_Off initial_die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
4218 if (Dwarf_Off canonical_die_offset =
4219 get_canonical_die_offset(canonical_dies,
4220 initial_die_offset))
4222 get_die_from_offset(source, canonical_die_offset, &canonical_die);
4226 // The map that associates the string representation of 'die'
4227 // with a vector of offsets of potentially equivalent DIEs.
4228 istring_dwarf_offsets_map_type& map =
4230 ? (const_cast<read_context*>(this)->
4231 type_die_repr_die_offsets_maps().get_container(*this, die))
4232 : (const_cast<read_context*>(this)->
4233 decl_die_repr_die_offsets_maps().get_container(*this, die));
4235 // The variable repr is the the string representation of 'die'.
4237 // Even if die_as_type is true -- which means that 'die' is said
4238 // to be considered as a type -- we always consider a
4239 // DW_TAG_subprogram DIE as a decl here, as far as its string
4240 // representation is concerned.
4241 interned_string name =
4243 ? get_die_pretty_type_representation(die, where)
4244 : get_die_pretty_representation(die, where);
4246 istring_dwarf_offsets_map_type::iterator i = map.find(name);
4249 dwarf_offsets_type offsets;
4250 offsets.push_back(initial_die_offset);
4251 map[name] = offsets;
4252 get_die_from_offset(source, initial_die_offset, &canonical_die);
4253 set_canonical_die_offset(canonical_dies,
4255 initial_die_offset);
4259 if (odr_is_relevant(die))
4261 // ODR is relevant for this DIE. In this case, all types with
4262 // the same name are considered equivalent. So the array
4263 // i->second shoud only have on element. If not, then
4264 // the DIEs referenced in the array should all compare equal.
4265 // Otherwise, this is an ODR violation. In any case, return
4266 // the first element of the array.
4267 // ABG_ASSERT(i->second.size() == 1);
4268 Dwarf_Off die_offset = i->second.front();
4269 get_die_from_offset(source, die_offset, &canonical_die);
4270 set_canonical_die_offset(canonical_dies,
4276 // walk i->second without any iterator (using a while loop rather
4277 // than a for loop) because compare_dies might add new content to
4278 // the end of the i->second vector during the walking.
4279 dwarf_offsets_type::size_type n = 0, s = i->second.size();
4282 Dwarf_Off die_offset = i->second[n];
4283 get_die_from_offset(source, die_offset, &canonical_die);
4284 // compare die and canonical_die.
4285 if (compare_dies(*this, die, &canonical_die,
4286 /*update_canonical_dies_on_the_fly=*/true))
4288 set_canonical_die_offset(canonical_dies,
4296 // We didn't find a canonical DIE for 'die'. So let's consider
4297 // that it is its own canonical DIE.
4298 get_die_from_offset(source, initial_die_offset, &canonical_die);
4299 i->second.push_back(initial_die_offset);
4300 set_canonical_die_offset(canonical_dies,
4302 initial_die_offset);
4307 /// Get the source of the DIE.
4309 /// The function returns an enumerator value saying if the DIE comes
4310 /// from the .debug_info section of the primary debug info file, the
4311 /// .debug_info section of the alternate debug info file, or the
4312 /// .debug_types section.
4314 /// @param die the DIE to get the source of.
4316 /// @param source out parameter. The function sets this parameter
4317 /// to the source of the DIE @p iff it returns true.
4319 /// @return true iff the source of the DIE could be determined and
4322 get_die_source(const Dwarf_Die *die, die_source &source) const
4325 return get_die_source(*die, source);
4328 /// Get the source of the DIE.
4330 /// The function returns an enumerator value saying if the DIE comes
4331 /// from the .debug_info section of the primary debug info file, the
4332 /// .debug_info section of the alternate debug info file, or the
4333 /// .debug_types section.
4335 /// @param die the DIE to get the source of.
4337 /// @param source out parameter. The function sets this parameter
4338 /// to the source of the DIE @p iff it returns true.
4340 /// @return true iff the source of the DIE could be determined and
4343 get_die_source(const Dwarf_Die &die, die_source &source) const
4347 uint8_t address_size = 0, offset_size = 0;
4348 if (!dwarf_diecu(const_cast<Dwarf_Die*>(&die),
4349 &cu_die, &address_size,
4353 Dwarf_Half version = 0;
4354 Dwarf_Off abbrev_offset = 0;
4355 uint64_t type_signature = 0;
4356 Dwarf_Off type_offset = 0;
4357 if (!dwarf_cu_die(cu_die.cu, &cu_kind,
4358 &version, &abbrev_offset,
4359 &address_size, &offset_size,
4360 &type_signature, &type_offset))
4363 int tag = dwarf_tag(&cu_kind);
4365 if (tag == DW_TAG_compile_unit
4366 || tag == DW_TAG_partial_unit)
4368 Dwarf *die_dwarf = dwarf_cu_getdwarf(cu_die.cu);
4369 if (dwarf() == die_dwarf)
4370 source = PRIMARY_DEBUG_INFO_DIE_SOURCE;
4371 else if (alt_dwarf() == die_dwarf)
4372 source = ALT_DEBUG_INFO_DIE_SOURCE;
4374 ABG_ASSERT_NOT_REACHED;
4376 else if (tag == DW_TAG_type_unit)
4377 source = TYPE_UNIT_DIE_SOURCE;
4384 /// Getter for the DIE designated by an offset.
4386 /// @param source the source of the DIE to get.
4388 /// @param offset the offset of the DIE to get.
4390 /// @param die the resulting DIE. The pointer has to point to an
4391 /// allocated memory region.
4393 get_die_from_offset(die_source source, Dwarf_Off offset, Dwarf_Die *die) const
4395 if (source == TYPE_UNIT_DIE_SOURCE)
4396 ABG_ASSERT(dwarf_offdie_types(dwarf_per_die_source(source), offset, die));
4398 ABG_ASSERT(dwarf_offdie(dwarf_per_die_source(source), offset, die));
4403 /// Add an entry to the relevant die->decl map.
4405 /// @param die the DIE to add the the map.
4407 /// @param decl the decl to consider.
4409 /// @param where_offset where in the DIE stream we logically are.
4411 /// @param do_associate_by_repr if true then this function
4412 /// associates the representation string of @p die with the
4413 /// declaration @p decl, in a corpus-wide manner. That is, in the
4414 /// entire current corpus, there is going to be just one declaration
4415 /// associated with a DIE of the string representation of @p die.
4417 /// @param do_associate_by_repr_per_tu if true, then this function
4418 /// associates the representation string of @p die with the
4419 /// declaration @p decl in a translation unit wide manner. That is,
4420 /// in the entire current translation unit, there is going to be
4421 /// just one declaration associated with a DIE of the string
4422 /// representation of @p die.
4424 associate_die_to_decl(Dwarf_Die* die,
4425 decl_base_sptr decl,
4426 size_t where_offset,
4427 bool do_associate_by_repr = false)
4430 ABG_ASSERT(get_die_source(die, source));
4432 die_artefact_map_type& m =
4433 decl_die_artefact_maps().get_container(source);
4436 if (do_associate_by_repr)
4438 Dwarf_Die equiv_die;
4439 get_or_compute_canonical_die(die, equiv_die, where_offset,
4440 /*die_as_type=*/false);
4441 die_offset = dwarf_dieoffset(&equiv_die);
4444 die_offset = dwarf_dieoffset(die);
4446 m[die_offset] = decl;
4451 /// Lookup the decl for a given DIE.
4453 /// The returned decl is either the decl of the DIE that as the
4454 /// exact offset @p die_offset
4458 /// @param die_offset the offset of the DIE to consider.
4460 /// @param source where the DIE represented by @p die_offset comes
4463 /// Note that "alternate debug info sections" is a GNU extension as
4464 /// of DWARF4 and is described at
4465 /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1
4467 /// @return the resulting decl, or null if no decl is associated to
4468 /// the DIE represented by @p die_offset.
4470 lookup_decl_from_die_offset(Dwarf_Off die_offset, die_source source)
4472 decl_base_sptr result =
4473 is_decl(lookup_artifact_from_die_offset(die_offset, source,
4474 /*die_as_type=*/false));
4479 /// Get the qualified name of a given DIE.
4481 /// If the name of the DIE was already computed before just return
4482 /// that name from a cache. Otherwise, build the name, cache it and
4485 /// @param die the DIE to consider.
4487 /// @param where_offset where in the DIE stream we logically are.
4489 /// @return the interned string representing the qualified name of
4492 get_die_qualified_name(Dwarf_Die *die, size_t where_offset)
4495 die_istring_map_type& map =
4496 die_qualified_name_maps_.get_container(*this, die);
4498 size_t die_offset = dwarf_dieoffset(die);
4499 die_istring_map_type::const_iterator i = map.find(die_offset);
4503 read_context& ctxt = *const_cast<read_context*>(this);
4504 string qualified_name = die_qualified_name(ctxt, die, where_offset);
4505 interned_string istr = env()->intern(qualified_name);
4506 map[die_offset] = istr;
4513 /// Get the qualified name of a given DIE.
4515 /// If the name of the DIE was already computed before just return
4516 /// that name from a cache. Otherwise, build the name, cache it and
4519 /// @param die the DIE to consider.
4521 /// @param where_offset where in the DIE stream we logically are.
4523 /// @return the interned string representing the qualified name of
4526 get_die_qualified_name(Dwarf_Die *die, size_t where_offset) const
4528 return const_cast<read_context*>(this)->
4529 get_die_qualified_name(die, where_offset);
4532 /// Get the qualified name of a given DIE which is considered to be
4533 /// the DIE for a type.
4535 /// For instance, for a DW_TAG_subprogram DIE, this function
4536 /// computes the name of the function *type* that corresponds to the
4539 /// If the name of the DIE was already computed before just return
4540 /// that name from a cache. Otherwise, build the name, cache it and
4543 /// @param die the DIE to consider.
4545 /// @param where_offset where in the DIE stream we logically are.
4547 /// @return the interned string representing the qualified name of
4550 get_die_qualified_type_name(const Dwarf_Die *die, size_t where_offset) const
4554 // The name of the translation unit die is "".
4555 if (die == cur_tu_die())
4556 return env()->intern("");
4558 die_istring_map_type& map =
4559 die_qualified_name_maps_.get_container(*const_cast<read_context*>(this),
4562 size_t die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
4563 die_istring_map_type::const_iterator i =
4564 map.find(die_offset);
4568 read_context& ctxt = *const_cast<read_context*>(this);
4569 string qualified_name;
4570 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
4571 if ((tag == DW_TAG_structure_type
4572 || tag == DW_TAG_class_type
4573 || tag == DW_TAG_union_type)
4574 && die_is_anonymous(die))
4576 location l = die_location(*this, die);
4577 qualified_name = l ? l.expand() : "noloc";
4578 qualified_name = "unnamed-at-" + qualified_name;
4582 die_qualified_type_name(ctxt, die, where_offset);
4584 interned_string istr = env()->intern(qualified_name);
4585 map[die_offset] = istr;
4592 /// Get the pretty representation of a DIE that represents a type.
4594 /// For instance, for the DW_TAG_subprogram, this function computes
4595 /// the pretty representation of the type of the function, not the
4596 /// pretty representation of the function declaration.
4598 /// Once the pretty representation is computed, it's stored in a
4599 /// cache. Subsequent invocations of this function on the same DIE
4600 /// will yield the cached name.
4602 /// @param die the DIE to consider.
4604 /// @param where_offset where in the DIE stream we logically are.
4606 /// @return the interned_string that represents the pretty
4609 get_die_pretty_type_representation(const Dwarf_Die *die,
4610 size_t where_offset) const
4613 die_istring_map_type& map =
4614 die_pretty_type_repr_maps_.get_container(*const_cast<read_context*>(this),
4617 size_t die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
4618 die_istring_map_type::const_iterator i = map.find(die_offset);
4622 read_context& ctxt = *const_cast<read_context*>(this);
4623 string pretty_representation =
4624 die_pretty_print_type(ctxt, die, where_offset);
4625 interned_string istr = env()->intern(pretty_representation);
4626 map[die_offset] = istr;
4633 /// Get the pretty representation of a DIE.
4635 /// Once the pretty representation is computed, it's stored in a
4636 /// cache. Subsequent invocations of this function on the same DIE
4637 /// will yield the cached name.
4639 /// @param die the DIE to consider.
4641 /// @param where_offset where in the DIE stream we logically are.
4643 /// @return the interned_string that represents the pretty
4646 get_die_pretty_representation(const Dwarf_Die *die, size_t where_offset) const
4650 die_istring_map_type& map =
4651 die_pretty_repr_maps_.get_container(*const_cast<read_context*>(this),
4654 size_t die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
4655 die_istring_map_type::const_iterator i = map.find(die_offset);
4659 read_context& ctxt = *const_cast<read_context*>(this);
4660 string pretty_representation =
4661 die_pretty_print(ctxt, die, where_offset);
4662 interned_string istr = env()->intern(pretty_representation);
4663 map[die_offset] = istr;
4670 /// Lookup the artifact that was built to represent a type that has
4671 /// the same pretty representation as the type denoted by a given
4674 /// Note that the DIE must have previously been associated with the
4675 /// artifact using the functions associate_die_to_decl or
4676 /// associate_die_to_type.
4678 /// Also, note that the scope of the lookup is the current ABI
4681 /// @param die the DIE to consider.
4683 /// @param where_offset where in the DIE stream we logically are.
4685 /// @return the type artifact found.
4686 type_or_decl_base_sptr
4687 lookup_type_artifact_from_die(Dwarf_Die *die) const
4689 type_or_decl_base_sptr artifact =
4690 lookup_artifact_from_die(die, /*type_as_die=*/true);
4691 if (function_decl_sptr fn = is_function_decl(artifact))
4692 return fn->get_type();
4696 /// Lookup the artifact that was built to represent a type or a
4697 /// declaration that has the same pretty representation as the type
4698 /// denoted by a given DIE.
4700 /// Note that the DIE must have previously been associated with the
4701 /// artifact using the functions associate_die_to_decl or
4702 /// associate_die_to_type.
4704 /// Also, note that the scope of the lookup is the current ABI
4707 /// @param die the DIE to consider.
4709 /// @param where_offset where in the DIE stream we logically are.
4711 /// @param die_as_type if true, it means the DIE is to be considered
4714 /// @return the artifact found.
4715 type_or_decl_base_sptr
4716 lookup_artifact_from_die(const Dwarf_Die *die, bool die_as_type = false) const
4718 Dwarf_Die equiv_die;
4719 if (!get_or_compute_canonical_die(die, equiv_die, /*where=*/0, die_as_type))
4720 return type_or_decl_base_sptr();
4722 const die_artefact_map_type& m =
4724 ? type_die_artefact_maps().get_container(*this, &equiv_die)
4725 : decl_die_artefact_maps().get_container(*this, &equiv_die);
4727 size_t die_offset = dwarf_dieoffset(&equiv_die);
4728 die_artefact_map_type::const_iterator i = m.find(die_offset);
4731 return type_or_decl_base_sptr();
4735 /// Lookup the artifact that was built to represent a type or a
4736 /// declaration that has the same pretty representation as the type
4737 /// denoted by the offset of a given DIE.
4739 /// Note that the DIE must have previously been associated with the
4740 /// artifact using either associate_die_to_decl or
4741 /// associate_die_to_type.
4743 /// Also, note that the scope of the lookup is the current ABI
4746 /// @param die the DIE to consider.
4748 /// @param where_offset where in the DIE stream we logically are.
4750 /// @param die_as_type if true, it means the DIE is to be considered
4753 /// @return the artifact found.
4754 type_or_decl_base_sptr
4755 lookup_artifact_from_die_offset(Dwarf_Off die_offset,
4757 bool die_as_type = false) const
4759 const die_artefact_map_type& m =
4761 ? type_die_artefact_maps().get_container(source)
4762 : decl_die_artefact_maps().get_container(source);
4764 die_artefact_map_type::const_iterator i = m.find(die_offset);
4766 return type_or_decl_base_sptr();
4770 /// Get the language used to generate a given DIE.
4772 /// @param die the DIE to consider.
4774 /// @param lang the resulting language.
4776 /// @return true iff the language of the DIE was found.
4778 get_die_language(const Dwarf_Die *die, translation_unit::language &lang) const
4781 ABG_ASSERT(dwarf_diecu(const_cast<Dwarf_Die*>(die), &cu_die, 0, 0));
4784 if (!die_unsigned_constant_attribute(&cu_die, DW_AT_language, l))
4787 lang = dwarf_language_to_tu_language(l);
4791 /// Test if a given DIE originates from a program written in the C
4794 /// @param die the DIE to consider.
4796 /// @return true iff @p die originates from a program in the C
4799 die_is_in_c(const Dwarf_Die *die) const
4801 translation_unit::language l = translation_unit::LANG_UNKNOWN;
4802 if (!get_die_language(die, l))
4804 return is_c_language(l);
4807 /// Test if a given DIE originates from a program written in the C++
4810 /// @param die the DIE to consider.
4812 /// @return true iff @p die originates from a program in the C++
4815 die_is_in_cplus_plus(const Dwarf_Die *die) const
4817 translation_unit::language l = translation_unit::LANG_UNKNOWN;
4818 if (!get_die_language(die, l))
4820 return is_cplus_plus_language(l);
4823 /// Test if a given DIE originates from a program written either in
4826 /// @param die the DIE to consider.
4828 /// @return true iff @p die originates from a program written either in
4831 die_is_in_c_or_cplusplus(const Dwarf_Die *die) const
4833 translation_unit::language l = translation_unit::LANG_UNKNOWN;
4834 if (!get_die_language(die, l))
4836 return (is_cplus_plus_language(l) || is_c_language(l));
4839 /// Check if we can assume the One Definition Rule[1] to be relevant
4840 /// for the current translation unit.
4842 /// [1]: https://en.wikipedia.org/wiki/One_Definition_Rule
4844 /// At the moment this returns true if the current translation unit
4845 /// is in C++ language. In that case, it's relevant to assume that
4846 /// we use optimizations based on the ODR.
4848 odr_is_relevant() const
4849 {return odr_is_relevant(cur_transl_unit()->get_language());}
4851 /// Check if we can assume the One Definition Rule[1] to be relevant
4852 /// for a given language.
4854 /// [1]: https://en.wikipedia.org/wiki/One_Definition_Rule
4856 /// At the moment this returns true if the language considered
4857 /// is C++, Java or Ada.
4859 odr_is_relevant(translation_unit::language l) const
4861 return (is_cplus_plus_language(l)
4862 || is_java_language(l)
4863 || is_ada_language(l));
4866 /// Check if we can assume the One Definition Rule to be relevant
4867 /// for a given DIE.
4869 /// @param die the DIE to consider.
4871 /// @return true if the ODR is relevant for @p die.
4873 odr_is_relevant(Dwarf_Off die_offset, die_source source) const
4876 ABG_ASSERT(dwarf_offdie(dwarf_per_die_source(source), die_offset, &die));
4877 return odr_is_relevant(&die);
4880 /// Check if we can assume the One Definition Rule to be relevant
4881 /// for a given DIE.
4883 /// @param die the DIE to consider.
4885 /// @return true if the ODR is relevant for @p die.
4887 odr_is_relevant(const Dwarf_Die *die) const
4889 translation_unit::language lang;
4890 if (!get_die_language(die, lang))
4891 return odr_is_relevant();
4893 return odr_is_relevant(lang);
4896 /// Getter for the maps set that associates a decl DIE offset to an
4899 /// @return the maps set that associates a decl DIE offset to an
4901 die_source_dependant_container_set<die_artefact_map_type>&
4902 decl_die_artefact_maps()
4903 {return decl_die_artefact_maps_;}
4905 /// Getter for the maps set that associates a decl DIE offset to an
4908 /// @return the maps set that associates a decl DIE offset to an
4910 const die_source_dependant_container_set<die_artefact_map_type>&
4911 decl_die_artefact_maps() const
4912 {return decl_die_artefact_maps_;}
4914 /// Getter for the maps set that associates a type DIE offset to an
4917 /// @return the maps set that associates a type DIE offset to an
4919 die_source_dependant_container_set<die_artefact_map_type>&
4920 type_die_artefact_maps()
4921 {return type_die_artefact_maps_;}
4923 /// Getter for the maps set that associates a type DIE offset to an
4926 /// @return the maps set that associates a type DIE offset to an
4928 const die_source_dependant_container_set<die_artefact_map_type>&
4929 type_die_artefact_maps() const
4930 {return type_die_artefact_maps_;}
4932 /// Getter of the maps that associates function type representations
4933 /// to function types, inside a translation unit.
4935 /// @return the maps that associates function type representations
4936 /// to function types, inside a translation unit.
4937 istring_fn_type_map_type&
4938 per_tu_repr_to_fn_type_maps()
4939 {return per_tu_repr_to_fn_type_maps_;}
4941 /// Getter of the maps that associates function type representations
4942 /// to function types, inside a translation unit.
4944 /// @return the maps that associates function type representations
4945 /// to function types, inside a translation unit.
4946 const istring_fn_type_map_type&
4947 per_tu_repr_to_fn_type_maps() const
4948 {return per_tu_repr_to_fn_type_maps_;}
4950 /// Associate the representation of a function type DIE to a given
4951 /// function type, inside the current translation unit.
4953 /// @param die the DIE to associate to the function type, using its
4956 /// @param fn_type the function type to associate to @p die.
4958 associate_die_repr_to_fn_type_per_tu(const Dwarf_Die *die,
4959 const function_type_sptr &fn_type)
4961 if (!die_is_function_type(die))
4964 interned_string repr =
4965 get_die_pretty_type_representation(die, /*where=*/0);
4966 ABG_ASSERT(!repr.empty());
4968 per_tu_repr_to_fn_type_maps()[repr]= fn_type;
4971 /// Lookup the function type associated to a given function type
4972 /// DIE, in the current translation unit.
4974 /// @param die the DIE of function type to consider.
4976 /// @return the @ref function_type_sptr associated to @p die, or nil
4977 /// of no function_type is associated to @p die.
4979 lookup_fn_type_from_die_repr_per_tu(const Dwarf_Die *die)
4981 if (!die_is_function_type(die))
4982 return function_type_sptr();
4984 interned_string repr =
4985 get_die_pretty_representation(die, /*where=*/0);
4986 ABG_ASSERT(!repr.empty());
4988 istring_fn_type_map_type::const_iterator i =
4989 per_tu_repr_to_fn_type_maps().find(repr);
4991 if (i == per_tu_repr_to_fn_type_maps().end())
4992 return function_type_sptr();
4997 /// Set the canonical DIE offset of a given DIE.
4999 /// @param canonical_dies the vector that holds canonical DIEs.
5001 /// @param die_offset the offset of the DIE to set the canonical DIE
5004 /// @param canonical_die_offset the canonical DIE offset to
5005 /// associate to @p die_offset.
5007 set_canonical_die_offset(offset_offset_map_type &canonical_dies,
5008 Dwarf_Off die_offset,
5009 Dwarf_Off canonical_die_offset) const
5011 canonical_dies[die_offset] = canonical_die_offset;}
5013 /// Set the canonical DIE offset of a given DIE.
5016 /// @param die_offset the offset of the DIE to set the canonical DIE
5019 /// @param source the source of the DIE denoted by @p die_offset.
5021 /// @param canonical_die_offset the canonical DIE offset to
5022 /// associate to @p die_offset.
5024 /// @param die_as_type if true, it means that @p die_offset has to
5025 /// be considered as a type.
5027 set_canonical_die_offset(Dwarf_Off die_offset,
5029 Dwarf_Off canonical_die_offset,
5030 bool die_as_type) const
5032 offset_offset_map_type &canonical_dies =
5034 ? const_cast<read_context*>(this)->canonical_type_die_offsets_.
5035 get_container(source)
5036 : const_cast<read_context*>(this)->canonical_decl_die_offsets_.
5037 get_container(source);
5039 set_canonical_die_offset(canonical_dies,
5041 canonical_die_offset);
5044 /// Set the canonical DIE offset of a given DIE.
5047 /// @param die the DIE to set the canonical DIE for.
5049 /// @param canonical_die_offset the canonical DIE offset to
5050 /// associate to @p die_offset.
5052 /// @param die_as_type if true, it means that @p die has to be
5053 /// considered as a type.
5055 set_canonical_die_offset(const Dwarf_Die *die,
5056 Dwarf_Off canonical_die_offset,
5057 bool die_as_type) const
5060 ABG_ASSERT(get_die_source(die, source));
5062 Dwarf_Off die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
5064 set_canonical_die_offset(die_offset, source,
5065 canonical_die_offset,
5069 /// Get the canonical DIE offset of a given DIE.
5071 /// @param canonical_dies the vector that contains canonical DIES.
5073 /// @param die_offset the offset of the DIE to consider.
5075 /// @return the canonical of the DIE denoted by @p die_offset, or
5076 /// zero if no canonical DIE was found.
5078 get_canonical_die_offset(offset_offset_map_type &canonical_dies,
5079 Dwarf_Off die_offset) const
5081 offset_offset_map_type::const_iterator it = canonical_dies.find(die_offset);
5082 if (it == canonical_dies.end())
5087 /// Get the canonical DIE offset of a given DIE.
5089 /// @param die_offset the offset of the DIE to consider.
5091 /// @param source the source of the DIE denoted by @p die_offset.
5093 /// @param die_as_type if true, it means that @p is to be considered
5096 /// @return the canonical of the DIE denoted by @p die_offset, or
5097 /// zero if no canonical DIE was found.
5099 get_canonical_die_offset(Dwarf_Off die_offset,
5101 bool die_as_type) const
5103 offset_offset_map_type &canonical_dies =
5105 ? const_cast<read_context*>(this)->canonical_type_die_offsets_.
5106 get_container(source)
5107 : const_cast<read_context*>(this)->canonical_decl_die_offsets_.
5108 get_container(source);
5110 return get_canonical_die_offset(canonical_dies, die_offset);
5113 /// Associate a DIE (representing a type) to the type that it
5116 /// @param die the DIE to consider.
5118 /// @param type the type to associate the DIE to.
5120 /// @param where_offset where in the DIE stream we logically are.
5122 associate_die_to_type(const Dwarf_Die *die,
5123 type_base_sptr type,
5129 Dwarf_Die equiv_die;
5130 get_or_compute_canonical_die(die, equiv_die, where, /*die_as_type=*/true);
5132 die_artefact_map_type& m =
5133 type_die_artefact_maps().get_container(*this, &equiv_die);
5135 size_t die_offset = dwarf_dieoffset(&equiv_die);
5136 m[die_offset] = type;
5139 /// Lookup the type associated to a given DIE.
5141 /// Note that the DIE must have been associated to type by a
5142 /// previous invocation of the function
5143 /// read_context::associate_die_to_type().
5145 /// @param die the DIE to consider.
5147 /// @return the type associated to the DIE or NULL if no type is
5148 /// associated to the DIE.
5150 lookup_type_from_die(const Dwarf_Die* die) const
5152 type_or_decl_base_sptr artifact =
5153 lookup_artifact_from_die(die, /*die_as_type=*/true);
5154 if (function_decl_sptr fn = is_function_decl(artifact))
5155 return fn->get_type();
5156 return is_type(artifact);
5159 /// Lookup the type associated to a DIE at a given offset, from a
5162 /// Note that the DIE must have been associated to type by a
5163 /// previous invocation of the function
5164 /// read_context::associate_die_to_type().
5166 /// @param die_offset the offset of the DIE to consider.
5168 /// @param source the source of the DIE to consider.
5170 /// @return the type associated to the DIE or NULL if no type is
5171 /// associated to the DIE.
5173 lookup_type_from_die_offset(size_t die_offset, die_source source) const
5175 type_base_sptr result;
5176 const die_artefact_map_type& m =
5177 type_die_artefact_maps().get_container(source);
5178 die_artefact_map_type::const_iterator i = m.find(die_offset);
5181 if (function_decl_sptr fn = is_function_decl(i->second))
5182 return fn->get_type();
5183 result = is_type(i->second);
5188 // Maybe we are looking for a class type being constructed?
5189 const die_class_or_union_map_type& m = die_wip_classes_map(source);
5190 die_class_or_union_map_type::const_iterator i = m.find(die_offset);
5198 // Maybe we are looking for a function type being constructed?
5199 const die_function_type_map_type& m =
5200 die_wip_function_types_map(source);
5201 die_function_type_map_type::const_iterator i = m.find(die_offset);
5210 /// Getter of a map that associates a die that represents a
5211 /// class/struct with the declaration of the class, while the class
5212 /// is being constructed.
5214 /// @param source where the DIE is from.
5216 /// @return the map that associates a DIE to the class that is being
5218 const die_class_or_union_map_type&
5219 die_wip_classes_map(die_source source) const
5220 {return const_cast<read_context*>(this)->die_wip_classes_map(source);}
5222 /// Getter of a map that associates a die that represents a
5223 /// class/struct with the declaration of the class, while the class
5224 /// is being constructed.
5226 /// @param source where the DIE comes from.
5228 /// @return the map that associates a DIE to the class that is being
5230 die_class_or_union_map_type&
5231 die_wip_classes_map(die_source source)
5235 case PRIMARY_DEBUG_INFO_DIE_SOURCE:
5237 case ALT_DEBUG_INFO_DIE_SOURCE:
5238 return alternate_die_wip_classes_map_;
5239 case TYPE_UNIT_DIE_SOURCE:
5240 return type_unit_die_wip_classes_map_;
5241 case NO_DEBUG_INFO_DIE_SOURCE:
5242 case NUMBER_OF_DIE_SOURCES:
5243 ABG_ASSERT_NOT_REACHED;
5245 return die_wip_classes_map_;
5248 /// Getter for a map that associates a die (that represents a
5249 /// function type) whith a function type, while the function type is
5250 /// being constructed (WIP == work in progress).
5252 /// @param source where the DIE comes from.n
5254 /// @return the map of wip function types.
5255 const die_function_type_map_type&
5256 die_wip_function_types_map(die_source source) const
5257 {return const_cast<read_context*>(this)->die_wip_function_types_map(source);}
5259 /// Getter for a map that associates a die (that represents a
5260 /// function type) whith a function type, while the function type is
5261 /// being constructed (WIP == work in progress).
5263 /// @param source where DIEs of the map come from.
5265 /// @return the map of wip function types.
5266 die_function_type_map_type&
5267 die_wip_function_types_map(die_source source)
5271 case PRIMARY_DEBUG_INFO_DIE_SOURCE:
5273 case ALT_DEBUG_INFO_DIE_SOURCE:
5274 return alternate_die_wip_function_types_map_;
5275 case TYPE_UNIT_DIE_SOURCE:
5276 return type_unit_die_wip_function_types_map_;
5277 case NO_DEBUG_INFO_DIE_SOURCE:
5278 case NUMBER_OF_DIE_SOURCES:
5279 ABG_ASSERT_NOT_REACHED;
5281 return die_wip_function_types_map_;
5284 /// Getter for a map that associates a die with a function decl
5285 /// which has a linkage name but no elf symbol yet.
5287 /// This is to fixup function decls with linkage names, but with no
5288 /// link to their underlying elf symbol. There are some DIEs like
5289 /// that in DWARF sometimes, especially when the compiler optimizes
5290 /// stuff aggressively.
5291 die_function_decl_map_type&
5292 die_function_decl_with_no_symbol_map()
5293 {return die_function_with_no_symbol_map_;}
5295 /// Return true iff a given offset is for the DIE of a class that is
5296 /// being built, but that is not fully built yet. WIP == "work in
5299 /// @param offset the DIE offset to consider.
5301 /// @param source where the DIE of the map come from.
5303 /// @return true iff @p offset is the offset of the DIE of a class
5304 /// that is being currently built.
5306 is_wip_class_die_offset(Dwarf_Off offset, die_source source) const
5308 die_class_or_union_map_type::const_iterator i =
5309 die_wip_classes_map(source).find(offset);
5310 return (i != die_wip_classes_map(source).end());
5313 /// Return true iff a given offset is for the DIE of a function type
5314 /// that is being built at the moment, but is not fully built yet.
5315 /// WIP == work in progress.
5317 /// @param offset DIE offset to consider.
5319 /// @param source where the DIE comes from.
5321 /// @return true iff @p offset is the offset of the DIE of a
5322 /// function type that is being currently built.
5324 is_wip_function_type_die_offset(Dwarf_Off offset, die_source source) const
5326 die_function_type_map_type::const_iterator i =
5327 die_wip_function_types_map(source).find(offset);
5328 return (i != die_wip_function_types_map(source).end());
5331 /// Getter for the map of declaration-only classes that are to be
5332 /// resolved to their definition classes by the end of the corpus
5335 /// @return a map of string -> vector of classes where the key is
5336 /// the fully qualified name of the class and the value is the
5337 /// vector of declaration-only class.
5338 const string_classes_map&
5339 declaration_only_classes() const
5340 {return decl_only_classes_map_;}
5342 /// Getter for the map of declaration-only classes that are to be
5343 /// resolved to their definition classes by the end of the corpus
5346 /// @return a map of string -> vector of classes where the key is
5347 /// the fully qualified name of the class and the value is the
5348 /// vector of declaration-only class.
5350 declaration_only_classes()
5351 {return decl_only_classes_map_;}
5353 /// If a given class is a declaration-only class then stash it on
5354 /// the side so that at the end of the corpus reading we can resolve
5355 /// it to its definition.
5357 /// @param klass the class to consider.
5359 maybe_schedule_declaration_only_class_for_resolution(class_decl_sptr& klass)
5361 if (klass->get_is_declaration_only()
5362 && klass->get_definition_of_declaration() == 0)
5364 string qn = klass->get_qualified_name();
5365 string_classes_map::iterator record =
5366 declaration_only_classes().find(qn);
5367 if (record == declaration_only_classes().end())
5368 declaration_only_classes()[qn].push_back(klass);
5370 record->second.push_back(klass);
5374 /// Test if a given declaration-only class has been scheduled for
5375 /// resolution to a defined class.
5377 /// @param klass the class to consider for the test.
5379 /// @return true iff @p klass is a declaration-only class and if
5380 /// it's been scheduled for resolution to a defined class.
5382 is_decl_only_class_scheduled_for_resolution(class_decl_sptr& klass)
5384 if (klass->get_is_declaration_only())
5385 return (declaration_only_classes().find(klass->get_qualified_name())
5386 != declaration_only_classes().end());
5391 /// Walk the declaration-only classes that have been found during
5392 /// the building of the corpus and resolve them to their definitions.
5394 resolve_declaration_only_classes()
5396 vector<string> resolved_classes;
5398 for (string_classes_map::iterator i =
5399 declaration_only_classes().begin();
5400 i != declaration_only_classes().end();
5403 bool to_resolve = false;
5404 for (classes_type::iterator j = i->second.begin();
5405 j != i->second.end();
5407 if ((*j)->get_is_declaration_only()
5408 && ((*j)->get_definition_of_declaration() == 0))
5413 resolved_classes.push_back(i->first);
5417 // Now, for each decl-only class that have the current name
5418 // 'i->first', let's try to poke at the fully defined class
5419 // that is defined in the same translation unit as the
5422 // If we find one class (defined in the TU of the declaration)
5423 // that defines the declaration, then the declaration can be
5424 // resolved to that class.
5426 // If no defining class is found in the TU of the declaration,
5427 // then there are possibly three cases to consider:
5429 // 1/ There is exactly one class that defines the
5430 // declaration and that class is defined in another TU. In
5431 // this case, the declaration is resolved to that
5434 // 2/ There are more than one class that define that
5435 // declaration and none of them is defined in the TU of the
5436 // declaration. In this case, the declaration is left
5439 // 3/ No class defines the declaration. In this case, the
5440 // declaration is left unresoved.
5442 // So get the classes that might define the current
5443 // declarations which name is i->first.
5444 const type_base_wptrs_type *classes =
5445 lookup_class_types(i->first, *current_corpus());
5449 unordered_map<string, class_decl_sptr> per_tu_class_map;
5450 for (type_base_wptrs_type::const_iterator c = classes->begin();
5451 c != classes->end();
5454 class_decl_sptr klass = is_class_type(type_base_sptr(*c));
5457 klass = is_class_type(look_through_decl_only_class(klass));
5458 if (klass->get_is_declaration_only())
5461 string tu_path = klass->get_translation_unit()->get_absolute_path();
5462 if (tu_path.empty())
5465 // Build a map that associates the translation unit path
5466 // to the class (that potentially defines the declarations
5467 // that we consider) that are defined in that translation unit.
5468 per_tu_class_map[tu_path] = klass;
5471 if (!per_tu_class_map.empty())
5473 // Walk the declarations to resolve and resolve them
5474 // either to the definitions that are in the same TU as
5475 // the declaration, or to the definition found elsewhere,
5476 // if there is only one such definition.
5477 for (classes_type::iterator j = i->second.begin();
5478 j != i->second.end();
5481 if ((*j)->get_is_declaration_only()
5482 && ((*j)->get_definition_of_declaration() == 0))
5485 (*j)->get_translation_unit()->get_absolute_path();
5486 unordered_map<string, class_decl_sptr>::const_iterator e =
5487 per_tu_class_map.find(tu_path);
5488 if (e != per_tu_class_map.end())
5489 (*j)->set_definition_of_declaration(e->second);
5490 else if (per_tu_class_map.size() == 1)
5491 (*j)->set_definition_of_declaration
5492 (per_tu_class_map.begin()->second);
5495 resolved_classes.push_back(i->first);
5499 size_t num_decl_only_classes = declaration_only_classes().size(),
5500 num_resolved = resolved_classes.size();
5502 cerr << "resolved " << num_resolved
5503 << " class declarations out of "
5504 << num_decl_only_classes
5507 for (vector<string>::const_iterator i = resolved_classes.begin();
5508 i != resolved_classes.end();
5510 declaration_only_classes().erase(*i);
5512 for (string_classes_map::iterator i = declaration_only_classes().begin();
5513 i != declaration_only_classes().end();
5518 if (i == declaration_only_classes().begin())
5519 cerr << "Here are the "
5520 << num_decl_only_classes - num_resolved
5521 << " unresolved class declarations:\n";
5523 cerr << " " << i->first << "\n";
5528 /// Some functions described by DWARF may have their linkage name
5529 /// set, but no link to their actual underlying elf symbol. When
5530 /// these are virtual member functions, comparing the enclosing type
5531 /// against another one which has its underlying symbol properly set
5532 /// might lead to spurious type changes.
5534 /// If the corpus contains a symbol with the same name as the
5535 /// linkage name of the function, then set up the link between the
5536 /// function and its underlying symbol.
5538 /// Note that for the moment, only virtual member functions are
5539 /// fixed up like this. This is because they really are the only
5540 /// fuctions of functions that can affect types (in spurious ways).
5542 fixup_functions_with_no_symbols()
5544 corpus_sptr corp = current_corpus();
5548 die_function_decl_map_type &fns_with_no_symbol =
5549 die_function_decl_with_no_symbol_map();
5552 cerr << fns_with_no_symbol.size()
5553 << " functions to fixup, potentially\n";
5555 for (die_function_decl_map_type::iterator i = fns_with_no_symbol.begin();
5556 i != fns_with_no_symbol.end();
5558 if (elf_symbol_sptr sym =
5559 corp->lookup_function_symbol(i->second->get_linkage_name()))
5561 ABG_ASSERT(is_member_function(i->second));
5562 ABG_ASSERT(get_member_function_is_virtual(i->second));
5563 i->second->set_symbol(sym);
5565 cerr << "fixed up '"
5566 << i->second->get_pretty_representation()
5567 << "' with symbol '"
5568 << sym->get_id_string()
5572 fns_with_no_symbol.clear();
5575 /// Return a reference to the vector containing the offsets of the
5576 /// types that need late canonicalizing.
5578 /// @param source whe DIEs referred to by the offsets contained in
5579 /// the vector to return are from.
5581 types_to_canonicalize(die_source source)
5585 case PRIMARY_DEBUG_INFO_DIE_SOURCE:
5587 case ALT_DEBUG_INFO_DIE_SOURCE:
5588 return alt_types_to_canonicalize_;
5589 case TYPE_UNIT_DIE_SOURCE:
5590 return type_unit_types_to_canonicalize_;
5591 case NO_DEBUG_INFO_DIE_SOURCE:
5592 case NUMBER_OF_DIE_SOURCES:
5593 ABG_ASSERT_NOT_REACHED;
5595 return types_to_canonicalize_;
5598 /// Return a reference to the vector containing the offsets of the
5599 /// types that need late canonicalizing.
5601 /// @param source where the DIEs referred to by the offset in the
5602 /// returned vector are from.
5603 const vector<Dwarf_Off>&
5604 types_to_canonicalize(die_source source) const
5605 {return const_cast<read_context*>(this)->types_to_canonicalize(source);}
5607 /// Return a reference to the vector containing the types created
5608 /// during the binary analysis but that are not tied to a given
5611 /// @return reference to the vector containing the types created
5612 /// during the binary analysis but that are not tied to a given
5614 const vector<type_base_sptr>&
5615 extra_types_to_canonicalize() const
5616 {return extra_types_to_canonicalize_;}
5618 /// Clear the containers holding types to canonicalize.
5620 clear_types_to_canonicalize()
5622 types_to_canonicalize_.clear();
5623 alt_types_to_canonicalize_.clear();
5624 type_unit_types_to_canonicalize_.clear();
5625 extra_types_to_canonicalize_.clear();
5628 /// Put the offset of a DIE representing a type on a side vector so
5629 /// that when the reading of the debug info of the current
5630 /// translation unit is done, we can get back to the type DIE and
5631 /// from there, to the type it's associated to, and then
5632 /// canonicalize it. This what we call late canonicalization.
5634 /// @param die the type DIE to schedule for late type
5635 /// canonicalization.
5637 schedule_type_for_late_canonicalization(const Dwarf_Die *die)
5642 Dwarf_Die equiv_die;
5643 ABG_ASSERT(get_canonical_die(die, equiv_die,
5645 /*die_as_type=*/true));
5647 ABG_ASSERT(get_die_source(&equiv_die, source));
5648 o = dwarf_dieoffset(&equiv_die);
5650 const die_artefact_map_type& m =
5651 type_die_artefact_maps().get_container(*this, die);
5653 die_artefact_map_type::const_iterator i = m.find(o);
5654 ABG_ASSERT(i != m.end());
5656 // Then really do the scheduling.
5657 types_to_canonicalize(source).push_back(o);
5660 /// Types that were created but not tied to a particular DIE, must
5661 /// be scheduled for late canonicalization using this method.
5663 /// @param t the type to schedule for late canonicalization.
5665 schedule_type_for_late_canonicalization(const type_base_sptr &t)
5667 extra_types_to_canonicalize_.push_back(t);
5670 /// Canonicalize types which DIE offsets are stored in vectors on
5671 /// the side. This is a sub-routine of
5672 /// read_context::perform_late_type_canonicalizing().
5674 /// @param source where the DIE of the types to canonicalize are
5677 canonicalize_types_scheduled(die_source source)
5679 tools_utils::timer cn_timer;
5682 cerr << "going to canonicalize types";
5683 corpus_sptr c = current_corpus();
5685 cerr << " of corpus " << current_corpus()->get_path();
5686 cerr << " (DIEs source: " << source << ")\n";
5690 if (!types_to_canonicalize(source).empty())
5692 tools_utils::timer single_type_cn_timer;
5693 size_t total = types_to_canonicalize(source).size();
5695 cerr << total << " types to canonicalize\n";
5696 for (size_t i = 0; i < total; ++i)
5698 Dwarf_Off element = types_to_canonicalize(source)[i];
5700 lookup_type_from_die_offset(element, source);
5704 cerr << "canonicalizing type "
5705 << get_pretty_representation(t, false)
5706 << " [" << i + 1 << "/" << total << "]";
5707 if (corpus_sptr c = current_corpus())
5708 cerr << "@" << c->get_path();
5710 single_type_cn_timer.start();
5716 single_type_cn_timer.stop();
5717 cerr << ":" <<single_type_cn_timer << "\n";
5721 // Now canonicalize types that were created but not tied to
5723 if (!extra_types_to_canonicalize().empty())
5725 tools_utils::timer single_type_cn_timer;
5726 size_t total = extra_types_to_canonicalize().size();
5728 cerr << total << " extra types to canonicalize\n";
5730 for (vector<type_base_sptr>::const_iterator it =
5731 extra_types_to_canonicalize().begin();
5732 it != extra_types_to_canonicalize().end();
5737 cerr << "canonicalizing extra type "
5738 << get_pretty_representation(*it, false)
5739 << " [" << i << "/" << total << "]";
5740 if (corpus_sptr c = current_corpus())
5741 cerr << "@" << c->get_path();
5743 single_type_cn_timer.start();
5748 single_type_cn_timer.stop();
5750 << single_type_cn_timer
5760 cerr << "finished canonicalizing types";
5761 corpus_sptr c = current_corpus();
5763 cerr << " of corpus " << current_corpus()->get_path();
5764 cerr << " (DIEs source: "
5771 /// Compute the number of canonicalized and missed types in the late
5772 /// canonicalization phase.
5774 /// @param source where the DIEs of the canonicalized types are
5777 /// @param canonicalized the number of types that got canonicalized
5778 /// is added to the value already present in this parameter.
5780 /// @param missed the number of types scheduled for late
5781 /// canonicalization and which couldn't be canonicalized (for a
5782 /// reason) is added to the value already present in this parameter.
5784 add_late_canonicalized_types_stats(die_source source,
5785 size_t& canonicalized,
5786 size_t& missed) const
5788 for (vector<Dwarf_Off>::const_iterator i =
5789 types_to_canonicalize(source).begin();
5790 i != types_to_canonicalize(source).end();
5793 type_base_sptr t = lookup_type_from_die_offset(*i, source);
5794 if (t->get_canonical_type())
5801 /// Compute the number of canonicalized and missed types in the late
5802 /// canonicalization phase.
5804 /// @param canonicalized the number of types that got canonicalized
5805 /// is added to the value already present in this parameter.
5807 /// @param missed the number of types scheduled for late
5808 /// canonicalization and which couldn't be canonicalized (for a
5809 /// reason) is added to the value already present in this parameter.
5811 add_late_canonicalized_types_stats(size_t& canonicalized,
5812 size_t& missed) const
5814 for (die_source source = PRIMARY_DEBUG_INFO_DIE_SOURCE;
5815 source < NUMBER_OF_DIE_SOURCES;
5817 add_late_canonicalized_types_stats(source, canonicalized, missed);
5820 // Look at the types that need to be canonicalized after the
5821 // translation unit has been constructed and canonicalize them.
5823 perform_late_type_canonicalizing()
5825 for (die_source source = PRIMARY_DEBUG_INFO_DIE_SOURCE;
5826 source < NUMBER_OF_DIE_SOURCES;
5828 canonicalize_types_scheduled(source);
5832 size_t num_canonicalized = 0, num_missed = 0, total = 0;
5833 add_late_canonicalized_types_stats(num_canonicalized,
5835 total = num_canonicalized + num_missed;
5839 cerr << " # late canonicalized types: "
5840 << num_canonicalized
5841 << " (" << num_canonicalized * 100 / total << "%)\n"
5842 << " # missed canonicalization opportunities: "
5844 << " (" << num_missed * 100 / total << "%)\n";
5849 const die_tu_map_type&
5851 {return die_tu_map_;}
5855 {return die_tu_map_;}
5857 /// Getter for the map that associates a translation unit DIE to the
5858 /// vector of imported unit points that it contains.
5860 /// @param source where the DIEs are from.
5862 /// @return the map.
5863 const tu_die_imported_unit_points_map_type&
5864 tu_die_imported_unit_points_map(die_source source) const
5865 {return const_cast<read_context*>(this)->tu_die_imported_unit_points_map(source);}
5867 /// Getter for the map that associates a translation unit DIE to the
5868 /// vector of imported unit points that it contains.
5870 /// @param source where the DIEs are from.
5872 /// @return the map.
5873 tu_die_imported_unit_points_map_type&
5874 tu_die_imported_unit_points_map(die_source source)
5878 case PRIMARY_DEBUG_INFO_DIE_SOURCE:
5880 case ALT_DEBUG_INFO_DIE_SOURCE:
5881 return alt_tu_die_imported_unit_points_map_;
5882 case TYPE_UNIT_DIE_SOURCE:
5883 return type_units_tu_die_imported_unit_points_map_;
5884 case NO_DEBUG_INFO_DIE_SOURCE:
5885 case NUMBER_OF_DIE_SOURCES:
5886 // We cannot reach this point.
5887 ABG_ASSERT_NOT_REACHED;
5889 return tu_die_imported_unit_points_map_;
5892 /// Getter of the current corpus being constructed.
5894 /// @return the current corpus.
5896 current_corpus() const
5897 {return cur_corpus_;}
5899 /// Getter of the current corpus being constructed.
5901 /// @return the current corpus.
5904 {return cur_corpus_;}
5906 /// Setter of the current corpus being constructed.
5908 /// @param c the new corpus.
5910 current_corpus(const corpus_sptr& c)
5916 /// Reset the current corpus being constructed.
5918 /// This actually deletes the current corpus being constructed.
5920 reset_current_corpus()
5921 {cur_corpus_.reset();}
5923 /// Getter of the current corpus group being constructed.
5925 /// @return current the current corpus being constructed, if any, or
5927 const corpus_group_sptr
5928 current_corpus_group() const
5929 {return cur_corpus_group_;}
5931 /// Getter of the current corpus group being constructed.
5933 /// @return current the current corpus being constructed, if any, or
5936 current_corpus_group()
5937 {return cur_corpus_group_;}
5939 /// Setter of the current corpus group being constructed.
5941 /// @param g the new corpus group.
5943 current_corpus_group(const corpus_group_sptr& g)
5946 cur_corpus_group_ = g;
5949 /// Test if there is a corpus group being built.
5951 /// @return if there is a corpus group being built, false otherwise.
5953 has_corpus_group() const
5954 {return bool(cur_corpus_group_);}
5956 /// Return the main corpus from the current corpus group, if any.
5958 /// @return the main corpus of the current corpus group, if any, nil
5959 /// if no corpus group is being constructed.
5961 main_corpus_from_current_group()
5963 if (cur_corpus_group_)
5964 return cur_corpus_group_->get_main_corpus();
5965 return corpus_sptr();
5968 /// Return the main corpus from the current corpus group, if any.
5970 /// @return the main corpus of the current corpus group, if any, nil
5971 /// if no corpus group is being constructed.
5973 main_corpus_from_current_group() const
5974 {return const_cast<read_context*>(this)->main_corpus_from_current_group();}
5976 /// Test if the current corpus being built is the main corpus of the
5977 /// current corpus group.
5979 /// @return return true iff the current corpus being built is the
5980 /// main corpus of the current corpus group.
5982 current_corpus_is_main_corpus_from_current_group() const
5984 corpus_sptr main_corpus = main_corpus_from_current_group();
5986 if (main_corpus && main_corpus.get() == cur_corpus_.get())
5992 /// Return true if the current corpus is part of a corpus group
5993 /// being built and if it's not the main corpus of the group.
5995 /// For instance, this would return true if we are loading a linux
5996 /// kernel *module* that is part of the current corpus group that is
5997 /// being built. In this case, it means we should re-use types
5998 /// coming from the "vmlinux" binary that is the main corpus of the
6001 /// @return the corpus group the current corpus belongs to, if the
6002 /// current corpus is part of a corpus group being built. Nil otherwise.
6004 should_reuse_type_from_corpus_group() const
6006 if (has_corpus_group() && is_c_language(cur_transl_unit()->get_language()))
6007 if (corpus_sptr main_corpus = main_corpus_from_current_group())
6008 if (!current_corpus_is_main_corpus_from_current_group())
6009 return current_corpus_group();
6011 return corpus_sptr();
6014 /// Get the map that associates each DIE to its parent DIE. This is
6015 /// for DIEs coming from the main debug info sections.
6017 /// @param source where the DIEs in the map come from.
6019 /// @return the DIE -> parent map.
6020 const offset_offset_map_type&
6021 die_parent_map(die_source source) const
6022 {return const_cast<read_context*>(this)->die_parent_map(source);}
6024 /// Get the map that associates each DIE to its parent DIE. This is
6025 /// for DIEs coming from the main debug info sections.
6027 /// @param source where the DIEs in the map come from.
6029 /// @return the DIE -> parent map.
6030 offset_offset_map_type&
6031 die_parent_map(die_source source)
6035 case PRIMARY_DEBUG_INFO_DIE_SOURCE:
6037 case ALT_DEBUG_INFO_DIE_SOURCE:
6038 return alternate_die_parent_map_;
6039 case TYPE_UNIT_DIE_SOURCE:
6040 return type_section_die_parent_map();
6041 case NO_DEBUG_INFO_DIE_SOURCE:
6042 case NUMBER_OF_DIE_SOURCES:
6043 ABG_ASSERT_NOT_REACHED;
6045 return primary_die_parent_map_;
6048 const offset_offset_map_type&
6049 type_section_die_parent_map() const
6050 {return type_section_die_parent_map_;}
6052 offset_offset_map_type&
6053 type_section_die_parent_map()
6054 {return type_section_die_parent_map_;}
6056 /// Getter of the current translation unit.
6058 /// @return the current translation unit being constructed.
6059 const translation_unit_sptr&
6060 cur_transl_unit() const
6063 /// Getter of the current translation unit.
6065 /// @return the current translation unit being constructed.
6066 translation_unit_sptr&
6070 /// Setter of the current translation unit.
6072 /// @param tu the current translation unit being constructed.
6074 cur_transl_unit(translation_unit_sptr tu)
6080 /// Return the global scope of the current translation unit.
6082 /// @return the global scope of the current translation unit.
6083 const scope_decl_sptr&
6084 global_scope() const
6085 {return cur_transl_unit()->get_global_scope();}
6087 /// Return a scope that is nil.
6089 /// @return a scope that is nil.
6090 const scope_decl_sptr&
6092 {return nil_scope_;}
6094 const scope_stack_type&
6096 {return scope_stack_;}
6100 {return scope_stack_;}
6105 if (scope_stack().empty())
6107 if (cur_transl_unit())
6108 scope_stack().push(cur_transl_unit()->get_global_scope().get());
6110 return scope_stack().top();
6113 list<var_decl_sptr>&
6114 var_decls_to_re_add_to_tree()
6115 {return var_decls_to_add_;}
6117 /// Return the type of the current elf file.
6119 /// @return the type of the current elf file.
6123 return elf_file_type(elf_handle());
6126 /// The section containing the symbol table from the current ELF
6129 /// Note that after it's first invocation, this function caches the
6130 /// symbol table that it found. Subsequent invocations just return
6131 /// the cached symbol table section.
6133 /// @return the symbol table section if found
6135 find_symbol_table_section() const
6137 if (!symtab_section_)
6138 dwarf_reader::find_symbol_table_section(elf_handle(),
6139 const_cast<read_context*>(this)->symtab_section_);
6140 return symtab_section_;
6143 /// Return the "Official Procedure descriptors section." This
6144 /// section is named .opd, and is usually present only on PPC64
6147 /// @return the .opd section, if found. Return nil otherwise.
6149 find_opd_section() const
6152 const_cast<read_context*>(this)->opd_section_=
6153 find_section(elf_handle(), ".opd", SHT_PROGBITS);
6154 return opd_section_;
6157 /// Return the __ksymtab section of a linux kernel ELF file (either
6158 /// a vmlinux binary or a kernel module).
6160 /// @return the __ksymtab section if found, nil otherwise.
6162 find_ksymtab_section() const
6164 if (!ksymtab_section_)
6165 const_cast<read_context*>(this)->ksymtab_section_ =
6166 find_section(elf_handle(), "__ksymtab", SHT_PROGBITS);
6167 return ksymtab_section_;
6170 /// Return the .rel{a,}__ksymtab section of a linux kernel ELF file (either
6171 /// a vmlinux binary or a kernel module).
6173 /// @return the .rel{a,}__ksymtab section if found, nil otherwise.
6175 find_ksymtab_reloc_section() const
6177 if (!ksymtab_reloc_section_)
6179 Elf_Scn *sec = find_section(elf_handle(), ".rela__ksymtab", SHT_RELA);
6181 sec = find_section(elf_handle(), ".rel__ksymtab", SHT_REL);
6182 const_cast<read_context*>(this)->ksymtab_reloc_section_ = sec;
6184 return ksymtab_reloc_section_;
6187 /// Return the __ksymtab_gpl section of a linux kernel ELF file
6188 /// (either a vmlinux binary or a kernel module).
6190 /// @return the __ksymtab_gpl section if found, nil otherwise.
6192 find_ksymtab_gpl_section() const
6194 if (!ksymtab_gpl_section_)
6195 const_cast<read_context*>(this)->ksymtab_gpl_section_ =
6196 find_section(elf_handle(), "__ksymtab_gpl", SHT_PROGBITS);
6197 return ksymtab_gpl_section_;
6200 /// Return the .rel{a,}__ksymtab_gpl section of a linux kernel ELF file
6201 /// (either a vmlinux binary or a kernel module).
6203 /// @return the .rel{a,}__ksymtab_gpl section if found, nil otherwise.
6205 find_ksymtab_gpl_reloc_section() const
6207 if (!ksymtab_gpl_reloc_section_)
6209 Elf_Scn *sec = find_section(elf_handle(), ".rela__ksymtab_gpl", SHT_RELA);
6211 sec = find_section(elf_handle(), ".rel__ksymtab_gpl", SHT_REL);
6212 const_cast<read_context*>(this)->ksymtab_gpl_reloc_section_ = sec;
6214 return ksymtab_gpl_reloc_section_;
6217 /// Return the __ksymtab_strings section of a linux kernel ELF file
6218 /// (either a vmlinux binary or a kernel module).
6220 /// @return the __ksymtab_strings section if found, nil otherwise.
6222 find_ksymtab_strings_section() const
6224 if (!ksymtab_strings_section_)
6225 const_cast<read_context*>(this)->ksymtab_strings_section_ =
6226 dwarf_reader::find_ksymtab_strings_section(elf_handle());
6227 return ksymtab_strings_section_;
6230 /// Return either a __ksymtab or a __ksymtab_gpl section, in case
6231 /// only the __ksymtab_gpl exists.
6233 /// @return the __ksymtab section if it exists, or the
6234 /// __ksymtab_gpl; or NULL if neither is found.
6236 find_any_ksymtab_section() const
6238 Elf_Scn *result = find_ksymtab_section();
6240 result = find_ksymtab_gpl_section();
6244 /// Return either a .rel{a,}__ksymtab or a .rel{a,}__ksymtab_gpl section
6246 /// @return the .rel{a,}__ksymtab section if it exists, or the
6247 /// .rel{a,}__ksymtab_gpl; or NULL if neither is found.
6249 find_any_ksymtab_reloc_section() const
6251 Elf_Scn *result = find_ksymtab_reloc_section();
6253 result = find_ksymtab_gpl_reloc_section();
6257 /// Return the SHT_GNU_versym, SHT_GNU_verdef and SHT_GNU_verneed
6258 /// sections that are involved in symbol versionning.
6260 /// @param versym_section the SHT_GNU_versym section found.
6262 /// @param verdef_section the SHT_GNU_verdef section found.
6264 /// @param verneed_section the SHT_GNU_verneed section found.
6266 /// @return true iff the sections where found.
6268 get_symbol_versionning_sections(Elf_Scn*& versym_section,
6269 Elf_Scn*& verdef_section,
6270 Elf_Scn*& verneed_section)
6272 if (!symbol_versionning_sections_loaded_)
6274 symbol_versionning_sections_found_ =
6275 dwarf_reader::get_symbol_versionning_sections(elf_handle(),
6279 symbol_versionning_sections_loaded_ = true;
6282 versym_section = versym_section_;
6283 verdef_section = verdef_section_;
6284 verneed_section = verneed_section_;
6285 return symbol_versionning_sections_found_;
6288 /// Return the version for a symbol that is at a given index in its
6289 /// SHT_SYMTAB section.
6291 /// The first invocation of this function caches the results and
6292 /// subsequent invocations just return the cached results.
6294 /// @param symbol_index the index of the symbol to consider.
6296 /// @param get_def_version if this is true, it means that that we want
6297 /// the version for a defined symbol; in that case, the version is
6298 /// looked for in a section of type SHT_GNU_verdef. Otherwise, if
6299 /// this parameter is false, this means that we want the version for
6300 /// an undefined symbol; in that case, the version is the needed one
6301 /// for the symbol to be resolved; so the version is looked fo in a
6302 /// section of type SHT_GNU_verneed.
6304 /// @param version the version found for symbol at @p symbol_index.
6306 /// @return true iff a version was found for symbol at index @p
6309 get_version_for_symbol(size_t symbol_index,
6310 bool get_def_version,
6311 elf_symbol::version& version)
6313 Elf_Scn *versym_section = NULL,
6314 *verdef_section = NULL,
6315 *verneed_section = NULL;
6317 if (!get_symbol_versionning_sections(versym_section,
6322 GElf_Versym versym_mem;
6323 Elf_Data* versym_data = (versym_section)
6324 ? elf_getdata(versym_section, NULL)
6326 GElf_Versym* versym = (versym_data)
6327 ? gelf_getversym(versym_data, symbol_index, &versym_mem)
6330 if (versym == 0 || *versym <= 1)
6331 // I got these value from the code of readelf.c in elfutils.
6332 // Apparently, if the symbol version entry has these values, the
6333 // symbol must be discarded. This is not documented in the
6334 // official specification.
6337 if (get_def_version)
6339 if (*versym == 0x8001)
6340 // I got this value from the code of readelf.c in elfutils
6341 // too. It's not really documented in the official
6346 && get_version_definition_for_versym(elf_handle(), versym,
6347 verdef_section, version))
6353 && get_version_needed_for_versym(elf_handle(), versym,
6354 verneed_section, version))
6361 /// Look into the symbol tables of the underlying elf file and see
6362 /// if we find a given symbol.
6364 /// @param symbol_name the name of the symbol to look for.
6366 /// @param demangle if true, demangle the symbols found in the symbol
6369 /// @param syms the vector of symbols with the name @p symbol_name
6370 /// that were found.
6372 /// @return true iff the symbol was found.
6374 lookup_symbol_from_elf(const string& symbol_name,
6376 vector<elf_symbol_sptr>& syms) const
6378 return dwarf_reader::lookup_symbol_from_elf(env(),
6385 /// Lookup an elf symbol, referred to by its index, from the .symtab
6388 /// The resulting symbol returned is an instance of a GElf_Sym, from
6389 /// the libelf library.
6391 /// @param symbol_index the index of the symbol to look up.
6393 /// @param elf_sym out parameter. This is set to the resulting ELF
6394 /// symbol iff the function returns TRUE, meaning the symbol was
6397 /// @return TRUE iff the symbol was found.
6399 lookup_native_elf_symbol_from_index(size_t symbol_index, GElf_Sym &elf_sym)
6401 Elf_Scn* symtab_section = find_symbol_table_section();
6402 if (!symtab_section)
6405 Elf_Data* symtab = elf_getdata(symtab_section, 0);
6408 if (!gelf_getsym(symtab, symbol_index, &elf_sym))
6414 /// Given the index of a symbol into the symbol table of an ELF
6415 /// file, look the symbol up, build an instace of @ref elf_symbol
6418 /// @param symbol_index the index of the symbol into the symbol
6419 /// table of the current elf file.
6421 /// @return the elf symbol found or nil if none was found.
6423 lookup_elf_symbol_from_index(size_t symbol_index)
6426 elf_symbol_sptr result =
6427 lookup_elf_symbol_from_index(symbol_index, s);
6431 /// Lookup an ELF symbol given its index into the .symtab section.
6433 /// This function returns both the native symbol (from libelf) and
6434 /// the @p abigail::ir::elf_symbol instance, which is the
6435 /// libabigail-specific representation of the symbol.
6437 /// @param symbol_index the index of the symbol to look for.
6439 /// @param native_sym output parameter. This is set to the native
6440 /// ELF symbol found iff the function returns a non-nil value.
6442 /// @return an instance of libabigail::ir::elf_symbol representing
6443 /// the ELF symbol found, iff one was found. Otherwise, returns
6446 lookup_elf_symbol_from_index(size_t symbol_index,
6447 GElf_Sym &native_sym)
6449 if (!lookup_native_elf_symbol_from_index(symbol_index, native_sym))
6450 return elf_symbol_sptr();
6452 Elf_Scn* symtab_section = find_symbol_table_section();
6453 if (!symtab_section)
6454 return elf_symbol_sptr();
6456 GElf_Shdr header_mem;
6457 GElf_Shdr* symtab_sheader = gelf_getshdr(symtab_section,
6460 Elf_Data* symtab = elf_getdata(symtab_section, 0);
6463 bool sym_is_defined = native_sym.st_shndx != SHN_UNDEF;
6464 bool sym_is_common = native_sym.st_shndx == SHN_COMMON; // this occurs in
6467 const char* name_str = elf_strptr(elf_handle(),
6468 symtab_sheader->sh_link,
6469 native_sym.st_name);
6473 elf_symbol::version ver;
6474 get_version_for_symbol(symbol_index,
6478 elf_symbol::visibility vis =
6479 stv_to_elf_symbol_visibility(GELF_ST_VISIBILITY(native_sym.st_other));
6481 Elf_Scn *strings_section = find_ksymtab_strings_section();
6482 size_t strings_ndx = strings_section
6483 ? elf_ndxscn(strings_section)
6486 elf_symbol_sptr sym =
6487 elf_symbol::create(env(), symbol_index, native_sym.st_size,
6488 name_str, stt_to_elf_symbol_type
6489 (GELF_ST_TYPE(native_sym.st_info)),
6490 stb_to_elf_symbol_binding
6491 (GELF_ST_BIND(native_sym.st_info)),
6492 sym_is_defined, sym_is_common, ver, vis,
6493 native_sym.st_shndx == strings_ndx);
6497 /// Read 8 bytes and convert their value into an uint64_t.
6499 /// @param bytes the array of bytes to read the next 8 bytes from.
6500 /// Note that this array must be at least 8 bytes long.
6502 /// @param result where to store the resuting uint64_t that was read.
6504 /// @param is_big_endian if true, read the 8 bytes in Big Endian
6505 /// mode, otherwise, read them in Little Endian.
6507 /// @param true if the 8 bytes could be read, false otherwise.
6509 read_uint64_from_array_of_bytes(const uint8_t *bytes,
6511 uint64_t &result) const
6513 return read_int_from_array_of_bytes(bytes, 8, is_big_endian, result);
6516 /// Read N bytes and convert their value into an integer type T.
6518 /// Note that N cannot be bigger than 8 for now. The type passed needs to be
6519 /// at least of the size of number_of_bytes.
6521 /// @param bytes the array of bytes to read the next 8 bytes from.
6522 /// Note that this array must be at least 8 bytes long.
6524 /// @param number_of_bytes the number of bytes to read. This number
6525 /// cannot be bigger than 8.
6527 /// @param is_big_endian if true, read the 8 bytes in Big Endian
6528 /// mode, otherwise, read them in Little Endian.
6530 /// @param result where to store the resuting integer that was read.
6533 /// @param true if the 8 bytes could be read, false otherwise.
6534 template<typename T>
6536 read_int_from_array_of_bytes(const uint8_t *bytes,
6537 unsigned char number_of_bytes,
6544 ABG_ASSERT(number_of_bytes <= 8);
6545 ABG_ASSERT(number_of_bytes <= sizeof(T));
6549 const uint8_t *cur = bytes;
6552 // In Big Endian, the most significant byte is at the lowest
6554 const uint8_t* msb = cur;
6557 // Now read the remaining least significant bytes.
6558 for (uint i = 1; i < number_of_bytes; ++i)
6559 res = (res << 8) | ((T)msb[i]);
6563 // In Little Endian, the least significant byte is at the
6565 const uint8_t* lsb = cur;
6567 // Now read the remaining most significant bytes.
6568 for (uint i = 1; i < number_of_bytes; ++i)
6569 res = res | (((T)lsb[i]) << i * 8);
6576 /// Lookup the address of the function entry point that corresponds
6577 /// to the address of a given function descriptor.
6579 /// On PPC64, a function pointer is the address of a function
6580 /// descriptor. Function descriptors are located in the .opd
6581 /// section. Each function descriptor is a triplet of three
6582 /// addresses, each one on 64 bits. Among those three address only
6583 /// the first one is of any interest to us: the address of the entry
6584 /// point of the function.
6586 /// This function returns the address of the entry point of the
6587 /// function whose descriptor's address is given.
6589 /// http://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html#FUNC-DES
6591 /// https://www.ibm.com/developerworks/community/blogs/5894415f-be62-4bc0-81c5-3956e82276f3/entry/deeply_understand_64_bit_powerpc_elf_abi_function_descriptors?lang=en
6593 /// @param fn_desc_address the address of the function descriptor to
6596 /// @return the address of the entry point of the function whose
6597 /// descriptor has the address @p fn_desc_address. If there is no
6598 /// .opd section (e.g because we are not on ppc64) or more generally
6599 /// if the function descriptor could not be found then this function
6600 /// just returns the address of the fuction descriptor.
6602 lookup_ppc64_elf_fn_entry_point_address(GElf_Addr fn_desc_address) const
6605 return fn_desc_address;
6607 if (!elf_architecture_is_ppc64())
6608 return fn_desc_address;
6610 bool is_big_endian = elf_architecture_is_big_endian();
6612 Elf_Scn *opd_section = find_opd_section();
6614 return fn_desc_address;
6616 GElf_Shdr header_mem;
6617 // The section header of the .opd section.
6618 GElf_Shdr *opd_sheader = gelf_getshdr(opd_section, &header_mem);
6620 // The offset of the function descriptor entry, in the .opd
6622 size_t fn_desc_offset = fn_desc_address - opd_sheader->sh_addr;
6623 Elf_Data *elf_data = elf_rawdata(opd_section, 0);
6625 // Ensure that the opd_section has at least 8 bytes, starting from
6626 // the offset we want read the data from.
6627 if (elf_data->d_size <= fn_desc_offset + 8)
6628 return fn_desc_address;
6630 // A pointer to the data of the .opd section, that we can actually
6631 // do something with.
6632 uint8_t * bytes = (uint8_t*) elf_data->d_buf;
6634 // The resulting address we are looking for is going to be formed
6635 // in this variable.
6636 GElf_Addr result = 0;
6637 ABG_ASSERT(read_uint64_from_array_of_bytes(bytes + fn_desc_offset,
6638 is_big_endian, result));
6643 /// Given the address of the beginning of a function, lookup the
6644 /// symbol of the function, build an instance of @ref elf_symbol out
6645 /// of it and return it.
6647 /// @param symbol_start_addr the address of the beginning of the
6648 /// function to consider.
6650 /// @param sym the resulting symbol. This is set iff the function
6653 /// @return the elf symbol found at address @p symbol_start_addr, or
6654 /// nil if none was found.
6656 lookup_elf_fn_symbol_from_address(GElf_Addr symbol_start_addr) const
6658 addr_elf_symbol_sptr_map_type::const_iterator i,
6659 nil = fun_entry_addr_sym_map().end();
6661 if ((i = fun_entry_addr_sym_map().find(symbol_start_addr)) == nil)
6662 return elf_symbol_sptr();
6667 /// Given the address of a global variable, lookup the symbol of the
6668 /// variable, build an instance of @ref elf_symbol out of it and
6671 /// @param symbol_start_addr the address of the beginning of the
6672 /// variable to consider.
6674 /// @param the symbol found, iff the function returns true.
6676 /// @return the elf symbol found or nil if none was found.
6678 lookup_elf_var_symbol_from_address(GElf_Addr symbol_start_addr) const
6680 addr_elf_symbol_sptr_map_type::const_iterator i,
6681 nil = var_addr_sym_map().end();
6683 if ((i = var_addr_sym_map().find(symbol_start_addr)) == nil)
6684 return elf_symbol_sptr();
6689 /// Lookup an elf symbol, knowing its address.
6691 /// This function first looks for a function symbol having this
6692 /// address; if it doesn't find any, then it looks for a variable
6695 /// @param symbol_addr the address of the symbol of the symbol we
6696 /// are looking for. Note that the address is a relative offset
6697 /// starting from the beginning of the .text section. Addresses
6698 /// that are presen in the symbol table (the one named .symtab).
6700 /// @return the elf symbol if found, or nil otherwise.
6702 lookup_elf_symbol_from_address(GElf_Addr symbol_addr) const
6704 elf_symbol_sptr result = lookup_elf_fn_symbol_from_address(symbol_addr);
6706 result = lookup_elf_var_symbol_from_address(symbol_addr);
6710 /// Look in the symbol tables of the underying elf file and see if
6711 /// we find a symbol of a given name of function type.
6713 /// @param sym_name the name of the symbol to look for.
6715 /// @param syms the public function symbols that were found, with
6716 /// the name @p sym_name.
6718 /// @return true iff the symbol was found.
6720 lookup_public_function_symbol_from_elf(const string& sym_name,
6721 vector<elf_symbol_sptr>& syms)
6723 return dwarf_reader::lookup_public_function_symbol_from_elf(env(),
6729 /// Look in the symbol tables of the underying elf file and see if
6730 /// we find a symbol of a given name of variable type.
6732 /// @param sym_name the name of the symbol to look for.
6734 /// @param syms the variable symbols that were found, with the name
6737 /// @return true iff the symbol was found.
6739 lookup_public_variable_symbol_from_elf(const string& sym_name,
6740 vector<elf_symbol_sptr>& syms)
6742 return dwarf_reader::lookup_public_variable_symbol_from_elf(env(),
6748 /// Test if a given function symbol has been exported.
6750 /// @param symbol_address the address of the symbol we are looking
6751 /// for. Note that this address must be a relative offset from the
6752 /// beginning of the .text section, just like the kind of addresses
6753 /// that are present in the .symtab section.
6755 /// @returnthe elf symbol if found, or nil otherwise.
6757 function_symbol_is_exported(GElf_Addr symbol_address) const
6759 elf_symbol_sptr symbol = lookup_elf_fn_symbol_from_address(symbol_address);
6763 if (!symbol->is_public())
6764 return elf_symbol_sptr();
6766 address_set_sptr set;
6767 bool looking_at_linux_kernel_binary =
6768 load_in_linux_kernel_mode() && is_linux_kernel_binary();
6770 if (looking_at_linux_kernel_binary)
6772 if ((set = linux_exported_fn_syms()))
6774 if (set->find(symbol_address) != set->end())
6777 if ((set = linux_exported_gpl_fn_syms()))
6779 if (set->find(symbol_address) != set->end())
6782 return elf_symbol_sptr();
6788 /// Test if a given variable symbol has been exported.
6790 /// @param symbol_address the address of the symbol we are looking
6791 /// for. Note that this address must be a relative offset from the
6792 /// beginning of the .text section, just like the kind of addresses
6793 /// that are present in the .symtab section.
6795 /// @returnthe elf symbol if found, or nil otherwise.
6797 variable_symbol_is_exported(GElf_Addr symbol_address) const
6799 elf_symbol_sptr symbol = lookup_elf_var_symbol_from_address(symbol_address);
6803 if (!symbol->is_public())
6804 return elf_symbol_sptr();
6806 address_set_sptr set;
6807 bool looking_at_linux_kernel_binary =
6808 load_in_linux_kernel_mode() && is_linux_kernel_binary();
6810 if (looking_at_linux_kernel_binary)
6812 if ((set = linux_exported_var_syms()))
6814 if (set->find(symbol_address) != set->end())
6817 if ((set = linux_exported_gpl_var_syms()))
6819 if (set->find(symbol_address) != set->end())
6822 return elf_symbol_sptr();
6828 /// Getter for the map of function address -> symbol.
6830 /// @return the function address -> symbol map.
6831 const addr_elf_symbol_sptr_map_sptr
6832 fun_addr_sym_map_sptr() const
6834 maybe_load_symbol_maps();
6835 return fun_addr_sym_map_;
6838 /// Getter for the map of function address -> symbol.
6840 /// @return the function address -> symbol map.
6841 addr_elf_symbol_sptr_map_sptr
6842 fun_addr_sym_map_sptr()
6844 maybe_load_symbol_maps();
6845 return fun_addr_sym_map_;
6848 /// Getter for the map of function symbol address -> function symbol
6851 /// @return the map. Note that this initializes the map once when
6853 const addr_elf_symbol_sptr_map_type&
6854 fun_addr_sym_map() const
6856 maybe_load_symbol_maps();
6857 return *fun_addr_sym_map_;
6860 /// Getter for the map of function symbol address -> function symbol
6863 /// @return the map. Note that this initializes the map once when
6865 addr_elf_symbol_sptr_map_type&
6868 maybe_load_symbol_maps();
6869 return *fun_addr_sym_map_;
6872 /// Getter for a pointer to the map that associates the address of
6873 /// an entry point of a function with the symbol of that function.
6875 /// Note that on non-"PPC64 ELFv1" binaries, this map is the same as
6876 /// the one that assciates the address of a function with the symbol
6877 /// of that function.
6879 /// @return a pointer to the map that associates the address of an
6880 /// entry point of a function with the symbol of that function.
6881 addr_elf_symbol_sptr_map_sptr&
6882 fun_entry_addr_sym_map_sptr()
6884 if (!fun_entry_addr_sym_map_ && !fun_addr_sym_map_)
6885 maybe_load_symbol_maps();
6886 if (elf_architecture_is_ppc64())
6887 return fun_entry_addr_sym_map_;
6888 return fun_addr_sym_map_;
6891 /// Getter for a pointer to the map that associates the address of
6892 /// an entry point of a function with the symbol of that function.
6894 /// Note that on non-"PPC64 ELFv1" binaries, this map is the same as
6895 /// the one that assciates the address of a function with the symbol
6896 /// of that function.
6898 /// @return a pointer to the map that associates the address of an
6899 /// entry point of a function with the symbol of that function.
6900 const addr_elf_symbol_sptr_map_sptr&
6901 fun_entry_addr_sym_map_sptr() const
6902 {return const_cast<read_context*>(this)->fun_entry_addr_sym_map_sptr();}
6905 /// Getter for the map that associates the address of an entry point
6906 /// of a function with the symbol of that function.
6908 /// Note that on non-"PPC64 ELFv1" binaries, this map is the same as
6909 /// the one that assciates the address of a function with the symbol
6910 /// of that function.
6912 /// @return the map that associates the address of an entry point of
6913 /// a function with the symbol of that function.
6914 addr_elf_symbol_sptr_map_type&
6915 fun_entry_addr_sym_map()
6916 {return *fun_entry_addr_sym_map_sptr();}
6918 /// Getter for the map that associates the address of an entry point
6919 /// of a function with the symbol of that function.
6921 /// Note that on non-"PPC64 ELFv1" binaries, this map is the same as
6922 /// the one that assciates the address of a function with the symbol
6923 /// of that function.
6925 /// @return the map that associates the address of an entry point of
6926 /// a function with the symbol of that function.
6927 const addr_elf_symbol_sptr_map_type&
6928 fun_entry_addr_sym_map() const
6929 { return *fun_entry_addr_sym_map_sptr();}
6931 /// Getter for the map of function symbols (name -> sym).
6933 /// @return a shared pointer to the map of function symbols.
6934 const string_elf_symbols_map_sptr&
6935 fun_syms_sptr() const
6937 maybe_load_symbol_maps();
6941 /// Getter for the map of function symbols (name -> sym).
6943 /// @return a shared pointer to the map of function symbols.
6944 string_elf_symbols_map_sptr&
6947 maybe_load_symbol_maps();
6951 /// Getter for the map of function symbols (name -> sym).
6953 /// @return a reference to the map of function symbols.
6954 const string_elf_symbols_map_type&
6957 maybe_load_symbol_maps();
6961 /// Getter for the map of function symbols (name -> sym).
6963 /// @return a reference to the map of function symbols.
6964 string_elf_symbols_map_type&
6967 maybe_load_symbol_maps();
6971 /// Getter for the map of variable symbols (name -> sym)
6973 /// @return a shared pointer to the map of variable symbols.
6974 const string_elf_symbols_map_sptr
6975 var_syms_sptr() const
6977 maybe_load_symbol_maps();
6981 /// Getter for the map of variable symbols (name -> sym)
6983 /// @return a shared pointer to the map of variable symbols.
6984 string_elf_symbols_map_sptr
6987 maybe_load_symbol_maps();
6991 /// Getter for the map of variable symbols (name -> sym)
6993 /// @return a reference to the map of variable symbols.
6994 const string_elf_symbols_map_type&
6997 maybe_load_symbol_maps();
7001 /// Getter for the map of variable symbols (name -> sym)
7003 /// @return a reference to the map of variable symbols.
7004 string_elf_symbols_map_type&
7007 maybe_load_symbol_maps();
7011 /// Getter for the map of undefined function symbols (name -> vector
7014 /// @return a (smart) pointer to the map of undefined function
7016 const string_elf_symbols_map_sptr&
7017 undefined_fun_syms_sptr() const
7019 maybe_load_symbol_maps();
7020 return undefined_fun_syms_;
7023 /// Getter for the map of undefined function symbols (name -> vector
7026 /// @return a (smart) pointer to the map of undefined function
7028 string_elf_symbols_map_sptr&
7029 undefined_fun_syms_sptr()
7031 maybe_load_symbol_maps();
7032 return undefined_fun_syms_;
7035 /// Getter for the map of undefined function symbols (name -> vector
7038 /// @return a reference to the map of undefined function symbols.
7039 const string_elf_symbols_map_type&
7040 undefined_fun_syms() const
7042 maybe_load_symbol_maps();
7043 return *undefined_fun_syms_;
7046 /// Getter for the map of undefined function symbols (name -> vector
7049 /// @return a reference to the map of undefined function symbols.
7050 string_elf_symbols_map_type&
7051 undefined_fun_syms()
7053 maybe_load_symbol_maps();
7054 return *undefined_fun_syms_;
7057 /// Getter for the map of undefined variable symbols (name -> vector
7060 /// @return a (smart) pointer to the map of undefined variable
7062 const string_elf_symbols_map_sptr&
7063 undefined_var_syms_sptr() const
7065 maybe_load_symbol_maps();
7066 return undefined_var_syms_;
7069 /// Getter for the map of undefined variable symbols (name -> vector
7072 /// @return a (smart) pointer to the map of undefined variable
7074 string_elf_symbols_map_sptr&
7075 undefined_var_syms_sptr()
7077 maybe_load_symbol_maps();
7078 return undefined_var_syms_;
7081 /// Getter for the map of undefined variable symbols (name -> vector
7084 /// @return a reference to the map of undefined variable symbols.
7085 const string_elf_symbols_map_type&
7086 undefined_var_syms() const
7088 maybe_load_symbol_maps();
7089 return *undefined_var_syms_;
7092 /// Getter for the map of undefined variable symbols (name -> vector
7095 /// @return a reference to the map of undefined variable symbols.
7096 string_elf_symbols_map_type&
7097 undefined_var_syms()
7099 maybe_load_symbol_maps();
7100 return *undefined_var_syms_;
7103 /// Getter for the set of addresses of function symbols that are
7104 /// explicitely exported, for a linux kernel (module) binary. These
7105 /// are the addresses of function symbols present in the __ksymtab
7108 linux_exported_fn_syms()
7109 {return linux_exported_fn_syms_;}
7111 /// Getter for the set of addresses of functions that are
7112 /// explicitely exported, for a linux kernel (module) binary. These
7113 /// are the addresses of function symbols present in the __ksymtab
7116 /// @return the set of addresses of exported function symbols.
7117 const address_set_sptr&
7118 linux_exported_fn_syms() const
7119 {return const_cast<read_context*>(this)->linux_exported_fn_syms();}
7121 /// Create an empty set of addresses of functions exported from a
7122 /// linux kernel (module) binary, or return the one that already
7125 /// @return the set of addresses of exported function symbols.
7127 create_or_get_linux_exported_fn_syms()
7129 if (!linux_exported_fn_syms_)
7130 linux_exported_fn_syms_.reset(new address_set_type);
7131 return linux_exported_fn_syms_;
7134 /// Getter for the set of addresses of v ariables that are
7135 /// explicitely exported, for a linux kernel (module) binary. These
7136 /// are the addresses of variable symbols present in the __ksymtab
7139 /// @return the set of addresses of exported variable symbols.
7141 linux_exported_var_syms()
7142 {return linux_exported_var_syms_;}
7144 /// Getter for the set of addresses of variables that are
7145 /// explicitely exported, for a linux kernel (module) binary. These
7146 /// are the addresses of variable symbols present in the __ksymtab
7149 /// @return the set of addresses of exported variable symbols.
7150 const address_set_sptr&
7151 linux_exported_var_syms() const
7152 {return const_cast<read_context*>(this)->linux_exported_var_syms();}
7155 /// Create an empty set of addresses of variables exported from a
7156 /// linux kernel (module) binary, or return the one that already
7159 /// @return the set of addresses of exported variable symbols.
7161 create_or_get_linux_exported_var_syms()
7163 if (!linux_exported_var_syms_)
7164 linux_exported_var_syms_.reset(new address_set_type);
7165 return linux_exported_var_syms_;
7169 /// Getter for the set of addresses of function symbols that are
7170 /// explicitely exported as GPL, for a linux kernel (module) binary.
7171 /// These are the addresses of function symbols present in the
7172 /// __ksymtab_gpl section.
7174 linux_exported_gpl_fn_syms()
7175 {return linux_exported_gpl_fn_syms_;}
7177 /// Getter for the set of addresses of function symbols that are
7178 /// explicitely exported as GPL, for a linux kernel (module) binary.
7179 /// These are the addresses of function symbols present in the
7180 /// __ksymtab_gpl section.
7181 const address_set_sptr&
7182 linux_exported_gpl_fn_syms() const
7183 {return const_cast<read_context*>(this)->linux_exported_gpl_fn_syms();}
7185 /// Create an empty set of addresses of GPL functions exported from
7186 /// a linux kernel (module) binary, or return the one that already
7189 /// @return the set of addresses of exported function symbols.
7191 create_or_get_linux_exported_gpl_fn_syms()
7193 if (!linux_exported_gpl_fn_syms_)
7194 linux_exported_gpl_fn_syms_.reset(new address_set_type);
7195 return linux_exported_gpl_fn_syms_;
7198 /// Getter for the set of addresses of variable symbols that are
7199 /// explicitely exported as GPL, for a linux kernel (module) binary.
7200 /// These are the addresses of variable symbols present in the
7201 /// __ksymtab_gpl section.
7203 linux_exported_gpl_var_syms()
7204 {return linux_exported_gpl_var_syms_;}
7206 /// Getter for the set of addresses of variable symbols that are
7207 /// explicitely exported as GPL, for a linux kernel (module) binary.
7208 /// These are the addresses of variable symbols present in the
7209 /// __ksymtab_gpl section.
7210 const address_set_sptr&
7211 linux_exported_gpl_var_syms() const
7212 {return const_cast<read_context*>(this)->linux_exported_gpl_var_syms();}
7214 /// Create an empty set of addresses of GPL variables exported from
7215 /// a linux kernel (module) binary, or return the one that already
7218 /// @return the set of addresses of exported variable symbols.
7220 create_or_get_linux_exported_gpl_var_syms()
7222 if (!linux_exported_gpl_var_syms_)
7223 linux_exported_gpl_var_syms_.reset(new address_set_type);
7224 return linux_exported_gpl_var_syms_;
7227 /// Getter for the ELF dt_needed tag.
7228 const vector<string>&
7230 {return dt_needed_;}
7232 /// Getter for the ELF dt_soname tag.
7235 {return dt_soname_;}
7237 /// Getter for the ELF architecture of the current file.
7239 elf_architecture() const
7240 {return elf_architecture_;}
7242 /// Return the size of a word for the current architecture.
7243 /// @return the size of a word.
7245 architecture_word_size() const
7247 unsigned char word_size = 0;
7249 GElf_Ehdr* elf_header = gelf_getehdr(elf_handle(), &eh_mem);
7250 if (elf_header->e_ident[EI_CLASS] == ELFCLASS32)
7252 else if (elf_header->e_ident[EI_CLASS] == ELFCLASS64)
7255 ABG_ASSERT_NOT_REACHED;
7259 /// Test if the architecture of the current binary is ppc64.
7261 /// @return true iff the architecture of the current binary is ppc64.
7263 elf_architecture_is_ppc64() const
7266 GElf_Ehdr* elf_header = gelf_getehdr(elf_handle(), &eh_mem);
7268 return (elf_header && elf_header->e_machine == EM_PPC64);
7271 /// Test if the endianness of the current binary is Big Endian.
7273 /// https://en.wikipedia.org/wiki/Endianness.
7275 /// @return true iff the current binary is Big Endian.
7277 elf_architecture_is_big_endian() const
7280 GElf_Ehdr* elf_header = gelf_getehdr(elf_handle(), &eh_mem);
7282 bool is_big_endian = (elf_header->e_ident[EI_DATA] == ELFDATA2MSB);
7285 ABG_ASSERT(elf_header->e_ident[EI_DATA] == ELFDATA2LSB);
7287 return is_big_endian;
7290 /// Test if the current elf file being read is an executable.
7292 /// @return true iff the current elf file being read is an
7295 current_elf_file_is_executable() const
7298 GElf_Ehdr* elf_header = gelf_getehdr(elf_handle(), &eh_mem);
7299 return elf_header->e_type == ET_EXEC;
7302 /// Test if the current elf file being read is a dynamic shared
7305 /// @return true iff the current elf file being read is a
7306 /// dynamic shared object.
7308 current_elf_file_is_dso() const
7311 GElf_Ehdr* elf_header = gelf_getehdr(elf_handle(), &eh_mem);
7312 return elf_header->e_type == ET_DYN;
7315 /// Getter for the map of global variables symbol address -> global
7316 /// variable symbol index.
7318 /// @return the map. Note that this initializes the map once when
7320 const addr_elf_symbol_sptr_map_type&
7321 var_addr_sym_map() const
7322 {return const_cast<read_context*>(this)->var_addr_sym_map();}
7324 /// Getter for the map of global variables symbol address -> global
7325 /// variable symbol index.
7327 /// @return the map. Note that this initializes the map once when
7329 addr_elf_symbol_sptr_map_type&
7332 if (!var_addr_sym_map_)
7333 maybe_load_symbol_maps();
7334 return *var_addr_sym_map_;
7337 /// Load the maps address -> function symbol, address -> variable
7338 /// symbol and the maps of function and variable undefined symbols.
7340 /// @param load_fun_map whether to load the address to function map.
7342 /// @param load_var_map whether to laod the address to variable map.
7344 /// @param load_undefined_fun_map whether to load the undefined
7347 /// @param load_undefined_var_map whether to laod the undefined
7350 /// @return return true iff the maps have be loaded.
7352 load_symbol_maps_from_symtab_section(bool load_fun_map,
7354 bool load_undefined_fun_map,
7355 bool load_undefined_var_map)
7357 Elf_Scn* symtab_section = find_symbol_table_section();
7358 if (!symtab_section)
7361 GElf_Shdr header_mem;
7362 GElf_Shdr* symtab_sheader = gelf_getshdr(symtab_section,
7364 size_t nb_syms = symtab_sheader->sh_size / symtab_sheader->sh_entsize;
7366 Elf_Data* symtab = elf_getdata(symtab_section, 0);
7369 GElf_Ehdr elf_header;
7370 ABG_ASSERT(gelf_getehdr(elf_handle(), &elf_header));
7372 bool is_ppc64 = elf_architecture_is_ppc64();
7374 for (size_t i = 0; i < nb_syms; ++i)
7376 GElf_Sym* sym, sym_mem;
7377 sym = gelf_getsym(symtab, i, &sym_mem);
7380 if ((load_fun_map || load_undefined_fun_map)
7381 && (GELF_ST_TYPE(sym->st_info) == STT_FUNC
7382 || GELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC))
7384 elf_symbol_sptr symbol = lookup_elf_symbol_from_index(i);
7386 ABG_ASSERT(symbol->is_function());
7389 if (load_fun_map && symbol->is_public())
7392 string_elf_symbols_map_type::iterator it =
7393 fun_syms_->find(symbol->get_name());
7394 if (it == fun_syms_->end())
7396 (*fun_syms_)[symbol->get_name()] = elf_symbols();
7397 it = fun_syms_->find(symbol->get_name());
7399 string name = symbol->get_name();
7400 it->second.push_back(symbol);
7404 GElf_Addr symbol_value =
7405 maybe_adjust_et_rel_sym_addr_to_abs_addr(sym);
7407 addr_elf_symbol_sptr_map_type::const_iterator it =
7408 fun_addr_sym_map_->find(symbol_value);
7409 if (it == fun_addr_sym_map_->end())
7410 (*fun_addr_sym_map_)[symbol_value] = symbol;
7411 else //if (sym->st_value != 0)
7412 it->second->get_main_symbol()->add_alias(symbol);
7416 // For ppc64 ELFv1 binaries, we need to build a
7417 // function entry point address -> function
7418 // symbol map. This is in addition to the
7419 // function pointer -> symbol map. This is
7420 // because on ppc64 ELFv1, a function pointer is
7421 // different from a function entry point
7424 // On ppc64 ELFv1, the DWARF DIE of a function
7425 // references the address of the entry point of
7426 // the function symbol; whereas the value of the
7427 // function symbol is the function pointer. As
7428 // these addresses are different, if I we want
7429 // to get to the symbol of a function from its
7430 // entry point address (as referenced by DWARF
7431 // function DIEs) we must have the two maps I
7432 // mentionned right above.
7434 // In other words, we need a map that associates
7435 // a function enty point address with the symbol
7436 // of that function, to be able to get the
7437 // function symbol that corresponds to a given
7438 // function DIE, on ppc64.
7440 // The value of the function pointer (the value
7441 // of the symbol) usually refers to the offset
7442 // of a table in the .opd section. But
7443 // sometimes, for a symbol named "foo", the
7444 // corresponding symbol named ".foo" (note the
7445 // dot before foo) which value is the entry
7446 // point address of the function; that entry
7447 // point address refers to a region in the .text
7450 // So we are only interested in values of the
7451 // symbol that are in the .opd section.
7452 GElf_Addr fn_desc_addr = sym->st_value;
7453 GElf_Addr fn_entry_point_addr =
7454 lookup_ppc64_elf_fn_entry_point_address(fn_desc_addr);
7455 addr_elf_symbol_sptr_map_type::const_iterator it2 =
7456 fun_entry_addr_sym_map().find(fn_entry_point_addr);
7458 if (it2 == fun_entry_addr_sym_map().end())
7459 fun_entry_addr_sym_map()[fn_entry_point_addr] = symbol;
7460 else if (address_is_in_opd_section(fn_desc_addr))
7464 // 'symbol' must have been registered as an
7465 // alias for it2->second->get_main_symbol(),
7466 // right before the "if (ppc64)" statement.
7470 // if the name of 'symbol' is foo, then the
7471 // name of it2->second is ".foo". That is,
7472 // foo is the name of the symbol when it
7473 // refers to the function descriptor in the
7474 // .opd section and ".foo" is an internal
7475 // name for the address of the entry point
7478 // In the latter case, we just want to keep
7479 // a refernce to "foo" as .foo is an
7482 bool two_symbols_alias =
7483 it2->second->get_main_symbol()->does_alias(*symbol);
7484 bool symbol_is_foo_and_prev_symbol_is_dot_foo =
7485 (it2->second->get_name()
7486 == string(".") + symbol->get_name());
7488 ABG_ASSERT(two_symbols_alias
7489 || symbol_is_foo_and_prev_symbol_is_dot_foo);
7491 if (symbol_is_foo_and_prev_symbol_is_dot_foo)
7492 // Let's just keep a reference of the
7493 // symbol that the user sees in the source
7494 // code (the one named foo). The symbol
7495 // which name is prefixed with a "dot" is
7496 // an artificial one.
7497 fun_entry_addr_sym_map()[fn_entry_point_addr] = symbol;
7502 else if (load_undefined_fun_map && !symbol->is_defined())
7504 string_elf_symbols_map_type::iterator it =
7505 undefined_fun_syms_->find(symbol->get_name());
7506 if (it == undefined_fun_syms_->end())
7508 (*undefined_fun_syms_)[symbol->get_name()] = elf_symbols();
7509 it = undefined_fun_syms_->find(symbol->get_name());
7511 it->second.push_back(symbol);
7514 else if ((load_var_map || load_undefined_var_map)
7515 && (GELF_ST_TYPE(sym->st_info) == STT_OBJECT
7516 || GELF_ST_TYPE(sym->st_info) == STT_TLS)
7517 // If the symbol is for an OBJECT, the index of the
7518 // section it refers to cannot be absolute.
7519 // Otherwise that OBJECT is not a variable.
7520 && (sym->st_shndx != SHN_ABS
7521 || GELF_ST_TYPE(sym->st_info) != STT_OBJECT ))
7523 elf_symbol_sptr symbol = lookup_elf_symbol_from_index(i);
7525 ABG_ASSERT(symbol->is_variable());
7527 if (load_var_map && symbol->is_public())
7530 string_elf_symbols_map_type::iterator it =
7531 var_syms_->find(symbol->get_name());
7532 if (it == var_syms_->end())
7534 (*var_syms_)[symbol->get_name()] = elf_symbols();
7535 it = var_syms_->find(symbol->get_name());
7537 string name = symbol->get_name();
7538 it->second.push_back(symbol);
7541 if (symbol->is_common_symbol())
7543 string_elf_symbols_map_type::iterator it =
7544 var_syms_->find(symbol->get_name());
7545 ABG_ASSERT(it != var_syms_->end());
7546 const elf_symbols& common_sym_instances = it->second;
7547 ABG_ASSERT(!common_sym_instances.empty());
7548 if (common_sym_instances.size() > 1)
7550 elf_symbol_sptr main_common_sym =
7551 common_sym_instances[0];
7552 ABG_ASSERT(main_common_sym->get_name()
7553 == symbol->get_name());
7554 ABG_ASSERT(main_common_sym->is_common_symbol());
7555 ABG_ASSERT(symbol.get() != main_common_sym.get());
7556 main_common_sym->add_common_instance(symbol);
7561 GElf_Addr symbol_value =
7562 maybe_adjust_et_rel_sym_addr_to_abs_addr(sym);
7563 addr_elf_symbol_sptr_map_type::const_iterator it =
7564 var_addr_sym_map_->find(symbol_value);
7565 if (it == var_addr_sym_map_->end())
7566 (*var_addr_sym_map_)[symbol_value] = symbol;
7568 it->second->get_main_symbol()->add_alias(symbol);
7571 else if (load_undefined_var_map && !symbol->is_defined())
7573 string_elf_symbols_map_type::iterator it =
7574 undefined_var_syms_->find(symbol->get_name());
7575 if (it == undefined_var_syms_->end())
7577 (*undefined_var_syms_)[symbol->get_name()] = elf_symbols();
7578 it = undefined_var_syms_->find(symbol->get_name());
7580 it->second.push_back(symbol);
7587 /// Try reading the first __ksymtab section entry as if it is in the
7588 /// pre-v4_19 format and lookup a symbol from the .symbol section to
7589 /// see if that succeeds. If it does, then we can assume the
7590 /// __ksymtab section is in the pre-v4_19 format.
7592 /// @return the symbol resulting from the lookup of the symbol
7593 /// address we got from reading the first entry of the ksymtab
7594 /// section assuming the pre-v4.19 format. If nil, it means the
7595 /// __ksymtab section is not in the pre-v4.19 format.
7597 try_reading_first_ksymtab_entry_using_pre_v4_19_format() const
7599 Elf_Scn *section = find_any_ksymtab_section();
7600 Elf_Data *elf_data = elf_rawdata(section, 0);
7601 uint8_t *bytes = reinterpret_cast<uint8_t*>(elf_data->d_buf);
7602 bool is_big_endian = elf_architecture_is_big_endian();
7603 elf_symbol_sptr symbol;
7604 unsigned char symbol_value_size = architecture_word_size();
7606 GElf_Addr symbol_address = 0, adjusted_symbol_address = 0;
7607 ABG_ASSERT(read_int_from_array_of_bytes(bytes,
7611 adjusted_symbol_address = maybe_adjust_fn_sym_address(symbol_address);
7612 symbol = lookup_elf_symbol_from_address(adjusted_symbol_address);
7616 /// Try reading the first __ksymtab section entry as if it is in the
7617 /// v4_19 format and lookup a symbol from the .symbol section to see
7618 /// if that succeeds. If it does, then we can assume the __ksymtab
7619 /// section is in the v4_19 format.
7621 /// @return the symbol resulting from the lookup of the symbol
7622 /// address we got from reading the first entry of the ksymtab
7623 /// section assuming the v4.19 format. If nil, it means the
7624 /// __ksymtab section is not in the v4.19 format.
7626 try_reading_first_ksymtab_entry_using_v4_19_format() const
7628 Elf_Scn *section = find_any_ksymtab_section();
7629 Elf_Data *elf_data = elf_rawdata(section, 0);
7630 uint8_t *bytes = reinterpret_cast<uint8_t*>(elf_data->d_buf);
7631 bool is_big_endian = elf_architecture_is_big_endian();
7632 elf_symbol_sptr symbol;
7635 const unsigned char symbol_value_size = sizeof(offset);
7636 GElf_Addr symbol_address = 0, adjusted_symbol_address = 0;
7637 ABG_ASSERT(read_int_from_array_of_bytes(bytes,
7642 GElf_Shdr *section_header = gelf_getshdr(section, &mem);
7643 symbol_address = offset + section_header->sh_addr;
7645 adjusted_symbol_address = maybe_adjust_fn_sym_address(symbol_address);
7646 symbol = lookup_elf_symbol_from_address(adjusted_symbol_address);
7650 /// Try to determine the format of the __ksymtab and __ksymtab_gpl
7651 /// sections of Linux kernel modules.
7653 /// This is important because we need to know the format of these
7654 /// sections to be able to read from them.
7656 /// @return the format the __ksymtab[_gpl] sections.
7658 get_ksymtab_format_module() const
7660 Elf_Scn *section = find_any_ksymtab_reloc_section();
7662 ABG_ASSERT(section);
7664 // Libdwfl has a weird quirk where, in the process of obtaining an Elf
7665 // descriptor via dwfl_module_getelf(), it will apply all relocations it
7666 // knows how to and it will zero the relocation info after applying it. If
7667 // the .rela__ksymtab* section contained only simple (absolute) relocations,
7668 // they will have been all applied and sh_size will be 0. For arches that
7669 // support relative ksymtabs, simple relocations only appear in pre-4.19
7671 GElf_Shdr section_mem;
7672 GElf_Shdr *section_shdr = gelf_getshdr(section, §ion_mem);
7673 if (section_shdr->sh_size == 0)
7674 return PRE_V4_19_KSYMTAB_FORMAT;
7676 bool is_relasec = (section_shdr->sh_type == SHT_RELA);
7678 // If we still have a normal non-zeroed relocation section, we can guess
7679 // what format the ksymtab is in depending on what types of relocs it
7683 Elf_Data *section_data = elf_getdata(section, 0);
7687 gelf_getrela(section_data, 0, &rela);
7688 type = GELF_R_TYPE(rela.r_info);
7693 gelf_getrel(section_data, 0, &rel);
7694 type = GELF_R_TYPE(rel.r_info);
7697 // Sigh, I dislike the arch-dependent code here, but this seems to be a
7698 // reliable heuristic for kernel modules for now. Relative ksymtabs only
7699 // supported on x86 and arm64 as of v4.19.
7700 ksymtab_format format;
7703 case R_X86_64_64: // Same as R_386_32, fallthrough
7704 #ifdef HAVE_R_AARCH64_ABS64_MACRO
7705 case R_AARCH64_ABS64:
7707 format = PRE_V4_19_KSYMTAB_FORMAT;
7709 case R_X86_64_PC32: // Same as R_386_PC32, fallthrough
7710 #ifdef HAVE_R_AARCH64_PREL32_MACRO
7711 case R_AARCH64_PREL32:
7713 format = V4_19_KSYMTAB_FORMAT;
7716 // Fall back to other methods of determining the ksymtab format.
7717 format = UNDEFINED_KSYMTAB_FORMAT;
7723 /// Determine the format of the __ksymtab and __ksymtab_gpl
7726 /// This is important because we need the know the format of these
7727 /// sections to be able to read from them.
7729 /// @return the format the __ksymtab[_gpl] sections.
7731 get_ksymtab_format() const
7733 if (!find_any_ksymtab_section())
7734 ksymtab_format_ = UNDEFINED_KSYMTAB_FORMAT;
7737 if (ksymtab_format_ == UNDEFINED_KSYMTAB_FORMAT)
7739 // Since Linux kernel modules are relocatable, we can first try
7740 // using a heuristic based on relocations to guess the ksymtab format.
7741 if (is_linux_kernel_module())
7743 ksymtab_format_ = get_ksymtab_format_module();
7744 if (ksymtab_format_ != UNDEFINED_KSYMTAB_FORMAT)
7745 return ksymtab_format_;
7748 // If it's not a kernel module or we couldn't determine its format
7749 // with relocations, fall back to the heuristics below.
7751 // OK this is a dirty little heuristic to determine the
7752 // format of the ksymtab section.
7754 // We try to read the first ksymtab entry assuming a
7755 // pre-v4.19 format. If that succeeds then we are in the
7756 // pr-v4.19 format. Otherwise, try reading it assuming a
7757 // v4.19 format. For now, we just support
7758 // PRE_V4_19_KSYMTAB_FORMAT and V4_19_KSYMTAB_FORMAT.
7759 if (try_reading_first_ksymtab_entry_using_pre_v4_19_format())
7760 ksymtab_format_ = PRE_V4_19_KSYMTAB_FORMAT;
7761 else if (try_reading_first_ksymtab_entry_using_v4_19_format())
7762 ksymtab_format_ = V4_19_KSYMTAB_FORMAT;
7764 // If a new format emerges, then we need to add its
7766 ABG_ASSERT_NOT_REACHED;
7769 return ksymtab_format_;
7772 /// Getter of the size of the symbol value part of an entry of the
7773 /// ksymtab section.
7775 /// @return the size of the symbol value part of the entry of the
7776 /// ksymtab section.
7778 get_ksymtab_symbol_value_size() const
7780 unsigned char result = 0;
7781 ksymtab_format format = get_ksymtab_format();
7782 if (format == UNDEFINED_KSYMTAB_FORMAT)
7784 else if (format == PRE_V4_19_KSYMTAB_FORMAT)
7785 result = architecture_word_size();
7786 else if (format == V4_19_KSYMTAB_FORMAT)
7789 ABG_ASSERT_NOT_REACHED;
7794 /// Getter of the size of one entry of the ksymtab section.
7796 /// @return the size of one entry of the ksymtab section.
7798 get_ksymtab_entry_size() const
7800 if (ksymtab_entry_size_ == 0)
7801 // The entry size if 2 * symbol_value_size.
7802 ksymtab_entry_size_ = 2 * get_ksymtab_symbol_value_size();
7804 return ksymtab_entry_size_;
7807 /// Getter of the number of entries that are present in the ksymtab
7810 /// @return the number of entries that are present in the ksymtab
7813 get_nb_ksymtab_entries() const
7815 if (nb_ksymtab_entries_ == 0)
7817 Elf_Scn *section = find_ksymtab_section();
7820 GElf_Shdr header_mem;
7821 GElf_Shdr *section_header = gelf_getshdr(section, &header_mem);
7822 size_t entry_size = get_ksymtab_entry_size();
7823 ABG_ASSERT(entry_size);
7824 nb_ksymtab_entries_ = section_header->sh_size / entry_size;
7827 return nb_ksymtab_entries_;
7830 /// Getter of the number of entries that are present in the
7831 /// ksymtab_gpl section.
7833 /// @return the number of entries that are present in the
7834 /// ksymtab_gpl section.
7836 get_nb_ksymtab_gpl_entries()
7838 if (nb_ksymtab_gpl_entries_ == 0)
7840 Elf_Scn *section = find_ksymtab_gpl_section();
7843 GElf_Shdr header_mem;
7844 GElf_Shdr *section_header = gelf_getshdr(section, &header_mem);
7845 size_t entry_size = get_ksymtab_entry_size();
7846 ABG_ASSERT(entry_size);
7847 nb_ksymtab_gpl_entries_ = section_header->sh_size / entry_size;
7850 return nb_ksymtab_gpl_entries_;
7853 /// Populate the symbol map by reading exported symbols from the
7854 /// ksymtab directly.
7856 /// @param section the ksymtab section to read from
7858 /// @param exported_fns_set the set of exported functions
7860 /// @param exported_vars_set the set of exported variables
7862 /// @param nb_entries the number of ksymtab entries to read
7864 /// @return true upon successful completion, false otherwise.
7866 populate_symbol_map_from_ksymtab(Elf_Scn *section,
7867 address_set_sptr exported_fns_set,
7868 address_set_sptr exported_vars_set,
7871 // The data of the section.
7872 Elf_Data *elf_data = elf_rawdata(section, 0);
7874 // An array-of-bytes view of the elf data above. Something we can
7875 // actually program with. Phew.
7876 uint8_t *bytes = reinterpret_cast<uint8_t*>(elf_data->d_buf);
7878 // This is where to store an address of a symbol that we read from
7880 GElf_Addr symbol_address = 0, adjusted_symbol_address = 0;
7882 // So the section is an array of entries. Each entry describes a
7883 // symbol. Each entry is made of two words.
7885 // The first word is the address of a symbol. The second one is
7886 // the address of a static global variable symbol which value is
7887 // the string representing the symbol name. That string is in the
7888 // __ksymtab_strings section. Here, we are only interested in the
7891 // Lets thus walk the array of entries, and let's read just the
7892 // symbol address part of each entry.
7893 bool is_big_endian = elf_architecture_is_big_endian();
7894 elf_symbol_sptr symbol;
7895 unsigned char symbol_value_size = get_ksymtab_symbol_value_size();
7897 for (size_t i = 0, entry_offset = 0;
7899 ++i, entry_offset = get_ksymtab_entry_size() * i)
7902 ABG_ASSERT(read_int_from_array_of_bytes(&bytes[entry_offset],
7907 // Starting from linux kernel v4.19, it can happen that the
7908 // address value read from the ksymtab[_gpl] section might
7909 // need some decoding to get the real symbol address that has
7910 // a meaning in the .symbol section.
7912 maybe_adjust_sym_address_from_v4_19_ksymtab(symbol_address,
7913 entry_offset, section);
7915 // We might also want to adjust the symbol address, depending
7916 // on if we are looking at an ET_REL, an executable or a
7917 // shared object binary.
7918 adjusted_symbol_address = maybe_adjust_fn_sym_address(symbol_address);
7920 if (adjusted_symbol_address == 0)
7921 // The resulting symbol address is zero, not sure this
7922 // valid; ignore it.
7925 // OK now the symbol address should be in a suitable form to
7926 // be used to look the symbol up in the usual .symbol section
7927 // (aka ELF symbol table).
7928 symbol = lookup_elf_symbol_from_address(adjusted_symbol_address);
7931 adjusted_symbol_address =
7932 maybe_adjust_var_sym_address(symbol_address);
7933 symbol = lookup_elf_symbol_from_address(adjusted_symbol_address);
7935 // This must be a symbol that is of type neither FUNC
7936 // (function) nor OBJECT (variable). There are for intance,
7937 // symbols of type 'NOTYPE' in the ksymtab symbol table. I
7938 // am not sure what those are.
7942 address_set_sptr set;
7943 if (symbol->is_function())
7945 ABG_ASSERT(lookup_elf_fn_symbol_from_address
7946 (adjusted_symbol_address));
7947 set = exported_fns_set;
7949 else if (symbol->is_variable())
7951 ABG_ASSERT(lookup_elf_var_symbol_from_address
7952 (adjusted_symbol_address));
7953 set = exported_vars_set;
7956 ABG_ASSERT_NOT_REACHED;
7957 set->insert(adjusted_symbol_address);
7962 /// Populate the symbol map by extracting the exported symbols from a
7963 /// ksymtab rela section.
7965 /// @param section the ksymtab section to read from
7967 /// @param exported_fns_set the set of exported functions
7969 /// @param exported_vars_set the set of exported variables
7971 /// @return true upon successful completion, false otherwise.
7973 populate_symbol_map_from_ksymtab_reloc(Elf_Scn *reloc_section,
7974 address_set_sptr exported_fns_set,
7975 address_set_sptr exported_vars_set)
7977 GElf_Shdr reloc_section_mem;
7978 GElf_Shdr *reloc_section_shdr = gelf_getshdr(reloc_section,
7979 &reloc_section_mem);
7980 size_t reloc_count =
7981 reloc_section_shdr->sh_size / reloc_section_shdr->sh_entsize;
7983 Elf_Data *reloc_section_data = elf_getdata(reloc_section, 0);
7985 bool is_relasec = (reloc_section_shdr->sh_type == SHT_RELA);
7986 elf_symbol_sptr symbol;
7987 GElf_Sym native_symbol;
7988 for (unsigned int i = 0; i < reloc_count; i++)
7993 gelf_getrela(reloc_section_data, i, &rela);
7994 symbol = lookup_elf_symbol_from_index(GELF_R_SYM(rela.r_info),
8000 gelf_getrel(reloc_section_data, i, &rel);
8001 symbol = lookup_elf_symbol_from_index(GELF_R_SYM(rel.r_info),
8007 // If the symbol is a linux string constant then ignore it.
8008 if (symbol->get_is_linux_string_cst())
8011 if (!symbol->is_function() && !symbol->is_variable())
8015 if (symbol->get_type() == elf_symbol::NOTYPE_TYPE)
8016 cerr << "skipping NOTYPE symbol "
8017 << symbol->get_name()
8019 << symbol->get_index()
8023 else if (symbol->get_type() == elf_symbol::SECTION_TYPE)
8024 cerr << "skipping SECTION symbol "
8026 << symbol->get_index()
8034 // If we are looking at an ET_REL (relocatable) binary, then
8035 // the symbol value of native_symbol is relative to the
8036 // section that symbol is defined in. We need to translate it
8037 // into an absolute (okay, binary-relative, rather) address.
8038 GElf_Addr symbol_address =
8039 maybe_adjust_et_rel_sym_addr_to_abs_addr (&native_symbol);
8041 address_set_sptr set;
8042 if (symbol->is_function())
8044 ABG_ASSERT(lookup_elf_fn_symbol_from_address(symbol_address));
8045 set = exported_fns_set;
8047 else if (symbol->is_variable())
8049 ABG_ASSERT(lookup_elf_var_symbol_from_address(symbol_address));
8050 set = exported_vars_set;
8053 ABG_ASSERT_NOT_REACHED;
8054 set->insert(symbol_address);
8059 /// Load a given kernel symbol table.
8061 /// One can thus retrieve the resulting symbols by using the
8062 /// accessors read_context::linux_exported_fn_syms(),
8063 /// read_context::linux_exported_var_syms(),
8064 /// read_context::linux_exported_gpl_fn_syms(), or
8065 /// read_context::linux_exported_gpl_var_syms().
8067 /// @param kind the kind of kernel symbol table to load.
8069 /// @return true upon successful completion, false otherwise.
8071 load_kernel_symbol_table(kernel_symbol_table_kind kind)
8073 size_t nb_entries = 0;
8074 Elf_Scn *section = 0, *reloc_section = 0;
8075 address_set_sptr linux_exported_fns_set, linux_exported_vars_set;
8079 case KERNEL_SYMBOL_TABLE_KIND_UNDEFINED:
8081 case KERNEL_SYMBOL_TABLE_KIND_KSYMTAB:
8082 section = find_ksymtab_section();
8083 reloc_section = find_ksymtab_reloc_section();
8084 nb_entries = get_nb_ksymtab_entries();
8085 linux_exported_fns_set = create_or_get_linux_exported_fn_syms();
8086 linux_exported_vars_set = create_or_get_linux_exported_var_syms();
8088 case KERNEL_SYMBOL_TABLE_KIND_KSYMTAB_GPL:
8089 section = find_ksymtab_gpl_section();
8090 reloc_section = find_ksymtab_gpl_reloc_section();
8091 nb_entries = get_nb_ksymtab_gpl_entries();
8092 linux_exported_fns_set = create_or_get_linux_exported_gpl_fn_syms();
8093 linux_exported_vars_set = create_or_get_linux_exported_gpl_var_syms();
8097 if (!linux_exported_vars_set
8098 || !linux_exported_fns_set
8103 ksymtab_format format = get_ksymtab_format();
8105 // Although pre-v4.19 kernel modules can have a relocation section for the
8106 // __ksymtab section, libdwfl zeroes the rela section after applying
8107 // "simple" absolute relocations via dwfl_module_getelf(). For v4.19 and
8108 // above, we get PC-relative relocations so dwfl_module_getelf() doesn't
8109 // apply those relocations and we're safe to read the relocation section to
8110 // determine which exported symbols are in the ksymtab.
8111 if (!reloc_section || format == PRE_V4_19_KSYMTAB_FORMAT)
8112 return populate_symbol_map_from_ksymtab(section, linux_exported_fns_set,
8113 linux_exported_vars_set,
8116 return populate_symbol_map_from_ksymtab_reloc(reloc_section,
8117 linux_exported_fns_set,
8118 linux_exported_vars_set);
8121 /// Load the special __ksymtab section. This is for linux kernel
8124 /// @return true upon successful completion, false otherwise.
8126 load_ksymtab_symbols()
8128 return load_kernel_symbol_table(KERNEL_SYMBOL_TABLE_KIND_KSYMTAB);
8131 /// Load the special __ksymtab_gpl section. This is for linux kernel
8134 /// @return true upon successful completion, false otherwise.
8136 load_ksymtab_gpl_symbols()
8138 return load_kernel_symbol_table(KERNEL_SYMBOL_TABLE_KIND_KSYMTAB_GPL);
8141 /// Load linux kernel (module) specific exported symbol sections.
8143 /// @return true upon successful completion, false otherwise.
8145 load_linux_specific_exported_symbol_maps()
8147 bool loaded = false;
8148 if (!linux_exported_fn_syms_
8149 || !linux_exported_var_syms_)
8150 loaded |= load_ksymtab_symbols();
8152 if (!linux_exported_gpl_fn_syms_
8153 || !linux_exported_gpl_var_syms_)
8154 loaded |= load_ksymtab_gpl_symbols();
8159 /// Load the maps of function symbol address -> function symbol,
8160 /// global variable symbol address -> variable symbol and also the
8161 /// maps of function and variable undefined symbols.
8163 /// All these maps are loaded only if they are not loaded already.
8165 /// @return true iff everything went fine.
8169 bool load_fun_map = !fun_addr_sym_map_ ;
8170 bool load_var_map = !var_addr_sym_map_;
8171 bool load_undefined_fun_map = !undefined_fun_syms_;
8172 bool load_undefined_var_map = !undefined_var_syms_;
8175 fun_syms_.reset(new string_elf_symbols_map_type);
8177 if (!fun_addr_sym_map_)
8178 fun_addr_sym_map_.reset(new addr_elf_symbol_sptr_map_type);
8180 if (!fun_entry_addr_sym_map_ && elf_architecture_is_ppc64())
8181 fun_entry_addr_sym_map_.reset(new addr_elf_symbol_sptr_map_type);
8184 var_syms_.reset(new string_elf_symbols_map_type);
8186 if (!var_addr_sym_map_)
8187 var_addr_sym_map_.reset(new addr_elf_symbol_sptr_map_type);
8189 if (!undefined_fun_syms_)
8190 undefined_fun_syms_.reset(new string_elf_symbols_map_type);
8192 if (!undefined_var_syms_)
8193 undefined_var_syms_.reset(new string_elf_symbols_map_type);
8195 if (!options_.ignore_symbol_table)
8197 if (load_symbol_maps_from_symtab_section(load_fun_map,
8199 load_undefined_fun_map,
8200 load_undefined_var_map))
8202 if (load_in_linux_kernel_mode() && is_linux_kernel_binary())
8203 return load_linux_specific_exported_symbol_maps();
8211 /// Return true if an address is in the ".opd" section that is
8212 /// present on the ppc64 platform.
8214 /// @param addr the address to consider.
8216 /// @return true iff @p addr is designates a word that is in the
8219 address_is_in_opd_section(Dwarf_Addr addr)
8221 Elf_Scn * opd_section = find_opd_section();
8224 if (address_is_in_section(addr, opd_section))
8229 /// Load the symbol maps if necessary.
8231 /// @return true iff the symbol maps has been loaded by this
8234 maybe_load_symbol_maps() const
8236 if (!fun_addr_sym_map_
8237 || !var_addr_sym_map_
8240 || !undefined_fun_syms_
8241 || !undefined_var_syms_)
8242 return const_cast<read_context*>(this)->load_symbol_maps();
8246 /// Load the DT_NEEDED and DT_SONAME elf TAGS.
8249 load_dt_soname_and_needed()
8251 lookup_data_tag_from_dynamic_segment(elf_handle(), DT_NEEDED, dt_needed_);
8253 vector<string> dt_tag_data;
8254 lookup_data_tag_from_dynamic_segment(elf_handle(), DT_SONAME, dt_tag_data);
8255 if (!dt_tag_data.empty())
8256 dt_soname_ = dt_tag_data[0];
8259 /// Read the string representing the architecture of the current ELF
8262 load_elf_architecture()
8268 GElf_Ehdr* elf_header = gelf_getehdr(elf_handle(), &eh_mem);
8270 elf_architecture_ = e_machine_to_string(elf_header->e_machine);
8273 /// Load various ELF data.
8275 /// This function loads ELF data that are not symbol maps or debug
8276 /// info. That is, things like various tags, elf architecture and
8279 load_elf_properties()
8281 load_dt_soname_and_needed();
8282 load_elf_architecture();
8285 /// Convert the value of the symbol address part of a post V4.19
8286 /// ksymtab entry (that contains place-relative addresses) into its
8287 /// corresponding symbol value in the .symtab section. The value of
8288 /// the symbol in .symtab equals to addr_offset + address-of-ksymtab
8291 /// @param addr the address read from the ksymtab section.
8293 /// @param addr_offset the offset at which @p addr was read.
8295 /// @param ksymtab_section the kymstab section @p addr was read
8298 maybe_adjust_sym_address_from_v4_19_ksymtab(GElf_Addr addr,
8300 Elf_Scn *ksymtab_section) const
8302 GElf_Addr result = addr;
8304 if (get_ksymtab_format() == V4_19_KSYMTAB_FORMAT)
8306 int32_t offset = addr;
8308 GElf_Shdr *section_header = gelf_getshdr(ksymtab_section, &mem);
8309 result = offset + section_header->sh_addr + addr_offset;
8315 /// This is a sub-routine of maybe_adjust_fn_sym_address and
8316 /// maybe_adjust_var_sym_address.
8318 /// Given an address that we got by looking at some debug
8319 /// information (e.g, a symbol's address referred to by a DWARF
8320 /// TAG), If the ELF file we are interested in is a shared library
8321 /// or an executable, then adjust the address to be coherent with
8322 /// where the executable (or shared library) is loaded. That way,
8323 /// the address can be used to look for symbols in the executable or
8326 /// @return the adjusted address, or the same address as @p addr if
8327 /// it didn't need any adjustment.
8329 maybe_adjust_address_for_exec_or_dyn(Dwarf_Addr addr) const
8335 GElf_Ehdr *elf_header = gelf_getehdr(elf_handle(), &eh_mem);
8337 if (elf_header->e_type == ET_DYN || elf_header->e_type == ET_EXEC)
8339 Dwarf_Addr dwarf_elf_load_address = 0, elf_load_address = 0;
8340 ABG_ASSERT(get_binary_load_address(dwarf_elf_handle(),
8341 dwarf_elf_load_address));
8342 ABG_ASSERT(get_binary_load_address(elf_handle(),
8344 if (dwarf_is_splitted()
8345 && (dwarf_elf_load_address != elf_load_address))
8346 // This means that in theory the DWARF and the executable are
8347 // not loaded at the same address. And addr is meaningful
8348 // only in the context of the DWARF.
8350 // So let's transform addr into an offset relative to where
8351 // the DWARF is loaded, and let's add that relative offset
8352 // to the load address of the executable. That way, addr
8353 // becomes meaningful in the context of the executable and
8354 // can thus be used to compare against the address of
8355 // symbols of the executable, for instance.
8356 addr = addr - dwarf_elf_load_address + elf_load_address;
8362 /// For a relocatable (*.o) elf file, this function expects an
8363 /// absolute address, representing a function symbol. It then
8364 /// extracts the address of the .text section from the symbol
8365 /// absolute address to get the relative address of the function
8366 /// from the beginning of the .text section.
8368 /// For executable or shared library, this function expects an
8369 /// address of a function symbol that was retrieved by looking at a
8370 /// DWARF "file". The function thus adjusts the address to make it
8371 /// be meaningful in the context of the ELF file.
8373 /// In both cases, the address can then be compared against the
8374 /// st_value field of a function symbol from the ELF file.
8376 /// @param addr an adress for a function symbol that was retrieved
8377 /// from a DWARF file.
8379 /// @return the (possibly) adjusted address, or just @p addr if no
8380 /// adjustment took place.
8382 maybe_adjust_fn_sym_address(Dwarf_Addr addr) const
8387 Elf* elf = elf_handle();
8389 GElf_Ehdr* elf_header = gelf_getehdr(elf, &eh_mem);
8391 if (elf_header->e_type == ET_REL)
8392 // We are looking at a relocatable file. In this case, we don't
8393 // do anything because:
8395 // 1/ the addresses from DWARF are absolute (relative to the
8396 // beginning of the relocatable file)
8398 // 2/ The ELF symbol addresses that we store in our lookup
8399 // tables are translated from section-related to absolute as
8400 // well. So we don't have anything to do at this point for
8404 addr = maybe_adjust_address_for_exec_or_dyn(addr);
8409 /// Translate a section-relative symbol address (i.e, symbol value)
8410 /// into an absolute symbol address by adding the address of the
8411 /// section the symbol belongs to, to the address value.
8413 /// This is useful when looking at symbol values coming from
8414 /// relocatable files (of ET_REL kind). If the binary is not
8415 /// ET_REL, then the function does nothing and returns the input
8416 /// address unchanged.
8418 /// @param addr the symbol address to possibly translate.
8420 /// @param section the section the symbol which value is @p addr
8423 /// @return the section-relative address, translated into an
8424 /// absolute address, if @p section is an ET_REL binary. Otherwise,
8425 /// return @p addr, unchanged.
8427 maybe_adjust_et_rel_sym_addr_to_abs_addr(GElf_Addr addr, Elf_Scn *section)
8432 Elf* elf = elf_handle();
8433 GElf_Ehdr elf_header;
8435 if (!gelf_getehdr(elf, &elf_header))
8438 if (elf_header.e_type != ET_REL)
8441 GElf_Shdr section_header;
8442 if (!gelf_getshdr(section, §ion_header))
8445 return addr + section_header.sh_addr;
8448 /// Translate a section-relative symbol address (i.e, symbol value)
8449 /// into an absolute symbol address by adding the address of the
8450 /// section the symbol belongs to, to the address value.
8452 /// This is useful when looking at symbol values coming from
8453 /// relocatable files (of ET_REL kind). If the binary is not
8454 /// ET_REL, then the function does nothing and returns the input
8455 /// address unchanged.
8457 /// @param sym the symbol whose address to possibly needs to be
8460 /// @return the section-relative address, translated into an
8461 /// absolute address, if @p sym is from an ET_REL binary.
8462 /// Otherwise, return the address of @p sym, unchanged.
8464 maybe_adjust_et_rel_sym_addr_to_abs_addr(GElf_Sym *sym)
8466 Elf_Scn *symbol_section = elf_getscn(elf_handle(), sym->st_shndx);
8467 ABG_ASSERT(symbol_section);
8468 GElf_Addr result = sym->st_value;
8469 result = maybe_adjust_et_rel_sym_addr_to_abs_addr(result, symbol_section);
8473 /// Test if a given address is in a given section.
8475 /// @param addr the address to consider.
8477 /// @param section the section to consider.
8479 address_is_in_section(Dwarf_Addr addr, Elf_Scn* section) const
8484 GElf_Shdr sheader_mem;
8485 GElf_Shdr* sheader = gelf_getshdr(section, &sheader_mem);
8487 if (sheader->sh_addr <= addr && addr <= sheader->sh_addr + sheader->sh_size)
8493 /// Get the section which a global variable address comes from.
8495 /// @param var_addr the address for the variable.
8497 /// @return the ELF section the @p var_addr comes from, or nil if no
8498 /// section was found for that variable address.
8500 get_data_section_for_variable_address(Dwarf_Addr var_addr) const
8502 // There are several potential 'data sections" from which a
8503 // variable address can come from: .data, .data1 and .rodata.
8504 // Let's try to try them all in sequence.
8506 Elf_Scn* data_scn = bss_section();
8507 if (!address_is_in_section(var_addr, data_scn))
8509 data_scn = data_section();
8510 if (!address_is_in_section(var_addr, data_scn))
8512 data_scn = data1_section();
8513 if (!address_is_in_section(var_addr, data_scn))
8515 data_scn = rodata_section();
8516 if (!address_is_in_section(var_addr, data_scn))
8524 /// For a relocatable (*.o) elf file, this function expects an
8525 /// absolute address, representing a global variable symbol. It
8526 /// then extracts the address of the {.data,.data1,.rodata,.bss}
8527 /// section from the symbol absolute address to get the relative
8528 /// address of the variable from the beginning of the data section.
8530 /// For executable or shared library, this function expects an
8531 /// address of a variable symbol that was retrieved by looking at a
8532 /// DWARF "file". The function thus adjusts the address to make it
8533 /// be meaningful in the context of the ELF file.
8535 /// In both cases, the address can then be compared against the
8536 /// st_value field of a function symbol from the ELF file.
8538 /// @param addr an address for a global variable symbol that was
8539 /// retrieved from a DWARF file.
8541 /// @return the (possibly) adjusted address, or just @p addr if no
8542 /// adjustment took place.
8544 maybe_adjust_var_sym_address(Dwarf_Addr addr) const
8546 Elf* elf = elf_handle();
8548 GElf_Ehdr* elf_header = gelf_getehdr(elf, &eh_mem);
8550 if (elf_header->e_type == ET_REL)
8551 // We are looking at a relocatable file. In this case, we don't
8552 // do anything because:
8554 // 1/ the addresses from DWARF are absolute (relative to the
8555 // beginning of the relocatable file)
8557 // 2/ The ELF symbol addresses that we store in our lookup
8558 // tables are translated from section-related to absolute as
8559 // well. So we don't have anything to do at this point for
8563 addr = maybe_adjust_address_for_exec_or_dyn(addr);
8569 /// Get the address of the function.
8571 /// The address of the function is considered to be the value of the
8572 /// DW_AT_low_pc attribute, possibly adjusted (in relocatable files
8573 /// only) to not point to an absolute address anymore, but rather to
8574 /// the address of the function inside the .text segment.
8576 /// @param function_die the die of the function to consider.
8578 /// @param address the resulting address iff the function returns
8581 /// @return true if the function address was found.
8583 get_function_address(Dwarf_Die* function_die,
8584 Dwarf_Addr& address) const
8586 Dwarf_Addr low_pc = 0;
8587 if (!die_address_attribute(function_die, DW_AT_low_pc, low_pc))
8590 low_pc = maybe_adjust_fn_sym_address(low_pc);
8595 /// Get the address of the global variable.
8597 /// The address of the global variable is considered to be the value
8598 /// of the DW_AT_location attribute, possibly adjusted (in
8599 /// relocatable files only) to not point to an absolute address
8600 /// anymore, but rather to the address of the global variable inside
8601 /// the data segment.
8603 /// @param variable_die the die of the function to consider.
8605 /// @param address the resulting address iff this function returns
8608 /// @return true if the variable address was found.
8610 get_variable_address(Dwarf_Die* variable_die,
8611 Dwarf_Addr& address) const
8613 bool is_tls_address = false;
8614 if (!die_location_address(variable_die, address, is_tls_address))
8616 if (!is_tls_address)
8617 address = maybe_adjust_var_sym_address(address);
8621 /// Tests if a suppression specification can match ABI artifacts
8622 /// coming from the binary being analyzed.
8624 /// This tests if the suppression matches the soname of and binary
8625 /// name of the ELF binary being analyzed.
8627 /// @param s the suppression specification to consider.
8629 suppression_can_match(const suppr::suppression_base& s) const
8631 if (s.priv_->matches_soname(dt_soname())
8632 && s.priv_->matches_binary_name(elf_path()))
8637 /// Test whether if a given function suppression matches a function
8638 /// designated by a regular expression that describes its linkage
8639 /// name (symbol name).
8641 /// @param s the suppression specification to evaluate to see if it
8642 /// matches a given function linkage name
8644 /// @param fn_linkage_name the linkage name of the function of interest.
8646 /// @return true iff the suppression specification @p s matches the
8647 /// function whose linkage name is @p fn_linkage_name.
8649 suppression_matches_function_sym_name(const suppr::function_suppression_sptr& s,
8650 const string& fn_linkage_name) const
8654 return suppression_matches_function_sym_name(*s,fn_linkage_name);
8657 /// Test whether if a given function suppression matches a function
8658 /// designated by a regular expression that describes its linkage
8659 /// name (symbol name).
8661 /// @param s the suppression specification to evaluate to see if it
8662 /// matches a given function linkage name
8664 /// @param fn_linkage_name the linkage name of the function of interest.
8666 /// @return true iff the suppression specification @p s matches the
8667 /// function whose linkage name is @p fn_linkage_name.
8669 suppression_matches_function_sym_name(const suppr::function_suppression& s,
8670 const string& fn_linkage_name) const
8672 if (!suppression_can_match(s))
8675 return suppr::suppression_matches_function_sym_name(s, fn_linkage_name);
8678 /// Test whether if a given function suppression matches a function
8679 /// designated by a regular expression that describes its name.
8681 /// @param s the suppression specification to evaluate to see if it
8682 /// matches a given function name.
8684 /// @param fn_name the name of the function of interest. Note that
8685 /// this name must be *non* qualified.
8687 /// @return true iff the suppression specification @p s matches the
8688 /// function whose name is @p fn_name.
8690 suppression_matches_function_name(const suppr::function_suppression_sptr& s,
8691 const string& fn_name) const
8695 return suppression_matches_function_name(*s, fn_name);
8698 /// Test whether if a given function suppression matches a function
8699 /// designated by a regular expression that describes its name.
8701 /// @param s the suppression specification to evaluate to see if it
8702 /// matches a given function name.
8704 /// @param fn_name the name of the function of interest. Note that
8705 /// this name must be *non* qualified.
8707 /// @return true iff the suppression specification @p s matches the
8708 /// function whose name is @p fn_name.
8710 suppression_matches_function_name(const suppr::function_suppression& s,
8711 const string& fn_name) const
8713 if (!suppression_can_match(s))
8716 return suppr::suppression_matches_function_name(s, fn_name);
8719 /// Test whether if a given variable suppression specification
8720 /// matches a variable denoted by its name.
8722 /// @param s the variable suppression specification to consider.
8724 /// @param var_name the name of the variable to consider.
8726 /// @return true iff the suppression specification @p s matches the
8727 /// variable whose name is @p var_name.
8729 suppression_matches_variable_name(const suppr::variable_suppression& s,
8730 const string& var_name) const
8732 if (!suppression_can_match(s))
8735 return suppr::suppression_matches_variable_name(s, var_name);
8738 /// Test whether if a given variable suppression specification
8739 /// matches a variable denoted by its linkage name.
8741 /// @param s the variable suppression specification to consider.
8743 /// @param var_linkage_name the linkage name of the variable to consider.
8745 /// @return true iff variable suppression specification @p s matches
8746 /// the variable denoted by linkage name @p var_linkage_name.
8748 suppression_matches_variable_sym_name(const suppr::variable_suppression& s,
8749 const string& var_linkage_name) const
8751 if (!suppression_can_match(s))
8754 return suppr::suppression_matches_variable_sym_name(s, var_linkage_name);
8757 /// Test if a given type suppression specification matches a type
8758 /// designated by its name and location.
8760 /// @param s the suppression specification to consider.
8762 /// @param type_name the fully qualified type name to consider.
8764 /// @param type_location the type location to consider.
8766 /// @return true iff the type suppression specification matches a
8767 /// type of a given name and location.
8769 suppression_matches_type_name_or_location(const suppr::type_suppression& s,
8770 const string& type_name,
8771 const location& type_location) const
8773 if (!suppression_can_match(s))
8776 return suppr::suppression_matches_type_name_or_location(s, type_name,
8780 /// Test if a type suppression specification matches the name of a
8781 /// type within a given scope.
8783 /// @param s the type suppression specification to consider.
8785 /// @param type_scope the type scope to consider.
8787 /// @param type the type to consider.
8789 /// @return true iff the type suppression specification matches a
8790 /// the name of type @p type.
8792 suppression_matches_type_name(const suppr::type_suppression& s,
8793 const scope_decl* type_scope,
8794 const type_base_sptr& type) const
8796 if (!suppression_can_match(s))
8798 return suppr::suppression_matches_type_name(s, type_scope, type);
8801 /// Getter of the exported decls builder object.
8803 /// @return the exported decls builder.
8804 corpus::exported_decls_builder*
8805 exported_decls_builder()
8806 {return exported_decls_builder_;}
8808 /// Setter of the exported decls builder object.
8810 /// Note that this @ref read_context is not responsible for the live
8811 /// time of the exported_decls_builder object. The corpus is.
8813 /// @param b the new builder.
8815 exported_decls_builder(corpus::exported_decls_builder* b)
8816 {exported_decls_builder_ = b;}
8818 /// Getter of the "load_all_types" flag. This flag tells if all the
8819 /// types (including those not reachable by public declarations) are
8820 /// to be read and represented in the final ABI corpus.
8822 /// @return the load_all_types flag.
8824 load_all_types() const
8825 {return options_.load_all_types;}
8827 /// Setter of the "load_all_types" flag. This flag tells if all the
8828 /// types (including those not reachable by public declarations) are
8829 /// to be read and represented in the final ABI corpus.
8831 /// @param f the new load_all_types flag.
8833 load_all_types(bool f)
8834 {options_.load_all_types = f;}
8837 load_in_linux_kernel_mode() const
8838 {return options_.load_in_linux_kernel_mode;}
8841 load_in_linux_kernel_mode(bool f)
8842 {options_.load_in_linux_kernel_mode = f;}
8844 /// Guess if the current binary is a Linux Kernel or a Linux Kernel module.
8846 /// To guess that, the function looks for the presence of the
8847 /// special "__ksymtab_strings" section in the binary.
8850 is_linux_kernel_binary() const
8852 return find_section(elf_handle(), "__ksymtab_strings", SHT_PROGBITS)
8853 || is_linux_kernel_module();
8856 /// Guess if the current binary is a Linux Kernel module.
8858 /// To guess that, the function looks for the presence of the special
8859 /// ".modinfo" and ".gnu.linkonce.this_module" sections in the binary.
8862 is_linux_kernel_module() const
8864 return find_section(elf_handle(), ".modinfo", SHT_PROGBITS)
8865 && find_section(elf_handle(), ".gnu.linkonce.this_module", SHT_PROGBITS);
8868 /// Getter of the "show_stats" flag.
8870 /// This flag tells if we should emit statistics about various
8873 /// @return the value of the flag.
8876 {return options_.show_stats;}
8878 /// Setter of the "show_stats" flag.
8880 /// This flag tells if we should emit statistics about various
8883 /// @param f the value of the flag.
8886 {options_.show_stats = f;}
8888 /// Getter of the "do_log" flag.
8890 /// This flag tells if we should log about various internal
8893 /// return the "do_log" flag.
8896 {return options_.do_log;}
8898 /// Setter of the "do_log" flag.
8900 /// This flag tells if we should log about various internal details.
8902 /// @param f the new value of the flag.
8905 {options_.do_log = f;}
8907 /// If a given function decl is suitable for the set of exported
8908 /// functions of the current corpus, this function adds it to that
8911 /// @param fn the function to consider for inclusion into the set of
8912 /// exported functions of the current corpus.
8914 maybe_add_fn_to_exported_decls(function_decl* fn)
8917 if (corpus::exported_decls_builder* b = exported_decls_builder())
8918 b->maybe_add_fn_to_exported_fns(fn);
8921 /// If a given variable decl is suitable for the set of exported
8922 /// variables of the current corpus, this variable adds it to that
8925 /// @param fn the variable to consider for inclusion into the set of
8926 /// exported variables of the current corpus.
8928 maybe_add_var_to_exported_decls(var_decl* var)
8931 if (corpus::exported_decls_builder* b = exported_decls_builder())
8932 b->maybe_add_var_to_exported_vars(var);
8935 /// Walk the DIEs under a given die and for each child, populate the
8936 /// die -> parent map to record the child -> parent relationship
8938 /// exists between the child and the given die.
8940 /// The function also builds the vector of places where units are
8943 /// This is done recursively as for each child DIE, this function
8944 /// walks its children as well.
8946 /// @param die the DIE whose children to walk recursively.
8948 /// @param source where the DIE @p die comes from.
8950 /// @param imported_units a vector containing all the offsets of the
8951 /// points where unit have been imported, under @p die.
8953 build_die_parent_relations_under(Dwarf_Die* die,
8955 imported_unit_points_type & imported_units)
8960 offset_offset_map_type& parent_of = die_parent_map(source);
8963 if (dwarf_child(die, &child) != 0)
8968 parent_of[dwarf_dieoffset(&child)] = dwarf_dieoffset(die);
8969 if (dwarf_tag(&child) == DW_TAG_imported_unit)
8971 Dwarf_Die imported_unit;
8972 if (die_die_attribute(&child, DW_AT_import, imported_unit))
8974 die_source imported_unit_die_source = NO_DEBUG_INFO_DIE_SOURCE;
8975 ABG_ASSERT(get_die_source(imported_unit, imported_unit_die_source));
8976 imported_units.push_back
8977 (imported_unit_point(dwarf_dieoffset(&child),
8979 imported_unit_die_source));
8982 build_die_parent_relations_under(&child, source, imported_units);
8984 while (dwarf_siblingof(&child, &child) == 0);
8988 /// Determine if we do have to build a DIE -> parent map, depending
8989 /// on a given language.
8991 /// Some languages like C++, Ada etc, do have the concept of
8992 /// namespace and yet, the DIE data structure doesn't provide us
8993 /// with a way to get the parent namespace of a given DIE. So for
8994 /// those languages, we need to build a DIE -> parent map so that we
8995 /// can get the namespace DIE (or more generally the scope DIE) of a given
8996 /// DIE as we need it.
8998 /// But then some more basic languages like C or assembly don't have
9001 /// This function, depending on the language, tells us if we need to
9002 /// build the DIE -> parent map or not.
9004 /// @param lang the language to consider.
9006 /// @return true iff we need to build the DIE -> parent map for this
9009 do_we_build_die_parent_maps(translation_unit::language lang)
9011 if (is_c_language(lang))
9016 case translation_unit::LANG_UNKNOWN:
9017 #ifdef HAVE_DW_LANG_Mips_Assembler_enumerator
9018 case translation_unit::LANG_Mips_Assembler:
9027 /// Walk all the DIEs accessible in the debug info (and in the
9028 /// alternate debug info as well) and build maps representing the
9029 /// relationship DIE -> parent. That is, make it so that we can get
9030 /// the parent for a given DIE.
9032 /// Note that the goal of this map is to be able to get the parent
9033 /// of a given DIE. This is to mainly to handle namespaces. For instance,
9034 /// when we get a DIE of a type, and we want to build an internal
9035 /// representation for it, we need to get its fully qualified name.
9036 /// For that, we need to know what is the parent DIE of that type
9037 /// DIE, so that we can know what the namespace of that type is.
9039 /// Note that as the C language doesn't have namespaces (all types
9040 /// are defined in the same global namespace), this function doesn't
9041 /// build the DIE -> parent map if the current translation unit
9042 /// comes from C. This saves time on big C ELF files with a lot of
9045 build_die_parent_maps()
9047 bool we_do_have_to_build_die_parent_map = false;
9048 uint8_t address_size = 0;
9049 size_t header_size = 0;
9050 // Get the DIE of the current translation unit, look at it to get
9051 // its language. If that language is in C, then all types are in
9052 // the global namespace so we don't need to build the DIE ->
9053 // parent map. So we dont build it in that case.
9054 for (Dwarf_Off offset = 0, next_offset = 0;
9055 (dwarf_next_unit(dwarf(), offset, &next_offset, &header_size,
9056 NULL, NULL, &address_size, NULL, NULL, NULL) == 0);
9057 offset = next_offset)
9059 Dwarf_Off die_offset = offset + header_size;
9061 if (!dwarf_offdie(dwarf(), die_offset, &cu))
9065 die_unsigned_constant_attribute(&cu, DW_AT_language, l);
9066 translation_unit::language lang = dwarf_language_to_tu_language(l);
9067 if (do_we_build_die_parent_maps(lang))
9068 we_do_have_to_build_die_parent_map = true;
9071 if (!we_do_have_to_build_die_parent_map)
9074 // Build the DIE -> parent relation for DIEs coming from the
9075 // .debug_info section in the alternate debug info file.
9076 die_source source = ALT_DEBUG_INFO_DIE_SOURCE;
9077 for (Dwarf_Off offset = 0, next_offset = 0;
9078 (dwarf_next_unit(alt_dwarf(), offset, &next_offset, &header_size,
9079 NULL, NULL, &address_size, NULL, NULL, NULL) == 0);
9080 offset = next_offset)
9082 Dwarf_Off die_offset = offset + header_size;
9084 if (!dwarf_offdie(alt_dwarf(), die_offset, &cu))
9088 imported_unit_points_type& imported_units =
9089 tu_die_imported_unit_points_map(source)[die_offset] =
9090 imported_unit_points_type();
9091 build_die_parent_relations_under(&cu, source, imported_units);
9094 // Build the DIE -> parent relation for DIEs coming from the
9095 // .debug_info section of the main debug info file.
9096 source = PRIMARY_DEBUG_INFO_DIE_SOURCE;
9099 for (Dwarf_Off offset = 0, next_offset = 0;
9100 (dwarf_next_unit(dwarf(), offset, &next_offset, &header_size,
9101 NULL, NULL, &address_size, NULL, NULL, NULL) == 0);
9102 offset = next_offset)
9104 Dwarf_Off die_offset = offset + header_size;
9106 if (!dwarf_offdie(dwarf(), die_offset, &cu))
9109 imported_unit_points_type& imported_units =
9110 tu_die_imported_unit_points_map(source)[die_offset] =
9111 imported_unit_points_type();
9112 build_die_parent_relations_under(&cu, source, imported_units);
9115 // Build the DIE -> parent relation for DIEs coming from the
9116 // .debug_types section.
9117 source = TYPE_UNIT_DIE_SOURCE;
9120 uint64_t type_signature = 0;
9121 Dwarf_Off type_offset;
9122 for (Dwarf_Off offset = 0, next_offset = 0;
9123 (dwarf_next_unit(dwarf(), offset, &next_offset, &header_size,
9124 NULL, NULL, &address_size, NULL,
9125 &type_signature, &type_offset) == 0);
9126 offset = next_offset)
9128 Dwarf_Off die_offset = offset + header_size;
9131 if (!dwarf_offdie_types(dwarf(), die_offset, &cu))
9134 imported_unit_points_type& imported_units =
9135 tu_die_imported_unit_points_map(source)[die_offset] =
9136 imported_unit_points_type();
9137 build_die_parent_relations_under(&cu, source, imported_units);
9140 };// end class read_context.
9142 static type_or_decl_base_sptr
9143 build_ir_node_from_die(read_context& ctxt,
9146 bool called_from_public_decl,
9147 size_t where_offset,
9148 bool is_required_decl_spec = false);
9150 static type_or_decl_base_sptr
9151 build_ir_node_from_die(read_context& ctxt,
9153 bool called_from_public_decl,
9154 size_t where_offset);
9156 static class_decl_sptr
9157 add_or_update_class_type(read_context& ctxt,
9161 class_decl_sptr klass,
9162 bool called_from_public_decl,
9163 size_t where_offset);
9165 static union_decl_sptr
9166 add_or_update_union_type(read_context& ctxt,
9169 union_decl_sptr union_type,
9170 bool called_from_public_decl,
9171 size_t where_offset);
9173 static decl_base_sptr
9174 build_ir_node_for_void_type(read_context& ctxt);
9176 static function_decl_sptr
9177 build_function_decl(read_context& ctxt,
9179 size_t where_offset,
9180 function_decl_sptr fn);
9183 function_is_suppressed(const read_context& ctxt,
9184 const scope_decl* scope,
9185 Dwarf_Die *function_die);
9187 static function_decl_sptr
9188 build_or_get_fn_decl_if_not_suppressed(read_context& ctxt,
9191 size_t where_offset,
9192 function_decl_sptr f = function_decl_sptr());
9194 static var_decl_sptr
9195 build_var_decl(read_context& ctxt,
9197 size_t where_offset,
9198 var_decl_sptr result = var_decl_sptr());
9200 static var_decl_sptr
9201 build_or_get_var_decl_if_not_suppressed(read_context& ctxt,
9204 size_t where_offset,
9205 var_decl_sptr res = var_decl_sptr(),
9206 bool is_required_decl_spec = false);
9208 variable_is_suppressed(const read_context& ctxt,
9209 const scope_decl* scope,
9210 Dwarf_Die *variable_die,
9211 bool is_required_decl_spec = false);
9214 finish_member_function_reading(Dwarf_Die* die,
9215 const function_decl_sptr& f,
9216 const class_or_union_sptr& klass,
9217 read_context& ctxt);
9219 /// Setter of the debug info root path for a dwarf reader context.
9221 /// @param ctxt the dwarf reader context to consider.
9223 /// @param path the new debug info root path. This must be a pointer to a
9224 /// character string which life time should be greater than the life
9225 /// time of the read context.
9227 set_debug_info_root_path(read_context& ctxt, char** path)
9228 {ctxt.offline_callbacks()->debuginfo_path = path;}
9230 /// Setter of the debug info root path for a dwarf reader context.
9232 /// @param ctxt the dwarf reader context to consider.
9234 /// @return a pointer to the debug info root path.
9236 /// time of the read context.
9238 get_debug_info_root_path(read_context& ctxt)
9239 {return ctxt.offline_callbacks()->debuginfo_path;}
9241 /// Getter of the "show_stats" flag.
9243 /// This flag tells if we should emit statistics about various
9246 /// @param ctx the read context to consider for this flag.
9248 /// @return the value of the flag.
9250 get_show_stats(read_context& ctxt)
9251 {return ctxt.show_stats();}
9253 /// Setter of the "show_stats" flag.
9255 /// This flag tells if we should emit statistics about various
9258 /// @param ctxt the read context to consider for this flag.
9260 /// @param f the value of the flag.
9262 set_show_stats(read_context& ctxt, bool f)
9263 {ctxt.show_stats(f);}
9265 /// Setter of the "do_log" flag.
9267 /// This flag tells if we should emit verbose logs for various
9268 /// internal things related to DWARF reading.
9270 /// @param ctxt the DWARF reading context to consider.
9272 /// @param f the new value of the flag.
9274 set_do_log(read_context& ctxt, bool f)
9277 /// Setter of the "set_ignore_symbol_table" flag.
9279 /// This flag tells if we should load information about ELF symbol
9280 /// tables. Not loading the symbol tables is a speed optimization
9281 /// that is done when the set of symbols we care about is provided
9282 /// off-hand. This is the case when we are supposed to analyze a
9283 /// Linux kernel binary. In that case, because we have the white list
9284 /// of functions/variable symbols we care about, we don't need to
9285 /// analyze the symbol table; things are thus faster in that case.
9287 /// By default, the symbol table is analyzed so this boolean is set to
9290 /// @param ctxt the read context to consider.
9292 /// @param f the new value of the flag.
9294 set_ignore_symbol_table(read_context &ctxt, bool f)
9295 {ctxt.options_.ignore_symbol_table = f;}
9297 /// Getter of the "set_ignore_symbol_table" flag.
9299 /// This flag tells if we should load information about ELF symbol
9300 /// tables. Not loading the symbol tables is a speed optimization
9301 /// that is done when the set of symbols we care about is provided
9302 /// off-hand. This is the case when we are supposed to analyze a
9303 /// Linux kernel binary. In that case, because we have the white list
9304 /// of functions/variable symbols we care about, we don't need to
9305 /// analyze the symbol table; things are thus faster in that case.
9307 /// By default, the symbol table is analyzed so this boolean is set to
9310 /// @param ctxt the read context to consider.
9312 /// @return the value of the flag.
9314 get_ignore_symbol_table(const read_context& ctxt)
9315 {return ctxt.options_.ignore_symbol_table;}
9317 /// Test if a given DIE is anonymous
9319 /// @param die the DIE to consider.
9321 /// @return true iff @p die is anonymous.
9323 die_is_anonymous(const Dwarf_Die* die)
9325 Dwarf_Attribute attr;
9326 if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), DW_AT_name, &attr))
9331 /// Get the value of an attribute that is supposed to be a string, or
9332 /// an empty string if the attribute could not be found.
9334 /// @param die the DIE to get the attribute value from.
9336 /// @param attr_name the attribute name. Must come from dwarf.h and
9337 /// be an enumerator representing an attribute like, e.g, DW_AT_name.
9339 /// @return the string representing the value of the attribute, or an
9340 /// empty string if no string attribute could be found.
9342 die_string_attribute(const Dwarf_Die* die, unsigned attr_name)
9347 Dwarf_Attribute attr;
9348 if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), attr_name, &attr))
9351 const char* str = dwarf_formstring(&attr);
9352 return str ? str : "";
9355 /// Get the value of an attribute that is supposed to be an unsigned
9358 /// @param die the DIE to read the information from.
9360 /// @param attr_name the DW_AT_* name of the attribute. Must come
9361 /// from dwarf.h and be an enumerator representing an attribute like,
9362 /// e.g, DW_AT_decl_line.
9364 ///@param cst the output parameter that is set to the value of the
9365 /// attribute @p attr_name. This parameter is set iff the function
9368 /// @return true if there was an attribute of the name @p attr_name
9369 /// and with a value that is a constant, false otherwise.
9371 die_unsigned_constant_attribute(const Dwarf_Die* die,
9378 Dwarf_Attribute attr;
9379 Dwarf_Word result = 0;
9380 if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), attr_name, &attr)
9381 || dwarf_formudata(&attr, &result))
9388 /// Read a signed constant value from a given attribute.
9390 /// The signed constant expected must be of form DW_FORM_sdata.
9392 /// @param die the DIE to get the attribute from.
9394 /// @param attr_name the attribute name.
9396 /// @param cst the resulting signed constant read.
9398 /// @return true iff a signed constant attribute of the name @p
9399 /// attr_name was found on the DIE @p die.
9401 die_signed_constant_attribute(const Dwarf_Die *die,
9408 Dwarf_Attribute attr;
9409 Dwarf_Sword result = 0;
9410 if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), attr_name, &attr)
9411 || dwarf_formsdata(&attr, &result))
9418 /// Read the value of a constant attribute that is either signed or
9419 /// unsigned into a array_type_def::subrange_type::bound_value value.
9421 /// The bound_value instance will capture the actual signedness of the
9424 /// @param die the DIE from which to read the value of the attribute.
9426 /// @param attr_name the attribute name to consider.
9428 /// @param value the resulting value read from attribute @p attr_name
9431 /// @return true iff DIE @p die has an attribute named @p attr_name
9432 /// with a constant value.
9434 die_constant_attribute(const Dwarf_Die *die,
9436 array_type_def::subrange_type::bound_value &value)
9438 if (die_attribute_is_unsigned(die, attr_name)
9439 || die_attribute_has_no_signedness(die, attr_name))
9442 if (!die_unsigned_constant_attribute(die, attr_name, l))
9444 value.set_unsigned(l);
9449 if (!die_signed_constant_attribute(die, attr_name, l))
9451 value.set_signed(l);
9456 /// Test if a given attribute on a DIE has a particular form.
9458 /// @param die the DIE to consider.
9460 /// @param attr_name the attribute name to consider on DIE @p die.
9462 /// @param attr_form the attribute form that we expect attribute @p
9463 /// attr_name has on DIE @p die.
9465 /// @return true iff the attribute named @p attr_name on DIE @p die
9466 /// has the form @p attr_form.
9468 die_attribute_has_form(const Dwarf_Die *die,
9470 unsigned int attr_form)
9472 Dwarf_Attribute attr;
9473 if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), attr_name, &attr))
9476 return dwarf_hasform(&attr, attr_form);
9479 /// Test if a given DWARF form is DW_FORM_strx{1,4}.
9481 /// Unfortunaly, the DW_FORM_strx{1,4} are enumerators of an untagged
9482 /// enum in dwarf.h so we have to use an unsigned int for the form,
9485 /// @param form the form to consider.
9487 /// @return true iff @p form is DW_FORM_strx{1,4}.
9489 form_is_DW_FORM_strx(unsigned form)
9493 #if defined HAVE_DW_FORM_strx1 \
9494 && defined HAVE_DW_FORM_strx2 \
9495 && defined HAVE_DW_FORM_strx3 \
9496 && defined HAVE_DW_FORM_strx4
9497 if (form == DW_FORM_strx1
9498 || form == DW_FORM_strx2
9499 || form == DW_FORM_strx3
9500 ||form == DW_FORM_strx4)
9507 /// Test if a given DIE attribute is signed.
9509 /// @param die the DIE to consider.
9511 /// @param attr_name the attribute name to consider.
9513 /// @return true iff the attribute named @p attr_name on DIE @p die is
9516 die_attribute_is_signed(const Dwarf_Die* die, unsigned attr_name)
9518 if (die_attribute_has_form(die, attr_name, DW_FORM_sdata))
9523 /// Test if a given DIE attribute is unsigned.
9525 /// @param die the DIE to consider.
9527 /// @param attr_name the attribute name to consider.
9529 /// @return true iff the attribute named @p attr_name on DIE @p die is
9532 die_attribute_is_unsigned(const Dwarf_Die* die, unsigned attr_name)
9534 if (die_attribute_has_form(die, attr_name, DW_FORM_udata))
9539 /// Test if a given DIE attribute is neither explicitely signed nor
9540 /// unsigned. Usually this is the case for attribute of the form
9543 /// @param die the DIE to consider.
9545 /// @param attr_name the name of the attribute to consider.
9547 /// @return true iff the attribute named @p attr_name of DIE @p die is
9548 /// neither specifically signed nor unsigned.
9550 die_attribute_has_no_signedness(const Dwarf_Die *die, unsigned attr_name)
9552 return (!die_attribute_is_unsigned(die, attr_name)
9553 && !die_attribute_is_signed(die, attr_name));
9556 /// Get the value of a DIE attribute; that value is meant to be a
9559 /// @param die the DIE to get the attribute from.
9561 /// @param attr_name the DW_AT_* name of the attribute. Must come
9562 /// from dwarf.h and be an enumerator representing an attribute like,
9563 /// e.g, DW_AT_external.
9565 /// @param flag the output parameter to store the flag value into.
9566 /// This is set iff the function returns true.
9568 /// @return true if the DIE has a flag attribute named @p attr_name,
9569 /// false otherwise.
9571 die_flag_attribute(Dwarf_Die* die, unsigned attr_name, bool& flag)
9573 Dwarf_Attribute attr;
9575 if (!dwarf_attr_integrate(die, attr_name, &attr)
9576 || dwarf_formflag(&attr, &f))
9583 /// Get the mangled name from a given DIE.
9585 /// @param die the DIE to read the mangled name from.
9587 /// @return the mangled name if it's present in the DIE, or just an
9588 /// empty string if it's not.
9590 die_linkage_name(const Dwarf_Die* die)
9595 string linkage_name = die_string_attribute(die, DW_AT_linkage_name);
9596 if (linkage_name.empty())
9597 linkage_name = die_string_attribute(die, DW_AT_MIPS_linkage_name);
9598 return linkage_name;
9601 /// Get the file path that is the value of the DW_AT_decl_file
9602 /// attribute on a given DIE, if the DIE is a decl DIE having that
9605 /// @param die the DIE to consider.
9607 /// @return a string containing the file path that is the logical
9608 /// value of the DW_AT_decl_file attribute. If the DIE @p die
9609 /// doesn't have a DW_AT_decl_file attribute, then the return value is
9610 /// just an empty string.
9612 die_decl_file_attribute(const Dwarf_Die* die)
9617 const char* str = dwarf_decl_file(const_cast<Dwarf_Die*>(die));
9619 return str ? str : "";
9622 /// Get the value of an attribute which value is supposed to be a
9623 /// reference to a DIE.
9625 /// @param die the DIE to read the value from.
9627 /// @param die_is_in_alt_di true if @p die comes from alternate debug
9630 /// @param attr_name the DW_AT_* attribute name to read.
9632 /// @param result the DIE resulting from reading the attribute value.
9633 /// This is set iff the function returns true.
9635 /// @param look_thru_abstract_origin if yes, the function looks
9636 /// through the possible DW_AT_abstract_origin attribute all the way
9637 /// down to the initial DIE that is cloned and look on that DIE to see
9638 /// if it has the @p attr_name attribute.
9640 /// @return true if the DIE @p die contains an attribute named @p
9641 /// attr_name that is a DIE reference, false otherwise.
9643 die_die_attribute(const Dwarf_Die* die,
9646 bool look_thru_abstract_origin)
9648 Dwarf_Attribute attr;
9649 if (look_thru_abstract_origin)
9651 if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), attr_name, &attr))
9656 if (!dwarf_attr(const_cast<Dwarf_Die*>(die), attr_name, &attr))
9659 bool r = dwarf_formref_die(&attr, &result);
9663 /// Read and return a DW_FORM_addr attribute from a given DIE.
9665 /// @param die the DIE to consider.
9667 /// @param attr_name the name of the DW_FORM_addr attribute to read
9670 /// @param the resulting address.
9672 /// @return true iff the attribute could be read, was of the expected
9673 /// DW_FORM_addr and could thus be translated into the @p result.
9675 die_address_attribute(Dwarf_Die* die, unsigned attr_name, Dwarf_Addr& result)
9677 Dwarf_Attribute attr;
9678 if (!dwarf_attr_integrate(die, attr_name, &attr))
9680 return dwarf_formaddr(&attr, &result) == 0;
9683 /// Returns the source location associated with a decl DIE.
9685 /// @param ctxt the @ref read_context to use.
9687 /// @param die the DIE the read the source location from.
9689 /// @return the location associated with @p die.
9691 die_location(const read_context& ctxt, const Dwarf_Die* die)
9696 string file = die_decl_file_attribute(die);
9698 die_unsigned_constant_attribute(die, DW_AT_decl_line, line);
9700 if (!file.empty() && line != 0)
9702 translation_unit_sptr tu = ctxt.cur_transl_unit();
9703 location l = tu->get_loc_mgr().create_new_location(file, line, 1);
9709 /// Return a copy of the name of a DIE.
9711 /// @param die the DIE to consider.
9713 /// @return a copy of the name of the DIE.
9715 die_name(const Dwarf_Die* die)
9717 string name = die_string_attribute(die, DW_AT_name);
9721 /// Return the location, the name and the mangled name of a given DIE.
9723 /// @param ctxt the read context to use.
9725 /// @param die the DIE to read location and names from.
9727 /// @param loc the location output parameter to set.
9729 /// @param name the name output parameter to set.
9731 /// @param linkage_name the linkage_name output parameter to set.
9733 die_loc_and_name(const read_context& ctxt,
9737 string& linkage_name)
9739 loc = die_location(ctxt, die);
9740 name = die_name(die);
9741 linkage_name = die_linkage_name(die);
9744 /// Get the size of a (type) DIE as the value for the parameter
9745 /// DW_AT_byte_size or DW_AT_bit_size.
9747 /// @param die the DIE to read the information from.
9749 /// @param size the resulting size in bits. This is set iff the
9750 /// function return true.
9752 /// @return true if the size attribute was found.
9754 die_size_in_bits(const Dwarf_Die* die, uint64_t& size)
9759 uint64_t byte_size = 0, bit_size = 0;
9761 if (!die_unsigned_constant_attribute(die, DW_AT_byte_size, byte_size))
9763 if (!die_unsigned_constant_attribute(die, DW_AT_bit_size, bit_size))
9767 bit_size = byte_size * 8;
9774 /// Get the access specifier (from the DW_AT_accessibility attribute
9775 /// value) of a given DIE.
9777 /// @param die the DIE to consider.
9779 /// @param access the resulting access. This is set iff the function
9782 /// @return bool if the DIE contains the DW_AT_accessibility die.
9784 die_access_specifier(Dwarf_Die * die, access_specifier& access)
9790 if (!die_unsigned_constant_attribute(die, DW_AT_accessibility, a))
9793 access_specifier result = private_access;
9797 case private_access:
9798 result = private_access;
9801 case protected_access:
9802 result = protected_access;
9806 result = public_access;
9817 /// Test whether a given DIE represents a decl that is public. That
9818 /// is, one with the DW_AT_external attribute set.
9820 /// @param die the DIE to consider for testing.
9822 /// @return true if a DW_AT_external attribute is present and its
9823 /// value is set to the true; return false otherwise.
9825 die_is_public_decl(Dwarf_Die* die)
9827 bool is_public = false;
9828 die_flag_attribute(die, DW_AT_external, is_public);
9832 /// Test whether a given DIE represents a declaration-only DIE.
9834 /// That is, if the DIE has the DW_AT_declaration flag set.
9836 /// @param die the DIE to consider.
9838 /// @return true if a DW_AT_declaration is present, false otherwise.
9840 die_is_declaration_only(Dwarf_Die* die)
9842 bool is_declaration_only = false;
9843 die_flag_attribute(die, DW_AT_declaration, is_declaration_only);
9844 return is_declaration_only;
9847 /// Tests whether a given DIE is artificial.
9849 /// @param die the test to test for.
9851 /// @return true if the DIE is artificial, false otherwise.
9853 die_is_artificial(Dwarf_Die* die)
9856 return die_flag_attribute(die, DW_AT_artificial, is_artificial);
9859 ///@return true if a tag represents a type, false otherwise.
9861 ///@param tag the tag to consider.
9863 is_type_tag(unsigned tag)
9865 bool result = false;
9869 case DW_TAG_array_type:
9870 case DW_TAG_class_type:
9871 case DW_TAG_enumeration_type:
9872 case DW_TAG_pointer_type:
9873 case DW_TAG_reference_type:
9874 case DW_TAG_string_type:
9875 case DW_TAG_structure_type:
9876 case DW_TAG_subroutine_type:
9877 case DW_TAG_typedef:
9878 case DW_TAG_union_type:
9879 case DW_TAG_ptr_to_member_type:
9880 case DW_TAG_set_type:
9881 case DW_TAG_subrange_type:
9882 case DW_TAG_base_type:
9883 case DW_TAG_const_type:
9884 case DW_TAG_file_type:
9885 case DW_TAG_packed_type:
9886 case DW_TAG_thrown_type:
9887 case DW_TAG_volatile_type:
9888 case DW_TAG_restrict_type:
9889 case DW_TAG_interface_type:
9890 case DW_TAG_unspecified_type:
9891 case DW_TAG_shared_type:
9892 case DW_TAG_rvalue_reference_type:
9904 /// Test if a given DIE is a type to be canonicalized. note that a
9905 /// function DIE (DW_TAG_subprogram) is considered to be a
9906 /// canonicalize-able type too because we can consider that DIE as
9907 /// being the type of the function, as well as the function decl
9910 /// @param tag the tag of the DIE to consider.
9912 /// @return true iff the DIE of tag @p tag is a canonicalize-able DIE.
9914 is_canonicalizeable_type_tag(unsigned tag)
9916 bool result = false;
9920 case DW_TAG_array_type:
9921 case DW_TAG_class_type:
9922 case DW_TAG_enumeration_type:
9923 case DW_TAG_pointer_type:
9924 case DW_TAG_reference_type:
9925 case DW_TAG_structure_type:
9926 case DW_TAG_subroutine_type:
9927 case DW_TAG_subprogram:
9928 case DW_TAG_typedef:
9929 case DW_TAG_union_type:
9930 case DW_TAG_base_type:
9931 case DW_TAG_const_type:
9932 case DW_TAG_volatile_type:
9933 case DW_TAG_restrict_type:
9934 case DW_TAG_rvalue_reference_type:
9946 /// Test if a DIE tag represents a declaration.
9948 /// @param tag the DWARF tag to consider.
9950 /// @return true iff @p tag is for a declaration.
9952 is_decl_tag(unsigned tag)
9956 case DW_TAG_formal_parameter:
9957 case DW_TAG_imported_declaration:
9959 case DW_TAG_unspecified_parameters:
9960 case DW_TAG_subprogram:
9961 case DW_TAG_variable:
9962 case DW_TAG_namespace:
9963 case DW_TAG_GNU_template_template_param:
9964 case DW_TAG_GNU_template_parameter_pack:
9965 case DW_TAG_GNU_formal_parameter_pack:
9971 /// Test if a DIE represents a type DIE.
9973 /// @param die the DIE to consider.
9975 /// @return true if @p die represents a type, false otherwise.
9977 die_is_type(const Dwarf_Die* die)
9981 return is_type_tag(dwarf_tag(const_cast<Dwarf_Die*>(die)));
9984 /// Test if a DIE represents a declaration.
9986 /// @param die the DIE to consider.
9988 /// @return true if @p die represents a decl, false otherwise.
9990 die_is_decl(const Dwarf_Die* die)
9994 return is_decl_tag(dwarf_tag(const_cast<Dwarf_Die*>(die)));
9997 /// Test if a DIE represents a namespace.
9999 /// @param die the DIE to consider.
10001 /// @return true if @p die represents a namespace, false otherwise.
10003 die_is_namespace(const Dwarf_Die* die)
10007 return (dwarf_tag(const_cast<Dwarf_Die*>(die)) == DW_TAG_namespace);
10010 /// Test if a DIE has tag DW_TAG_unspecified_type.
10012 /// @param die the DIE to consider.
10014 /// @return true if @p die has tag DW_TAG_unspecified_type.
10016 die_is_unspecified(Dwarf_Die* die)
10020 return (dwarf_tag(die) == DW_TAG_unspecified_type);
10023 /// Test if a DIE represents a void type.
10025 /// @param die the DIE to consider.
10027 /// @return true if @p die represents a void type, false otherwise.
10029 die_is_void_type(Dwarf_Die* die)
10031 if (!die || dwarf_tag(die) != DW_TAG_base_type)
10034 string name = die_name(die);
10035 if (name == "void")
10041 /// Test if a DIE represents a pointer type.
10043 /// @param die the die to consider.
10045 /// @return true iff @p die represents a pointer type.
10047 die_is_pointer_type(const Dwarf_Die* die)
10052 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
10053 if (tag == DW_TAG_pointer_type)
10059 /// Test if a DIE is for a pointer, reference or qualified type to
10060 /// anonymous class or struct.
10062 /// @param die the DIE to consider.
10064 /// @return true iff @p is for a pointer, reference or qualified type
10065 /// to anonymous class or struct.
10067 pointer_or_qual_die_of_anonymous_class_type(const Dwarf_Die* die)
10069 if (!die_is_pointer_or_reference_type(die)
10070 && !die_is_qualified_type(die))
10073 Dwarf_Die underlying_type_die;
10074 if (!die_die_attribute(die, DW_AT_type, underlying_type_die))
10077 if (!die_is_class_type(&underlying_type_die))
10080 string name = die_name(&underlying_type_die);
10082 return name.empty();
10085 /// Test if a DIE represents a reference type.
10087 /// @param die the die to consider.
10089 /// @return true iff @p die represents a reference type.
10091 die_is_reference_type(const Dwarf_Die* die)
10096 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
10097 if (tag == DW_TAG_reference_type || tag == DW_TAG_rvalue_reference_type)
10103 /// Test if a DIE represents an array type.
10105 /// @param die the die to consider.
10107 /// @return true iff @p die represents an array type.
10109 die_is_array_type(const Dwarf_Die* die)
10114 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
10115 if (tag == DW_TAG_array_type)
10121 /// Test if a DIE represents a pointer, reference or array type.
10123 /// @param die the die to consider.
10125 /// @return true iff @p die represents a pointer or reference type.
10127 die_is_pointer_or_reference_type(const Dwarf_Die* die)
10128 {return (die_is_pointer_type(die)
10129 || die_is_reference_type(die)
10130 || die_is_array_type(die));}
10132 /// Test if a DIE represents a pointer, a reference or a typedef type.
10134 /// @param die the die to consider.
10136 /// @return true iff @p die represents a pointer, a reference or a
10139 die_is_pointer_reference_or_typedef_type(const Dwarf_Die* die)
10140 {return (die_is_pointer_or_reference_type(die)
10141 || dwarf_tag(const_cast<Dwarf_Die*>(die)) == DW_TAG_typedef);}
10143 /// Test if a DIE represents a class type.
10145 /// @param die the die to consider.
10147 /// @return true iff @p die represents a class type.
10149 die_is_class_type(const Dwarf_Die* die)
10151 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
10153 if (tag == DW_TAG_class_type || tag == DW_TAG_structure_type)
10159 /// Test if a DIE is for a qualified type.
10161 /// @param die the DIE to consider.
10163 /// @return true iff @p die is for a qualified type.
10165 die_is_qualified_type(const Dwarf_Die* die)
10167 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
10168 if (tag == DW_TAG_const_type
10169 || tag == DW_TAG_volatile_type
10170 || tag == DW_TAG_restrict_type)
10176 /// Test if a DIE is for a function type.
10178 /// @param die the DIE to consider.
10180 /// @return true iff @p die is for a function type.
10182 die_is_function_type(const Dwarf_Die *die)
10184 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
10185 if (tag == DW_TAG_subprogram || tag == DW_TAG_subroutine_type)
10191 /// Test if a DIE for a function pointer or member function has an
10192 /// DW_AT_object_pointer attribute.
10194 /// @param die the DIE to consider.
10196 /// @param object_pointer out parameter. It's set to the DIE for the
10197 /// object pointer iff the function returns true.
10199 /// @return true iff the DIE @p die has an object pointer. In that
10200 /// case, the parameter @p object_pointer is set to the DIE of that
10201 /// object pointer.
10203 die_has_object_pointer(const Dwarf_Die* die, Dwarf_Die& object_pointer)
10208 if (die_die_attribute(die, DW_AT_object_pointer, object_pointer))
10214 /// When given the object pointer DIE of a function type or member
10215 /// function DIE, this function returns the "this" pointer that points
10216 /// to the associated class.
10218 /// @param die the DIE of the object pointer of the function or member
10219 /// function to consider.
10221 /// @param this_pointer_die out parameter. This is set to the DIE of
10222 /// the "this" pointer iff the function returns true.
10224 /// @return true iff the function found the "this" pointer from the
10225 /// object pointer DIE @p die. In that case, the parameter @p
10226 /// this_pointer_die is set to the DIE of that "this" pointer.
10228 die_this_pointer_from_object_pointer(Dwarf_Die* die,
10229 Dwarf_Die& this_pointer_die)
10232 ABG_ASSERT(dwarf_tag(die) == DW_TAG_formal_parameter);
10234 if (die_die_attribute(die, DW_AT_type, this_pointer_die))
10240 /// Test if a given "this" pointer that points to a particular class
10241 /// type is for a const class or not. If it's for a const class, then
10242 /// it means the function type or the member function associated to
10243 /// that "this" pointer is const.
10245 /// @param die the DIE of the "this" pointer to consider.
10247 /// @return true iff @p die points to a const class type.
10249 die_this_pointer_is_const(Dwarf_Die* die)
10253 if (dwarf_tag(die) == DW_TAG_pointer_type)
10255 Dwarf_Die pointed_to_type_die;
10256 if (die_die_attribute(die, DW_AT_type, pointed_to_type_die))
10257 if (dwarf_tag(&pointed_to_type_die) == DW_TAG_const_type)
10264 /// Test if an object pointer (referred-to via a DW_AT_object_pointer
10265 /// attribute) points to a const implicit class and so is for a const
10266 /// method or or a const member function type.
10268 /// @param die the DIE of the object pointer to consider.
10270 /// @return true iff the object pointer represented by @p die is for a
10271 /// a const method or const member function type.
10273 die_object_pointer_is_for_const_method(Dwarf_Die* die)
10276 ABG_ASSERT(dwarf_tag(die) == DW_TAG_formal_parameter);
10278 Dwarf_Die this_pointer_die;
10279 if (die_this_pointer_from_object_pointer(die, this_pointer_die))
10280 if (die_this_pointer_is_const(&this_pointer_die))
10286 /// Test if a DIE represents an entity that is at class scope.
10288 /// @param ctxt the read context to use.
10290 /// @param die the DIE to consider.
10292 /// @param where_offset where we are logically at in the DIE stream.
10294 /// @param class_scope_die out parameter. Set to the DIE of the
10295 /// containing class iff @p die happens to be at class scope; that is,
10296 /// iff the function returns true.
10298 /// @return true iff @p die is at class scope. In that case, @p
10299 /// class_scope_die is set to the DIE of the class that contains @p
10302 die_is_at_class_scope(const read_context& ctxt,
10303 const Dwarf_Die* die,
10304 size_t where_offset,
10305 Dwarf_Die& class_scope_die)
10307 if (!get_scope_die(ctxt, die, where_offset, class_scope_die))
10310 int tag = dwarf_tag(&class_scope_die);
10312 return (tag == DW_TAG_structure_type
10313 || tag == DW_TAG_class_type
10314 || tag == DW_TAG_union_type);
10317 /// Return the leaf object under a pointer, reference or qualified
10320 /// @param die the DIE of the type to consider.
10322 /// @param peeled_die out parameter. Set to the DIE of the leaf
10323 /// object iff the function actually peeled anything.
10325 /// @return true upon successful completion.
10327 die_peel_qual_ptr(Dwarf_Die *die, Dwarf_Die& peeled_die)
10332 int tag = dwarf_tag(die);
10334 if (tag == DW_TAG_const_type
10335 || tag == DW_TAG_volatile_type
10336 || tag == DW_TAG_restrict_type
10337 || tag == DW_TAG_pointer_type
10338 || tag == DW_TAG_reference_type
10339 || tag == DW_TAG_rvalue_reference_type)
10341 if (!die_die_attribute(die, DW_AT_type, peeled_die))
10347 while (tag == DW_TAG_const_type
10348 || tag == DW_TAG_volatile_type
10349 || tag == DW_TAG_restrict_type
10350 || tag == DW_TAG_pointer_type
10351 || tag == DW_TAG_reference_type
10352 || tag == DW_TAG_rvalue_reference_type)
10354 if (!die_die_attribute(&peeled_die, DW_AT_type, peeled_die))
10356 tag = dwarf_tag(&peeled_die);
10362 /// Return the leaf object under a typedef type DIE.
10364 /// @param die the DIE of the type to consider.
10366 /// @param peeled_die out parameter. Set to the DIE of the leaf
10367 /// object iff the function actually peeled anything.
10369 /// @return true upon successful completion.
10371 die_peel_typedef(Dwarf_Die *die, Dwarf_Die& peeled_die)
10376 int tag = dwarf_tag(die);
10378 if (tag == DW_TAG_typedef)
10380 if (!die_die_attribute(die, DW_AT_type, peeled_die))
10386 while (tag == DW_TAG_typedef)
10388 if (!die_die_attribute(&peeled_die, DW_AT_type, peeled_die))
10390 tag = dwarf_tag(&peeled_die);
10397 /// Return the leaf DIE under a pointer, a reference or a typedef DIE.
10399 /// @param die the DIE to consider.
10401 /// @param peeled_die the resulting peeled (or leaf) DIE. This is set
10402 /// iff the function returned true.
10404 /// @return true iff the function could peel @p die.
10406 die_peel_pointer_and_typedef(const Dwarf_Die *die, Dwarf_Die& peeled_die)
10411 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
10413 if (tag == DW_TAG_pointer_type
10414 || tag == DW_TAG_reference_type
10415 || tag == DW_TAG_rvalue_reference_type
10416 || tag == DW_TAG_typedef)
10418 if (!die_die_attribute(die, DW_AT_type, peeled_die))
10424 while (tag == DW_TAG_pointer_type
10425 || tag == DW_TAG_reference_type
10426 || tag == DW_TAG_rvalue_reference_type
10427 || tag == DW_TAG_typedef)
10429 if (!die_die_attribute(&peeled_die, DW_AT_type, peeled_die))
10431 tag = dwarf_tag(&peeled_die);
10436 /// Test if a DIE for a function type represents a method type.
10438 /// @param ctxt the read context.
10440 /// @param die the DIE to consider.
10442 /// @param where_offset where we logically are in the stream of DIEs.
10444 /// @param object_pointer_die out parameter. This is set by the
10445 /// function to the DIE that refers to the formal function parameter
10446 /// which holds the implicit "this" pointer of the method. That die
10447 /// is called the object pointer DIE. This is set iff the function
10449 /// @param class_die out parameter. This is set by the function to
10450 /// the DIE that represents the class of the method type. This is set
10451 /// iff the function returns true.
10453 /// @param is_static out parameter. This is set to true by the
10454 /// function if @p die is a static method. This is set iff the
10455 /// function returns true.
10457 /// @return true iff @p die is a DIE for a method type.
10459 die_function_type_is_method_type(const read_context& ctxt,
10460 const Dwarf_Die *die,
10461 size_t where_offset,
10462 Dwarf_Die& object_pointer_die,
10463 Dwarf_Die& class_die,
10469 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
10470 ABG_ASSERT(tag == DW_TAG_subroutine_type || tag == DW_TAG_subprogram);
10472 bool has_object_pointer = false;
10474 if (tag == DW_TAG_subprogram)
10476 Dwarf_Die spec_or_origin_die;
10477 if (die_die_attribute(die, DW_AT_specification,
10478 spec_or_origin_die)
10479 || die_die_attribute(die, DW_AT_abstract_origin,
10480 spec_or_origin_die))
10482 if (die_has_object_pointer(&spec_or_origin_die,
10483 object_pointer_die))
10484 has_object_pointer = true;
10487 if (die_is_at_class_scope(ctxt, &spec_or_origin_die,
10488 where_offset, class_die))
10496 if (die_has_object_pointer(die, object_pointer_die))
10497 has_object_pointer = true;
10500 if (die_is_at_class_scope(ctxt, die, where_offset, class_die))
10509 if (die_has_object_pointer(die, object_pointer_die))
10510 has_object_pointer = true;
10517 ABG_ASSERT(has_object_pointer);
10518 // The object pointer die points to a DW_TAG_formal_parameter which
10519 // is the "this" parameter. The type of the "this" parameter is a
10520 // pointer. Let's get that pointer type.
10521 Dwarf_Die this_type_die;
10522 if (!die_die_attribute(&object_pointer_die, DW_AT_type, this_type_die))
10525 // So the class type is the type pointed to by the type of the "this"
10527 if (!die_peel_qual_ptr(&this_type_die, class_die))
10530 // And make we return a class type, rather than a typedef to a
10532 die_peel_typedef(&class_die, class_die);
10540 VIRTUALITY_NOT_VIRTUAL,
10541 VIRTUALITY_VIRTUAL,
10542 VIRTUALITY_PURE_VIRTUAL
10545 /// Get the virtual-ness of a given DIE, that is, the value of the
10546 /// DW_AT_virtuality attribute.
10548 /// @param die the DIE to read from.
10550 /// @param virt the resulting virtuality attribute. This is set iff
10551 /// the function returns true.
10553 /// @return true if the virtual-ness could be determined.
10555 die_virtuality(const Dwarf_Die* die, virtuality& virt)
10561 die_unsigned_constant_attribute(die, DW_AT_virtuality, v);
10563 if (v == DW_VIRTUALITY_virtual)
10564 virt = VIRTUALITY_VIRTUAL;
10565 else if (v == DW_VIRTUALITY_pure_virtual)
10566 virt = VIRTUALITY_PURE_VIRTUAL;
10568 virt = VIRTUALITY_NOT_VIRTUAL;
10573 /// Test whether the DIE represent either a virtual base or function.
10575 /// @param die the DIE to consider.
10577 /// @return bool if the DIE represents a virtual base or function,
10578 /// false othersise.
10580 die_is_virtual(const Dwarf_Die* die)
10583 if (!die_virtuality(die, v))
10586 return v == VIRTUALITY_PURE_VIRTUAL || v == VIRTUALITY_VIRTUAL;
10589 /// Test if the DIE represents an entity that was declared inlined.
10591 /// @param die the DIE to test for.
10593 /// @return true if the DIE represents an entity that was declared
10596 die_is_declared_inline(Dwarf_Die* die)
10598 uint64_t inline_value = 0;
10599 if (!die_unsigned_constant_attribute(die, DW_AT_inline, inline_value))
10601 return inline_value == DW_INL_declared_inlined;
10604 /// This function is a fast routine (optmization) to compare the values of
10605 /// two string attributes of two DIEs.
10607 /// @param l the first DIE to consider.
10609 /// @param r the second DIE to consider.
10611 /// @param attr_name the name of the attribute to compare, on the two
10614 /// @param result out parameter. This is set to the result of the
10615 /// comparison. If the value of attribute @p attr_name on DIE @p l
10616 /// equals the value of attribute @p attr_name on DIE @p r, then the
10617 /// the argument of this parameter is set to true. Otherwise, it's
10618 /// set to false. Note that the argument of this parameter is set iff
10619 /// the function returned true.
10621 /// @return true iff the comparison could be performed. There are
10622 /// cases in which the comparison cannot be performed. For instance,
10623 /// if one of the DIEs does not have the attribute @p attr_name. In
10624 /// any case, if this function returns true, then the parameter @p
10625 /// result is set to the result of the comparison.
10627 compare_dies_string_attribute_value(const Dwarf_Die *l, const Dwarf_Die *r,
10628 unsigned attr_name,
10631 Dwarf_Attribute l_attr, r_attr;
10632 if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(l), attr_name, &l_attr)
10633 || !dwarf_attr_integrate(const_cast<Dwarf_Die*>(r), attr_name, &r_attr))
10636 ABG_ASSERT(l_attr.form == DW_FORM_strp
10637 || l_attr.form == DW_FORM_string
10638 || l_attr.form == DW_FORM_GNU_strp_alt
10639 || form_is_DW_FORM_strx(l_attr.form));
10641 ABG_ASSERT(r_attr.form == DW_FORM_strp
10642 || r_attr.form == DW_FORM_string
10643 || r_attr.form == DW_FORM_GNU_strp_alt
10644 || form_is_DW_FORM_strx(r_attr.form));
10646 if ((l_attr.form == DW_FORM_strp
10647 && r_attr.form == DW_FORM_strp)
10648 || (l_attr.form == DW_FORM_GNU_strp_alt
10649 && r_attr.form == DW_FORM_GNU_strp_alt)
10650 || (form_is_DW_FORM_strx(l_attr.form)
10651 && form_is_DW_FORM_strx(r_attr.form)))
10653 // So these string attributes are actually pointers into a
10654 // string table. The string table is most likely de-duplicated
10655 // so comparing the *values* of the pointers should be enough.
10657 // This is the fast path.
10658 if (l_attr.valp == r_attr.valp)
10660 else if (l_attr.valp && r_attr.valp)
10661 result = *l_attr.valp == *r_attr.valp;
10667 // If we reached this point it means we couldn't use the fast path
10668 // because the string atttributes are strings that are "inline" in
10669 // the debug info section. Let's just compare them the slow and
10671 string l_str = die_string_attribute(l, attr_name),
10672 r_str = die_string_attribute(r, attr_name);
10673 result = l_str == r_str;
10678 /// Compare the file path of the compilation units (aka CUs)
10679 /// associated to two DIEs.
10681 /// If the DIEs are for pointers or typedefs, this function also
10682 /// compares the file paths of the CUs of the leaf DIEs (underlying
10683 /// DIEs of the pointer or the typedef).
10685 /// @param l the first type DIE to consider.
10687 /// @param r the second type DIE to consider.
10689 /// @return true iff the file paths of the DIEs of the two types are
10692 compare_dies_cu_decl_file(const Dwarf_Die* l, const Dwarf_Die *r, bool &result)
10694 Dwarf_Die l_cu, r_cu;
10695 if (!dwarf_diecu(const_cast<Dwarf_Die*>(l), &l_cu, 0, 0)
10696 ||!dwarf_diecu(const_cast<Dwarf_Die*>(r), &r_cu, 0, 0))
10700 compare_dies_string_attribute_value(&l_cu, &r_cu,
10705 Dwarf_Die peeled_l, peeled_r;
10706 if (die_is_pointer_reference_or_typedef_type(l)
10707 && die_is_pointer_reference_or_typedef_type(r)
10708 && die_peel_pointer_and_typedef(l, peeled_l)
10709 && die_peel_pointer_and_typedef(r, peeled_r))
10711 if (!dwarf_diecu(&peeled_l, &l_cu, 0, 0)
10712 ||!dwarf_diecu(&peeled_r, &r_cu, 0, 0))
10715 compare_dies_string_attribute_value(&l_cu, &r_cu,
10724 // -----------------------------------
10725 // <location expression evaluation>
10726 // -----------------------------------
10728 /// Get the value of a given DIE attribute, knowing that it must be a
10729 /// location expression.
10731 /// @param die the DIE to read the attribute from.
10733 /// @param attr_name the name of the attribute to read the value for.
10735 /// @param expr the pointer to allocate and fill with the resulting
10736 /// array of operators + operands forming a dwarf expression. This is
10737 /// set iff the function returns true.
10739 /// @param expr_len the length of the resulting dwarf expression.
10740 /// This is set iff the function returns true.
10742 /// @return true if the attribute exists and has a dwarf expression as
10743 /// value. In that case the expr and expr_len arguments are set to
10744 /// the resulting dwarf exprssion.
10746 die_location_expr(const Dwarf_Die* die,
10747 unsigned attr_name,
10749 uint64_t* expr_len)
10754 Dwarf_Attribute attr;
10755 if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), attr_name, &attr))
10759 bool result = (dwarf_getlocation(&attr, expr, &len) == 0);
10767 /// If the current operation in the dwarf expression represents a push
10768 /// of a constant value onto the dwarf expr virtual machine (aka
10769 /// DEVM), perform the operation and update the DEVM.
10771 /// If the result of the operation is a constant, update the DEVM
10772 /// accumulator with its value. Otherwise, the DEVM accumulator is
10773 /// left with its previous value.
10775 /// @param ops the array of the dwarf expression operations to consider.
10777 /// @param ops_len the lengths of @p ops array above.
10779 /// @param index the index of the operation to interpret, in @p ops.
10781 /// @param next_index the index of the operation to interpret at the
10782 /// next step, after this function completed and returned. This is
10783 /// set an output parameter that is set iff the function returns true.
10785 /// @param ctxt the DEVM evaluation context.
10787 /// @return true if the current operation actually pushes a constant
10788 /// value onto the DEVM stack, false otherwise.
10790 op_pushes_constant_value(Dwarf_Op* ops,
10793 uint64_t& next_index,
10794 dwarf_expr_eval_context& ctxt)
10796 ABG_ASSERT(index < ops_len);
10798 Dwarf_Op& op = ops[index];
10804 value = ops[index].number;
10807 case DW_OP_const1u:
10808 case DW_OP_const1s:
10809 case DW_OP_const2u:
10810 case DW_OP_const2s:
10811 case DW_OP_const4u:
10812 case DW_OP_const4s:
10813 case DW_OP_const8u:
10814 case DW_OP_const8s:
10817 value = ops[index].number;
10921 expr_result r(value);
10924 next_index = index + 1;
10929 /// If the current operation in the dwarf expression represents a push
10930 /// of a non-constant value onto the dwarf expr virtual machine (aka
10931 /// DEVM), perform the operation and update the DEVM. A non-constant
10932 /// is namely a quantity for which we need inferior (a running program
10933 /// image) state to know the exact value.
10935 /// Upon successful completion, as the result of the operation is a
10936 /// non-constant the DEVM accumulator value is left to its state as of
10937 /// before the invocation of this function.
10939 /// @param ops the array of the dwarf expression operations to consider.
10941 /// @param ops_len the lengths of @p ops array above.
10943 /// @param index the index of the operation to interpret, in @p ops.
10945 /// @param next_index the index of the operation to interpret at the
10946 /// next step, after this function completed and returned. This is
10947 /// set an output parameter that is set iff the function returns true.
10949 /// @param ctxt the DEVM evaluation context.
10951 /// @return true if the current operation actually pushes a
10952 /// non-constant value onto the DEVM stack, false otherwise.
10954 op_pushes_non_constant_value(Dwarf_Op* ops,
10957 uint64_t& next_index,
10958 dwarf_expr_eval_context& ctxt)
10960 ABG_ASSERT(index < ops_len);
10961 Dwarf_Op& op = ops[index];
10997 next_index = index + 1;
11032 next_index = index + 1;
11036 next_index = index + 2;
11040 next_index = index + 1;
11044 next_index = index + 1;
11051 expr_result r(false);
11057 /// If the current operation in the dwarf expression represents a
11058 /// manipulation of the stack of the DWARF Expression Virtual Machine
11059 /// (aka DEVM), this function performs the operation and updates the
11060 /// state of the DEVM. If the result of the operation represents a
11061 /// constant value, then the accumulator of the DEVM is set to that
11062 /// result's value, Otherwise, the DEVM accumulator is left with its
11063 /// previous value.
11065 /// @param expr the array of the dwarf expression operations to consider.
11067 /// @param expr_len the lengths of @p ops array above.
11069 /// @param index the index of the operation to interpret, in @p ops.
11071 /// @param next_index the index of the operation to interpret at the
11072 /// next step, after this function completed and returned. This is
11073 /// set an output parameter that is set iff the function returns true.
11075 /// @param ctxt the DEVM evaluation context.
11077 /// @return true if the current operation actually manipulates the
11078 /// DEVM stack, false otherwise.
11080 op_manipulates_stack(Dwarf_Op* expr,
11083 uint64_t& next_index,
11084 dwarf_expr_eval_context& ctxt)
11086 Dwarf_Op& op = expr[index];
11092 v = ctxt.stack.front();
11097 v = ctxt.stack.front();
11102 ABG_ASSERT(ctxt.stack.size() > 1);
11108 ABG_ASSERT(index + 1 < expr_len);
11114 ABG_ASSERT(ctxt.stack.size() > 1);
11116 ctxt.stack.erase(ctxt.stack.begin() + 1);
11121 ABG_ASSERT(ctxt.stack.size() > 2);
11123 ctxt.stack.erase(ctxt.stack.begin() + 2);
11128 case DW_OP_deref_size:
11129 ABG_ASSERT(ctxt.stack.size() > 0);
11136 case DW_OP_xderef_size:
11137 ABG_ASSERT(ctxt.stack.size() > 1);
11144 case DW_OP_push_object_address:
11149 case DW_OP_form_tls_address:
11150 case DW_OP_GNU_push_tls_address:
11151 ABG_ASSERT(ctxt.stack.size() > 0);
11153 if (op.atom == DW_OP_form_tls_address)
11158 case DW_OP_call_frame_cfa:
11170 if (op.atom == DW_OP_form_tls_address
11171 || op.atom == DW_OP_GNU_push_tls_address)
11172 ctxt.set_tls_address(true);
11174 ctxt.set_tls_address(false);
11176 next_index = index + 1;
11181 /// If the current operation in the dwarf expression represents a push
11182 /// of an arithmetic or logic operation onto the dwarf expr virtual
11183 /// machine (aka DEVM), perform the operation and update the DEVM.
11185 /// If the result of the operation is a constant, update the DEVM
11186 /// accumulator with its value. Otherwise, the DEVM accumulator is
11187 /// left with its previous value.
11189 /// @param expr the array of the dwarf expression operations to consider.
11191 /// @param expr_len the lengths of @p expr array above.
11193 /// @param index the index of the operation to interpret, in @p expr.
11195 /// @param next_index the index of the operation to interpret at the
11196 /// next step, after this function completed and returned. This is
11197 /// set an output parameter that is set iff the function returns true.
11199 /// @param ctxt the DEVM evaluation context.
11201 /// @return true if the current operation actually represent an
11202 /// arithmetic or logic operation.
11204 op_is_arith_logic(Dwarf_Op* expr,
11207 uint64_t& next_index,
11208 dwarf_expr_eval_context& ctxt)
11210 ABG_ASSERT(index < expr_len);
11212 Dwarf_Op& op = expr[index];
11213 expr_result val1, val2;
11224 ABG_ASSERT(ctxt.stack.size() > 1);
11227 ctxt.push(val1 & val2);
11233 if (!val1.is_const())
11235 ctxt.push(val2 / val1);
11241 ctxt.push(val2 - val1);
11247 ctxt.push(val2 % val1);
11253 ctxt.push(val2 * val1);
11269 ctxt.push(val1 | val2);
11275 ctxt.push(val2 + val1);
11278 case DW_OP_plus_uconst:
11287 ctxt.push(val2 << val1);
11294 ctxt.push(val2 >> val1);
11300 ctxt.push(val2 ^ val1);
11307 if (ctxt.stack.front().is_const())
11308 ctxt.accum = ctxt.stack.front();
11310 next_index = index + 1;
11314 /// If the current operation in the dwarf expression represents a push
11315 /// of a control flow operation onto the dwarf expr virtual machine
11316 /// (aka DEVM), perform the operation and update the DEVM.
11318 /// If the result of the operation is a constant, update the DEVM
11319 /// accumulator with its value. Otherwise, the DEVM accumulator is
11320 /// left with its previous value.
11322 /// @param expr the array of the dwarf expression operations to consider.
11324 /// @param expr_len the lengths of @p expr array above.
11326 /// @param index the index of the operation to interpret, in @p expr.
11328 /// @param next_index the index of the operation to interpret at the
11329 /// next step, after this function completed and returned. This is
11330 /// set an output parameter that is set iff the function returns true.
11332 /// @param ctxt the DEVM evaluation context.
11334 /// @return true if the current operation actually represents a
11335 /// control flow operation, false otherwise.
11337 op_is_control_flow(Dwarf_Op* expr,
11340 uint64_t& next_index,
11341 dwarf_expr_eval_context& ctxt)
11343 ABG_ASSERT(index < expr_len);
11345 Dwarf_Op& op = expr[index];
11346 expr_result val1, val2;
11360 if (op.atom == DW_OP_eq)
11361 value = val2 == val1;
11362 else if (op.atom == DW_OP_ge)
11363 value = val2 >= val1;
11364 else if (op.atom == DW_OP_gt)
11365 value = val2 > val1;
11366 else if (op.atom == DW_OP_le)
11367 value = val2 <= val1;
11368 else if (op.atom == DW_OP_lt)
11369 value = val2 < val1;
11370 else if (op.atom == DW_OP_ne)
11371 value = val2 != val1;
11373 val1 = value ? 1 : 0;
11380 index += op.number - 1;
11386 index += val1.const_value() - 1;
11391 case DW_OP_call_ref:
11399 if (ctxt.stack.front().is_const())
11400 ctxt.accum = ctxt.stack.front();
11402 next_index = index + 1;
11406 /// This function quickly evaluates a DWARF expression that is a
11409 /// This is a "fast path" function that quickly evaluates a DWARF
11410 /// expression that is only made of a DW_OP_plus_uconst operator.
11412 /// This is a sub-routine of die_member_offset.
11414 /// @param expr the DWARF expression to evaluate.
11416 /// @param expr_len the length of the expression @p expr.
11418 /// @param value out parameter. This is set to the result of the
11419 /// evaluation of @p expr, iff this function returns true.
11421 /// @return true iff the evaluation of @p expr went OK.
11423 eval_quickly(Dwarf_Op* expr,
11427 if (expr_len == 1 && (expr[0].atom == DW_OP_plus_uconst))
11429 value = expr[0].number;
11435 /// Evaluate the value of the last sub-expression that is a constant,
11436 /// inside a given DWARF expression.
11438 /// @param expr the DWARF expression to consider.
11440 /// @param expr_len the length of the expression to consider.
11442 /// @param value the resulting value of the last constant
11443 /// sub-expression of the DWARF expression. This is set iff the
11444 /// function returns true.
11446 /// @param is_tls_address out parameter. This is set to true iff
11447 /// the resulting value of the evaluation is a TLS (thread local
11448 /// storage) address.
11450 /// @param eval_ctxt the evaluation context to (re)use. Note that
11451 /// this function initializes this context before using it.
11453 /// @return true if the function could find a constant sub-expression
11454 /// to evaluate, false otherwise.
11456 eval_last_constant_dwarf_sub_expr(Dwarf_Op* expr,
11459 bool& is_tls_address,
11460 dwarf_expr_eval_context &eval_ctxt)
11462 // Reset the evaluation context before evaluating the constant sub
11463 // expression contained in the DWARF expression 'expr'.
11466 uint64_t index = 0, next_index = 0;
11469 if (op_is_arith_logic(expr, expr_len, index,
11470 next_index, eval_ctxt)
11471 || op_pushes_constant_value(expr, expr_len, index,
11472 next_index, eval_ctxt)
11473 || op_manipulates_stack(expr, expr_len, index,
11474 next_index, eval_ctxt)
11475 || op_pushes_non_constant_value(expr, expr_len, index,
11476 next_index, eval_ctxt)
11477 || op_is_control_flow(expr, expr_len, index,
11478 next_index, eval_ctxt))
11481 next_index = index + 1;
11483 ABG_ASSERT(next_index > index);
11484 index = next_index;
11485 } while (index < expr_len);
11487 is_tls_address = eval_ctxt.set_tls_address();
11488 if (eval_ctxt.accum.is_const())
11490 value = eval_ctxt.accum;
11496 /// Evaluate the value of the last sub-expression that is a constant,
11497 /// inside a given DWARF expression.
11499 /// @param expr the DWARF expression to consider.
11501 /// @param expr_len the length of the expression to consider.
11503 /// @param value the resulting value of the last constant
11504 /// sub-expression of the DWARF expression. This is set iff the
11505 /// function returns true.
11507 /// @return true if the function could find a constant sub-expression
11508 /// to evaluate, false otherwise.
11510 eval_last_constant_dwarf_sub_expr(Dwarf_Op* expr,
11513 bool& is_tls_address)
11515 dwarf_expr_eval_context eval_ctxt;
11516 return eval_last_constant_dwarf_sub_expr(expr, expr_len, value,
11517 is_tls_address, eval_ctxt);
11520 // -----------------------------------
11521 // </location expression evaluation>
11522 // -----------------------------------
11524 /// Get the offset of a struct/class member as represented by the
11525 /// value of the DW_AT_data_member_location attribute.
11527 /// There is a huge gotcha in here. The value of the
11528 /// DW_AT_data_member_location is not necessarily a constant that one
11529 /// would just read and be done with it. Rather, it can be a DWARF
11530 /// expression that one has to interpret. In general, the offset can
11531 /// be given by the DW_AT_bit_offset attribute. In that case the
11532 /// offset is a constant. But it can also be given by the
11533 /// DW_AT_data_member_location attribute. In that case it's a DWARF
11534 /// location expression.
11536 /// When the it's the DW_AT_data_member_location that is present,
11537 /// there are three cases to possibly take into account:
11539 /// 1/ The offset in the vtable where the offset of a virtual base
11540 /// can be found, aka vptr offset. Given the address of a
11541 /// given object O, the vptr offset for B is given by the
11542 /// (DWARF) expression:
11544 /// address(O) + *(*address(0) - VIRTUAL_OFFSET)
11546 /// where VIRTUAL_OFFSET is a constant value; In this case,
11547 /// this function returns the constant VIRTUAL_OFFSET, as this
11548 /// is enough to detect changes in a given virtual base
11549 /// relative to the other virtual bases.
11551 /// 2/ The offset of a regular data member. Given the address of
11552 /// a struct object named O, the memory location for a
11553 /// particular data member is given by the (DWARF) expression:
11555 /// address(O) + OFFSET
11557 /// where OFFSET is a constant. In this case, this function
11558 /// returns the OFFSET constant.
11560 /// 3/ The offset of a virtual member function in the virtual
11561 /// pointer. The DWARF expression is a constant that designates
11562 /// the offset of the function in the vtable. In this case this
11563 /// function returns that constant.
11565 ///@param ctxt the read context to consider.
11567 ///@param die the DIE to read the information from.
11569 ///@param offset the resulting constant offset, in bits. This
11570 ///argument is set iff the function returns true.
11572 die_member_offset(const read_context& ctxt,
11573 const Dwarf_Die* die,
11576 Dwarf_Op* expr = NULL;
11577 uint64_t expr_len = 0;
11580 if (die_unsigned_constant_attribute(die, DW_AT_bit_offset, off))
11582 // The DW_AT_bit_offset is present. If it contains a non-zero
11583 // value, let's read that one.
11591 if (!die_location_expr(die, DW_AT_data_member_location, &expr, &expr_len))
11594 // Otherwise, the DW_AT_data_member_location attribute is present.
11595 // In that case, let's evaluate it and get its constant
11596 // sub-expression and return that one.
11598 if (!eval_quickly(expr, expr_len, offset))
11600 bool is_tls_address = false;
11601 if (!eval_last_constant_dwarf_sub_expr(expr, expr_len,
11602 offset, is_tls_address,
11603 ctxt.dwarf_expr_eval_ctxt()))
11611 /// Read the value of the DW_AT_location attribute from a DIE,
11612 /// evaluate the resulting DWARF expression and, if it's a constant
11613 /// expression, return it.
11615 /// @param die the DIE to consider.
11617 /// @param address the resulting constant address. This is set iff
11618 /// the function returns true.
11620 /// @return true iff the whole sequence of action described above
11621 /// could be completed normally.
11623 die_location_address(Dwarf_Die* die,
11624 Dwarf_Addr& address,
11625 bool& is_tls_address)
11627 Dwarf_Op* expr = NULL;
11628 uint64_t expr_len = 0;
11630 is_tls_address = false;
11631 if (!die_location_expr(die, DW_AT_location, &expr, &expr_len))
11635 if (!eval_last_constant_dwarf_sub_expr(expr, expr_len, addr, is_tls_address))
11643 /// Return the index of a function in its virtual table. That is,
11644 /// return the value of the DW_AT_vtable_elem_location attribute.
11646 /// @param die the DIE of the function to consider.
11648 /// @param vindex the resulting index. This is set iff the function
11651 /// @return true if the DIE has a DW_AT_vtable_elem_location
11654 die_virtual_function_index(Dwarf_Die* die,
11660 Dwarf_Op* expr = NULL;
11661 uint64_t expr_len = 0;
11662 if (!die_location_expr(die, DW_AT_vtable_elem_location,
11667 bool is_tls_addr = false;
11668 if (!eval_last_constant_dwarf_sub_expr(expr, expr_len, i, is_tls_addr))
11675 /// Test if a given DIE represents an anonymous type.
11677 /// Anonymous types we are interested in are classes, unions and
11680 /// @param die the DIE to consider.
11682 /// @return true iff @p die represents an anonymous type.
11684 is_anonymous_type_die(Dwarf_Die *die)
11686 int tag = dwarf_tag(die);
11688 if (tag == DW_TAG_class_type
11689 || tag == DW_TAG_structure_type
11690 || tag == DW_TAG_union_type
11691 || tag == DW_TAG_enumeration_type)
11692 return die_is_anonymous(die);
11697 /// Return the base of the internal name to represent an anonymous
11700 /// Typically, anonymous enums would be named
11701 /// __anonymous_enum__<number>, anonymous struct or classes would be
11702 /// named __anonymous_struct__<number> and anonymous unions would be
11703 /// named __anonymous_union__<number>. The first part of these
11704 /// anonymous names (i.e, __anonymous_{enum,struct,union}__ is called
11705 /// the base name. This function returns that base name, depending on
11706 /// the kind of type DIE we are looking at.
11708 /// @param die the type DIE to look at. This function expects a type
11709 /// DIE with an empty DW_AT_name property value (anonymous).
11711 /// @return a string representing the base of the internal anonymous
11714 get_internal_anonymous_die_prefix_name(const Dwarf_Die *die)
11716 ABG_ASSERT(die_is_type(die));
11717 ABG_ASSERT(die_string_attribute(die, DW_AT_name) == "");
11719 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
11721 if (tag == DW_TAG_class_type || tag == DW_TAG_structure_type)
11722 type_name = tools_utils::get_anonymous_struct_internal_name_prefix();
11723 else if (tag == DW_TAG_union_type)
11724 type_name = tools_utils::get_anonymous_union_internal_name_prefix();
11725 else if (tag == DW_TAG_enumeration_type)
11726 type_name = tools_utils::get_anonymous_enum_internal_name_prefix();
11731 /// Build a full internal anonymous type name.
11733 /// @param base_name this is the base name as returned by the function
11734 /// @ref get_internal_anonymous_die_prefix_name.
11736 /// @param anonymous_type_index this is the index of the anonymous
11737 /// type in its scope. That is, if there are more than one anonymous
11738 /// types of a given kind in a scope, this index is what tells them
11739 /// appart, starting from 0.
11741 /// @return the built string, which is a concatenation of @p base_name
11742 /// and @p anonymous_type_index.
11744 build_internal_anonymous_die_name(const string &base_name,
11745 size_t anonymous_type_index)
11747 string name = base_name;
11748 if (anonymous_type_index && !base_name.empty())
11750 std::ostringstream o;
11751 o << base_name << anonymous_type_index;
11757 /// Build a full internal anonymous type name.
11759 /// @param die the DIE representing the anonymous type to consider.
11761 /// @param anonymous_type_index the index of the anonymous type
11762 /// represented by @p DIE, in its scope. That is, if there are
11763 /// several different anonymous types of the same kind as @p die, this
11764 /// index is what tells them appart.
11766 /// @return the internal name of the anonymous type represented by @p
11769 get_internal_anonymous_die_name(Dwarf_Die *die,
11770 size_t anonymous_type_index)
11772 string name = get_internal_anonymous_die_prefix_name(die);
11773 name = build_internal_anonymous_die_name(name, anonymous_type_index);
11777 // ------------------------------------
11778 // <DIE pretty printer>
11779 // ------------------------------------
11781 /// Compute the qualified name of a DIE that represents a type.
11783 /// For instance, if the DIE tag is DW_TAG_subprogram then this
11784 /// function computes the name of the function *type*.
11786 /// @param ctxt the read context.
11788 /// @param die the DIE to consider.
11790 /// @param where_offset where in the are logically are in the DIE
11793 /// @return a copy of the qualified name of the type.
11795 die_qualified_type_name(const read_context& ctxt,
11796 const Dwarf_Die* die,
11797 size_t where_offset)
11802 int tag = dwarf_tag (const_cast<Dwarf_Die*>(die));
11803 if (tag == DW_TAG_compile_unit
11804 || tag == DW_TAG_partial_unit
11805 || tag == DW_TAG_type_unit)
11808 string name = die_name(die);
11810 Dwarf_Die scope_die;
11811 if (!get_scope_die(ctxt, die, where_offset, scope_die))
11814 string parent_name = die_qualified_name(ctxt, &scope_die, where_offset);
11815 bool colon_colon = die_is_type(die) || die_is_namespace(die);
11816 string separator = colon_colon ? "::" : ".";
11822 case DW_TAG_unspecified_type:
11825 case DW_TAG_base_type:
11827 abigail::ir::integral_type int_type;
11828 if (parse_integral_type(name, int_type))
11835 case DW_TAG_typedef:
11836 case DW_TAG_enumeration_type:
11837 case DW_TAG_structure_type:
11838 case DW_TAG_class_type:
11839 case DW_TAG_union_type:
11841 if (tag == DW_TAG_typedef)
11843 // If the underlying type of the typedef is unspecified,
11844 // bail out as we don't support that yet.
11845 Dwarf_Die underlying_type_die;
11846 if (die_die_attribute(die, DW_AT_type, underlying_type_die))
11848 string n = die_qualified_type_name(ctxt, &underlying_type_die,
11850 if (die_is_unspecified(&underlying_type_die)
11857 // TODO: handle cases where there are more than one
11858 // anonymous type of the same kind in the same scope. In
11859 // that case, their name must be built with the function
11860 // get_internal_anonymous_die_name or something of the same
11862 name = get_internal_anonymous_die_prefix_name(die);
11864 ABG_ASSERT(!name.empty());
11865 repr = parent_name.empty() ? name : parent_name + separator + name;
11869 case DW_TAG_const_type:
11870 case DW_TAG_volatile_type:
11871 case DW_TAG_restrict_type:
11873 Dwarf_Die underlying_type_die;
11874 bool has_underlying_type_die =
11875 die_die_attribute(die, DW_AT_type, underlying_type_die);
11877 if (has_underlying_type_die && die_is_unspecified(&underlying_type_die))
11880 if (tag == DW_TAG_const_type)
11882 if (has_underlying_type_die
11883 && die_is_reference_type(&underlying_type_die))
11884 // A reference is always const. So, to lower false
11885 // positive reports in diff computations, we consider a
11886 // const reference just as a reference. But we need to
11887 // keep the qualified-ness of the type. So we introduce
11888 // a 'no-op' qualifier here. Please remember that this
11889 // has to be kept in sync with what is done in
11890 // get_name_of_qualified_type. So if you change this
11891 // here, you have to change that code there too.
11893 else if (!has_underlying_type_die
11894 || die_is_void_type(&underlying_type_die))
11902 else if (tag == DW_TAG_volatile_type)
11904 else if (tag == DW_TAG_restrict_type)
11907 ABG_ASSERT_NOT_REACHED;
11909 string underlying_type_repr;
11910 if (has_underlying_type_die)
11911 underlying_type_repr =
11912 die_qualified_type_name(ctxt, &underlying_type_die, where_offset);
11914 underlying_type_repr = "void";
11916 if (underlying_type_repr.empty())
11920 if (has_underlying_type_die
11921 && die_is_pointer_or_reference_type(&underlying_type_die))
11922 repr = underlying_type_repr + " " + repr;
11924 repr += " " + underlying_type_repr;
11929 case DW_TAG_pointer_type:
11930 case DW_TAG_reference_type:
11931 case DW_TAG_rvalue_reference_type:
11933 Dwarf_Die pointed_to_type_die;
11934 if (!die_die_attribute(die, DW_AT_type, pointed_to_type_die))
11936 if (tag == DW_TAG_pointer_type)
11941 if (die_is_unspecified(&pointed_to_type_die))
11944 string pointed_type_repr =
11945 die_qualified_type_name(ctxt, &pointed_to_type_die, where_offset);
11947 repr = pointed_type_repr;
11951 if (tag == DW_TAG_pointer_type)
11953 else if (tag == DW_TAG_reference_type)
11955 else if (tag == DW_TAG_rvalue_reference_type)
11958 ABG_ASSERT_NOT_REACHED;
11962 case DW_TAG_subrange_type:
11964 // In Ada, this one can be generated on its own, that is, not
11965 // as a sub-type of an array. So we need to support it on its
11966 // own. Note that when it's emitted as the sub-type of an
11967 // array like in C and C++, this is handled differently, for
11968 // now. But we try to make this usable by other languages
11969 // that are not Ada, even if we modelled it after Ada.
11971 // So we build a subrange type for the sole purpose of using
11972 // the ::as_string() method of that type. So we don't add
11973 // that type to the current type tree being built.
11974 array_type_def::subrange_sptr s =
11975 build_subrange_type(const_cast<read_context&>(ctxt),
11977 /*associate_die_to_type=*/false);
11978 repr += s->as_string();
11982 case DW_TAG_array_type:
11984 Dwarf_Die element_type_die;
11985 if (!die_die_attribute(die, DW_AT_type, element_type_die))
11987 string element_type_name =
11988 die_qualified_type_name(ctxt, &element_type_die, where_offset);
11989 if (element_type_name.empty())
11992 array_type_def::subranges_type subranges;
11993 build_subranges_from_array_type_die(const_cast<read_context&>(ctxt),
11994 die, subranges, where_offset,
11995 /*associate_type_to_die=*/false);
11997 repr = element_type_name;
11998 repr += array_type_def::subrange_type::vector_as_string(subranges);
12002 case DW_TAG_subroutine_type:
12003 case DW_TAG_subprogram:
12005 string return_type_name;
12007 vector<string> parm_names;
12008 bool is_const = false;
12009 bool is_static = false;
12011 die_return_and_parm_names_from_fn_type_die(ctxt, die, where_offset,
12012 /*pretty_print=*/true,
12013 return_type_name, class_name,
12014 parm_names, is_const,
12016 if (return_type_name.empty())
12017 return_type_name = "void";
12019 repr = return_type_name;
12021 if (!class_name.empty())
12023 // This is a method, so print the class name.
12024 repr += " (" + class_name + "::*)";
12029 for (vector<string>::const_iterator i = parm_names.begin();
12030 i != parm_names.end();
12033 if (i != parm_names.begin())
12042 case DW_TAG_string_type:
12043 case DW_TAG_ptr_to_member_type:
12044 case DW_TAG_set_type:
12045 case DW_TAG_file_type:
12046 case DW_TAG_packed_type:
12047 case DW_TAG_thrown_type:
12048 case DW_TAG_interface_type:
12049 case DW_TAG_shared_type:
12056 /// Compute the qualified name of a decl represented by a given DIE.
12058 /// For instance, for a DIE of tag DW_TAG_subprogram this function
12059 /// computes the signature of the function *declaration*.
12061 /// @param ctxt the read context.
12063 /// @param die the DIE to consider.
12065 /// @param where_offset where we are logically at in the DIE stream.
12067 /// @return a copy of the computed name.
12069 die_qualified_decl_name(const read_context& ctxt,
12070 const Dwarf_Die* die,
12071 size_t where_offset)
12073 if (!die || !die_is_decl(die))
12076 string name = die_name(die);
12078 Dwarf_Die scope_die;
12079 if (!get_scope_die(ctxt, die, where_offset, scope_die))
12082 string scope_name = die_qualified_name(ctxt, &scope_die, where_offset);
12083 string separator = "::";
12087 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
12090 case DW_TAG_namespace:
12091 case DW_TAG_member:
12092 case DW_TAG_variable:
12093 repr = scope_name.empty() ? name : scope_name + separator + name;
12095 case DW_TAG_subprogram:
12096 repr = die_function_signature(ctxt, die, where_offset);
12099 case DW_TAG_unspecified_parameters:
12103 case DW_TAG_formal_parameter:
12104 case DW_TAG_imported_declaration:
12105 case DW_TAG_GNU_template_template_param:
12106 case DW_TAG_GNU_template_parameter_pack:
12107 case DW_TAG_GNU_formal_parameter_pack:
12113 /// Compute the qualified name of the artifact represented by a given
12116 /// If the DIE represents a type, then the function computes the name
12117 /// of the type. Otherwise, if the DIE represents a decl then the
12118 /// function computes the name of the decl. Note that a DIE of tag
12119 /// DW_TAG_subprogram is going to be considered as a "type" -- just
12120 /// like if it was a DW_TAG_subroutine_type.
12122 /// @param ctxt the read context.
12124 /// @param die the DIE to consider.
12126 /// @param where_offset where we are logically at in the DIE stream.
12128 /// @return a copy of the computed name.
12130 die_qualified_name(const read_context& ctxt, const Dwarf_Die* die, size_t where)
12132 if (die_is_type(die))
12133 return die_qualified_type_name(ctxt, die, where);
12134 else if (die_is_decl(die))
12135 return die_qualified_decl_name(ctxt, die, where);
12139 /// Test if the qualified name of a given type should be empty.
12141 /// The reason why the name of a DIE with a given tag would be empty
12142 /// is that libabigail's internal representation doesn't yet support
12143 /// that tag; or if the DIE's qualified name is built from names of
12144 /// sub-types DIEs whose tags are not yet supported.
12146 /// @param ctxt the reading context.
12148 /// @param die the DIE to consider.
12150 /// @param where where we are logically at, in the DIE stream.
12152 /// @param qualified_name the qualified name of the DIE. This is set
12153 /// only iff the function returns false.
12155 /// @return true if the qualified name of the DIE is empty.
12157 die_qualified_type_name_empty(const read_context& ctxt,
12158 const Dwarf_Die* die,
12159 size_t where, string &qualified_name)
12164 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
12167 if (tag == DW_TAG_typedef
12168 || tag == DW_TAG_pointer_type
12169 || tag == DW_TAG_reference_type
12170 || tag == DW_TAG_rvalue_reference_type
12171 || tag == DW_TAG_array_type
12172 || tag == DW_TAG_const_type
12173 || tag == DW_TAG_volatile_type
12174 || tag == DW_TAG_restrict_type)
12176 Dwarf_Die underlying_type_die;
12177 if (die_die_attribute(die, DW_AT_type, underlying_type_die))
12180 die_qualified_type_name(ctxt, &underlying_type_die, where);
12187 string name = die_qualified_type_name(ctxt, die, where);
12192 qname = die_qualified_type_name(ctxt, die, where);
12196 qualified_name = qname;
12200 /// Given the DIE that represents a function type, compute the names
12201 /// of the following properties the function's type:
12204 /// - enclosing class (if the function is a member function)
12205 /// - function parameter types
12207 /// When the function we are looking at is a member function, it also
12208 /// tells if it's const.
12210 /// @param ctxt the reading context.
12212 /// @param die the DIE of the function or function type we are looking
12215 /// @param where_offset where we are logically at in the DIE stream.
12217 /// @param pretty_print if set to yes, the type names are going to be
12218 /// pretty-printed names; otherwise, they are just qualified type
12221 /// @param return_type_name out parameter. This contains the name of
12222 /// the return type of the function.
12224 /// @param class_name out parameter. If the function is a member
12225 /// function, this contains the name of the enclosing class.
12227 /// @param parm_names out parameter. This vector is set to the names
12228 /// of the types of the parameters of the function.
12230 /// @param is_const out parameter. If the function is a member
12231 /// function, this is set to true iff the member function is const.
12233 /// @param is_static out parameter. If the function is a static
12234 /// member function, then this is set to true.
12236 die_return_and_parm_names_from_fn_type_die(const read_context& ctxt,
12237 const Dwarf_Die* die,
12238 size_t where_offset,
12240 string &return_type_name,
12241 string &class_name,
12242 vector<string>& parm_names,
12247 Dwarf_Die ret_type_die;
12248 if (!die_die_attribute(die, DW_AT_type, ret_type_die))
12249 return_type_name = "void";
12253 ? ctxt.get_die_pretty_representation(&ret_type_die, where_offset)
12254 : ctxt.get_die_qualified_type_name(&ret_type_die, where_offset);
12256 if (return_type_name.empty())
12257 return_type_name = "void";
12259 Dwarf_Die object_pointer_die, class_die;
12260 bool is_method_type =
12261 die_function_type_is_method_type(ctxt, die, where_offset,
12262 object_pointer_die,
12263 class_die, is_static);
12266 if (is_method_type)
12268 class_name = ctxt.get_die_qualified_type_name(&class_die, where_offset);
12270 Dwarf_Die this_pointer_die;
12271 Dwarf_Die pointed_to_die;
12273 && die_die_attribute(&object_pointer_die, DW_AT_type,
12275 if (die_die_attribute(&this_pointer_die, DW_AT_type, pointed_to_die))
12276 if (dwarf_tag(&pointed_to_die) == DW_TAG_const_type)
12279 string fn_name = die_name(die);
12280 string non_qualified_class_name = die_name(&class_die);
12281 bool is_ctor = fn_name == non_qualified_class_name;
12282 bool is_dtor = !fn_name.empty() && fn_name[0] == '~';
12284 if (is_ctor || is_dtor)
12285 return_type_name.clear();
12288 if (dwarf_child(const_cast<Dwarf_Die*>(die), &child) == 0)
12291 int child_tag = dwarf_tag(&child);
12292 if (child_tag == DW_TAG_formal_parameter)
12294 Dwarf_Die parm_type_die;
12295 if (!die_die_attribute(&child, DW_AT_type, parm_type_die))
12297 string qualified_name =
12299 ? ctxt.get_die_pretty_representation(&parm_type_die, where_offset)
12300 : ctxt.get_die_qualified_type_name(&parm_type_die, where_offset);
12302 if (qualified_name.empty())
12304 parm_names.push_back(qualified_name);
12306 else if (child_tag == DW_TAG_unspecified_parameters)
12308 // This is a variadic function parameter.
12309 parm_names.push_back("variadic parameter type");
12310 // After a DW_TAG_unspecified_parameters tag, we shouldn't
12311 // keep reading for parameters. The
12312 // unspecified_parameters TAG should be the last parameter
12313 // that we record. For instance, if there are multiple
12314 // DW_TAG_unspecified_parameters DIEs then we should care
12315 // only for the first one.
12319 while (dwarf_siblingof(&child, &child) == 0);
12321 if (class_name.empty())
12323 Dwarf_Die parent_die;
12324 if (get_parent_die(ctxt, die, parent_die, where_offset))
12326 if (die_is_class_type(&parent_die))
12328 ctxt.get_die_qualified_type_name(&parent_die, where_offset);
12333 /// This computes the signature of the a function declaration
12334 /// represented by a DIE.
12336 /// @param ctxt the reading context.
12338 /// @param fn_die the DIE of the function to consider.
12340 /// @param where_offset where we are logically at in the stream of
12343 /// @return a copy of the computed function signature string.
12345 die_function_signature(const read_context& ctxt,
12346 const Dwarf_Die *fn_die,
12347 size_t where_offset)
12350 translation_unit::language lang;
12351 bool has_lang = false;
12352 if ((has_lang = ctxt.get_die_language(fn_die, lang)))
12354 // In a binary originating from the C language, it's OK to use
12355 // the linkage name of the function as a key for the map which
12356 // is meant to reduce the number of DIE comparisons involved
12357 // during DIE canonicalization computation.
12358 if (is_c_language(lang))
12360 string fn_name = die_linkage_name(fn_die);
12361 if (fn_name.empty())
12362 fn_name = die_name(fn_die);
12367 // TODO: When we can structurally compare DIEs originating from C++
12368 // as well, we can use the linkage name of functions in C++ too, to
12369 // reduce the number of comparisons involved during DIE
12370 // canonicalization.
12372 string return_type_name;
12373 Dwarf_Die ret_type_die;
12374 if (die_die_attribute(fn_die, DW_AT_type, ret_type_die))
12375 return_type_name = ctxt.get_die_qualified_type_name(&ret_type_die,
12378 if (return_type_name.empty())
12379 return_type_name = "void";
12381 Dwarf_Die scope_die;
12383 if (get_scope_die(ctxt, fn_die, where_offset, scope_die))
12384 scope_name = ctxt.get_die_qualified_name(&scope_die, where_offset);
12385 string fn_name = die_name(fn_die);
12386 if (!scope_name.empty())
12387 fn_name = scope_name + "::" + fn_name;
12390 vector<string> parm_names;
12391 bool is_const = false;
12392 bool is_static = false;
12394 die_return_and_parm_names_from_fn_type_die(ctxt, fn_die, where_offset,
12395 /*pretty_print=*/false,
12396 return_type_name, class_name,
12397 parm_names, is_const, is_static);
12399 bool is_virtual = die_is_virtual(fn_die);
12401 string repr = class_name.empty() ? "function" : "method";
12403 repr += " virtual";
12405 if (!return_type_name.empty())
12406 repr += " " + return_type_name;
12408 repr += " " + fn_name;
12412 bool some_parm_emitted = false;
12413 for (vector<string>::const_iterator i = parm_names.begin();
12414 i != parm_names.end();
12417 if (i != parm_names.begin())
12419 if (some_parm_emitted)
12423 if (!is_static && !class_name.empty())
12424 // We are printing a non-static method name, skip the implicit "this"
12428 some_parm_emitted = true;
12434 ABG_ASSERT(!class_name.empty());
12441 /// Return a pretty string representation of a type, for internal purposes.
12443 /// By internal purpose, we mean things like key-ing types for lookup
12444 /// purposes and so on.
12446 /// Note that this function is also used to pretty print functions.
12447 /// For functions, it prints the *type* of the function.
12449 /// @param ctxt the context to use.
12451 /// @param the DIE of the type to pretty print.
12453 /// @param where_offset where we logically are placed when calling
12454 /// this. It's useful to handle inclusion of DW_TAG_compile_unit
12457 /// @return the resulting pretty representation.
12459 die_pretty_print_type(read_context& ctxt,
12460 const Dwarf_Die* die,
12461 size_t where_offset)
12464 || (!die_is_type(die)
12465 && dwarf_tag(const_cast<Dwarf_Die*>(die)) != DW_TAG_subprogram))
12470 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
12473 case DW_TAG_string_type:
12474 // For now, we won't try to go get the actual representation of
12475 // the string because this would make things more complicated;
12476 // for that we'd need to interpret some location expressions to
12477 // get the length of the string. And for dynamically allocated
12478 // strings, the result of the location expression evaluation
12479 // might not even be a constant. So at the moment I consider
12480 // this to be a lot of hassle for no great return. Until proven
12481 // otherwise, of course.
12482 repr = "string type";
12484 case DW_TAG_unspecified_type:
12485 case DW_TAG_ptr_to_member_type:
12488 case DW_TAG_namespace:
12489 repr = "namespace " + ctxt.get_die_qualified_type_name(die, where_offset);
12492 case DW_TAG_base_type:
12493 repr = ctxt.get_die_qualified_type_name(die, where_offset);
12496 case DW_TAG_typedef:
12498 string qualified_name;
12499 if (!die_qualified_type_name_empty(ctxt, die,
12502 repr = "typedef " + qualified_name;
12506 case DW_TAG_const_type:
12507 case DW_TAG_volatile_type:
12508 case DW_TAG_restrict_type:
12509 case DW_TAG_pointer_type:
12510 case DW_TAG_reference_type:
12511 case DW_TAG_rvalue_reference_type:
12512 repr = ctxt.get_die_qualified_type_name(die, where_offset);
12515 case DW_TAG_enumeration_type:
12517 string qualified_name =
12518 ctxt.get_die_qualified_type_name(die, where_offset);
12519 repr = "enum " + qualified_name;
12523 case DW_TAG_structure_type:
12524 case DW_TAG_class_type:
12526 string qualified_name =
12527 ctxt.get_die_qualified_type_name(die, where_offset);
12528 repr = "class " + qualified_name;
12532 case DW_TAG_union_type:
12534 string qualified_name =
12535 ctxt.get_die_qualified_type_name(die, where_offset);
12536 repr = "union " + qualified_name;
12540 case DW_TAG_array_type:
12542 Dwarf_Die element_type_die;
12543 if (!die_die_attribute(die, DW_AT_type, element_type_die))
12545 string element_type_name =
12546 ctxt.get_die_qualified_type_name(&element_type_die, where_offset);
12547 if (element_type_name.empty())
12550 array_type_def::subranges_type subranges;
12551 build_subranges_from_array_type_die(ctxt, die, subranges, where_offset,
12552 /*associate_type_to_die=*/false);
12554 repr = element_type_name;
12555 repr += array_type_def::subrange_type::vector_as_string(subranges);
12559 case DW_TAG_subrange_type:
12561 // So this can be generated by Ada, on its own; that is, not
12562 // as a subtype of an array. In that case we need to handle
12565 // For now, we consider that the pretty printed name of the
12566 // subrange type is its name. We might need something more
12567 // advance, should the needs of the users get more
12569 repr += die_qualified_type_name(ctxt, die, where_offset);
12573 case DW_TAG_subroutine_type:
12574 case DW_TAG_subprogram:
12576 string return_type_name;
12578 vector<string> parm_names;
12579 bool is_const = false;
12580 bool is_static = false;
12582 die_return_and_parm_names_from_fn_type_die(ctxt, die, where_offset,
12583 /*pretty_print=*/true,
12584 return_type_name, class_name,
12585 parm_names, is_const,
12587 if (class_name.empty())
12588 repr = "function type";
12590 repr = "method type";
12591 repr += " " + ctxt.get_die_qualified_type_name(die, where_offset);
12595 case DW_TAG_set_type:
12596 case DW_TAG_file_type:
12597 case DW_TAG_packed_type:
12598 case DW_TAG_thrown_type:
12599 case DW_TAG_interface_type:
12600 case DW_TAG_shared_type:
12601 ABG_ASSERT_NOT_REACHED;
12607 /// Return a pretty string representation of a declaration, for
12608 /// internal purposes.
12610 /// By internal purpose, we mean things like key-ing declarations for
12611 /// lookup purposes and so on.
12613 /// Note that this function is also used to pretty print functions.
12614 /// For functions, it prints the signature of the function.
12616 /// @param ctxt the context to use.
12618 /// @param the DIE of the declaration to pretty print.
12620 /// @param where_offset where we logically are placed when calling
12621 /// this. It's useful to handle inclusion of DW_TAG_compile_unit
12624 /// @return the resulting pretty representation.
12626 die_pretty_print_decl(read_context& ctxt,
12627 const Dwarf_Die* die,
12628 size_t where_offset)
12630 if (!die || !die_is_decl(die))
12635 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
12638 case DW_TAG_namespace:
12639 repr = "namespace " + die_qualified_name(ctxt, die, where_offset);
12642 case DW_TAG_member:
12643 case DW_TAG_variable:
12645 string type_repr = "void";
12646 Dwarf_Die type_die;
12647 if (die_die_attribute(die, DW_AT_type, type_die))
12648 type_repr = die_qualified_type_name(ctxt, &type_die, where_offset);
12649 repr = die_qualified_name(ctxt, die, where_offset);
12651 repr = type_repr + " " + repr;
12655 case DW_TAG_subprogram:
12656 repr = die_function_signature(ctxt, die, where_offset);
12665 /// Compute the pretty printed representation of an artifact
12666 /// represented by a DIE.
12668 /// If the DIE is a type, compute the its pretty representation as a
12669 /// type; otherwise, if it's a declaration, compute its pretty
12670 /// representation as a declaration. Note for For instance, that a
12671 /// DW_TAG_subprogram DIE is going to be represented as a function
12674 /// @param ctxt the reading context.
12676 /// @param die the DIE to consider.
12678 /// @param where_offset we in the DIE stream we are logically at.
12680 /// @return a copy of the pretty printed artifact.
12682 die_pretty_print(read_context& ctxt, const Dwarf_Die* die, size_t where_offset)
12684 if (die_is_type(die))
12685 return die_pretty_print_type(ctxt, die, where_offset);
12686 else if (die_is_decl(die))
12687 return die_pretty_print_decl(ctxt, die, where_offset);
12691 // -----------------------------------
12692 // </die pretty printer>
12693 // -----------------------------------
12696 // ----------------------------------
12697 // <die comparison engine>
12698 // ---------------------------------
12700 /// Compares two decls DIEs
12702 /// This works only for DIEs emitted by the C language.
12704 /// This implementation doesn't yet support namespaces.
12706 /// This is a subroutine of compare_dies.
12708 /// @return true iff @p l equals @p r.
12710 compare_as_decl_dies(const Dwarf_Die *l, const Dwarf_Die *r)
12712 ABG_ASSERT(l && r);
12714 int l_tag = dwarf_tag(const_cast<Dwarf_Die*>(l));
12715 int r_tag = dwarf_tag(const_cast<Dwarf_Die*>(r));
12716 if (l_tag != r_tag)
12719 bool result = false;
12721 if (l_tag == DW_TAG_subprogram || l_tag == DW_TAG_variable)
12723 // Fast path for functions and global variables.
12724 if (compare_dies_string_attribute_value(l, r, DW_AT_linkage_name,
12726 || compare_dies_string_attribute_value(l, r, DW_AT_MIPS_linkage_name,
12733 if (compare_dies_string_attribute_value(l, r, DW_AT_name,
12742 // Fast path for types.
12743 if (compare_dies_string_attribute_value(l, r, DW_AT_name,
12749 /// Compares two type DIEs
12751 /// This is a subroutine of compare_dies.
12753 /// @param l the left operand of the comparison operator.
12755 /// @param r the right operand of the comparison operator.
12757 /// @return true iff @p l equals @p r.
12759 compare_as_type_dies(const Dwarf_Die *l, const Dwarf_Die *r)
12761 ABG_ASSERT(l && r);
12762 ABG_ASSERT(die_is_type(l));
12763 ABG_ASSERT(die_is_type(r));
12765 if (dwarf_tag(const_cast<Dwarf_Die*>(l)) == DW_TAG_string_type
12766 && dwarf_tag(const_cast<Dwarf_Die*>(r)) == DW_TAG_string_type
12767 && (dwarf_dieoffset(const_cast<Dwarf_Die*>(l))
12768 != dwarf_dieoffset(const_cast<Dwarf_Die*>(r))))
12769 // For now, we cannot compare DW_TAG_string_type because of its
12770 // string_length attribute that is a location descriptor that is
12771 // not necessarily a constant. So it's super hard to evaluate it
12772 // in a libabigail context. So for now, we just say that all
12773 // DW_TAG_string_type DIEs are different, by default.
12776 uint64_t l_size = 0, r_size = 0;
12777 die_size_in_bits(l, l_size);
12778 die_size_in_bits(r, r_size);
12780 return l_size == r_size;
12783 /// Test if two DIEs representing function declarations have the same
12784 /// linkage name, and thus are considered equal if they are C or C++,
12785 /// because the two DIEs represent functions in the same binary.
12787 /// If the DIEs don't have a linkage name, the function compares their
12788 /// name. But in that case, the caller of the function must know that
12789 /// in C++ for instance, that doesn't imply that the two functions are
12792 /// @param ctxt the @ref read_context to consider.
12794 /// @param l the first function DIE to consider.
12796 /// @param r the second function DIE to consider.
12798 /// @return true iff the function represented by @p l have the same
12799 /// linkage name as the function represented by @p r.
12801 fn_die_equal_by_linkage_name(const read_context &ctxt,
12802 const Dwarf_Die *l,
12803 const Dwarf_Die *r)
12811 int tag = dwarf_tag(const_cast<Dwarf_Die*>(l));
12812 ABG_ASSERT(tag == DW_TAG_subprogram);
12813 tag = dwarf_tag(const_cast<Dwarf_Die*>(r));
12814 ABG_ASSERT(tag == DW_TAG_subprogram);
12816 string lname = die_name(l), rname = die_name(r);
12817 string llinkage_name = die_linkage_name(l),
12818 rlinkage_name = die_linkage_name(r);
12820 if (ctxt.die_is_in_c_or_cplusplus(l)
12821 && ctxt.die_is_in_c_or_cplusplus(r))
12823 if (!llinkage_name.empty() && !rlinkage_name.empty())
12824 return llinkage_name == rlinkage_name;
12825 else if (!!llinkage_name.empty() != !!rlinkage_name.empty())
12828 return lname == rname;
12831 return (!llinkage_name.empty()
12832 && !rlinkage_name.empty()
12833 && llinkage_name == rlinkage_name);
12836 /// Compare two DIEs emitted by a C compiler.
12838 /// @param ctxt the read context used to load the DWARF information.
12840 /// @param l the left-hand-side argument of this comparison operator.
12842 /// @param r the righ-hand-side argument of this comparison operator.
12844 /// @param aggregates_being_compared this holds the names of the set
12845 /// of aggregates being compared. It's used by the comparison
12846 /// function to avoid recursing infinitely when faced with types
12847 /// referencing themselves through pointers or references. By
12848 /// default, just pass an empty instance of @ref istring_set_type to
12851 /// @param update_canonical_dies_on_the_fly if true, when two
12852 /// sub-types compare equal (during the comparison of @p l and @p r)
12853 /// update their canonical type. That way, two types of the same name
12854 /// are structurally compared to each other only once. So the
12855 /// non-linear structural comparison of two types of the same name
12856 /// only happen once.
12858 /// @return true iff @p l equals @p r.
12860 compare_dies(const read_context& ctxt,
12861 const Dwarf_Die *l, const Dwarf_Die *r,
12862 istring_set_type& aggregates_being_compared,
12863 bool update_canonical_dies_on_the_fly)
12868 int l_tag = dwarf_tag(const_cast<Dwarf_Die*>(l)),
12869 r_tag = dwarf_tag(const_cast<Dwarf_Die*>(r));
12871 if (l_tag != r_tag)
12874 Dwarf_Off l_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(l)),
12875 r_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(r));
12876 Dwarf_Off l_canonical_die_offset = 0, r_canonical_die_offset = 0;
12877 die_source l_die_source, r_die_source;
12878 ABG_ASSERT(ctxt.get_die_source(l, l_die_source));
12879 ABG_ASSERT(ctxt.get_die_source(r, r_die_source));
12881 // If 'l' and 'r' already have canonical DIEs, then just compare the
12882 // offsets of their canonical DIEs.
12883 bool l_has_canonical_die_offset =
12884 (l_canonical_die_offset =
12885 ctxt.get_canonical_die_offset(l_offset, l_die_source,
12886 /*die_as_type=*/true));
12888 bool r_has_canonical_die_offset =
12889 (r_canonical_die_offset =
12890 ctxt.get_canonical_die_offset(r_offset, r_die_source,
12891 /*die_as_type=*/true));
12893 if (l_has_canonical_die_offset && r_has_canonical_die_offset)
12894 return l_canonical_die_offset == r_canonical_die_offset;
12896 bool result = true;
12900 case DW_TAG_base_type:
12901 case DW_TAG_string_type:
12902 if (!compare_as_type_dies(l, r)
12903 || !compare_as_decl_dies(l, r))
12907 case DW_TAG_typedef:
12908 case DW_TAG_pointer_type:
12909 case DW_TAG_reference_type:
12910 case DW_TAG_rvalue_reference_type:
12911 case DW_TAG_const_type:
12912 case DW_TAG_volatile_type:
12913 case DW_TAG_restrict_type:
12915 if (!compare_as_type_dies(l, r))
12921 bool from_the_same_tu = false;
12922 if (!pointer_or_qual_die_of_anonymous_class_type(l)
12923 && compare_dies_cu_decl_file(l, r, from_the_same_tu)
12924 && from_the_same_tu)
12926 // These two typedefs, pointer, reference, or qualified
12927 // types have the same name and are defined in the same TU.
12928 // They thus ought to be the same.
12930 // Note that pointers, reference or qualified types to
12931 // anonymous types are not taking into account here because
12932 // those always need to be structurally compared.
12939 // No fancy optimization in this case. We need to
12940 // structurally compare the two DIEs.
12941 Dwarf_Die lu_type_die, ru_type_die;
12942 bool lu_is_void, ru_is_void;
12944 lu_is_void = !die_die_attribute(l, DW_AT_type, lu_type_die);
12945 ru_is_void = !die_die_attribute(r, DW_AT_type, ru_type_die);
12947 if (lu_is_void && ru_is_void)
12949 else if (lu_is_void != ru_is_void)
12952 result = compare_dies(ctxt, &lu_type_die, &ru_type_die,
12953 aggregates_being_compared,
12954 update_canonical_dies_on_the_fly);
12958 case DW_TAG_enumeration_type:
12959 if (!compare_as_type_dies(l, r)
12960 || !compare_as_decl_dies(l, r))
12964 // Walk the enumerators.
12965 Dwarf_Die l_enumtor, r_enumtor;
12966 bool found_l_enumtor, found_r_enumtor;
12968 for (found_l_enumtor = dwarf_child(const_cast<Dwarf_Die*>(l),
12970 found_r_enumtor = dwarf_child(const_cast<Dwarf_Die*>(r),
12972 found_l_enumtor && found_r_enumtor;
12973 found_l_enumtor = dwarf_siblingof(&l_enumtor, &l_enumtor) == 0,
12974 found_r_enumtor = dwarf_siblingof(&r_enumtor, &r_enumtor) == 0)
12976 int l_tag = dwarf_tag(&l_enumtor), r_tag = dwarf_tag(&r_enumtor);
12977 if ( l_tag != r_tag)
12983 if (l_tag != DW_TAG_enumerator)
12986 uint64_t l_val = 0, r_val = 0;
12987 die_unsigned_constant_attribute(&l_enumtor,
12990 die_unsigned_constant_attribute(&r_enumtor,
12993 if (l_val != r_val)
12999 if (found_l_enumtor != found_r_enumtor )
13005 case DW_TAG_structure_type:
13006 case DW_TAG_union_type:
13008 interned_string ln = ctxt.get_die_pretty_type_representation(l, 0);
13009 interned_string rn = ctxt.get_die_pretty_type_representation(r, 0);
13011 if ((aggregates_being_compared.find(ln)
13012 != aggregates_being_compared.end())
13013 || (aggregates_being_compared.find(rn)
13014 != aggregates_being_compared.end()))
13016 else if (!compare_as_decl_dies(l, r))
13018 else if (!compare_as_type_dies(l, r))
13022 aggregates_being_compared.insert(ln);
13023 aggregates_being_compared.insert(rn);
13025 Dwarf_Die l_member, r_member;
13026 bool found_l_member, found_r_member;
13027 for (found_l_member = dwarf_child(const_cast<Dwarf_Die*>(l),
13029 found_r_member = dwarf_child(const_cast<Dwarf_Die*>(r),
13031 found_l_member && found_r_member;
13032 found_l_member = dwarf_siblingof(&l_member, &l_member) == 0,
13033 found_r_member = dwarf_siblingof(&r_member, &r_member) == 0)
13035 int l_tag = dwarf_tag(&l_member), r_tag = dwarf_tag(&r_member);
13036 if (l_tag != r_tag)
13042 if (l_tag != DW_TAG_member && l_tag != DW_TAG_variable)
13045 if (!compare_dies(ctxt, &l_member, &r_member,
13046 aggregates_being_compared,
13047 update_canonical_dies_on_the_fly))
13053 if (found_l_member != found_r_member)
13056 aggregates_being_compared.erase(ln);
13057 aggregates_being_compared.erase(rn);
13062 case DW_TAG_array_type:
13064 Dwarf_Die l_child, r_child;
13065 bool found_l_child, found_r_child;
13066 for (found_l_child = dwarf_child(const_cast<Dwarf_Die*>(l),
13068 found_r_child = dwarf_child(const_cast<Dwarf_Die*>(r),
13070 found_l_child && found_r_child;
13071 found_l_child = dwarf_siblingof(&l_child, &l_child) == 0,
13072 found_r_child = dwarf_siblingof(&r_child, &r_child) == 0)
13074 int l_child_tag = dwarf_tag(&l_child),
13075 r_child_tag = dwarf_tag(&r_child);
13076 if (l_child_tag == DW_TAG_subrange_type
13077 || r_child_tag == DW_TAG_subrange_type)
13078 if (!compare_dies(ctxt, &l_child, &r_child,
13079 aggregates_being_compared,
13080 update_canonical_dies_on_the_fly))
13086 if (found_l_child != found_r_child)
13091 case DW_TAG_subrange_type:
13093 uint64_t l_lower_bound = 0, r_lower_bound = 0,
13094 l_upper_bound = 0, r_upper_bound = 0;
13095 die_unsigned_constant_attribute(l, DW_AT_lower_bound, l_lower_bound);
13096 die_unsigned_constant_attribute(r, DW_AT_lower_bound, r_lower_bound);
13097 if (!die_unsigned_constant_attribute(l, DW_AT_upper_bound,
13100 uint64_t l_count = 0;
13101 if (die_unsigned_constant_attribute(l, DW_AT_count, l_count))
13103 l_upper_bound = l_lower_bound + l_count;
13108 if (!die_unsigned_constant_attribute(r, DW_AT_upper_bound,
13111 uint64_t r_count = 0;
13112 if (die_unsigned_constant_attribute(l, DW_AT_count, r_count))
13114 r_upper_bound = r_lower_bound + r_count;
13120 if ((l_lower_bound != r_lower_bound)
13121 || (l_upper_bound != r_upper_bound))
13126 case DW_TAG_subroutine_type:
13127 case DW_TAG_subprogram:
13129 interned_string ln = ctxt.get_die_pretty_type_representation(l, 0);
13130 interned_string rn = ctxt.get_die_pretty_type_representation(r, 0);
13132 if ((aggregates_being_compared.find(ln)
13133 != aggregates_being_compared.end())
13134 || (aggregates_being_compared.find(rn)
13135 != aggregates_being_compared.end()))
13137 else if (l_tag == DW_TAG_subroutine_type)
13139 // The string reprs of l and r are already equal. Now let's
13140 // just check if they both come from the same TU.
13141 bool from_the_same_tu = false;
13142 if (compare_dies_cu_decl_file(l, r, from_the_same_tu)
13143 && from_the_same_tu)
13148 if (!fn_die_equal_by_linkage_name(ctxt, l, r))
13154 if (!ctxt.die_is_in_c(l) && !ctxt.die_is_in_c(r))
13156 // In C, we cannot have two different functions with the
13157 // same linkage name in a given binary. But here we are
13158 // looking at DIEs that don't originate from C. So we
13159 // need to compare return types and parameter types.
13160 Dwarf_Die l_return_type, r_return_type;
13161 bool l_return_type_is_void = !die_die_attribute(l, DW_AT_type,
13163 bool r_return_type_is_void = !die_die_attribute(r, DW_AT_type,
13165 if (l_return_type_is_void != r_return_type_is_void
13166 || (!l_return_type_is_void
13167 && !compare_dies(ctxt,
13168 &l_return_type, &r_return_type,
13169 aggregates_being_compared,
13170 update_canonical_dies_on_the_fly)))
13174 Dwarf_Die l_child, r_child;
13175 bool found_l_child, found_r_child;
13176 for (found_l_child = dwarf_child(const_cast<Dwarf_Die*>(l),
13178 found_r_child = dwarf_child(const_cast<Dwarf_Die*>(r),
13180 found_l_child && found_r_child;
13181 found_l_child = dwarf_siblingof(&l_child,
13183 found_r_child = dwarf_siblingof(&r_child,
13186 int l_child_tag = dwarf_tag(&l_child);
13187 int r_child_tag = dwarf_tag(&r_child);
13188 if (l_child_tag != r_child_tag
13189 || (l_child_tag == DW_TAG_formal_parameter
13190 && !compare_dies(ctxt, &l_child, &r_child,
13191 aggregates_being_compared,
13192 update_canonical_dies_on_the_fly)))
13198 if (found_l_child != found_r_child)
13203 aggregates_being_compared.erase(ln);
13204 aggregates_being_compared.erase(rn);
13209 case DW_TAG_formal_parameter:
13211 Dwarf_Die l_type, r_type;
13212 bool l_type_is_void = !die_die_attribute(l, DW_AT_type, l_type);
13213 bool r_type_is_void = !die_die_attribute(r, DW_AT_type, r_type);
13214 if ((l_type_is_void != r_type_is_void)
13215 || !compare_dies(ctxt, &l_type, &r_type,
13216 aggregates_being_compared,
13217 update_canonical_dies_on_the_fly))
13222 case DW_TAG_variable:
13223 case DW_TAG_member:
13224 if (compare_as_decl_dies(l, r))
13226 // Compare the offsets of the data members
13227 if (l_tag == DW_TAG_member)
13229 int64_t l_offset_in_bits = 0, r_offset_in_bits = 0;
13230 die_member_offset(ctxt, l, l_offset_in_bits);
13231 die_member_offset(ctxt, r, r_offset_in_bits);
13232 if (l_offset_in_bits != r_offset_in_bits)
13237 // Compare the types of the data members or variables.
13238 Dwarf_Die l_type, r_type;
13239 ABG_ASSERT(die_die_attribute(l, DW_AT_type, l_type));
13240 ABG_ASSERT(die_die_attribute(r, DW_AT_type, r_type));
13241 if (aggregates_being_compared.size () < 5)
13243 if (!compare_dies(ctxt, &l_type, &r_type,
13244 aggregates_being_compared,
13245 update_canonical_dies_on_the_fly))
13250 if (!compare_as_type_dies(&l_type, &r_type)
13251 ||!compare_as_decl_dies(&l_type, &r_type))
13260 case DW_TAG_class_type:
13261 case DW_TAG_enumerator:
13262 case DW_TAG_packed_type:
13263 case DW_TAG_set_type:
13264 case DW_TAG_file_type:
13265 case DW_TAG_ptr_to_member_type:
13266 case DW_TAG_thrown_type:
13267 case DW_TAG_interface_type:
13268 case DW_TAG_unspecified_type:
13269 case DW_TAG_shared_type:
13270 case DW_TAG_compile_unit:
13271 case DW_TAG_namespace:
13272 case DW_TAG_module:
13273 case DW_TAG_constant:
13274 case DW_TAG_partial_unit:
13275 case DW_TAG_imported_unit:
13276 case DW_TAG_dwarf_procedure:
13277 case DW_TAG_imported_declaration:
13278 case DW_TAG_entry_point:
13280 case DW_TAG_lexical_block:
13281 case DW_TAG_unspecified_parameters:
13282 case DW_TAG_variant:
13283 case DW_TAG_common_block:
13284 case DW_TAG_common_inclusion:
13285 case DW_TAG_inheritance:
13286 case DW_TAG_inlined_subroutine:
13287 case DW_TAG_with_stmt:
13288 case DW_TAG_access_declaration:
13289 case DW_TAG_catch_block:
13290 case DW_TAG_friend:
13291 case DW_TAG_namelist:
13292 case DW_TAG_namelist_item:
13293 case DW_TAG_template_type_parameter:
13294 case DW_TAG_template_value_parameter:
13295 case DW_TAG_try_block:
13296 case DW_TAG_variant_part:
13297 case DW_TAG_imported_module:
13298 case DW_TAG_condition:
13299 case DW_TAG_type_unit:
13300 case DW_TAG_template_alias:
13301 case DW_TAG_lo_user:
13302 case DW_TAG_MIPS_loop:
13303 case DW_TAG_format_label:
13304 case DW_TAG_function_template:
13305 case DW_TAG_class_template:
13306 case DW_TAG_GNU_BINCL:
13307 case DW_TAG_GNU_EINCL:
13308 case DW_TAG_GNU_template_template_param:
13309 case DW_TAG_GNU_template_parameter_pack:
13310 case DW_TAG_GNU_formal_parameter_pack:
13311 case DW_TAG_GNU_call_site:
13312 case DW_TAG_GNU_call_site_parameter:
13313 case DW_TAG_hi_user:
13314 ABG_ASSERT_NOT_REACHED;
13318 && update_canonical_dies_on_the_fly
13319 && is_canonicalizeable_type_tag(l_tag))
13321 // If 'l' has no canonical DIE and if 'r' has one, then propagage
13322 // the canonical DIE of 'r' to 'l'.
13324 // In case 'r' has no canonical DIE, then compute it, and then
13325 // propagate that canonical DIE to 'r'.
13326 die_source l_source = NO_DEBUG_INFO_DIE_SOURCE,
13327 r_source = NO_DEBUG_INFO_DIE_SOURCE;
13328 ABG_ASSERT(ctxt.get_die_source(l, l_source));
13329 ABG_ASSERT(ctxt.get_die_source(r, r_source));
13330 if (!l_has_canonical_die_offset
13331 // A DIE can be equivalent only to another DIE of the same
13333 && l_source == r_source)
13335 if (!r_has_canonical_die_offset)
13336 ctxt.compute_canonical_die_offset(r, r_canonical_die_offset,
13337 /*die_as_type=*/true);
13338 ABG_ASSERT(r_canonical_die_offset);
13339 ctxt.set_canonical_die_offset(l, r_canonical_die_offset,
13340 /*die_as_type=*/true);
13346 /// Compare two DIEs emitted by a C compiler.
13348 /// @param ctxt the read context used to load the DWARF information.
13350 /// @param l the left-hand-side argument of this comparison operator.
13352 /// @param r the righ-hand-side argument of this comparison operator.
13354 /// @param update_canonical_dies_on_the_fly if yes, then this function
13355 /// updates the canonical DIEs of sub-type DIEs of 'l' and 'r', while
13356 /// comparing l and r. This helps in making so that sub-type DIEs of
13357 /// 'l' and 'r' are compared structurally only once. This is how we
13358 /// turn this exponential comparison problem into a problem that is a
13359 /// closer to a linear one.
13361 /// @return true iff @p l equals @p r.
13363 compare_dies(const read_context& ctxt,
13364 const Dwarf_Die *l,
13365 const Dwarf_Die *r,
13366 bool update_canonical_dies_on_the_fly)
13368 istring_set_type aggregates_being_compared;
13369 return compare_dies(ctxt, l, r, aggregates_being_compared,
13370 update_canonical_dies_on_the_fly);
13373 // ----------------------------------
13374 // </die comparison engine>
13375 // ---------------------------------
13377 /// Get the point where a DW_AT_import DIE is used to import a given
13378 /// (unit) DIE, between two DIEs.
13380 /// @param ctxt the dwarf reading context to consider.
13382 /// @param partial_unit_offset the imported unit for which we want to
13383 /// know the insertion point. This is usually a partial unit (with
13384 /// tag DW_TAG_partial_unit) but it does not necessarily have to be
13387 /// @param first_die_offset the offset of the DIE from which this
13388 /// function starts looking for the import point of
13389 /// @partial_unit_offset. Note that this offset is excluded from the
13390 /// set of potential solutions.
13392 /// @param first_die_cu_offset the offset of the (compilation) unit
13393 /// that @p first_die_cu_offset belongs to.
13395 /// @param source where the DIE of first_die_cu_offset unit comes
13398 /// @param last_die_offset the offset of the last DIE of the up to
13399 /// which this function looks for the import point of @p
13400 /// partial_unit_offset. Note that this offset is excluded from the
13401 /// set of potential solutions.
13403 /// @param imported_point_offset. The resulting
13404 /// imported_point_offset. Note that if the imported DIE @p
13405 /// partial_unit_offset is not found between @p first_die_offset and
13406 /// @p last_die_offset, this parameter is left untouched by this
13409 /// @return true iff an imported unit is found between @p
13410 /// first_die_offset and @p last_die_offset.
13412 find_import_unit_point_between_dies(const read_context& ctxt,
13413 size_t partial_unit_offset,
13414 Dwarf_Off first_die_offset,
13415 Dwarf_Off first_die_cu_offset,
13417 size_t last_die_offset,
13418 size_t& imported_point_offset)
13420 const tu_die_imported_unit_points_map_type& tu_die_imported_unit_points_map =
13421 ctxt.tu_die_imported_unit_points_map(source);
13423 tu_die_imported_unit_points_map_type::const_iterator iter =
13424 tu_die_imported_unit_points_map.find(first_die_cu_offset);
13426 ABG_ASSERT(iter != tu_die_imported_unit_points_map.end());
13428 const imported_unit_points_type& imported_unit_points = iter->second;
13429 if (imported_unit_points.empty())
13432 imported_unit_points_type::const_iterator b = imported_unit_points.begin();
13433 imported_unit_points_type::const_iterator e = imported_unit_points.end();
13435 find_lower_bound_in_imported_unit_points(imported_unit_points,
13439 if (last_die_offset != static_cast<size_t>(-1))
13440 find_lower_bound_in_imported_unit_points(imported_unit_points,
13444 if (e != imported_unit_points.end())
13446 for (imported_unit_points_type::const_iterator i = e; i >= b; --i)
13447 if (i->imported_unit_die_off == partial_unit_offset)
13449 imported_point_offset = i->offset_of_import ;
13453 for (imported_unit_points_type::const_iterator i = e; i >= b; --i)
13455 if (find_import_unit_point_between_dies(ctxt,
13456 partial_unit_offset,
13457 i->imported_unit_child_off,
13458 i->imported_unit_cu_off,
13459 i->imported_unit_die_source,
13461 imported_point_offset))
13467 for (imported_unit_points_type::const_iterator i = b; i != e; ++i)
13468 if (i->imported_unit_die_off == partial_unit_offset)
13470 imported_point_offset = i->offset_of_import ;
13474 for (imported_unit_points_type::const_iterator i = b; i != e; ++i)
13476 if (find_import_unit_point_between_dies(ctxt,
13477 partial_unit_offset,
13478 i->imported_unit_child_off,
13479 i->imported_unit_cu_off,
13480 i->imported_unit_die_source,
13482 imported_point_offset))
13490 /// In the current translation unit, get the last point where a
13491 /// DW_AT_import DIE is used to import a given (unit) DIE, before a
13492 /// given DIE is found. That given DIE is called the limit DIE.
13494 /// Said otherwise, this function returns the last import point of a
13495 /// unit, before a limit.
13497 /// @param ctxt the dwarf reading context to consider.
13499 /// @param partial_unit_offset the imported unit for which we want to
13500 /// know the insertion point of. This is usually a partial unit (with
13501 /// tag DW_TAG_partial_unit) but it does not necessarily have to be
13504 /// @param where_offset the offset of the limit DIE.
13506 /// @param imported_point_offset. The resulting imported_point_offset.
13507 /// Note that if the imported DIE @p partial_unit_offset is not found
13508 /// before @p die_offset, this is set to the last @p
13509 /// partial_unit_offset found under @p parent_die.
13511 /// @return true iff an imported unit is found before @p die_offset.
13512 /// Note that if an imported unit is found after @p die_offset then @p
13513 /// imported_point_offset is set and the function return false.
13515 find_import_unit_point_before_die(const read_context& ctxt,
13516 size_t partial_unit_offset,
13517 size_t where_offset,
13518 size_t& imported_point_offset)
13520 size_t import_point_offset = 0;
13521 Dwarf_Die first_die_of_tu;
13523 if (dwarf_child(const_cast<Dwarf_Die*>(ctxt.cur_tu_die()),
13524 &first_die_of_tu) != 0)
13527 Dwarf_Die cu_die_memory;
13530 cu_die = dwarf_diecu(const_cast<Dwarf_Die*>(&first_die_of_tu),
13531 &cu_die_memory, 0, 0);
13533 if (find_import_unit_point_between_dies(ctxt, partial_unit_offset,
13534 dwarf_dieoffset(&first_die_of_tu),
13535 dwarf_dieoffset(cu_die),
13536 /*source=*/PRIMARY_DEBUG_INFO_DIE_SOURCE,
13538 import_point_offset))
13540 imported_point_offset = import_point_offset;
13544 if (import_point_offset)
13546 imported_point_offset = import_point_offset;
13553 /// Return the parent DIE for a given DIE.
13555 /// Note that the function build_die_parent_map() must have been
13556 /// called before this one can work. This function either succeeds or
13557 /// aborts the current process.
13559 /// @param ctxt the read context to consider.
13561 /// @param die the DIE for which we want the parent.
13563 /// @param parent_die the output parameter set to the parent die of
13564 /// @p die. Its memory must be allocated and handled by the caller.
13566 /// @param where_offset the offset of the DIE where we are "logically"
13567 /// positionned at, in the DIE tree. This is useful when @p die is
13568 /// e.g, DW_TAG_partial_unit that can be included in several places in
13571 /// @return true if the function could get a parent DIE, false
13574 get_parent_die(const read_context& ctxt,
13575 const Dwarf_Die* die,
13576 Dwarf_Die& parent_die,
13577 size_t where_offset)
13579 ABG_ASSERT(ctxt.dwarf());
13581 die_source source = NO_DEBUG_INFO_DIE_SOURCE;
13582 ABG_ASSERT(ctxt.get_die_source(die, source));
13584 const offset_offset_map_type& m = ctxt.die_parent_map(source);
13585 offset_offset_map_type::const_iterator i =
13586 m.find(dwarf_dieoffset(const_cast<Dwarf_Die*>(die)));
13593 case PRIMARY_DEBUG_INFO_DIE_SOURCE:
13594 ABG_ASSERT(dwarf_offdie(ctxt.dwarf(), i->second, &parent_die));
13596 case ALT_DEBUG_INFO_DIE_SOURCE:
13597 ABG_ASSERT(dwarf_offdie(ctxt.alt_dwarf(), i->second, &parent_die));
13599 case TYPE_UNIT_DIE_SOURCE:
13600 ABG_ASSERT(dwarf_offdie_types(ctxt.dwarf(), i->second, &parent_die));
13602 case NO_DEBUG_INFO_DIE_SOURCE:
13603 case NUMBER_OF_DIE_SOURCES:
13604 ABG_ASSERT_NOT_REACHED;
13607 if (dwarf_tag(&parent_die) == DW_TAG_partial_unit)
13609 if (where_offset == 0)
13611 parent_die = *ctxt.cur_tu_die();
13614 size_t import_point_offset = 0;
13616 find_import_unit_point_before_die(ctxt,
13617 dwarf_dieoffset(&parent_die),
13619 import_point_offset);
13621 // It looks like parent_die (which comes from the alternate
13622 // debug info file) hasn't been imported into this TU. So,
13623 // Let's assume its logical parent is the DIE of the current
13625 parent_die = *ctxt.cur_tu_die();
13628 ABG_ASSERT(import_point_offset);
13629 Dwarf_Die import_point_die;
13630 ABG_ASSERT(dwarf_offdie(ctxt.dwarf(),
13631 import_point_offset,
13632 &import_point_die));
13633 return get_parent_die(ctxt, &import_point_die,
13634 parent_die, where_offset);
13641 /// Get the DIE representing the scope of a given DIE.
13643 /// Please note that when the DIE we are looking at has a
13644 /// DW_AT_specification or DW_AT_abstract_origin attribute, the scope
13645 /// DIE is the parent DIE of the DIE referred to by that attribute.
13646 /// This is the only case where a scope DIE is different from the
13647 /// parent DIE of a given DIE.
13649 /// Also note that if the current translation unit is from C, then
13650 /// this returns the global scope.
13652 /// @param ctxt the reading context to use.
13654 /// @param die the DIE to consider.
13656 /// @param where_offset where we are logically at in the DIE stream.
13658 /// @param scope_die out parameter. This is set to the resulting
13659 /// scope DIE iff the function returns true.
13661 get_scope_die(const read_context& ctxt,
13662 const Dwarf_Die* die,
13663 size_t where_offset,
13664 Dwarf_Die& scope_die)
13666 if (is_c_language(ctxt.cur_transl_unit()->get_language()))
13668 ABG_ASSERT(dwarf_tag(const_cast<Dwarf_Die*>(die)) != DW_TAG_member);
13669 return dwarf_diecu(const_cast<Dwarf_Die*>(die), &scope_die, 0, 0);
13672 Dwarf_Die logical_parent_die;
13673 if (die_die_attribute(die, DW_AT_specification,
13674 logical_parent_die, false)
13675 || die_die_attribute(die, DW_AT_abstract_origin,
13676 logical_parent_die, false))
13677 return get_scope_die(ctxt, &logical_parent_die, where_offset, scope_die);
13679 if (!get_parent_die(ctxt, die, scope_die, where_offset))
13682 if (dwarf_tag(&scope_die) == DW_TAG_subprogram
13683 || dwarf_tag(&scope_die) == DW_TAG_subroutine_type
13684 || dwarf_tag(&scope_die) == DW_TAG_array_type)
13685 return get_scope_die(ctxt, &scope_die, where_offset, scope_die);
13690 /// Return the abigail IR node representing the scope of a given DIE.
13692 /// Note that it is the logical scope that is returned. That is, if
13693 /// the DIE has a DW_AT_specification or DW_AT_abstract_origin
13694 /// attribute, it's the scope of the referred-to DIE (via these
13695 /// attributes) that is returned.
13697 /// Also note that if the current translation unit is from C, then
13698 /// this returns the global scope.
13700 /// @param ctxt the dwarf reading context to use.
13702 /// @param die the DIE to get the scope for.
13704 /// @param called_from_public_decl is true if this function has been
13705 /// initially called within the context of a public decl.
13707 /// @param where_offset the offset of the DIE where we are "logically"
13708 /// positionned at, in the DIE tree. This is useful when @p die is
13709 /// e.g, DW_TAG_partial_unit that can be included in several places in
13711 static scope_decl_sptr
13712 get_scope_for_die(read_context& ctxt,
13714 bool called_for_public_decl,
13715 size_t where_offset)
13717 die_source source_of_die;
13718 ABG_ASSERT(ctxt.get_die_source(die, source_of_die));
13720 if (is_c_language(ctxt.cur_transl_unit()->get_language()))
13722 ABG_ASSERT(dwarf_tag(die) != DW_TAG_member);
13723 return ctxt.global_scope();
13726 Dwarf_Die cloned_die;
13727 if (die_die_attribute(die, DW_AT_specification, cloned_die, false)
13728 || die_die_attribute(die, DW_AT_abstract_origin, cloned_die, false))
13729 return get_scope_for_die(ctxt, &cloned_die,
13730 called_for_public_decl,
13733 Dwarf_Die parent_die;
13735 if (!get_parent_die(ctxt, die, parent_die, where_offset))
13736 return ctxt.nil_scope();
13738 if (dwarf_tag(&parent_die) == DW_TAG_compile_unit
13739 || dwarf_tag(&parent_die) == DW_TAG_partial_unit
13740 || dwarf_tag(&parent_die) == DW_TAG_type_unit)
13742 if (dwarf_tag(&parent_die) == DW_TAG_partial_unit
13743 || dwarf_tag(&parent_die) == DW_TAG_type_unit)
13745 ABG_ASSERT(source_of_die == ALT_DEBUG_INFO_DIE_SOURCE
13746 || source_of_die == TYPE_UNIT_DIE_SOURCE);
13747 return ctxt.cur_transl_unit()->get_global_scope();
13750 // For top level DIEs like DW_TAG_compile_unit, we just want to
13751 // return the global scope for the corresponding translation
13752 // unit. This must have been set by
13753 // build_translation_unit_and_add_to_ir if we already started to
13754 // build the translation unit of parent_die. Otherwise, just
13755 // return the global scope of the current translation unit.
13756 die_tu_map_type::const_iterator i =
13757 ctxt.die_tu_map().find(dwarf_dieoffset(&parent_die));
13758 if (i != ctxt.die_tu_map().end())
13759 return i->second->get_global_scope();
13760 return ctxt.cur_transl_unit()->get_global_scope();
13764 type_or_decl_base_sptr d;
13765 if (dwarf_tag(&parent_die) == DW_TAG_subprogram
13766 || dwarf_tag(&parent_die) == DW_TAG_array_type)
13767 // this is an entity defined in a scope that is a function.
13768 // Normally, I would say that this should be dropped. But I have
13769 // seen a case where a typedef DIE needed by a function parameter
13770 // was defined right before the parameter, under the scope of the
13771 // function. Yeah, weird. So if I drop the typedef DIE, I'd drop
13772 // the function parm too. So for that case, let's say that the
13773 // scope is the scope of the function itself. Note that this is
13774 // an error of the DWARF emitter. We should never see this DIE in
13777 scope_decl_sptr s = get_scope_for_die(ctxt, &parent_die,
13778 called_for_public_decl,
13780 if (is_anonymous_type_die(die))
13781 // For anonymous type that have nothing to do in a function or
13782 // array type context, let's put it in the containing
13783 // namespace. That is, do not let it be in a containing class
13784 // or union where it has nothing to do.
13785 while (is_class_or_union_type(s))
13787 if (!get_parent_die(ctxt, &parent_die, parent_die, where_offset))
13788 return ctxt.nil_scope();
13789 s = get_scope_for_die(ctxt, &parent_die,
13790 called_for_public_decl,
13796 d = build_ir_node_from_die(ctxt, &parent_die,
13797 called_for_public_decl,
13799 s = dynamic_pointer_cast<scope_decl>(d);
13801 // this is an entity defined in someting that is not a scope.
13803 return ctxt.nil_scope();
13805 class_decl_sptr cl = dynamic_pointer_cast<class_decl>(d);
13806 if (cl && cl->get_is_declaration_only())
13808 scope_decl_sptr scop (cl->get_definition_of_declaration());
13817 /// Convert a DWARF constant representing the value of the
13818 /// DW_AT_language property into the translation_unit::language
13821 /// @param l the DWARF constant to convert.
13823 /// @return the resulting translation_unit::language enumerator.
13824 static translation_unit::language
13825 dwarf_language_to_tu_language(size_t l)
13830 return translation_unit::LANG_C89;
13832 return translation_unit::LANG_C;
13833 case DW_LANG_Ada83:
13834 return translation_unit::LANG_Ada83;
13835 case DW_LANG_C_plus_plus:
13836 return translation_unit::LANG_C_plus_plus;
13837 case DW_LANG_Cobol74:
13838 return translation_unit::LANG_Cobol74;
13839 case DW_LANG_Cobol85:
13840 return translation_unit::LANG_Cobol85;
13841 case DW_LANG_Fortran77:
13842 return translation_unit::LANG_Fortran77;
13843 case DW_LANG_Fortran90:
13844 return translation_unit::LANG_Fortran90;
13845 case DW_LANG_Pascal83:
13846 return translation_unit::LANG_Pascal83;
13847 case DW_LANG_Modula2:
13848 return translation_unit::LANG_Modula2;
13850 return translation_unit::LANG_Java;
13852 return translation_unit::LANG_C99;
13853 case DW_LANG_Ada95:
13854 return translation_unit::LANG_Ada95;
13855 case DW_LANG_Fortran95:
13856 return translation_unit::LANG_Fortran95;
13858 return translation_unit::LANG_PL1;
13860 return translation_unit::LANG_ObjC;
13861 case DW_LANG_ObjC_plus_plus:
13862 return translation_unit::LANG_ObjC_plus_plus;
13864 #ifdef HAVE_DW_LANG_Rust_enumerator
13866 return translation_unit::LANG_Rust;
13869 #ifdef HAVE_DW_LANG_UPC_enumerator
13871 return translation_unit::LANG_UPC;
13874 #ifdef HAVE_DW_LANG_D_enumerator
13876 return translation_unit::LANG_D;
13879 #ifdef HAVE_DW_LANG_Python_enumerator
13880 case DW_LANG_Python:
13881 return translation_unit::LANG_Python;
13884 #ifdef HAVE_DW_LANG_Go_enumerator
13886 return translation_unit::LANG_Go;
13889 #ifdef HAVE_DW_LANG_C11_enumerator
13891 return translation_unit::LANG_C11;
13894 #ifdef HAVE_DW_LANG_C_plus_plus_03_enumerator
13895 case DW_LANG_C_plus_plus_03:
13896 return translation_unit::LANG_C_plus_plus_03;
13899 #ifdef HAVE_DW_LANG_C_plus_plus_11_enumerator
13900 case DW_LANG_C_plus_plus_11:
13901 return translation_unit::LANG_C_plus_plus_11;
13904 #ifdef HAVE_DW_LANG_C_plus_plus_14_enumerator
13905 case DW_LANG_C_plus_plus_14:
13906 return translation_unit::LANG_C_plus_plus_14;
13909 #ifdef HAVE_DW_LANG_Mips_Assembler_enumerator
13910 case DW_LANG_Mips_Assembler:
13911 return translation_unit::LANG_Mips_Assembler;
13915 return translation_unit::LANG_UNKNOWN;
13919 /// Get the default array lower bound value as defined by the DWARF
13920 /// specification, version 4, depending on the language of the
13921 /// translation unit.
13923 /// @param l the language of the translation unit.
13925 /// @return the default array lower bound value.
13927 get_default_array_lower_bound(translation_unit::language l)
13932 case translation_unit::LANG_UNKNOWN:
13935 case translation_unit::LANG_Cobol74:
13936 case translation_unit::LANG_Cobol85:
13939 case translation_unit::LANG_C89:
13940 case translation_unit::LANG_C99:
13941 case translation_unit::LANG_C11:
13942 case translation_unit::LANG_C:
13943 case translation_unit::LANG_C_plus_plus_03:
13944 case translation_unit::LANG_C_plus_plus_11:
13945 case translation_unit::LANG_C_plus_plus_14:
13946 case translation_unit::LANG_C_plus_plus:
13947 case translation_unit::LANG_ObjC:
13948 case translation_unit::LANG_ObjC_plus_plus:
13949 case translation_unit::LANG_Rust:
13952 case translation_unit::LANG_Fortran77:
13953 case translation_unit::LANG_Fortran90:
13954 case translation_unit::LANG_Fortran95:
13955 case translation_unit::LANG_Ada83:
13956 case translation_unit::LANG_Ada95:
13957 case translation_unit::LANG_Pascal83:
13958 case translation_unit::LANG_Modula2:
13961 case translation_unit::LANG_Java:
13964 case translation_unit::LANG_PL1:
13967 case translation_unit::LANG_UPC:
13968 case translation_unit::LANG_D:
13969 case translation_unit::LANG_Python:
13970 case translation_unit::LANG_Go:
13971 case translation_unit::LANG_Mips_Assembler:
13979 /// For a given offset, find the lower bound of a sorted vector of
13980 /// imported unit point offset.
13982 /// The lower bound is the smallest point (the point with the smallest
13983 /// offset) which is the greater than a given offset.
13985 /// @param imported_unit_points_type the sorted vector of imported
13988 /// @param val the offset to consider when looking for the lower
13991 /// @param r an iterator to the lower bound found. This parameter is
13992 /// set iff the function returns true.
13994 /// @return true iff the lower bound has been found.
13996 find_lower_bound_in_imported_unit_points(const imported_unit_points_type& p,
13998 imported_unit_points_type::const_iterator& r)
14000 imported_unit_point v(val);
14001 imported_unit_points_type::const_iterator result =
14002 std::lower_bound(p.begin(), p.end(), v);
14004 bool is_ok = result != p.end();
14012 /// Given a DW_TAG_compile_unit, build and return the corresponding
14013 /// abigail::translation_unit ir node. Note that this function
14014 /// recursively reads the children dies of the current DIE and
14015 /// populates the resulting translation unit.
14017 /// @param ctxt the read_context to use.
14019 /// @param die the DW_TAG_compile_unit DIE to consider.
14021 /// @param address_size the size of the addresses expressed in this
14022 /// translation unit in general.
14024 /// @return a pointer to the resulting translation_unit.
14025 static translation_unit_sptr
14026 build_translation_unit_and_add_to_ir(read_context& ctxt,
14030 translation_unit_sptr result;
14034 ABG_ASSERT(dwarf_tag(die) == DW_TAG_compile_unit);
14036 // Clear the part of the context that is dependent on the translation
14037 // unit we are reading.
14038 ctxt.clear_per_translation_unit_data();
14040 ctxt.cur_tu_die(die);
14042 string path = die_string_attribute(die, DW_AT_name);
14043 string compilation_dir = die_string_attribute(die, DW_AT_comp_dir);
14045 // See if the same translation unit exits already in the current
14046 // corpus. Sometimes, the same translation unit can be present
14047 // several times in the same debug info. The content of the
14048 // different instances of the translation unit are different. So to
14049 // represent that, we are going to re-use the same translation
14050 // unit. That is, it's going to be the union of all the translation
14051 // units of the same path.
14053 string abs_path = compilation_dir + "/" + path;
14054 result = ctxt.current_corpus()->find_translation_unit(abs_path);
14059 result.reset(new translation_unit(ctxt.env(),
14062 result->set_compilation_dir_path(compilation_dir);
14063 ctxt.current_corpus()->add(result);
14065 die_unsigned_constant_attribute(die, DW_AT_language, l);
14066 result->set_language(dwarf_language_to_tu_language(l));
14069 ctxt.cur_transl_unit(result);
14070 ctxt.die_tu_map()[dwarf_dieoffset(die)] = result;
14073 if (dwarf_child(die, &child) != 0)
14076 result->set_is_constructed(false);
14079 build_ir_node_from_die(ctxt, &child,
14080 die_is_public_decl(&child),
14081 dwarf_dieoffset(&child));
14082 while (dwarf_siblingof(&child, &child) == 0);
14084 if (!ctxt.var_decls_to_re_add_to_tree().empty())
14085 for (list<var_decl_sptr>::const_iterator v =
14086 ctxt.var_decls_to_re_add_to_tree().begin();
14087 v != ctxt.var_decls_to_re_add_to_tree().end();
14090 if (is_member_decl(*v))
14093 ABG_ASSERT((*v)->get_scope());
14094 string demangled_name =
14095 demangle_cplus_mangled_name((*v)->get_linkage_name());
14096 if (!demangled_name.empty())
14098 std::list<string> fqn_comps;
14099 fqn_to_components(demangled_name, fqn_comps);
14100 string mem_name = fqn_comps.back();
14101 fqn_comps.pop_back();
14102 class_decl_sptr class_type;
14104 if (!fqn_comps.empty())
14106 ty_name = components_to_type_name(fqn_comps);
14108 lookup_class_type(ty_name, *ctxt.cur_transl_unit());
14112 // So we are seeing a member variable for which there
14113 // is a global variable definition DIE not having a
14114 // reference attribute pointing back to the member
14115 // variable declaration DIE. Thus remove the global
14116 // variable definition from its current non-class
14119 if ((d = lookup_var_decl_in_scope(mem_name, class_type)))
14120 // This is the data member with the same name in cl.
14121 // We just need to flag it as static.
14125 // In this case there is no data member with the
14126 // same name in cl already. Let's add it there then
14128 remove_decl_from_scope(*v);
14129 d = add_decl_to_scope(*v, class_type);
14132 ABG_ASSERT(dynamic_pointer_cast<var_decl>(d));
14133 // Let's flag the data member as static.
14134 set_member_is_static(d, true);
14138 ctxt.var_decls_to_re_add_to_tree().clear();
14140 result->set_is_constructed(true);
14145 /// Build a abigail::namespace_decl out of a DW_TAG_namespace or
14146 /// DW_TAG_module (for fortran) DIE.
14148 /// Note that this function connects the DW_TAG_namespace to the IR
14149 /// being currently created, reads the children of the DIE and
14150 /// connects them to the IR as well.
14152 /// @param ctxt the read context to use.
14154 /// @param die the DIE to read from. Must be either DW_TAG_namespace
14155 /// or DW_TAG_module.
14157 /// @param where_offset the offset of the DIE where we are "logically"
14158 /// positionned at, in the DIE tree. This is useful when @p die is
14159 /// e.g, DW_TAG_partial_unit that can be included in several places in
14162 /// @return the resulting @ref abigail::namespace_decl or NULL if it
14163 /// couldn't be created.
14164 static namespace_decl_sptr
14165 build_namespace_decl_and_add_to_ir(read_context& ctxt,
14167 size_t where_offset)
14169 namespace_decl_sptr result;
14175 ABG_ASSERT(ctxt.get_die_source(die, source));
14177 unsigned tag = dwarf_tag(die);
14178 if (tag != DW_TAG_namespace && tag != DW_TAG_module)
14181 scope_decl_sptr scope = get_scope_for_die(ctxt, die,
14182 /*called_for_public_decl=*/false,
14185 string name, linkage_name;
14187 die_loc_and_name(ctxt, die, loc, name, linkage_name);
14189 result.reset(new namespace_decl(ctxt.env(), name, loc));
14190 add_decl_to_scope(result, scope.get());
14191 ctxt.associate_die_to_decl(die, result, where_offset);
14194 if (dwarf_child(die, &child) != 0)
14197 ctxt.scope_stack().push(result.get());
14199 build_ir_node_from_die(ctxt, &child,
14200 /*called_from_public_decl=*/false,
14202 while (dwarf_siblingof(&child, &child) == 0);
14203 ctxt.scope_stack().pop();
14208 /// Build a @ref type_decl out of a DW_TAG_base_type DIE.
14210 /// @param ctxt the read context to use.
14212 /// @param die the DW_TAG_base_type to consider.
14214 /// @param where_offset where we are logically at in the DIE stream.
14216 /// @return the resulting decl_base_sptr.
14217 static type_decl_sptr
14218 build_type_decl(read_context& ctxt, Dwarf_Die* die, size_t where_offset)
14220 type_decl_sptr result;
14224 ABG_ASSERT(dwarf_tag(die) == DW_TAG_base_type);
14226 uint64_t byte_size = 0, bit_size = 0;
14227 if (!die_unsigned_constant_attribute(die, DW_AT_byte_size, byte_size))
14228 if (!die_unsigned_constant_attribute(die, DW_AT_bit_size, bit_size))
14231 if (bit_size == 0 && byte_size != 0)
14232 // Update the bit size.
14233 bit_size = byte_size * 8;
14235 string type_name, linkage_name;
14237 die_loc_and_name(ctxt, die, loc, type_name, linkage_name);
14239 if (byte_size == 0)
14241 // The size of the type is zero, that must mean that we are
14242 // looking at the definition of the void type.
14243 if (type_name == "void")
14244 result = is_type_decl(build_ir_node_for_void_type(ctxt));
14246 // A type of size zero that is not void? Hmmh, I am not sure
14247 // what that means. Return nil for now.
14251 if (corpus_sptr corp = ctxt.should_reuse_type_from_corpus_group())
14253 string normalized_type_name = type_name;
14254 integral_type int_type;
14255 if (parse_integral_type(type_name, int_type))
14256 normalized_type_name = int_type.to_string();
14257 result = lookup_basic_type(normalized_type_name, *corp);
14261 if (corpus_sptr corp = ctxt.current_corpus())
14262 result = lookup_basic_type(type_name, *corp);
14264 result.reset(new type_decl(ctxt.env(), type_name, bit_size,
14265 /*alignment=*/0, loc, linkage_name));
14266 ctxt.associate_die_to_type(die, result, where_offset);
14270 /// Build an enum_type_decl from a DW_TAG_enumeration_type DIE.
14272 /// @param ctxt the read context to use.
14274 /// @param die the DIE to read from.
14276 /// @param scope the scope of the final enum. Note that this function
14277 /// does *NOT* add the built type to this scope. The scope is just so
14278 /// that the function knows how to name anonymous enums.
14280 /// @return the built enum_type_decl or NULL if it could not be built.
14281 static enum_type_decl_sptr
14282 build_enum_type(read_context& ctxt,
14285 size_t where_offset)
14287 enum_type_decl_sptr result;
14291 unsigned tag = dwarf_tag(die);
14292 if (tag != DW_TAG_enumeration_type)
14295 string name, linkage_name;
14297 die_loc_and_name(ctxt, die, loc, name, linkage_name);
14299 bool enum_is_anonymous = false;
14300 // If the enum is anonymous, let's give it a name.
14303 name = get_internal_anonymous_die_prefix_name(die);
14304 ABG_ASSERT(!name.empty());
14305 // But we remember that the type is anonymous.
14306 enum_is_anonymous = true;
14308 if (size_t s = scope->get_num_anonymous_member_enums())
14309 name = build_internal_anonymous_die_name(name, s);
14312 bool use_odr = ctxt.odr_is_relevant(die);
14313 // If the type has location, then associate it to its
14314 // representation. This way, all occurences of types with the same
14315 // representation (name) and location can be later detected as being
14316 // for the same type.
14318 if (!enum_is_anonymous)
14322 if (enum_type_decl_sptr pre_existing_enum =
14323 is_enum_type(ctxt.lookup_artifact_from_die(die)))
14324 result = pre_existing_enum;
14326 else if (corpus_sptr corp = ctxt.should_reuse_type_from_corpus_group())
14329 result = lookup_enum_type_per_location(loc.expand(), *corp);
14333 if (enum_type_decl_sptr pre_existing_enum =
14334 is_enum_type(ctxt.lookup_artifact_from_die(die)))
14335 if (pre_existing_enum->get_location() == loc)
14336 result = pre_existing_enum;
14341 ctxt.associate_die_to_type(die, result, where_offset);
14345 // TODO: for anonymous enums, maybe have a map of loc -> enums so that
14346 // we can look them up?
14349 if (die_unsigned_constant_attribute(die, DW_AT_byte_size, size))
14352 // for now we consider that underlying types of enums are all anonymous
14353 bool enum_underlying_type_is_anonymous= true;
14354 string underlying_type_name;
14355 if (enum_underlying_type_is_anonymous)
14357 underlying_type_name = "unnamed-enum";
14358 enum_underlying_type_is_anonymous = true;
14361 underlying_type_name = string("enum-") + name;
14362 underlying_type_name += "-underlying-type";
14364 enum_type_decl::enumerators enms;
14366 if (dwarf_child(die, &child) == 0)
14370 if (dwarf_tag(&child) != DW_TAG_enumerator)
14375 die_loc_and_name(ctxt, &child, l, n, m);
14377 die_unsigned_constant_attribute(&child, DW_AT_const_value, val);
14378 enms.push_back(enum_type_decl::enumerator(ctxt.env(), n, val));
14380 while (dwarf_siblingof(&child, &child) == 0);
14383 // DWARF up to version 4 (at least) doesn't seem to carry the
14384 // underlying type, so let's create an artificial one here, which
14385 // sole purpose is to be passed to the constructor of the
14386 // enum_type_decl type.
14387 type_decl_sptr t(new type_decl(ctxt.env(), underlying_type_name,
14388 size, size, location()));
14389 t->set_is_anonymous(enum_underlying_type_is_anonymous);
14390 translation_unit_sptr tu = ctxt.cur_transl_unit();
14392 add_decl_to_scope(t, tu->get_global_scope().get());
14395 t = dynamic_pointer_cast<type_decl>(d);
14397 result.reset(new enum_type_decl(name, loc, t, enms, linkage_name));
14398 result->set_is_anonymous(enum_is_anonymous);
14399 ctxt.associate_die_to_type(die, result, where_offset);
14403 /// Once a function_decl has been built and added to a class as a
14404 /// member function, this function updates the information of the
14405 /// function_decl concerning the properties of its relationship with
14406 /// the member class. That is, it updates properties like
14407 /// virtualness, access, constness, cdtorness, etc ...
14409 /// @param die the DIE of the function_decl that has been just built.
14411 /// @param f the function_decl that has just been built from @p die.
14413 /// @param klass the @ref class_or_union that @p f belongs to.
14415 /// @param ctxt the context used to read the ELF/DWARF information.
14417 finish_member_function_reading(Dwarf_Die* die,
14418 const function_decl_sptr& f,
14419 const class_or_union_sptr& klass,
14420 read_context& ctxt)
14424 method_decl_sptr m = is_method_decl(f);
14427 method_type_sptr method_t = is_method_type(m->get_type());
14428 ABG_ASSERT(method_t);
14430 bool is_ctor = (f->get_name() == klass->get_name());
14431 bool is_dtor = (!f->get_name().empty()
14432 && static_cast<string>(f->get_name())[0] == '~');
14433 bool is_virtual = die_is_virtual(die);
14434 int64_t vindex = -1;
14436 die_virtual_function_index(die, vindex);
14437 access_specifier access = private_access;
14438 if (class_decl_sptr c = is_class_type(klass))
14439 if (c->is_struct())
14440 access = public_access;
14441 die_access_specifier(die, access);
14443 bool is_static = false;
14445 // Let's see if the first parameter is a pointer to an instance of
14446 // the same class type as the current class and has a
14447 // DW_AT_artificial attribute flag set. We are not looking at
14448 // DW_AT_object_pointer (for DWARF 3) because it wasn't being
14449 // emitted in GCC 4_4, which was already DWARF 3.
14450 function_decl::parameter_sptr first_parm;
14451 if (!f->get_parameters().empty())
14452 first_parm = f->get_parameters()[0];
14454 bool is_artificial =
14455 first_parm && first_parm->get_artificial();;
14456 pointer_type_def_sptr this_ptr_type;
14457 type_base_sptr other_klass;
14460 this_ptr_type = is_pointer_type(first_parm->get_type());
14462 other_klass = this_ptr_type->get_pointed_to_type();
14463 // Sometimes, other_klass can be qualified; e.g, volatile. In
14464 // that case, let's get the unqualified version of other_klass.
14465 if (qualified_type_def_sptr q = is_qualified_type(other_klass))
14466 other_klass = q->get_underlying_type();
14469 && get_type_name(other_klass) == klass->get_qualified_name())
14474 set_member_access_specifier(m, access);
14476 set_member_function_vtable_offset(m, vindex);
14477 set_member_function_is_virtual(m, is_virtual);
14478 set_member_is_static(m, is_static);
14479 set_member_function_is_ctor(m, is_ctor);
14480 set_member_function_is_dtor(m, is_dtor);
14481 set_member_function_is_const(m, method_t->get_is_const());
14483 ABG_ASSERT(is_member_function(m));
14485 if (is_virtual && !f->get_linkage_name().empty() && !f->get_symbol())
14487 // This is a virtual member function which has a linkage name
14488 // but has no underlying symbol set.
14490 // The underlying elf symbol to set to this function can show up
14491 // later in the DWARF input or it can be that, because of some
14492 // compiler optimization, the relation between this function and
14493 // its underlying elf symbol is simply not emitted in the DWARF.
14495 // Let's thus schedule this function for a later fixup pass
14497 // read_context::fixup_functions_with_no_symbols()) that will
14498 // set its underlying symbol.
14500 // Note that if the underying symbol is encountered later in the
14501 // DWARF input, then the part of build_function_decl() that
14502 // updates the function to set its underlying symbol will
14503 // de-schedule this function wrt fixup pass.
14504 Dwarf_Off die_offset = dwarf_dieoffset(die);
14505 die_function_decl_map_type &fns_with_no_symbol =
14506 ctxt.die_function_decl_with_no_symbol_map();
14507 die_function_decl_map_type::const_iterator i =
14508 fns_with_no_symbol.find(die_offset);
14509 if (i == fns_with_no_symbol.end())
14510 fns_with_no_symbol[die_offset] = f;
14515 /// If a function DIE has attributes which have not yet been read and
14516 /// added to the internal representation that represents that function
14517 /// then read those extra attributes and update the internal
14518 /// representation.
14520 /// @param ctxt the read context to use.
14522 /// @param die the function DIE to consider.
14524 /// @param where_offset where we logical are, currently, in the stream
14525 /// of DIEs. If you don't know what this is, you can just set it to zero.
14527 /// @param existing_fn the representation of the function to update.
14529 /// @return the updated function representation.
14530 static function_decl_sptr
14531 maybe_finish_function_decl_reading(read_context& ctxt,
14533 size_t where_offset,
14534 const function_decl_sptr& existing_fn)
14536 function_decl_sptr result = build_function_decl(ctxt, die,
14543 /// Lookup a class or a typedef with a given qualified name in the
14544 /// corpus that a given scope belongs to.
14546 /// @param scope the scope to consider.
14548 /// @param type_name the qualified name of the type to look for.
14550 /// @return the typedef or class type found.
14551 static type_base_sptr
14552 lookup_class_or_typedef_from_corpus(scope_decl* scope, const string& type_name)
14554 string qname = build_qualified_name(scope, type_name);
14555 corpus* corp = scope->get_corpus();
14556 type_base_sptr result = lookup_class_or_typedef_type(qname, *corp);
14560 /// Lookup a class of typedef type from the current corpus being
14563 /// The type being looked for has the same name as a given DIE.
14565 /// @param ctxt the reading context to use.
14567 /// @param die the DIE which has the same name as the type we are
14570 /// @param called_for_public_decl whether this function is being
14571 /// called from a a publicly defined declaration.
14573 /// @param where_offset where we are logically at in the DIE stream.
14575 /// @return the type found.
14576 static type_base_sptr
14577 lookup_class_or_typedef_from_corpus(read_context& ctxt,
14579 bool called_for_public_decl,
14580 size_t where_offset)
14583 return class_decl_sptr();
14585 string class_name = die_string_attribute(die, DW_AT_name);
14586 if (class_name.empty())
14587 return class_decl_sptr();
14589 scope_decl_sptr scope = get_scope_for_die(ctxt, die,
14590 called_for_public_decl,
14593 return lookup_class_or_typedef_from_corpus(scope.get(), class_name);
14595 return type_base_sptr();
14598 /// Lookup a class, typedef or enum type with a given qualified name
14599 /// in the corpus that a given scope belongs to.
14601 /// @param scope the scope to consider.
14603 /// @param type_name the qualified name of the type to look for.
14605 /// @return the typedef, enum or class type found.
14606 static type_base_sptr
14607 lookup_class_typedef_or_enum_type_from_corpus(scope_decl* scope,
14608 const string& type_name)
14610 string qname = build_qualified_name(scope, type_name);
14611 corpus* corp = scope->get_corpus();
14612 type_base_sptr result = lookup_class_typedef_or_enum_type(qname, *corp);
14616 /// Lookup a class, typedef or enum type in a given scope, in the
14617 /// corpus that scope belongs to.
14619 /// @param die the DIE of the class, typedef or enum to lookup.
14621 /// @param anonymous_member_type_idx if @p DIE represents an anonymous
14622 /// type, this is the index of that anonymous type in its scope, in
14623 /// case there are several anonymous types of the same kind in that
14626 /// @param scope the scope in which to look the type for.
14628 /// @return the typedef, enum or class type found.
14629 static type_base_sptr
14630 lookup_class_typedef_or_enum_type_from_corpus(Dwarf_Die* die,
14631 size_t anonymous_member_type_idx,
14635 return class_decl_sptr();
14637 string type_name = die_string_attribute(die, DW_AT_name);
14638 if (is_anonymous_type_die(die))
14640 get_internal_anonymous_die_name(die, anonymous_member_type_idx);
14642 if (type_name.empty())
14643 return class_decl_sptr();
14645 return lookup_class_typedef_or_enum_type_from_corpus(scope, type_name);
14648 /// Test if a DIE represents a function that is a member of a given
14651 /// @param ctxt the reading context.
14653 /// @param function_die the DIE of the function to consider.
14655 /// @param class_type the class type to consider.
14657 /// @param where_offset where we are logically at in the DIE stream.
14659 /// @return the method declaration corresponding to the member
14660 /// function of @p class_type, iff @p function_die is for a member
14661 /// function of @p class_type.
14662 static method_decl_sptr
14663 is_function_for_die_a_member_of_class(read_context& ctxt,
14664 Dwarf_Die* function_die,
14665 const class_or_union_sptr& class_type)
14667 type_or_decl_base_sptr artifact = ctxt.lookup_artifact_from_die(function_die);
14670 return method_decl_sptr();
14672 method_decl_sptr method = is_method_decl(artifact);
14673 method_type_sptr method_type;
14676 method_type = method->get_type();
14678 method_type = is_method_type(artifact);
14679 ABG_ASSERT(method_type);
14681 class_or_union_sptr method_class = method_type->get_class_type();
14682 ABG_ASSERT(method_class);
14684 string method_class_name = method_class->get_qualified_name(),
14685 class_type_name = class_type->get_qualified_name();
14687 if (method_class_name == class_type_name)
14689 //ABG_ASSERT(class_type.get() == method_class.get());
14693 return method_decl_sptr();
14696 /// If a given function DIE represents an existing member function of
14697 /// a given class, then update that member function with new
14698 /// properties present in the DIE. Otherwise, if the DIE represents a
14699 /// new member function that is not already present in the class then
14700 /// add that new member function to the class.
14702 /// @param ctxt the reading context.
14704 /// @param function_die the DIE of the potential member function to
14707 /// @param class_type the class type to consider.
14709 /// @param called_from_public_decl is true iff this function was
14710 /// called from a publicly defined and exported declaration.
14712 /// @param where_offset where we are logically at in the DIE stream.
14714 /// @return the method decl representing the member function.
14715 static method_decl_sptr
14716 add_or_update_member_function(read_context& ctxt,
14717 Dwarf_Die* function_die,
14718 const class_or_union_sptr& class_type,
14719 bool called_from_public_decl,
14720 size_t where_offset)
14722 method_decl_sptr method =
14723 is_function_for_die_a_member_of_class(ctxt, function_die, class_type);
14726 method = is_method_decl(build_ir_node_from_die(ctxt, function_die,
14728 called_from_public_decl,
14731 return method_decl_sptr();
14733 finish_member_function_reading(function_die,
14734 is_function_decl(method),
14739 /// Build a an IR node for class type from a DW_TAG_structure_type or
14740 /// DW_TAG_class_type DIE and add that node to the ABI corpus being
14741 /// currently built.
14743 /// If the represents class type that already exists, then update the
14744 /// existing class type with the new properties found in the DIE.
14746 /// It meanst that this function can also update an existing
14747 /// class_decl node with data members, member functions and other
14748 /// properties coming from the DIE.
14750 /// @param ctxt the read context to consider.
14752 /// @param die the DIE to read information from. Must be either a
14753 /// DW_TAG_structure_type or a DW_TAG_class_type.
14755 /// @param scope a pointer to the scope_decl* under which this class
14756 /// is to be added to.
14758 /// @param is_struct whether the class was declared as a struct.
14760 /// @param klass if non-null, this is a klass to append the members
14761 /// to. Otherwise, this function just builds the class from scratch.
14763 /// @param called_from_public_decl set to true if this class is being
14764 /// called from a "Public declaration like vars or public symbols".
14766 /// @param where_offset the offset of the DIE where we are "logically"
14767 /// positionned at, in the DIE tree. This is useful when @p die is
14768 /// e.g, DW_TAG_partial_unit that can be included in several places in
14771 /// @return the resulting class_type.
14772 static class_decl_sptr
14773 add_or_update_class_type(read_context& ctxt,
14777 class_decl_sptr klass,
14778 bool called_from_public_decl,
14779 size_t where_offset)
14781 class_decl_sptr result;
14786 ABG_ASSERT(ctxt.get_die_source(die, source));
14788 unsigned tag = dwarf_tag(die);
14790 if (tag != DW_TAG_class_type && tag != DW_TAG_structure_type)
14794 die_class_or_union_map_type::const_iterator i =
14795 ctxt.die_wip_classes_map(source).find(dwarf_dieoffset(die));
14796 if (i != ctxt.die_wip_classes_map(source).end())
14798 class_decl_sptr class_type = is_class_type(i->second);
14799 ABG_ASSERT(class_type);
14804 if (!ctxt.die_is_in_cplus_plus(die))
14805 // In c++, a given class might be put together "piecewise". That
14806 // is, in a translation unit, some data members of that class
14807 // might be defined; then in another later, some member types
14808 // might be defined. So we can't just re-use a class "verbatim"
14809 // just because we've seen previously. So in c++, re-using the
14810 // class is a much clever process. In the other languages however
14811 // (like in C) we can re-use a class definition verbatim.
14812 if (class_decl_sptr class_type =
14813 is_class_type(ctxt.lookup_type_from_die(die)))
14814 if (!class_type->get_is_declaration_only())
14817 string name, linkage_name;
14819 die_loc_and_name(ctxt, die, loc, name, linkage_name);
14820 bool is_declaration_only = die_is_declaration_only(die);
14822 bool is_anonymous = false;
14825 // So we are looking at an anonymous struct. Let's
14827 name = get_internal_anonymous_die_prefix_name(die);
14828 ABG_ASSERT(!name.empty());
14829 // But we remember that the type is anonymous.
14830 is_anonymous = true;
14832 if (size_t s = scope->get_num_anonymous_member_classes())
14833 name = build_internal_anonymous_die_name(name, s);
14838 if (corpus_sptr corp = ctxt.should_reuse_type_from_corpus_group())
14841 // TODO: if there is only one class defined in the corpus
14842 // for this location, then re-use it. But if there are
14843 // more than one, then do not re-use it, for now.
14844 result = lookup_class_type_per_location(loc.expand(), *corp);
14846 // TODO: if there is just one class for that name defined,
14847 // then re-use it. Otherwise, don't.
14848 result = lookup_class_type(name, *corp);
14850 // If we are seeing a declaration of a definition we
14851 // already had, or if we are seing a type with the same
14852 // declaration-only-ness that we had before, then keep
14853 // the one we already had.
14854 && (result->get_is_declaration_only() == is_declaration_only
14855 || (!result->get_is_declaration_only()
14856 && is_declaration_only)))
14858 ctxt.associate_die_to_type(die, result, where_offset);
14862 // We might be seeing the definition of a declaration we
14863 // already had. In that case, keep the definition and
14864 // drop the declaration.
14869 // If we've already seen the same class as 'die', then let's re-use
14870 // that one, unless it's an anonymous class. We can't really safely
14871 // re-use anonymous classes as they have no name, by construction.
14872 // What we can do, rather, is to reuse the typedef that name them,
14873 // when they do have a naming typedef.
14875 if (class_decl_sptr pre_existing_class =
14876 is_class_type(ctxt.lookup_type_artifact_from_die(die)))
14877 klass = pre_existing_class;
14880 die_size_in_bits(die, size);
14883 bool has_child = (dwarf_child(die, &child) == 0);
14885 decl_base_sptr res;
14888 res = result = klass;
14890 result->set_location(loc);
14894 result.reset(new class_decl(ctxt.env(), name, size,
14895 /*alignment=*/0, is_struct, loc,
14896 decl_base::VISIBILITY_DEFAULT));
14897 result->set_is_anonymous(is_anonymous);
14899 if (is_declaration_only)
14900 result->set_is_declaration_only(true);
14902 res = add_decl_to_scope(result, scope);
14903 result = dynamic_pointer_cast<class_decl>(res);
14904 ABG_ASSERT(result);
14908 result->set_size_in_bits(size);
14910 ctxt.associate_die_to_type(die, result, where_offset);
14912 ctxt.maybe_schedule_declaration_only_class_for_resolution(result);
14915 // TODO: set the access specifier for the declaration-only class
14919 ctxt.die_wip_classes_map(source)[dwarf_dieoffset(die)] = result;
14921 scope_decl_sptr scop =
14922 dynamic_pointer_cast<scope_decl>(res);
14924 ctxt.scope_stack().push(scop.get());
14928 int anonymous_member_class_index = -1;
14929 int anonymous_member_union_index = -1;
14930 int anonymous_member_enum_index = -1;
14934 tag = dwarf_tag(&child);
14936 // Handle base classes.
14937 if (tag == DW_TAG_inheritance)
14939 result->set_is_declaration_only(false);
14941 Dwarf_Die type_die;
14942 if (!die_die_attribute(&child, DW_AT_type, type_die))
14945 type_base_sptr base_type;
14947 lookup_class_or_typedef_from_corpus(ctxt, &type_die,
14948 called_from_public_decl,
14952 is_type(build_ir_node_from_die(ctxt, &type_die,
14953 called_from_public_decl,
14956 // Sometimes base_type can be a typedef. Let's make
14957 // sure that typedef is compatible with a class type.
14958 class_decl_sptr b = is_compatible_with_class_type(base_type);
14962 access_specifier access =
14967 die_access_specifier(&child, access);
14969 bool is_virt= die_is_virtual(&child);
14970 int64_t offset = 0;
14971 bool is_offset_present =
14972 die_member_offset(ctxt, &child, offset);
14974 class_decl::base_spec_sptr base(new class_decl::base_spec
14976 is_offset_present ? offset : -1,
14978 if (b->get_is_declaration_only())
14979 ABG_ASSERT(ctxt.is_decl_only_class_scheduled_for_resolution(b));
14980 if (result->find_base_class(b->get_qualified_name()))
14982 result->add_base_specifier(base);
14984 // Handle data members.
14985 else if (tag == DW_TAG_member
14986 || tag == DW_TAG_variable)
14988 Dwarf_Die type_die;
14989 if (!die_die_attribute(&child, DW_AT_type, type_die))
14994 die_loc_and_name(ctxt, &child, loc, n, m);
14995 /// For now, we skip the hidden vtable pointer.
14996 /// Currently, we're looking for a member starting with
14997 /// "_vptr[^0-9a-zA-Z_]", which is what Clang and GCC
14998 /// use as a name for the hidden vtable pointer.
14999 if (n.substr(0, 5) == "_vptr"
15000 && !std::isalnum(n.at(5))
15004 // If the variable is already a member of this class,
15006 if (lookup_var_decl_in_scope(n, result))
15009 int64_t offset_in_bits = 0;
15010 bool is_laid_out = die_member_offset(ctxt, &child,
15012 // For now, is_static == !is_laid_out. When we have
15013 // templates, we'll try to be more specific. For now,
15014 // this approximation should do OK.
15015 bool is_static = !is_laid_out;
15017 if (is_static && variable_is_suppressed(ctxt,
15022 decl_base_sptr ty = is_decl(
15023 build_ir_node_from_die(ctxt, &type_die,
15024 called_from_public_decl,
15026 type_base_sptr t = is_type(ty);
15030 // The call to build_ir_node_from_die above could have
15031 // triggered the adding of a data member named 'n' into
15032 // result. So let's check again if the variable is
15033 // already a member of this class.
15034 if (lookup_var_decl_in_scope(n, result))
15038 // We have a non-static data member. So this class
15039 // cannot be a declaration-only class anymore, even if
15040 // some DWARF emitters might consider it otherwise.
15041 result->set_is_declaration_only(false);
15042 access_specifier access =
15047 die_access_specifier(&child, access);
15049 var_decl_sptr dm(new var_decl(n, t, loc, m));
15050 result->add_data_member(dm, access, is_laid_out,
15051 is_static, offset_in_bits);
15052 ABG_ASSERT(has_scope(dm));
15053 ctxt.associate_die_to_decl(&child, dm, where_offset,
15054 /*associate_by_repr=*/false);
15056 // Handle member functions;
15057 else if (tag == DW_TAG_subprogram)
15060 add_or_update_member_function(ctxt, &child, result,
15061 called_from_public_decl,
15063 if (function_decl_sptr f = is_function_decl(r))
15064 ctxt.associate_die_to_decl(&child, f, where_offset,
15065 /*associate_by_repr=*/true);
15067 // Handle member types
15068 else if (die_is_type(&child))
15070 // Track the anonymous type index in the current
15071 // scope. Look for what this means by reading the
15072 // comment of the function
15073 // build_internal_anonymous_die_name.
15074 int anonymous_member_type_index = 0;
15075 if (is_anonymous_type_die(&child))
15077 // Update the anonymous type index.
15078 if (die_is_class_type(&child))
15079 anonymous_member_type_index =
15080 ++anonymous_member_class_index;
15081 else if (dwarf_tag(&child) == DW_TAG_union_type)
15082 anonymous_member_type_index =
15083 ++anonymous_member_union_index;
15084 else if (dwarf_tag(&child) == DW_TAG_enumeration_type)
15085 anonymous_member_type_index =
15086 ++anonymous_member_enum_index;
15088 // if the type is not already a member of this class,
15089 // then add it to the class.
15090 if (!lookup_class_typedef_or_enum_type_from_corpus
15091 (&child, anonymous_member_type_index, result.get()))
15092 build_ir_node_from_die(ctxt, &child, result.get(),
15093 called_from_public_decl,
15096 } while (dwarf_siblingof(&child, &child) == 0);
15099 ctxt.scope_stack().pop();
15102 die_class_or_union_map_type::const_iterator i =
15103 ctxt.die_wip_classes_map(source).find(dwarf_dieoffset(die));
15104 if (i != ctxt.die_wip_classes_map(source).end())
15106 if (is_member_type(i->second))
15107 set_member_access_specifier(res,
15108 get_member_access_specifier(i->second));
15109 ctxt.die_wip_classes_map(source).erase(i);
15113 ctxt.maybe_schedule_declaration_only_class_for_resolution(result);
15117 /// Build an @ref union_decl from a DW_TAG_union_type DIE.
15119 /// @param ctxt the read context to use.
15121 /// @param die the DIE to read from.
15123 /// @param scope the scope the resulting @ref union_decl belongs to.
15125 /// @param union_type if this parameter is non-nil, then this function
15126 /// updates the @ref union_decl that it points to, rather than
15127 /// creating a new @ref union_decl.
15129 /// @param called_from_public_decl is true if this function has been
15130 /// initially called within the context of a public decl.
15132 /// @param where_offset the offset of the DIE where we are "logically"
15133 /// positionned at, in the DIE tree. This is useful when @p die is
15134 /// e.g, DW_TAG_partial_unit that can be included in several places in
15136 static union_decl_sptr
15137 add_or_update_union_type(read_context& ctxt,
15140 union_decl_sptr union_type,
15141 bool called_from_public_decl,
15142 size_t where_offset)
15144 union_decl_sptr result;
15148 unsigned tag = dwarf_tag(die);
15150 if (tag != DW_TAG_union_type)
15154 ABG_ASSERT(ctxt.get_die_source(die, source));
15156 die_class_or_union_map_type::const_iterator i =
15157 ctxt.die_wip_classes_map(source).find(dwarf_dieoffset(die));
15158 if (i != ctxt.die_wip_classes_map(source).end())
15160 union_decl_sptr u = is_union_type(i->second);
15166 string name, linkage_name;
15168 die_loc_and_name(ctxt, die, loc, name, linkage_name);
15169 bool is_declaration_only = die_is_declaration_only(die);
15171 bool is_anonymous = false;
15174 // So we are looking at an anonymous union. Let's give it a
15176 name = get_internal_anonymous_die_prefix_name(die);
15177 ABG_ASSERT(!name.empty());
15178 // But we remember that the type is anonymous.
15179 is_anonymous = true;
15181 if (size_t s = scope->get_num_anonymous_member_unions())
15182 name = build_internal_anonymous_die_name(name, s);
15185 // If the type has location, then associate it to its
15186 // representation. This way, all occurences of types with the same
15187 // representation (name) and location can be later detected as being
15188 // for the same type.
15192 if (corpus_sptr corp = ctxt.should_reuse_type_from_corpus_group())
15195 result = lookup_union_type_per_location(loc.expand(), *corp);
15197 result = lookup_union_type(name, *corp);
15201 ctxt.associate_die_to_type(die, result, where_offset);
15207 // if we've already seen a union with the same union as 'die' then
15208 // let's re-use that one. We can't really safely re-use anonymous
15209 // classes as they have no name, by construction. What we can do,
15210 // rather, is to reuse the typedef that name them, when they do have
15211 // a naming typedef.
15213 if (union_decl_sptr pre_existing_union =
15214 is_union_type(ctxt.lookup_artifact_from_die(die)))
15215 union_type = pre_existing_union;
15218 die_size_in_bits(die, size);
15222 result = union_type;
15223 result->set_location(loc);
15227 result.reset(new union_decl(ctxt.env(), name, size,
15228 loc, decl_base::VISIBILITY_DEFAULT));
15229 result->set_is_anonymous(is_anonymous);
15230 if (is_declaration_only)
15231 result->set_is_declaration_only(true);
15232 result = is_union_type(add_decl_to_scope(result, scope));
15233 ABG_ASSERT(result);
15238 result->set_size_in_bits(size);
15239 result->set_is_declaration_only(false);
15242 ctxt.associate_die_to_type(die, result, where_offset);
15244 // TODO: maybe schedule declaration-only union for result like we do
15246 // ctxt.maybe_schedule_declaration_only_class_for_resolution(result);
15249 bool has_child = (dwarf_child(die, &child) == 0);
15253 ctxt.die_wip_classes_map(source)[dwarf_dieoffset(die)] = result;
15255 scope_decl_sptr scop =
15256 dynamic_pointer_cast<scope_decl>(result);
15258 ctxt.scope_stack().push(scop.get());
15264 tag = dwarf_tag(&child);
15265 // Handle data members.
15266 if (tag == DW_TAG_member || tag == DW_TAG_variable)
15268 Dwarf_Die type_die;
15269 if (!die_die_attribute(&child, DW_AT_type, type_die))
15274 die_loc_and_name(ctxt, &child, loc, n, m);
15276 if (lookup_var_decl_in_scope(n, result))
15279 ssize_t offset_in_bits = 0;
15280 decl_base_sptr ty =
15281 is_decl(build_ir_node_from_die(ctxt, &type_die,
15282 called_from_public_decl,
15284 type_base_sptr t = is_type(ty);
15288 // We have a non-static data member. So this class
15289 // cannot be a declaration-only class anymore, even if
15290 // some DWARF emitters might consider it otherwise.
15291 result->set_is_declaration_only(false);
15292 access_specifier access = private_access;
15294 die_access_specifier(&child, access);
15296 var_decl_sptr dm(new var_decl(n, t, loc, m));
15297 result->add_data_member(dm, access, /*is_laid_out=*/true,
15298 /*is_static=*/false,
15300 ABG_ASSERT(has_scope(dm));
15301 ctxt.associate_die_to_decl(&child, dm, where_offset,
15302 /*associate_by_repr=*/false);
15304 // Handle member functions;
15305 else if (tag == DW_TAG_subprogram)
15308 is_decl(build_ir_node_from_die(ctxt, &child,
15310 called_from_public_decl,
15315 function_decl_sptr f = dynamic_pointer_cast<function_decl>(r);
15318 finish_member_function_reading(&child, f, result, ctxt);
15320 ctxt.associate_die_to_decl(&child, f, where_offset,
15321 /*associate_by_repr=*/false);
15323 // Handle member types
15324 else if (die_is_type(&child))
15325 decl_base_sptr td =
15326 is_decl(build_ir_node_from_die(ctxt, &child, result.get(),
15327 called_from_public_decl,
15329 } while (dwarf_siblingof(&child, &child) == 0);
15332 ctxt.scope_stack().pop();
15335 die_class_or_union_map_type::const_iterator i =
15336 ctxt.die_wip_classes_map(source).find(dwarf_dieoffset(die));
15337 if (i != ctxt.die_wip_classes_map(source).end())
15339 if (is_member_type(i->second))
15340 set_member_access_specifier(result,
15341 get_member_access_specifier(i->second));
15342 ctxt.die_wip_classes_map(source).erase(i);
15349 /// build a qualified type from a DW_TAG_const_type,
15350 /// DW_TAG_volatile_type or DW_TAG_restrict_type DIE.
15352 /// @param ctxt the read context to consider.
15354 /// @param die the input DIE to read from.
15356 /// @param called_from_public_decl true if this function was called
15357 /// from a context where either a public function or a public variable
15358 /// is being built.
15360 /// @param where_offset the offset of the DIE where we are "logically"
15361 /// positionned at, in the DIE tree. This is useful when @p die is
15362 /// e.g, DW_TAG_partial_unit that can be included in several places in
15365 /// @return the resulting qualified_type_def.
15366 static type_base_sptr
15367 build_qualified_type(read_context& ctxt,
15369 bool called_from_public_decl,
15370 size_t where_offset)
15372 type_base_sptr result;
15377 ABG_ASSERT(ctxt.get_die_source(die, source));
15379 unsigned tag = dwarf_tag(die);
15381 if (tag != DW_TAG_const_type
15382 && tag != DW_TAG_volatile_type
15383 && tag != DW_TAG_restrict_type)
15386 Dwarf_Die underlying_type_die;
15387 decl_base_sptr utype_decl;
15388 if (!die_die_attribute(die, DW_AT_type, underlying_type_die))
15389 // So, if no DW_AT_type is present, then this means (if we are
15390 // looking at a debug info emitted by GCC) that we are looking
15391 // at a qualified void type.
15392 utype_decl = build_ir_node_for_void_type(ctxt);
15395 utype_decl = is_decl(build_ir_node_from_die(ctxt, &underlying_type_die,
15396 called_from_public_decl,
15401 // The call to build_ir_node_from_die() could have triggered the
15402 // creation of the type for this DIE. In that case, just return it.
15403 if (type_base_sptr t = ctxt.lookup_type_from_die(die))
15406 ctxt.associate_die_to_type(die, result, where_offset);
15410 type_base_sptr utype = is_type(utype_decl);
15413 qualified_type_def::CV qual = qualified_type_def::CV_NONE;
15414 if (tag == DW_TAG_const_type)
15415 qual |= qualified_type_def::CV_CONST;
15416 else if (tag == DW_TAG_volatile_type)
15417 qual |= qualified_type_def::CV_VOLATILE;
15418 else if (tag == DW_TAG_restrict_type)
15419 qual |= qualified_type_def::CV_RESTRICT;
15421 ABG_ASSERT_NOT_REACHED;
15424 result.reset(new qualified_type_def(utype, qual, location()));
15426 ctxt.associate_die_to_type(die, result, where_offset);
15431 /// Strip qualification from a qualified type, when it makes sense.
15433 /// DWARF constructs "const reference". This is redundant because a
15434 /// reference is always const. The issue is these redundant types then
15435 /// leak into the IR and make for bad diagnostics.
15437 /// This function thus strips the const qualifier from the type in
15438 /// that case. It might contain code to strip other cases like this
15441 /// @param t the type to strip const qualification from.
15443 /// @param ctxt the @ref read_context to use.
15445 /// @return the stripped type or just return @p t.
15446 static decl_base_sptr
15447 maybe_strip_qualification(const qualified_type_def_sptr t,
15448 read_context &ctxt)
15453 decl_base_sptr result = t;
15454 type_base_sptr u = t->get_underlying_type();
15455 environment* env = t->get_environment();
15457 if (t->get_cv_quals() & qualified_type_def::CV_CONST
15458 && (is_reference_type(u)))
15460 // Let's strip only the const qualifier. To do that, the "const"
15461 // qualified is turned into a no-op "none" qualified.
15462 result.reset(new qualified_type_def
15463 (u, t->get_cv_quals() & ~qualified_type_def::CV_CONST,
15464 t->get_location()));
15466 else if (t->get_cv_quals() & qualified_type_def::CV_CONST
15467 && env->is_void_type(u))
15469 // So this type is a "const void". Let's strip the "const"
15470 // qualifier out and make this just be "void", so that a "const
15471 // void" type and a "void" type compare equal after going through
15473 result = is_decl(u);
15475 else if (is_array_of_qualified_element(u))
15477 // In C and C++, a cv qualifiers of a qualified array apply to
15478 // the array element type. So the qualifiers of the array can
15479 // be dropped and applied to the element type.
15481 // Here, the element type is qualified already. So apply the
15482 // qualifiers of the array itself to the already qualified
15483 // element type and drop the array qualifiers.
15484 array_type_def_sptr array = is_array_type(u);
15485 qualified_type_def_sptr element_type =
15486 is_qualified_type(array->get_element_type());
15487 qualified_type_def::CV quals = element_type->get_cv_quals();
15488 quals |= t->get_cv_quals();
15489 element_type->set_cv_quals(quals);
15490 result = is_decl(u);
15491 if (u->get_canonical_type()
15492 || element_type->get_canonical_type())
15493 // We shouldn't be editing types that were already
15494 // canonicalized. For those, canonicalization should be
15495 // delayed until after all editing is done.
15496 ABG_ASSERT_NOT_REACHED;
15498 else if (is_array_type(u) && !is_array_of_qualified_element(is_array_type(u)))
15500 // In C and C++, a cv qualifiers of a qualified array apply to
15501 // the array element type. So the qualifiers of the array can
15502 // be dropped and applied to the element type.
15504 // Here, the element type is not qualified. So apply the
15505 // qualifiers of the array itself to the element type and drop
15506 // the array qualifiers.
15507 array_type_def_sptr array = is_array_type(u);
15508 type_base_sptr element_type = array->get_element_type();
15509 qualified_type_def_sptr qual_type
15510 (new qualified_type_def(element_type,
15512 t->get_location()));
15513 add_decl_to_scope(qual_type, is_decl(element_type)->get_scope());
15514 array->set_element_type(qual_type);
15515 ctxt.schedule_type_for_late_canonicalization(is_type(qual_type));
15516 result = is_decl(u);
15517 if (u->get_canonical_type())
15518 // We shouldn't be editing types that were already
15519 // canonicalized. For those, canonicalization should be
15520 // delayed until after all editing is done.
15521 ABG_ASSERT_NOT_REACHED;
15527 /// Build a pointer type from a DW_TAG_pointer_type DIE.
15529 /// @param ctxt the read context to consider.
15531 /// @param die the DIE to read information from.
15533 /// @param called_from_public_decl true if this function was called
15534 /// from a context where either a public function or a public variable
15535 /// is being built.
15537 /// @param where_offset the offset of the DIE where we are "logically"
15538 /// positionned at, in the DIE tree. This is useful when @p die is
15539 /// e.g, DW_TAG_partial_unit that can be included in several places in
15542 /// @return the resulting pointer to pointer_type_def.
15543 static pointer_type_def_sptr
15544 build_pointer_type_def(read_context& ctxt,
15546 bool called_from_public_decl,
15547 size_t where_offset)
15549 pointer_type_def_sptr result;
15555 ABG_ASSERT(ctxt.get_die_source(die, source));
15557 unsigned tag = dwarf_tag(die);
15558 if (tag != DW_TAG_pointer_type)
15561 type_or_decl_base_sptr utype_decl;
15562 Dwarf_Die underlying_type_die;
15563 bool has_underlying_type_die = false;
15564 if (!die_die_attribute(die, DW_AT_type, underlying_type_die))
15565 // If the DW_AT_type attribute is missing, that means we are
15566 // looking at a pointer to "void".
15567 utype_decl = build_ir_node_for_void_type(ctxt);
15569 has_underlying_type_die = true;
15571 if (!utype_decl && has_underlying_type_die)
15572 utype_decl = build_ir_node_from_die(ctxt, &underlying_type_die,
15573 called_from_public_decl,
15578 // The call to build_ir_node_from_die() could have triggered the
15579 // creation of the type for this DIE. In that case, just return it.
15580 if (type_base_sptr t = ctxt.lookup_type_from_die(die))
15582 result = is_pointer_type(t);
15583 ABG_ASSERT(result);
15587 type_base_sptr utype = is_type(utype_decl);
15590 // if the DIE for the pointer type doesn't have a byte_size
15591 // attribute then we assume the size of the pointer is the address
15592 // size of the current translation unit.
15593 uint64_t size = ctxt.cur_transl_unit()->get_address_size();
15594 if (die_unsigned_constant_attribute(die, DW_AT_byte_size, size))
15595 // The size as expressed by DW_AT_byte_size is in byte, so let's
15596 // convert it to bits.
15599 // And the size of the pointer must be the same as the address size
15600 // of the current translation unit.
15601 ABG_ASSERT((size_t) ctxt.cur_transl_unit()->get_address_size() == size);
15603 result.reset(new pointer_type_def(utype, size, /*alignment=*/0, location()));
15604 ABG_ASSERT(result->get_pointed_to_type());
15606 ctxt.associate_die_to_type(die, result, where_offset);
15610 /// Build a reference type from either a DW_TAG_reference_type or
15611 /// DW_TAG_rvalue_reference_type DIE.
15613 /// @param ctxt the read context to consider.
15615 /// @param die the DIE to read from.
15617 /// @param called_from_public_decl true if this function was called
15618 /// from a context where either a public function or a public variable
15619 /// is being built.
15621 /// @param where_offset the offset of the DIE where we are "logically"
15622 /// positionned at, in the DIE tree. This is useful when @p die is
15623 /// e.g, DW_TAG_partial_unit that can be included in several places in
15626 /// @return a pointer to the resulting reference_type_def.
15627 static reference_type_def_sptr
15628 build_reference_type(read_context& ctxt,
15630 bool called_from_public_decl,
15631 size_t where_offset)
15633 reference_type_def_sptr result;
15639 ABG_ASSERT(ctxt.get_die_source(die, source));
15641 unsigned tag = dwarf_tag(die);
15642 if (tag != DW_TAG_reference_type
15643 && tag != DW_TAG_rvalue_reference_type)
15646 Dwarf_Die underlying_type_die;
15647 if (!die_die_attribute(die, DW_AT_type, underlying_type_die))
15650 type_or_decl_base_sptr utype_decl =
15651 build_ir_node_from_die(ctxt, &underlying_type_die,
15652 called_from_public_decl,
15657 // The call to build_ir_node_from_die() could have triggered the
15658 // creation of the type for this DIE. In that case, just return it.
15659 if (type_base_sptr t = ctxt.lookup_type_from_die(die))
15661 result = is_reference_type(t);
15662 ABG_ASSERT(result);
15666 type_base_sptr utype = is_type(utype_decl);
15669 // if the DIE for the reference type doesn't have a byte_size
15670 // attribute then we assume the size of the reference is the address
15671 // size of the current translation unit.
15672 uint64_t size = ctxt.cur_transl_unit()->get_address_size();
15673 if (die_unsigned_constant_attribute(die, DW_AT_byte_size, size))
15676 // And the size of the pointer must be the same as the address size
15677 // of the current translation unit.
15678 ABG_ASSERT((size_t) ctxt.cur_transl_unit()->get_address_size() == size);
15680 bool is_lvalue = (tag == DW_TAG_reference_type) ? true : false;
15682 result.reset(new reference_type_def(utype, is_lvalue, size,
15685 if (corpus_sptr corp = ctxt.current_corpus())
15686 if (reference_type_def_sptr t = lookup_reference_type(*result, *corp))
15688 ctxt.associate_die_to_type(die, result, where_offset);
15692 /// Build a subroutine type from a DW_TAG_subroutine_type DIE.
15694 /// @param ctxt the read context to consider.
15696 /// @param die the DIE to read from.
15698 /// @param is_method points to a class or union declaration iff we're
15699 /// building the type for a method. This is the enclosing class or
15700 /// union of the method.
15702 /// @param where_offset the offset of the DIE where we are "logically"
15703 /// positioned at, in the DIE tree. This is useful when @p die is
15704 /// e.g, DW_TAG_partial_unit that can be included in several places in
15707 /// @return a pointer to the resulting function_type_sptr.
15708 static function_type_sptr
15709 build_function_type(read_context& ctxt,
15711 class_or_union_sptr is_method,
15712 size_t where_offset)
15714 function_type_sptr result;
15719 ABG_ASSERT(dwarf_tag(die) == DW_TAG_subroutine_type
15720 || dwarf_tag(die) == DW_TAG_subprogram);
15723 ABG_ASSERT(ctxt.get_die_source(die, source));
15725 decl_base_sptr type_decl;
15727 translation_unit_sptr tu = ctxt.cur_transl_unit();
15730 /// If, inside the current translation unit, we've already seen a
15731 /// function type with the same text representation, then reuse that
15733 if (type_base_sptr t = ctxt.lookup_fn_type_from_die_repr_per_tu(die))
15735 result = is_function_type(t);
15736 ABG_ASSERT(result);
15737 ctxt.associate_die_to_type(die, result, where_offset);
15741 bool odr_is_relevant = ctxt.odr_is_relevant(die);
15742 if (odr_is_relevant)
15744 // So we can rely on the One Definition Rule to say that if
15745 // several different function types have the same name (or
15746 // rather, representation) across the entire binary, then they
15747 // ought to designate the same function type. So let's ensure
15748 // that if we've already seen a function type with the same
15749 // representation as the function type 'die', then it's the same
15750 // type as the one denoted by 'die'.
15751 if (function_type_sptr fn_type =
15752 is_function_type(ctxt.lookup_type_artifact_from_die(die)))
15754 ctxt.associate_die_to_type(die, fn_type, where_offset);
15759 // Let's look at the DIE to detect if it's the DIE for a method
15760 // (type). If it is, we can deduce the name of its enclosing class
15761 // and if it's a static or const.
15762 bool is_const = false;
15763 bool is_static = false;
15764 Dwarf_Die object_pointer_die;
15765 Dwarf_Die class_type_die;
15766 bool has_this_parm_die =
15767 die_function_type_is_method_type(ctxt, die, where_offset,
15768 object_pointer_die,
15771 if (has_this_parm_die)
15773 // The function (type) has a "this" parameter DIE. It means it's
15774 // a member function DIE.
15776 if (die_object_pointer_is_for_const_method(&object_pointer_die))
15781 // We were initially called as if the function represented
15782 // by DIE was *NOT* a member function. But now we know it's
15783 // a member function. Let's take that into account.
15784 class_or_union_sptr klass_type =
15785 is_class_or_union_type(build_ir_node_from_die(ctxt, &class_type_die,
15786 /*called_from_pub_decl=*/true,
15788 ABG_ASSERT(klass_type);
15789 is_method = klass_type;
15793 // Let's create the type early and record it as being for the DIE
15794 // 'die'. This way, when building the sub-type triggers the
15795 // creation of a type matching the same 'die', then we'll reuse this
15798 result.reset(is_method
15799 ? new method_type(is_method, is_const,
15800 tu->get_address_size(),
15802 : new function_type(ctxt.env(), tu->get_address_size(),
15804 ctxt.associate_die_to_type(die, result, where_offset);
15805 ctxt.die_wip_function_types_map(source)[dwarf_dieoffset(die)] = result;
15806 ctxt.associate_die_repr_to_fn_type_per_tu(die, result);
15808 type_base_sptr return_type;
15809 Dwarf_Die ret_type_die;
15810 if (die_die_attribute(die, DW_AT_type, ret_type_die))
15812 is_type(build_ir_node_from_die(ctxt, &ret_type_die,
15813 /*called_from_public_decl=*/true,
15816 return_type = is_type(build_ir_node_for_void_type(ctxt));
15817 result->set_return_type(return_type);
15820 function_decl::parameters function_parms;
15822 if (dwarf_child(die, &child) == 0)
15825 int child_tag = dwarf_tag(&child);
15826 if (child_tag == DW_TAG_formal_parameter)
15828 // This is a "normal" function parameter.
15829 string name, linkage_name;
15831 die_loc_and_name(ctxt, &child, loc, name, linkage_name);
15832 if (!tools_utils::string_is_ascii_identifier(name))
15833 // Sometimes, bogus compiler emit names that are
15834 // non-ascii garbage. Let's just ditch that for now.
15836 bool is_artificial = die_is_artificial(&child);
15837 type_base_sptr parm_type;
15838 Dwarf_Die parm_type_die;
15839 if (die_die_attribute(&child, DW_AT_type, parm_type_die))
15841 is_type(build_ir_node_from_die(ctxt, &parm_type_die,
15842 /*called_from_public_decl=*/true,
15846 function_decl::parameter_sptr p
15847 (new function_decl::parameter(parm_type, name, loc,
15848 /*variadic_marker=*/false,
15850 function_parms.push_back(p);
15852 else if (child_tag == DW_TAG_unspecified_parameters)
15854 // This is a variadic function parameter.
15855 bool is_artificial = die_is_artificial(&child);
15856 ir::environment* env = ctxt.env();
15858 type_base_sptr parm_type = env->get_variadic_parameter_type();
15859 function_decl::parameter_sptr p
15860 (new function_decl::parameter(parm_type,
15863 /*variadic_marker=*/true,
15865 function_parms.push_back(p);
15866 // After a DW_TAG_unspecified_parameters tag, we shouldn't
15867 // keep reading for parameters. The
15868 // unspecified_parameters TAG should be the last parameter
15869 // that we record. For instance, if there are multiple
15870 // DW_TAG_unspecified_parameters DIEs then we should care
15871 // only for the first one.
15875 while (dwarf_siblingof(&child, &child) == 0);
15877 result->set_parameters(function_parms);
15879 tu->bind_function_type_life_time(result);
15882 die_function_type_map_type::const_iterator i =
15883 ctxt.die_wip_function_types_map(source).
15884 find(dwarf_dieoffset(die));
15885 if (i != ctxt.die_wip_function_types_map(source).end())
15886 ctxt.die_wip_function_types_map(source).erase(i);
15889 maybe_canonicalize_type(result, ctxt);
15893 /// Build a subrange type from a DW_TAG_subrange_type.
15895 /// @param ctxt the read context to consider.
15897 /// @param die the DIE to read from.
15899 /// @param where_offset the offset of the DIE where we are "logically"
15900 /// positionned at in the DIE tree. This is useful when @p die is
15901 /// e,g, DW_TAG_partial_unit that can be included in several places in
15904 /// @param associate_die_to_type if this is true then the resulting
15905 /// type is associated to the @p die, so that next time when the
15906 /// system looks up the type associated to it, the current resulting
15907 /// type is returned. If false, then no association is done and the
15908 /// resulting type can be destroyed right after. This can be useful
15909 /// when the sole purpose of building the @ref
15910 /// array_type_def::subrange_type is to use some of its method like,
15911 /// e.g, its name pretty printing methods.
15913 /// @return the newly built instance of @ref
15914 /// array_type_def::subrange_type, or nil if no type could be built.
15915 static array_type_def::subrange_sptr
15916 build_subrange_type(read_context& ctxt,
15917 const Dwarf_Die* die,
15918 size_t where_offset,
15919 bool associate_type_to_die)
15921 array_type_def::subrange_sptr result;
15927 ABG_ASSERT(ctxt.get_die_source(die, source));
15929 unsigned tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
15930 if (tag != DW_TAG_subrange_type)
15933 string name = die_name(die);
15935 translation_unit::language language = ctxt.cur_transl_unit()->get_language();
15936 array_type_def::subrange_type::bound_value lower_bound =
15937 get_default_array_lower_bound(language);
15938 array_type_def::subrange_type::bound_value upper_bound;
15939 uint64_t count = 0;
15940 bool is_infinite = false;
15942 // The DWARF 4 specifications says, in [5.11 Subrange
15945 // The subrange entry may have the attributes
15946 // DW_AT_lower_bound and DW_AT_upper_bound to
15947 // specify, respectively, the lower and upper bound
15948 // values of the subrange.
15950 // So let's look for DW_AT_lower_bound first.
15951 die_constant_attribute(die, DW_AT_lower_bound, lower_bound);
15953 // Then, DW_AT_upper_bound.
15954 if (!die_constant_attribute(die, DW_AT_upper_bound, upper_bound))
15956 // The DWARF 4 spec says, in [5.11 Subrange Type
15959 // The DW_AT_upper_bound attribute may be replaced
15960 // by a DW_AT_count attribute, whose value
15961 // describes the number of elements in the
15962 // subrange rather than the value of the last
15965 // So, as DW_AT_upper_bound is not present in this
15966 // case, let's see if there is a DW_AT_count.
15967 die_unsigned_constant_attribute(die, DW_AT_count, count);
15969 // We can deduce the upper_bound from the
15970 // lower_bound and the number of elements of the
15972 if (int64_t u = lower_bound.get_signed_value() + count)
15973 upper_bound = u - 1;
15975 if (upper_bound.get_unsigned_value() == 0 && count == 0)
15976 // No upper_bound nor count was present on the DIE, this means
15977 // the array is considered to have an infinite (or rather not
15979 is_infinite = true;
15982 if (UINT64_MAX == upper_bound.get_unsigned_value())
15984 // If the upper_bound size is the max of the integer value, then
15985 // it most certainly means infinite size.
15986 is_infinite = true;
15987 upper_bound.set_unsigned(0);
15991 (new array_type_def::subrange_type(ctxt.env(),
15996 result->is_infinite(is_infinite);
15998 // load the underlying type.
15999 Dwarf_Die underlying_type_die;
16000 type_base_sptr underlying_type;
16001 if (die_die_attribute(die, DW_AT_type, underlying_type_die))
16003 is_type(build_ir_node_from_die(ctxt,
16004 &underlying_type_die,
16005 /*called_from_public_decl=*/true,
16008 if (underlying_type)
16009 result->set_underlying_type(underlying_type);
16011 if (associate_type_to_die)
16012 ctxt.associate_die_to_type(die, result, where_offset);
16017 /// Build the sub-ranges of an array type.
16019 /// This is a sub-routine of build_array_type().
16021 /// @param ctxt the context to read from.
16023 /// @param die the DIE of tag DW_TAG_array_type which contains
16024 /// children DIEs that represent the sub-ranges.
16026 /// @param subranges out parameter. This is set to the sub-ranges
16027 /// that are built from @p die.
16029 /// @param where_offset the offset of the DIE where we are "logically"
16030 /// positioned at, in the DIE tree. This is useful when @p die is
16031 /// e.g, DW_TAG_partial_unit that can be included in several places in
16034 build_subranges_from_array_type_die(read_context& ctxt,
16035 const Dwarf_Die* die,
16036 array_type_def::subranges_type& subranges,
16037 size_t where_offset,
16038 bool associate_type_to_die)
16042 if (dwarf_child(const_cast<Dwarf_Die*>(die), &child) == 0)
16046 int child_tag = dwarf_tag(&child);
16047 if (child_tag == DW_TAG_subrange_type)
16049 array_type_def::subrange_sptr s;
16050 if (associate_type_to_die)
16052 // We are being called to create the type, add it to
16053 // the current type graph and associate it to the
16054 // DIE it's been created from.
16055 type_or_decl_base_sptr t =
16056 build_ir_node_from_die(ctxt, &child,
16057 /*called_from_public_decl=*/true,
16059 s = is_subrange_type(t);
16062 // We are being called to create the type but *NOT*
16063 // add it to the current tyupe tree, *NOR* associate
16064 // it to the DIE it's been created from.
16065 s = build_subrange_type(ctxt, &child,
16067 /*associate_type_to_die=*/false);
16069 subranges.push_back(s);
16072 while (dwarf_siblingof(&child, &child) == 0);
16076 /// Build an array type from a DW_TAG_array_type DIE.
16078 /// @param ctxt the read context to consider.
16080 /// @param die the DIE to read from.
16082 /// @param called_from_public_decl true if this function was called
16083 /// from a context where either a public function or a public variable
16084 /// is being built.
16086 /// @param where_offset the offset of the DIE where we are "logically"
16087 /// positioned at, in the DIE tree. This is useful when @p die is
16088 /// e.g, DW_TAG_partial_unit that can be included in several places in
16091 /// @return a pointer to the resulting array_type_def.
16092 static array_type_def_sptr
16093 build_array_type(read_context& ctxt,
16095 bool called_from_public_decl,
16096 size_t where_offset)
16098 array_type_def_sptr result;
16104 ABG_ASSERT(ctxt.get_die_source(die, source));
16106 unsigned tag = dwarf_tag(die);
16107 if (tag != DW_TAG_array_type)
16110 decl_base_sptr type_decl;
16111 Dwarf_Die type_die;
16113 if (die_die_attribute(die, DW_AT_type, type_die))
16114 type_decl = is_decl(build_ir_node_from_die(ctxt, &type_die,
16115 called_from_public_decl,
16120 // The call to build_ir_node_from_die() could have triggered the
16121 // creation of the type for this DIE. In that case, just return it.
16122 if (type_base_sptr t = ctxt.lookup_type_from_die(die))
16124 result = is_array_type(t);
16125 ABG_ASSERT(result);
16129 type_base_sptr type = is_type(type_decl);
16132 array_type_def::subranges_type subranges;
16134 build_subranges_from_array_type_die(ctxt, die, subranges, where_offset);
16136 result.reset(new array_type_def(type, subranges, location()));
16141 /// Create a typedef_decl from a DW_TAG_typedef DIE.
16143 /// @param ctxt the read context to consider.
16145 /// @param die the DIE to read from.
16147 /// @param called_from_public_decl true if this function was called
16148 /// from a context where either a public function or a public variable
16149 /// is being built.
16151 /// @param where_offset the offset of the DIE where we are "logically"
16152 /// positionned at, in the DIE tree. This is useful when @p die is
16153 /// e.g, DW_TAG_partial_unit that can be included in several places in
16156 /// @return the newly created typedef_decl.
16157 static typedef_decl_sptr
16158 build_typedef_type(read_context& ctxt,
16160 bool called_from_public_decl,
16161 size_t where_offset)
16163 typedef_decl_sptr result;
16169 ABG_ASSERT(ctxt.get_die_source(die, source));
16171 unsigned tag = dwarf_tag(die);
16172 if (tag != DW_TAG_typedef)
16175 string name, linkage_name;
16177 die_loc_and_name(ctxt, die, loc, name, linkage_name);
16179 if (corpus_sptr corp = ctxt.should_reuse_type_from_corpus_group())
16181 result = lookup_typedef_type_per_location(loc.expand(), *corp);
16183 if (!ctxt.odr_is_relevant(die))
16184 if (typedef_decl_sptr t = is_typedef(ctxt.lookup_artifact_from_die(die)))
16189 type_base_sptr utype;
16190 Dwarf_Die underlying_type_die;
16191 if (!die_die_attribute(die, DW_AT_type, underlying_type_die))
16192 // A typedef DIE with no underlying type means a typedef to
16194 utype = ctxt.env()->get_void_type();
16198 is_type(build_ir_node_from_die(ctxt,
16199 &underlying_type_die,
16200 called_from_public_decl,
16205 // The call to build_ir_node_from_die() could have triggered the
16206 // creation of the type for this DIE. In that case, just return
16208 if (type_base_sptr t = ctxt.lookup_type_from_die(die))
16210 result = is_typedef(t);
16211 ABG_ASSERT(result);
16216 result.reset(new typedef_decl(name, utype, loc, linkage_name));
16218 if (class_decl_sptr klass = is_class_type(utype))
16219 if (is_anonymous_type(klass))
16220 klass->set_naming_typedef(result);
16223 ctxt.associate_die_to_type(die, result, where_offset);
16228 /// Build a @ref var_decl out of a DW_TAG_variable DIE if the variable
16229 /// denoted by the DIE is not suppressed by a suppression
16230 /// specification associated to the current read context.
16232 /// Note that if a member variable declaration with the same name as
16233 /// the name of the DIE we are looking at exists, this function returns
16234 /// that existing variable declaration.
16236 /// @param ctxt the read context to use.
16238 /// @param die the DIE representing the variable we are looking at.
16240 /// @param where_offset the offset of the DIE where we are "logically"
16241 /// positionned at, in the DIE tree. This is useful when @p die is
16242 /// e.g, DW_TAG_partial_unit that can be included in several places in
16245 /// @param result if this is set to an existing var_decl, this means
16246 /// that the function will append the new properties it sees on @p die
16247 /// to that exising var_decl. Otherwise, if this parameter is NULL, a
16248 /// new var_decl is going to be allocated and returned.
16250 /// @param is_required_decl_spec this is true iff the variable to
16251 /// build is referred to as being the specification of another
16254 /// @return a pointer to the newly created var_decl. If the var_decl
16255 /// could not be built, this function returns NULL.
16256 static var_decl_sptr
16257 build_or_get_var_decl_if_not_suppressed(read_context& ctxt,
16260 size_t where_offset,
16261 var_decl_sptr result,
16262 bool is_required_decl_spec)
16265 if (variable_is_suppressed(ctxt, scope, die, is_required_decl_spec))
16268 if (class_decl* class_type = is_class_type(scope))
16270 string var_name = die_name(die);
16271 if (!var_name.empty())
16272 if ((var = class_type->find_data_member(var_name)))
16275 var = build_var_decl(ctxt, die, where_offset, result);
16279 /// Create a variable symbol with a given name.
16281 /// @param sym_name the name of the variable symbol.
16283 /// @param env the environment to create the default symbol in.
16285 /// @return the newly created symbol.
16286 static elf_symbol_sptr
16287 create_default_var_sym(const string& sym_name, const environment *env)
16289 elf_symbol::version ver;
16290 elf_symbol::visibility vis = elf_symbol::DEFAULT_VISIBILITY;
16291 elf_symbol_sptr result =
16292 elf_symbol::create(env,
16293 /*symbol index=*/ 0,
16294 /*symbol size=*/ 0,
16296 /*symbol type=*/ elf_symbol::OBJECT_TYPE,
16297 /*symbol binding=*/ elf_symbol::GLOBAL_BINDING,
16298 /*symbol is defined=*/ true,
16299 /*symbol is common=*/ false,
16300 /*symbol version=*/ ver,
16301 /*symbol_visibility=*/vis,
16302 /*is_linux_string_cst=*/false);
16306 /// Build a @ref var_decl out of a DW_TAG_variable DIE.
16308 /// @param ctxt the read context to use.
16310 /// @param die the DIE representing the variable we are looking at.
16312 /// @param where_offset the offset of the DIE where we are "logically"
16313 /// positionned at, in the DIE tree. This is useful when @p die is
16314 /// e.g, DW_TAG_partial_unit that can be included in several places in
16317 /// @param result if this is set to an existing var_decl, this means
16318 /// that the function will append the new properties it sees on @p die
16319 /// to that exising var_decl. Otherwise, if this parameter is NULL, a
16320 /// new var_decl is going to be allocated and returned.
16322 /// @return a pointer to the newly created var_decl. If the var_decl
16323 /// could not be built, this function returns NULL.
16324 static var_decl_sptr
16325 build_var_decl(read_context& ctxt,
16327 size_t where_offset,
16328 var_decl_sptr result)
16333 int tag = dwarf_tag(die);
16334 ABG_ASSERT(tag == DW_TAG_variable || tag == DW_TAG_member);
16336 if (!die_is_public_decl(die))
16340 ABG_ASSERT(ctxt.get_die_source(die, source));
16342 type_base_sptr type;
16343 Dwarf_Die type_die;
16344 if (die_die_attribute(die, DW_AT_type, type_die))
16346 decl_base_sptr ty =
16347 is_decl(build_ir_node_from_die(ctxt, &type_die,
16348 /*called_from_public_decl=*/true,
16352 type = is_type(ty);
16359 string name, linkage_name;
16361 die_loc_and_name(ctxt, die, loc, name, linkage_name);
16364 result.reset(new var_decl(name, type, loc, linkage_name));
16367 // We were called to append properties that might have been
16368 // missing from the first version of the variable. And usually
16369 // that missing property is the mangled name.
16370 if (!linkage_name.empty())
16371 result->set_linkage_name(linkage_name);
16374 // Check if a variable symbol with this name is exported by the elf
16375 // binary. If it is, then set the symbol of the variable, if it's
16376 // not set already.
16377 if (!result->get_symbol())
16379 elf_symbol_sptr var_sym;
16380 if (get_ignore_symbol_table(ctxt))
16383 result->get_linkage_name().empty()
16384 ? result->get_name()
16385 : result->get_linkage_name();
16387 var_sym = create_default_var_sym(var_name, ctxt.env());
16388 ABG_ASSERT(var_sym);
16389 add_symbol_to_map(var_sym, ctxt.var_syms());
16393 Dwarf_Addr var_addr;
16394 if (ctxt.get_variable_address(die, var_addr))
16395 var_sym = var_sym = ctxt.variable_symbol_is_exported(var_addr);
16400 result->set_symbol(var_sym);
16401 // If the linkage name is not set or is wrong, set it to
16402 // the name of the underlying symbol.
16403 string linkage_name = result->get_linkage_name();
16404 if (linkage_name.empty()
16405 || !var_sym->get_alias_from_name(linkage_name))
16406 result->set_linkage_name(var_sym->get_name());
16407 result->set_is_in_public_symbol_table(true);
16414 /// Test if a given function denoted by its DIE and its scope is
16415 /// suppressed by any of the suppression specifications associated to
16416 /// a given context of ELF/DWARF reading.
16418 /// Note that a non-member function which symbol is not exported is
16419 /// also suppressed.
16421 /// @param ctxt the ELF/DWARF reading content of interest.
16423 /// @param scope of the scope of the function.
16425 /// @param function_die the DIE representing the function.
16427 /// @return true iff @p function_die is suppressed by at least one
16428 /// suppression specification attached to the @p ctxt.
16430 function_is_suppressed(const read_context& ctxt,
16431 const scope_decl* scope,
16432 Dwarf_Die *function_die)
16434 if (function_die == 0
16435 || dwarf_tag(function_die) != DW_TAG_subprogram)
16438 string fname = die_string_attribute(function_die, DW_AT_name);
16439 string flinkage_name = die_linkage_name(function_die);
16440 string qualified_name = build_qualified_name(scope, fname);
16442 // A non-member function which symbol is not exported is suppressed.
16443 if (!is_class_type(scope) && !die_is_declaration_only(function_die))
16445 Dwarf_Addr fn_addr;
16446 elf_symbol_sptr fn_sym;
16447 if (!ctxt.get_function_address(function_die, fn_addr))
16449 if (!get_ignore_symbol_table(ctxt))
16451 // We were not instructed to ignore (avoid loading) the
16452 // symbol table, so we can rely on its presence to see if
16453 // the address corresponds to the address of an exported
16454 // function symbol.
16455 if (!ctxt.function_symbol_is_exported(fn_addr))
16460 return suppr::function_is_suppressed(ctxt, qualified_name,
16462 /*require_drop_property=*/true);
16465 /// Build a @ref function_decl out of a DW_TAG_subprogram DIE if the
16466 /// function denoted by the DIE is not suppressed by a suppression
16467 /// specification associated to the current read context.
16469 /// Note that if a member function declaration with the same signature
16470 /// (pretty representation) as one of the DIE we are looking at
16471 /// exists, this function returns that existing function declaration.
16473 /// @param ctxt the read context to use.
16475 /// @param scope the scope of the function we are looking at.
16477 /// @param fn_die the DIE representing the function we are looking at.
16479 /// @param where_offset the offset of the DIE where we are "logically"
16480 /// positionned at, in the DIE tree. This is useful when @p die is
16481 /// e.g, DW_TAG_partial_unit that can be included in several places in
16484 /// @param result if this is set to an existing function_decl, this
16485 /// means that the function will append the new properties it sees on
16486 /// @p fn_die to that exising function_decl. Otherwise, if this
16487 /// parameter is NULL, a new function_decl is going to be allocated
16490 /// @return a pointer to the newly created var_decl. If the var_decl
16491 /// could not be built, this function returns NULL.
16492 static function_decl_sptr
16493 build_or_get_fn_decl_if_not_suppressed(read_context& ctxt,
16496 size_t where_offset,
16497 function_decl_sptr result)
16499 function_decl_sptr fn;
16500 if (function_is_suppressed(ctxt, scope, fn_die))
16504 if ((fn = is_function_decl(ctxt.lookup_artifact_from_die(fn_die))))
16506 fn = maybe_finish_function_decl_reading(ctxt, fn_die, where_offset, fn);
16507 ctxt.associate_die_to_decl(fn_die, fn, /*do_associate_by_repr=*/true);
16508 ctxt.associate_die_to_type(fn_die, fn->get_type(), where_offset);
16512 fn = build_function_decl(ctxt, fn_die, where_offset, result);
16517 /// Test if a given variable denoted by its DIE and its scope is
16518 /// suppressed by any of the suppression specifications associated to
16519 /// a given context of ELF/DWARF reading.
16521 /// @param ctxt the ELF/DWARF reading content of interest.
16523 /// @param scope of the scope of the variable.
16525 /// @param variable_die the DIE representing the variable.
16527 /// @param is_required_decl_spec if true, means that the @p
16528 /// variable_die being considered is for a variable decl that is a
16529 /// specification for a concrete variable being built.
16531 /// @return true iff @p variable_die is suppressed by at least one
16532 /// suppression specification attached to the @p ctxt.
16534 variable_is_suppressed(const read_context& ctxt,
16535 const scope_decl* scope,
16536 Dwarf_Die *variable_die,
16537 bool is_required_decl_spec)
16539 if (variable_die == 0
16540 || (dwarf_tag(variable_die) != DW_TAG_variable
16541 && dwarf_tag(variable_die) != DW_TAG_member))
16544 string name = die_string_attribute(variable_die, DW_AT_name);
16545 string linkage_name = die_linkage_name(variable_die);
16546 string qualified_name = build_qualified_name(scope, name);
16548 // If a non member variable that is a declaration (has no exported
16549 // symbol), is not the specification of another concrete variable,
16550 // then it's suppressed. This is a size optimization; it removes
16551 // useless declaration-only variables from the IR.
16553 // Otherwise, if a non-member variable is the specification of
16554 // another concrete variable, then this function looks at
16555 // suppression specification specifications to know if its
16557 if (!is_class_type(scope) && !is_required_decl_spec)
16559 Dwarf_Addr var_addr = 0;
16560 elf_symbol_sptr var_sym;
16561 if (!ctxt.get_variable_address(variable_die, var_addr))
16563 if (!get_ignore_symbol_table(ctxt))
16565 // We were not instructed to ignore (avoid loading) the
16566 // symbol table, so we can rely on its presence to see if
16567 // the address corresponds to the address of an exported
16568 // variable symbol.
16569 if (!ctxt.variable_symbol_is_exported(var_addr))
16574 return suppr::variable_is_suppressed(ctxt, qualified_name,
16576 /*require_drop_property=*/true);
16579 /// Test if a type (designated by a given DIE) in a given scope is
16580 /// suppressed by the suppression specifications that are associated
16581 /// to a given read context.
16583 /// @param ctxt the read context to consider.
16585 /// @param scope of the scope of the type DIE to consider.
16587 /// @param type_die the DIE that designates the type to consider.
16589 /// @param type_is_private out parameter. If this function returns
16590 /// true (the type @p type_die is suppressed) and if the type was
16591 /// suppressed because it's private then this parameter is set to
16594 /// @return true iff the type designated by the DIE @p type_die, in
16595 /// the scope @p scope is suppressed by at the suppression
16596 /// specifications associated to the current read context.
16598 type_is_suppressed(const read_context& ctxt,
16599 const scope_decl* scope,
16600 Dwarf_Die *type_die,
16601 bool &type_is_private)
16604 || (dwarf_tag(type_die) != DW_TAG_enumeration_type
16605 && dwarf_tag(type_die) != DW_TAG_class_type
16606 && dwarf_tag(type_die) != DW_TAG_structure_type
16607 && dwarf_tag(type_die) != DW_TAG_union_type))
16610 string type_name, linkage_name;
16611 location type_location;
16612 die_loc_and_name(ctxt, type_die, type_location, type_name, linkage_name);
16613 string qualified_name = build_qualified_name(scope, type_name);
16615 return suppr::type_is_suppressed(ctxt, qualified_name,
16618 /*require_drop_property=*/true);
16621 /// Test if a type (designated by a given DIE) in a given scope is
16622 /// suppressed by the suppression specifications that are associated
16623 /// to a given read context.
16625 /// @param ctxt the read context to consider.
16627 /// @param scope of the scope of the type DIE to consider.
16629 /// @param type_die the DIE that designates the type to consider.
16631 /// @return true iff the type designated by the DIE @p type_die, in
16632 /// the scope @p scope is suppressed by at the suppression
16633 /// specifications associated to the current read context.
16635 type_is_suppressed(const read_context& ctxt,
16636 const scope_decl* scope,
16637 Dwarf_Die *type_die)
16639 bool type_is_private = false;
16640 return type_is_suppressed(ctxt, scope, type_die, type_is_private);
16643 /// Get the opaque version of a type that was suppressed because it's
16644 /// a private type.
16646 /// The opaque version version of the type is just a declared-only
16647 /// version of the type (class or union type) denoted by @p type_die.
16649 /// @param ctxt the read context in use.
16651 /// @param scope the scope of the type die we are looking at.
16653 /// @param type_die the type DIE we are looking at.
16655 /// @param where_offset the offset of the DIE where we are "logically"
16656 /// positionned at, in the DIE tree. This is useful when @p die is
16657 /// e.g, DW_TAG_partial_unit that can be included in several places in
16660 /// @return the opaque version of the type denoted by @p type_die or
16661 /// nil if no opaque version was found.
16662 static class_or_union_sptr
16663 get_opaque_version_of_type(read_context &ctxt,
16665 Dwarf_Die *type_die,
16666 size_t where_offset)
16668 class_or_union_sptr result;
16673 unsigned tag = dwarf_tag(type_die);
16674 if (tag != DW_TAG_class_type
16675 && tag != DW_TAG_structure_type
16676 && tag != DW_TAG_union_type)
16679 string type_name, linkage_name;
16680 location type_location;
16681 die_loc_and_name(ctxt, type_die, type_location, type_name, linkage_name);
16682 if (!type_location)
16685 string qualified_name = build_qualified_name(scope, type_name);
16687 // TODO: also handle declaration-only unions. To do that, we mostly
16688 // need to adapt add_or_update_union_type to make it schedule
16689 // declaration-only unions for resolution too.
16690 string_classes_map::const_iterator i =
16691 ctxt.declaration_only_classes().find(qualified_name);
16692 if (i != ctxt.declaration_only_classes().end())
16693 result = i->second.back();
16697 if (tag == DW_TAG_class_type || tag == DW_TAG_structure_type)
16699 // So we didn't find any pre-existing forward-declared-only
16700 // class for the class definition that we could return as an
16701 // opaque type. So let's build one.
16703 // TODO: we need to be able to do this for unions too!
16704 class_decl_sptr klass(new class_decl(ctxt.env(), type_name,
16705 /*alignment=*/0, /*size=*/0,
16706 tag == DW_TAG_structure_type,
16708 decl_base::VISIBILITY_DEFAULT));
16709 klass->set_is_declaration_only(true);
16710 add_decl_to_scope(klass, scope);
16711 ctxt.associate_die_to_type(type_die, klass, where_offset);
16712 ctxt.maybe_schedule_declaration_only_class_for_resolution(klass);
16720 /// Create a function symbol with a given name.
16722 /// @param sym_name the name of the symbol to create.
16724 /// @param env the environment to create the symbol in.
16726 /// @return the newly created symbol.
16728 create_default_fn_sym(const string& sym_name, const environment *env)
16730 elf_symbol::version ver;
16731 elf_symbol_sptr result =
16732 elf_symbol::create(env,
16733 /*symbol index=*/ 0,
16734 /*symbol size=*/ 0,
16736 /*symbol type=*/ elf_symbol::FUNC_TYPE,
16737 /*symbol binding=*/ elf_symbol::GLOBAL_BINDING,
16738 /*symbol is defined=*/ true,
16739 /*symbol is common=*/ false,
16740 /*symbol version=*/ ver,
16741 /*symbol visibility=*/elf_symbol::DEFAULT_VISIBILITY,
16742 /*symbol is linux string cst=*/false);
16746 /// Build a @ref function_decl our of a DW_TAG_subprogram DIE.
16748 /// @param ctxt the read context to use
16750 /// @param die the DW_TAG_subprogram DIE to read from.
16752 /// @param where_offset the offset of the DIE where we are "logically"
16753 /// positionned at, in the DIE tree. This is useful when @p die is
16754 /// e.g, DW_TAG_partial_unit that can be included in several places in
16757 /// @param called_for_public_decl this is set to true if the function
16758 /// was called for a public (function) decl.
16759 static function_decl_sptr
16760 build_function_decl(read_context& ctxt,
16762 size_t where_offset,
16763 function_decl_sptr fn)
16765 function_decl_sptr result = fn;
16768 ABG_ASSERT(dwarf_tag(die) == DW_TAG_subprogram);
16771 ABG_ASSERT(ctxt.get_die_source(die, source));
16773 if (!die_is_public_decl(die))
16776 translation_unit_sptr tu = ctxt.cur_transl_unit();
16779 string fname, flinkage_name;
16781 die_loc_and_name(ctxt, die, floc, fname, flinkage_name);
16783 size_t is_inline = die_is_declared_inline(die);
16784 class_or_union_sptr is_method =
16785 is_class_or_union_type(get_scope_for_die(ctxt, die, true, where_offset));
16789 // Add the properties that might have been missing from the
16790 // first declaration of the function. For now, it usually is
16791 // the mangled name that goes missing in the first declarations.
16793 // Also note that if 'fn' has just been cloned, the current
16794 // linkage name (of the current DIE) might be different from the
16795 // linkage name of 'fn'. In that case, update the linkage name
16797 if (!flinkage_name.empty()
16798 && result->get_linkage_name() != flinkage_name)
16799 result->set_linkage_name(flinkage_name);
16801 if (!result->get_location())
16802 result->set_location(floc);
16806 function_type_sptr fn_type(build_function_type(ctxt, die, is_method,
16811 result.reset(is_method
16812 ? new method_decl(fname, fn_type,
16815 : new function_decl(fname, fn_type,
16820 // Set the symbol of the function. If the linkage name is not set
16821 // or is wrong, set it to the name of the underlying symbol.
16822 if (!result->get_symbol())
16824 elf_symbol_sptr fn_sym;
16825 if (get_ignore_symbol_table(ctxt))
16828 result->get_linkage_name().empty()
16829 ? result->get_name()
16830 : result->get_linkage_name();
16832 fn_sym = create_default_fn_sym(fn_name, ctxt.env());
16833 ABG_ASSERT(fn_sym);
16834 add_symbol_to_map(fn_sym, ctxt.fun_syms());
16838 Dwarf_Addr fn_addr;
16839 if (ctxt.get_function_address(die, fn_addr))
16840 fn_sym = ctxt.function_symbol_is_exported(fn_addr);
16845 result->set_symbol(fn_sym);
16846 string linkage_name = result->get_linkage_name();
16847 if (linkage_name.empty()
16848 || !fn_sym->get_alias_from_name(linkage_name))
16849 result->set_linkage_name(fn_sym->get_name());
16850 result->set_is_in_public_symbol_table(true);
16854 ctxt.associate_die_to_type(die, result->get_type(), where_offset);
16856 size_t die_offset = dwarf_dieoffset(die);
16859 && is_member_function(fn)
16860 && get_member_function_is_virtual(fn)
16861 && !result->get_linkage_name().empty())
16862 // This function is a virtual member function which has its
16863 // linkage name *and* and has its underlying symbol correctly set.
16864 // It thus doesn't need any fixup related to elf symbol. So
16865 // remove it from the set of virtual member functions with linkage
16866 // names and no elf symbol that need to be fixed up.
16867 ctxt.die_function_decl_with_no_symbol_map().erase(die_offset);
16871 /// Add a set of addresses (representing function symbols) to a
16872 /// function symbol name -> symbol map.
16874 /// For a given symbol address, the function retrieves the name of the
16875 /// symbol as well as the symbol itself and inserts an entry {symbol
16876 /// name, symbol} into a map of symbol name -> symbol map.
16878 /// @param syms the set of symbol addresses to consider.
16880 /// @param map the map to populate.
16882 /// @param ctxt the context in which we are loading a given ELF file.
16884 add_fn_symbols_to_map(address_set_type& syms,
16885 string_elf_symbols_map_type& map,
16886 read_context& ctxt)
16888 for (address_set_type::iterator i = syms.begin(); i != syms.end(); ++i)
16890 elf_symbol_sptr sym = ctxt.lookup_elf_fn_symbol_from_address(*i);
16892 string_elf_symbols_map_type::iterator it =
16893 ctxt.fun_syms().find(sym->get_name());
16894 ABG_ASSERT(it != ctxt.fun_syms().end());
16899 /// Add a symbol to a symbol map.
16901 /// @param sym the symbol to add.
16903 /// @param map the symbol map to add the symbol into.
16905 add_symbol_to_map(const elf_symbol_sptr& sym,
16906 string_elf_symbols_map_type& map)
16911 string_elf_symbols_map_type::iterator it = map.find(sym->get_name());
16912 if (it == map.end())
16915 syms.push_back(sym);
16916 map[sym->get_name()] = syms;
16919 it->second.push_back(sym);
16922 /// Add a set of addresses (representing variable symbols) to a
16923 /// variable symbol name -> symbol map.
16925 /// For a given symbol address, the variable retrieves the name of the
16926 /// symbol as well as the symbol itself and inserts an entry {symbol
16927 /// name, symbol} into a map of symbol name -> symbol map.
16929 /// @param syms the set of symbol addresses to consider.
16931 /// @param map the map to populate.
16933 /// @param ctxt the context in which we are loading a given ELF file.
16935 add_var_symbols_to_map(address_set_type& syms,
16936 string_elf_symbols_map_type& map,
16937 read_context& ctxt)
16939 for (address_set_type::iterator i = syms.begin(); i != syms.end(); ++i)
16941 elf_symbol_sptr sym = ctxt.lookup_elf_var_symbol_from_address(*i);
16943 string_elf_symbols_map_type::iterator it =
16944 ctxt.var_syms().find(sym->get_name());
16945 ABG_ASSERT(it != ctxt.var_syms().end());
16950 /// Read all @ref abigail::translation_unit possible from the debug info
16951 /// accessible through a DWARF Front End Library handle, and stuff
16952 /// them into a libabigail ABI Corpus.
16954 /// @param ctxt the read context.
16956 /// @return a pointer to the resulting corpus, or NULL if the corpus
16957 /// could not be constructed.
16959 read_debug_info_into_corpus(read_context& ctxt)
16961 ctxt.clear_per_corpus_data();
16963 if (!ctxt.current_corpus())
16965 corpus_sptr corp (new corpus(ctxt.env(), ctxt.elf_path()));
16966 ctxt.current_corpus(corp);
16968 ctxt.env(corp->get_environment());
16971 // First set some mundane properties of the corpus gathered from
16973 ctxt.current_corpus()->set_path(ctxt.elf_path());
16974 if (ctxt.is_linux_kernel_binary())
16975 ctxt.current_corpus()->set_origin(corpus::LINUX_KERNEL_BINARY_ORIGIN);
16977 ctxt.current_corpus()->set_origin(corpus::DWARF_ORIGIN);
16978 ctxt.current_corpus()->set_soname(ctxt.dt_soname());
16979 ctxt.current_corpus()->set_needed(ctxt.dt_needed());
16980 ctxt.current_corpus()->set_architecture_name(ctxt.elf_architecture());
16981 if (corpus_group_sptr group = ctxt.current_corpus_group())
16982 group->add_corpus(ctxt.current_corpus());
16984 // Set symbols information to the corpus.
16985 if (!get_ignore_symbol_table(ctxt))
16987 if (ctxt.load_in_linux_kernel_mode() && ctxt.is_linux_kernel_binary())
16989 string_elf_symbols_map_sptr exported_fn_symbols_map
16990 (new string_elf_symbols_map_type);
16991 add_fn_symbols_to_map(*ctxt.linux_exported_fn_syms(),
16992 *exported_fn_symbols_map,
16994 add_fn_symbols_to_map(*ctxt.linux_exported_gpl_fn_syms(),
16995 *exported_fn_symbols_map,
16997 ctxt.current_corpus()->set_fun_symbol_map(exported_fn_symbols_map);
16999 string_elf_symbols_map_sptr exported_var_symbols_map
17000 (new string_elf_symbols_map_type);
17001 add_var_symbols_to_map(*ctxt.linux_exported_var_syms(),
17002 *exported_var_symbols_map,
17004 add_var_symbols_to_map(*ctxt.linux_exported_gpl_var_syms(),
17005 *exported_var_symbols_map,
17007 ctxt.current_corpus()->set_var_symbol_map(exported_var_symbols_map);
17011 ctxt.current_corpus()->set_fun_symbol_map(ctxt.fun_syms_sptr());
17012 ctxt.current_corpus()->set_var_symbol_map(ctxt.var_syms_sptr());
17015 ctxt.current_corpus()->set_undefined_fun_symbol_map
17016 (ctxt.undefined_fun_syms_sptr());
17017 ctxt.current_corpus()->set_undefined_var_symbol_map
17018 (ctxt.undefined_var_syms_sptr());
17022 ctxt.current_corpus()->set_fun_symbol_map(ctxt.fun_syms_sptr());
17023 ctxt.current_corpus()->set_var_symbol_map(ctxt.var_syms_sptr());
17026 // Get out now if no debug info is found.
17028 return ctxt.current_corpus();
17030 uint8_t address_size = 0;
17031 size_t header_size = 0;
17033 // Set the set of exported declaration that are defined.
17034 ctxt.exported_decls_builder
17035 (ctxt.current_corpus()->get_exported_decls_builder().get());
17037 // Walk all the DIEs of the debug info to build a DIE -> parent map
17038 // useful for get_die_parent() to work.
17040 tools_utils::timer t;
17043 cerr << "building die -> parent maps ...";
17047 ctxt.build_die_parent_maps();
17052 cerr << " DONE@" << ctxt.current_corpus()->get_path()
17059 ctxt.env()->canonicalization_is_done(false);
17062 tools_utils::timer t;
17065 cerr << "building the libabigail internal representation ...";
17068 // And now walk all the DIEs again to build the libabigail IR.
17069 Dwarf_Half dwarf_version = 0;
17070 for (Dwarf_Off offset = 0, next_offset = 0;
17071 (dwarf_next_unit(ctxt.dwarf(), offset, &next_offset, &header_size,
17072 &dwarf_version, NULL, &address_size, NULL,
17074 offset = next_offset)
17076 Dwarf_Off die_offset = offset + header_size;
17078 if (!dwarf_offdie(ctxt.dwarf(), die_offset, &unit)
17079 || dwarf_tag(&unit) != DW_TAG_compile_unit)
17082 ctxt.dwarf_version(dwarf_version);
17086 // Build a translation_unit IR node from cu; note that cu must
17087 // be a DW_TAG_compile_unit die.
17088 translation_unit_sptr ir_node =
17089 build_translation_unit_and_add_to_ir(ctxt, &unit, address_size);
17090 ABG_ASSERT(ir_node);
17095 cerr << " DONE@" << ctxt.current_corpus()->get_path()
17103 tools_utils::timer t;
17106 cerr << "resolving declaration only classes ...";
17109 ctxt.resolve_declaration_only_classes();
17113 cerr << " DONE@" << ctxt.current_corpus()->get_path()
17121 tools_utils::timer t;
17124 cerr << "fixing up functions with linkage name but "
17125 << "no advertised underlying symbols ....";
17128 ctxt.fixup_functions_with_no_symbols();
17132 cerr << " DONE@" << ctxt.current_corpus()->get_path()
17139 /// Now, look at the types that needs to be canonicalized after the
17140 /// translation has been constructed (which is just now) and
17141 /// canonicalize them.
17143 /// These types need to be constructed at the end of the translation
17144 /// unit reading phase because some types are modified by some DIEs
17145 /// even after the principal DIE describing the type has been read;
17146 /// this happens for clones of virtual destructors (for instance) or
17147 /// even for some static data members. We need to do that for types
17148 /// are in the alternate debug info section and for types that in
17149 /// the main debug info section.
17151 tools_utils::timer t;
17154 cerr << "perform late type canonicalizing ...\n";
17158 ctxt.perform_late_type_canonicalizing();
17162 cerr << "late type canonicalizing DONE@"
17163 << ctxt.current_corpus()->get_path()
17170 ctxt.env()->canonicalization_is_done(true);
17173 tools_utils::timer t;
17176 cerr << "sort functions and variables ...";
17179 ctxt.current_corpus()->sort_functions();
17180 ctxt.current_corpus()->sort_variables();
17184 cerr << " DONE@" << ctxt.current_corpus()->get_path()
17191 return ctxt.current_corpus();
17194 /// Canonicalize a type if it's suitable for early canonicalizing, or,
17195 /// if it's not, schedule it for late canonicalization, after the
17196 /// debug info of the current translation unit has been fully read.
17198 /// A (composite) type is deemed suitable for early canonicalizing iff
17199 /// all of its sub-types are canonicalized themselve. Non composite
17200 /// types are always deemed suitable for early canonicalization.
17202 /// Note that this function doesn't work on *ANONYMOUS* classes,
17203 /// structs, unions or enums because it first does some
17204 /// canonicalization of the DWARF DIE @p die. That canonicalization
17205 /// is done by looking up @p die by name; and because these are
17206 /// anonymous types, they don't have names! and so that
17207 /// canonicalization fails. So the type artifact associated to @p
17208 /// die often ends being *NOT* canonicalized. This later leads to
17209 /// extreme slowness of operation, especially when comparisons are
17210 /// later performed on these anonymous types.
17212 /// So when you have classes, structs, unions, or enums that can be
17213 /// anonymous, please use this overload instead:
17216 /// maybe_canonicalize_type(const Dwarf_Die* die,
17217 /// const type_base_sptr& t,
17218 /// read_context& ctxt);
17220 /// It knows how to deal with anonymous types.
17222 /// @p looks up the type artifact
17223 /// associated to @p die. During that lookup, ; but then those types don't have
17224 /// names because they are anonymous.
17226 /// @param die the type DIE to consider for canonicalization. Note
17227 /// that this DIE must have been associated with its type using the
17228 /// function read_context::associate_die_to_type() prior to calling
17231 /// @param ctxt the @ref read_context to use.
17233 maybe_canonicalize_type(const Dwarf_Die *die, read_context& ctxt)
17236 ABG_ASSERT(ctxt.get_die_source(die, source));
17238 size_t die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
17239 type_base_sptr t = ctxt.lookup_type_from_die(die);
17244 type_base_sptr peeled_type =
17245 peel_typedef_pointer_or_reference_type(t, /*peel_qual_types=*/false);
17246 if (is_class_type(peeled_type)
17247 || is_union_type(peeled_type)
17248 || is_function_type(peeled_type)
17249 || is_array_type(peeled_type)
17250 || is_qualified_type(peeled_type))
17251 // We delay canonicalization of classes/unions or typedef,
17252 // pointers, references and array to classes/unions. This is
17253 // because the (underlying) class might not be finished yet and we
17254 // might not be able to able detect it here (thinking about
17255 // classes that are work-in-progress, or classes that might be
17256 // later amended by some DWARF construct). So we err on the safe
17257 // side. We also delay canonicalization for array and qualified
17258 // types because they can be edited (in particular by
17259 // maybe_strip_qualification) after they are initially built.
17260 ctxt.schedule_type_for_late_canonicalization(die);
17261 else if ((is_function_type(t)
17262 && ctxt.is_wip_function_type_die_offset(die_offset, source))
17263 || type_has_non_canonicalized_subtype(t))
17264 ctxt.schedule_type_for_late_canonicalization(die);
17269 /// Canonicalize a type if it's suitable for early canonicalizing, or,
17270 /// if it's not, schedule it for late canonicalization, after the
17271 /// debug info of the current translation unit has been fully read.
17273 /// A (composite) type is deemed suitable for early canonicalizing iff
17274 /// all of its sub-types are canonicalized themselve. Non composite
17275 /// types are always deemed suitable for early canonicalization.
17277 /// Note that this function nows how to deal with anonymous classes,
17278 /// structs and enums, unlike the overload below:
17280 /// void maybe_canonicalize_type(const Dwarf_Die *die, read_context& ctxt)
17282 /// The problem, though is that this function is much slower that that
17283 /// overload above because of how the types that are meant for later
17284 /// canonicalization are stored. So the idea is that this function
17285 /// should be used only for the smallest possible subset of types that
17286 /// are anonymous and thus cannot be handled by the overload above.
17288 /// @param t the type DIE to consider for canonicalization.
17290 /// @param ctxt the @ref read_context to use.
17292 maybe_canonicalize_type(const type_base_sptr& t,
17293 read_context& ctxt)
17298 type_base_sptr peeled_type =
17299 peel_typedef_pointer_or_reference_type(t, /*peel_qual_types=*/false);
17300 if (is_class_type(peeled_type)
17301 || is_union_type(peeled_type)
17302 || is_function_type(peeled_type)
17303 || is_array_type(peeled_type)
17304 || is_qualified_type(peeled_type))
17305 // We delay canonicalization of classes/unions or typedef,
17306 // pointers, references and array to classes/unions. This is
17307 // because the (underlying) class might not be finished yet and we
17308 // might not be able to able detect it here (thinking about
17309 // classes that are work-in-progress, or classes that might be
17310 // later amended by some DWARF construct). So we err on the safe
17311 // side. We also delay canonicalization for array and qualified
17312 // types because they can be edited (in particular by
17313 // maybe_strip_qualification) after they are initially built.
17314 ctxt.schedule_type_for_late_canonicalization(t);
17315 else if (type_has_non_canonicalized_subtype(t))
17316 ctxt.schedule_type_for_late_canonicalization(t);
17321 /// Canonicalize a type if it's suitable for early canonicalizing, or,
17322 /// if it's not, schedule it for late canonicalization, after the
17323 /// debug info of the current translation unit has been fully read.
17325 /// A (composite) type is deemed suitable for early canonicalizing iff
17326 /// all of its sub-types are canonicalized themselve. Non composite
17327 /// types are always deemed suitable for early canonicalization.
17329 /// Note that this function knows how to properly use either one of
17330 /// the following two overloads:
17333 /// void maybe_canonicalize_type(const Dwarf_Die* die,
17334 /// const type_base_sptr& t,
17335 /// read_context& ctxt);
17338 /// void maybe_canonicalize_type(const Dwarf_Die *die, read_context& ctxt);
17340 /// So this function uses 1/ for most types and uses uses 2/ function
17341 /// types. Using 2/ is slower and bigger than using 1/, but then 1/
17342 /// deals poorly with anonymous types because of how poorly DIEs
17343 /// canonicalization works on anonymous types. That's why this
17344 /// function uses 2/ only for the types that really need it.
17346 /// @param die the DIE of the type denoted by @p t.
17348 /// @param t the type to consider. Its DIE is @p die.
17350 /// @param ctxt the read context in use.
17352 maybe_canonicalize_type(const Dwarf_Die *die,
17353 const type_base_sptr& t,
17354 read_context& ctxt)
17356 if (const function_type_sptr ft = is_function_type(t))
17358 maybe_canonicalize_type(ft, ctxt);
17362 maybe_canonicalize_type(die, ctxt);
17365 /// If a given decl is a member type declaration, set its access
17366 /// specifier from the DIE that represents it.
17368 /// @param member_type_declaration the member type declaration to
17371 maybe_set_member_type_access_specifier(decl_base_sptr member_type_declaration,
17374 if (is_type(member_type_declaration)
17375 && is_member_decl(member_type_declaration))
17377 class_or_union* scope =
17378 is_class_or_union_type(member_type_declaration->get_scope());
17381 access_specifier access = private_access;
17382 if (class_decl* cl = is_class_type(scope))
17383 if (cl->is_struct())
17384 access = public_access;
17386 die_access_specifier(die, access);
17387 set_member_access_specifier(member_type_declaration, access);
17391 /// Build an IR node from a given DIE and add the node to the current
17392 /// IR being build and held in the read_context. Doing that is called
17393 /// "emitting an IR node for the DIE".
17395 /// @param ctxt the read context.
17397 /// @param die the DIE to consider.
17399 /// @param scope the scope under which the resulting IR node has to be
17402 /// @param called_from_public_decl set to yes if this function is
17403 /// called from the functions used to build a public decl (functions
17404 /// and variables). In that case, this function accepts building IR
17405 /// nodes representing types. Otherwise, this function only creates
17406 /// IR nodes representing public decls (functions and variables).
17407 /// This is done to avoid emitting IR nodes for types that are not
17408 /// referenced by public functions or variables.
17410 /// @param where_offset the offset of the DIE where we are "logically"
17411 /// positionned at, in the DIE tree. This is useful when @p die is
17412 /// e.g, DW_TAG_partial_unit that can be included in several places in
17415 /// @param is_required_decl_spec if true, it means the ir node to
17416 /// build is for a decl that is a specification for another decl that
17417 /// is concrete. If you don't know what this is, set it to false.
17419 /// @return the resulting IR node.
17420 static type_or_decl_base_sptr
17421 build_ir_node_from_die(read_context& ctxt,
17424 bool called_from_public_decl,
17425 size_t where_offset,
17426 bool is_required_decl_spec)
17428 type_or_decl_base_sptr result;
17430 if (!die || !scope)
17433 int tag = dwarf_tag(die);
17435 if (!called_from_public_decl)
17437 if (ctxt.load_all_types() && die_is_type(die))
17438 /* We were instructed to load debug info for all types,
17439 included those that are not reachable from a public
17440 declaration. So load the debug info for this type. */;
17441 else if (tag != DW_TAG_subprogram
17442 && tag != DW_TAG_variable
17443 && tag != DW_TAG_member
17444 && tag != DW_TAG_namespace)
17448 die_source source_of_die;
17449 ABG_ASSERT(ctxt.get_die_source(die, source_of_die));
17451 if ((result = ctxt.lookup_decl_from_die_offset(dwarf_dieoffset(die),
17457 // Type DIEs we support.
17458 case DW_TAG_base_type:
17459 if (type_decl_sptr t = build_type_decl(ctxt, die, where_offset))
17462 add_decl_to_scope(t, ctxt.cur_transl_unit()->get_global_scope());
17467 case DW_TAG_typedef:
17469 typedef_decl_sptr t = build_typedef_type(ctxt, die,
17470 called_from_public_decl,
17472 result = add_decl_to_scope(t, scope);
17475 maybe_set_member_type_access_specifier(is_decl(result), die);
17476 maybe_canonicalize_type(die, ctxt);
17481 case DW_TAG_pointer_type:
17483 pointer_type_def_sptr p =
17484 build_pointer_type_def(ctxt, die,
17485 called_from_public_decl,
17490 add_decl_to_scope(p, ctxt.cur_transl_unit()->get_global_scope());
17491 ABG_ASSERT(result->get_translation_unit());
17492 maybe_canonicalize_type(die, ctxt);
17497 case DW_TAG_reference_type:
17498 case DW_TAG_rvalue_reference_type:
17500 reference_type_def_sptr r =
17501 build_reference_type(ctxt, die,
17502 called_from_public_decl,
17507 add_decl_to_scope(r, ctxt.cur_transl_unit()->get_global_scope());
17509 ctxt.associate_die_to_type(die, r, where_offset);
17510 maybe_canonicalize_type(die, ctxt);
17515 case DW_TAG_const_type:
17516 case DW_TAG_volatile_type:
17517 case DW_TAG_restrict_type:
17520 build_qualified_type(ctxt, die,
17521 called_from_public_decl,
17525 // Strip some potentially redundant type qualifiers from
17526 // the qualified type we just built.
17527 decl_base_sptr d = maybe_strip_qualification(is_qualified_type(q),
17530 d = get_type_declaration(q);
17532 type_base_sptr ty = is_type(d);
17533 // Associate the die to type ty again because 'ty'might be
17534 // different from 'q', because 'ty' is 'q' possibly
17535 // stripped from some redundant type qualifier.
17536 ctxt.associate_die_to_type(die, ty, where_offset);
17538 add_decl_to_scope(d, ctxt.cur_transl_unit()->get_global_scope());
17539 maybe_canonicalize_type(die, ctxt);
17544 case DW_TAG_enumeration_type:
17546 if (!type_is_suppressed(ctxt, scope, die))
17548 enum_type_decl_sptr e = build_enum_type(ctxt, die, scope,
17550 result = add_decl_to_scope(e, scope);
17553 maybe_set_member_type_access_specifier(is_decl(result), die);
17554 maybe_canonicalize_type(die, ctxt);
17560 case DW_TAG_class_type:
17561 case DW_TAG_structure_type:
17563 bool type_is_private = false;
17564 bool type_suppressed=
17565 type_is_suppressed(ctxt, scope, die, type_is_private);
17567 if (type_suppressed && type_is_private)
17568 // The type is suppressed because it's private. If other
17569 // non-suppressed and declaration-only instances of this
17570 // type exist in the current corpus, then it means those
17571 // non-suppressed instances are opaque versions of the
17572 // suppressed private type. Lets return one of these opaque
17574 result = get_opaque_version_of_type(ctxt, scope, die, where_offset);
17575 else if (!type_suppressed)
17577 Dwarf_Die spec_die;
17578 scope_decl_sptr scop;
17579 class_decl_sptr klass;
17580 if (die_die_attribute(die, DW_AT_specification, spec_die))
17582 scope_decl_sptr skope =
17583 get_scope_for_die(ctxt, &spec_die,
17584 called_from_public_decl,
17587 decl_base_sptr cl =
17588 is_decl(build_ir_node_from_die(ctxt, &spec_die,
17590 called_from_public_decl,
17593 klass = dynamic_pointer_cast<class_decl>(cl);
17597 add_or_update_class_type(ctxt, die,
17599 tag == DW_TAG_structure_type,
17601 called_from_public_decl,
17606 add_or_update_class_type(ctxt, die, scope,
17607 tag == DW_TAG_structure_type,
17609 called_from_public_decl,
17614 maybe_set_member_type_access_specifier(klass, die);
17615 maybe_canonicalize_type(die, klass, ctxt);
17620 case DW_TAG_union_type:
17621 if (!type_is_suppressed(ctxt, scope, die))
17623 union_decl_sptr union_type =
17624 add_or_update_union_type(ctxt, die, scope,
17626 called_from_public_decl,
17630 maybe_set_member_type_access_specifier(union_type, die);
17631 maybe_canonicalize_type(die, union_type, ctxt);
17633 result = union_type;
17636 case DW_TAG_string_type:
17638 case DW_TAG_subroutine_type:
17640 function_type_sptr f = build_function_type(ctxt, die,
17646 maybe_canonicalize_type(die, ctxt);
17650 case DW_TAG_array_type:
17652 array_type_def_sptr a = build_array_type(ctxt,
17654 called_from_public_decl,
17659 add_decl_to_scope(a, ctxt.cur_transl_unit()->get_global_scope());
17660 ctxt.associate_die_to_type(die, a, where_offset);
17661 maybe_canonicalize_type(die, ctxt);
17665 case DW_TAG_subrange_type:
17667 // If we got here, this means the subrange type is a "free
17668 // form" defined in the global namespace of the current
17669 // translation unit, like what is found in Ada.
17670 array_type_def::subrange_sptr s =
17671 build_subrange_type(ctxt, die, where_offset);
17675 add_decl_to_scope(s, ctxt.cur_transl_unit()->get_global_scope());
17676 ctxt.associate_die_to_type(die, s, where_offset);
17677 maybe_canonicalize_type(die, ctxt);
17681 case DW_TAG_packed_type:
17683 case DW_TAG_set_type:
17685 case DW_TAG_file_type:
17687 case DW_TAG_ptr_to_member_type:
17689 case DW_TAG_thrown_type:
17691 case DW_TAG_interface_type:
17693 case DW_TAG_unspecified_type:
17695 case DW_TAG_shared_type:
17698 case DW_TAG_compile_unit:
17699 // We shouldn't reach this point b/c this should be handled by
17700 // build_translation_unit.
17701 ABG_ASSERT_NOT_REACHED;
17703 case DW_TAG_namespace:
17704 case DW_TAG_module:
17705 result = build_namespace_decl_and_add_to_ir(ctxt, die, where_offset);
17708 case DW_TAG_variable:
17709 case DW_TAG_member:
17711 Dwarf_Die spec_die;
17712 bool var_is_cloned = false;
17714 if (tag == DW_TAG_member)
17715 ABG_ASSERT(!is_c_language(ctxt.cur_transl_unit()->get_language()));
17717 if (die_die_attribute(die, DW_AT_specification, spec_die,false)
17718 || (var_is_cloned = die_die_attribute(die, DW_AT_abstract_origin,
17721 scope_decl_sptr spec_scope = get_scope_for_die(ctxt, &spec_die,
17722 called_from_public_decl,
17727 is_decl(build_ir_node_from_die(ctxt, &spec_die,
17729 called_from_public_decl,
17731 /*is_required_decl_spec=*/true));
17735 dynamic_pointer_cast<var_decl>(d);
17738 m = build_var_decl(ctxt, die, where_offset, m);
17739 if (is_data_member(m))
17741 set_member_is_static(m, true);
17742 ctxt.associate_die_to_decl(die, m, where_offset,
17743 /*associate_by_repr=*/false);
17747 ABG_ASSERT(has_scope(m));
17748 ctxt.var_decls_to_re_add_to_tree().push_back(m);
17750 ABG_ASSERT(m->get_scope());
17751 ctxt.maybe_add_var_to_exported_decls(m.get());
17756 else if (var_decl_sptr v =
17757 build_or_get_var_decl_if_not_suppressed(ctxt, scope, die,
17759 /*result=*/var_decl_sptr(),
17760 is_required_decl_spec))
17762 result = add_decl_to_scope(v, scope);
17763 ABG_ASSERT(is_decl(result)->get_scope());
17764 v = dynamic_pointer_cast<var_decl>(result);
17766 ABG_ASSERT(v->get_scope());
17767 ctxt.var_decls_to_re_add_to_tree().push_back(v);
17768 ctxt.maybe_add_var_to_exported_decls(v.get());
17773 case DW_TAG_subprogram:
17775 Dwarf_Die spec_die;
17776 Dwarf_Die abstract_origin_die;
17777 Dwarf_Die *interface_die = 0, *origin_die = 0;
17778 scope_decl_sptr interface_scope;
17779 if (die_is_artificial(die))
17782 function_decl_sptr fn;
17783 bool has_spec = die_die_attribute(die, DW_AT_specification,
17785 bool has_abstract_origin =
17786 die_die_attribute(die, DW_AT_abstract_origin,
17787 abstract_origin_die, true);
17788 if (has_spec || has_abstract_origin)
17793 : &abstract_origin_die;
17795 has_abstract_origin
17796 ? &abstract_origin_die
17799 string linkage_name = die_linkage_name(die);
17800 string spec_linkage_name = die_linkage_name(interface_die);
17802 interface_scope = get_scope_for_die(ctxt, interface_die,
17803 called_from_public_decl,
17805 if (interface_scope)
17808 is_decl(build_ir_node_from_die(ctxt,
17810 interface_scope.get(),
17811 called_from_public_decl,
17815 fn = dynamic_pointer_cast<function_decl>(d);
17816 if (has_abstract_origin
17817 && (linkage_name != spec_linkage_name))
17818 // The current DIE has 'd' as abstract orign,
17819 // and has a linkage name that is different
17820 // from from the linkage name of 'd'. That
17821 // means, the current DIE represents a clone
17827 ctxt.scope_stack().push(scope);
17829 scope_decl* logical_scope =
17831 ? interface_scope.get()
17834 result = build_or_get_fn_decl_if_not_suppressed(ctxt, logical_scope,
17835 die, where_offset, fn);
17838 result = add_decl_to_scope(is_decl(result), logical_scope);
17840 fn = is_function_decl(result);
17841 if (fn && is_member_function(fn))
17843 class_decl_sptr klass(static_cast<class_decl*>(logical_scope),
17844 sptr_utils::noop_deleter());
17846 finish_member_function_reading(die, fn, klass, ctxt);
17851 ctxt.maybe_add_fn_to_exported_decls(fn.get());
17852 ctxt.associate_die_to_decl(die, fn, where_offset,
17853 /*associate_by_repr=*/false);
17854 maybe_canonicalize_type(die, ctxt);
17857 ctxt.scope_stack().pop();
17861 case DW_TAG_formal_parameter:
17862 // We should not read this case as it should have been dealt
17863 // with by build_function_decl above.
17864 ABG_ASSERT_NOT_REACHED;
17866 case DW_TAG_constant:
17868 case DW_TAG_enumerator:
17871 case DW_TAG_partial_unit:
17872 case DW_TAG_imported_unit:
17873 // For now, the DIEs under these are read lazily when they are
17874 // referenced by a public decl DIE that is under a
17875 // DW_TAG_compile_unit, so we shouldn't get here.
17876 ABG_ASSERT_NOT_REACHED;
17878 // Other declaration we don't really intend to support yet.
17879 case DW_TAG_dwarf_procedure:
17880 case DW_TAG_imported_declaration:
17881 case DW_TAG_entry_point:
17883 case DW_TAG_lexical_block:
17884 case DW_TAG_unspecified_parameters:
17885 case DW_TAG_variant:
17886 case DW_TAG_common_block:
17887 case DW_TAG_common_inclusion:
17888 case DW_TAG_inheritance:
17889 case DW_TAG_inlined_subroutine:
17890 case DW_TAG_with_stmt:
17891 case DW_TAG_access_declaration:
17892 case DW_TAG_catch_block:
17893 case DW_TAG_friend:
17894 case DW_TAG_namelist:
17895 case DW_TAG_namelist_item:
17896 case DW_TAG_template_type_parameter:
17897 case DW_TAG_template_value_parameter:
17898 case DW_TAG_try_block:
17899 case DW_TAG_variant_part:
17900 case DW_TAG_imported_module:
17901 case DW_TAG_condition:
17902 case DW_TAG_type_unit:
17903 case DW_TAG_template_alias:
17904 case DW_TAG_lo_user:
17905 case DW_TAG_MIPS_loop:
17906 case DW_TAG_format_label:
17907 case DW_TAG_function_template:
17908 case DW_TAG_class_template:
17909 case DW_TAG_GNU_BINCL:
17910 case DW_TAG_GNU_EINCL:
17911 case DW_TAG_GNU_template_template_param:
17912 case DW_TAG_GNU_template_parameter_pack:
17913 case DW_TAG_GNU_formal_parameter_pack:
17914 case DW_TAG_GNU_call_site:
17915 case DW_TAG_GNU_call_site_parameter:
17916 case DW_TAG_hi_user:
17921 if (result && tag != DW_TAG_subroutine_type)
17922 ctxt.associate_die_to_decl(die, is_decl(result), where_offset,
17923 /*associate_by_repr=*/false);
17928 /// Build the IR node for a void type.
17930 /// @param ctxt the read context to use.
17932 /// @return the void type node.
17933 static decl_base_sptr
17934 build_ir_node_for_void_type(read_context& ctxt)
17936 ir::environment* env = ctxt.env();
17938 type_base_sptr t = env->get_void_type();
17939 decl_base_sptr type_declaration = get_type_declaration(t);
17940 if (!has_scope(type_declaration))
17941 add_decl_to_scope(type_declaration,
17942 ctxt.cur_transl_unit()->get_global_scope());
17944 return type_declaration;
17947 /// Build an IR node from a given DIE and add the node to the current
17948 /// IR being build and held in the read_context. Doing that is called
17949 /// "emitting an IR node for the DIE".
17951 /// @param ctxt the read context.
17953 /// @param die the DIE to consider.
17955 /// @param called_from_public_decl set to yes if this function is
17956 /// called from the functions used to build a public decl (functions
17957 /// and variables). In that case, this function accepts building IR
17958 /// nodes representing types. Otherwise, this function only creates
17959 /// IR nodes representing public decls (functions and variables).
17960 /// This is done to avoid emitting IR nodes for types that are not
17961 /// referenced by public functions or variables.
17963 /// @param where_offset the offset of the DIE where we are "logically"
17964 /// positionned at, in the DIE tree. This is useful when @p die is
17965 /// e.g, DW_TAG_partial_unit that can be included in several places in
17968 /// @return the resulting IR node.
17969 static type_or_decl_base_sptr
17970 build_ir_node_from_die(read_context& ctxt,
17972 bool called_from_public_decl,
17973 size_t where_offset)
17976 return decl_base_sptr();
17978 if (is_c_language(ctxt.cur_transl_unit()->get_language()))
17980 const scope_decl_sptr& scop = ctxt.global_scope();
17981 return build_ir_node_from_die(ctxt, die, scop.get(),
17982 called_from_public_decl,
17986 scope_decl_sptr scope = get_scope_for_die(ctxt, die,
17987 called_from_public_decl,
17989 return build_ir_node_from_die(ctxt, die, scope.get(),
17990 called_from_public_decl,
17995 operator|(status l, status r)
17997 return static_cast<status>(static_cast<unsigned>(l)
17998 | static_cast<unsigned>(r));
18002 operator&(status l, status r)
18004 return static_cast<status>(static_cast<unsigned>(l)
18005 & static_cast<unsigned>(r));
18009 operator|=(status& l, status r)
18016 operator&=(status& l, status r)
18022 /// Emit a diagnostic status with english sentences to describe the
18023 /// problems encoded in a given abigail::dwarf_reader::status, if
18024 /// there is an error.
18026 /// @param status the status to diagnose
18028 /// @return a string containing sentences that describe the possible
18029 /// errors encoded in @p s. If there is no error to encode, then the
18030 /// empty string is returned.
18032 status_to_diagnostic_string(status s)
18036 if (s & STATUS_DEBUG_INFO_NOT_FOUND)
18037 str += "could not find debug info\n";
18039 if (s & STATUS_ALT_DEBUG_INFO_NOT_FOUND)
18040 str += "could not find alternate debug info\n";
18042 if (s & STATUS_NO_SYMBOLS_FOUND)
18043 str += "could not load ELF symbols\n";
18048 /// Create a dwarf_reader::read_context.
18050 /// @param elf_path the path to the elf file the context is to be used for.
18052 /// @param debug_info_root_paths a pointer to the path to the root
18053 /// directory under which the debug info is to be found for @p
18054 /// elf_path. Leave this to NULL if the debug info is not in a split
18057 /// @param environment the environment used by the current context.
18058 /// This environment contains resources needed by the reader and by
18059 /// the types and declarations that are to be created later. Note
18060 /// that ABI artifacts that are to be compared all need to be created
18061 /// within the same environment.
18063 /// Please also note that the life time of this environment object
18064 /// must be greater than the life time of the resulting @ref
18065 /// read_context the context uses resources that are allocated in the
18068 /// @param load_all_types if set to false only the types that are
18069 /// reachable from publicly exported declarations (of functions and
18070 /// variables) are read. If set to true then all types found in the
18071 /// debug information are loaded.
18073 /// @param linux_kernel_mode if set to true, then consider the special
18074 /// linux kernel symbol tables when determining if a symbol is
18075 /// exported or not.
18077 /// @return a smart pointer to the resulting dwarf_reader::read_context.
18079 create_read_context(const std::string& elf_path,
18080 const vector<char**>& debug_info_root_paths,
18081 ir::environment* environment,
18082 bool load_all_types,
18083 bool linux_kernel_mode)
18085 // Create a DWARF Front End Library handle to be used by functions
18086 // of that library.
18087 read_context_sptr result(new read_context(elf_path, debug_info_root_paths,
18088 environment, load_all_types,
18089 linux_kernel_mode));
18093 /// Getter for the path to the binary this @ref read_context is for.
18095 /// @return the path to the binary the @ref read_context is for.
18097 read_context_get_path(const read_context& ctxt)
18098 {return ctxt.elf_path();}
18100 /// Re-initialize a read_context so that it can re-used to read
18101 /// another binary.
18103 /// @param ctxt the context to re-initialize.
18105 /// @param elf_path the path to the elf file the context is to be used
18108 /// @param debug_info_root_path a pointer to the path to the root
18109 /// directory under which the debug info is to be found for @p
18110 /// elf_path. Leave this to NULL if the debug info is not in a split
18113 /// @param environment the environment used by the current context.
18114 /// This environment contains resources needed by the reader and by
18115 /// the types and declarations that are to be created later. Note
18116 /// that ABI artifacts that are to be compared all need to be created
18117 /// within the same environment.
18119 /// Please also note that the life time of this environment object
18120 /// must be greater than the life time of the resulting @ref
18121 /// read_context the context uses resources that are allocated in the
18124 /// @param load_all_types if set to false only the types that are
18125 /// reachable from publicly exported declarations (of functions and
18126 /// variables) are read. If set to true then all types found in the
18127 /// debug information are loaded.
18129 /// @param linux_kernel_mode if set to true, then consider the special
18130 /// linux kernel symbol tables when determining if a symbol is
18131 /// exported or not.
18133 /// @return a smart pointer to the resulting dwarf_reader::read_context.
18135 reset_read_context(read_context_sptr &ctxt,
18136 const std::string& elf_path,
18137 const vector<char**>& debug_info_root_path,
18138 ir::environment* environment,
18139 bool read_all_types,
18140 bool linux_kernel_mode)
18143 ctxt->initialize(elf_path, debug_info_root_path, environment,
18144 read_all_types, linux_kernel_mode);
18147 /// Add suppressions specifications to the set of suppressions to be
18148 /// used during the construction of the ABI internal representation
18149 /// (the ABI corpus) from ELF and DWARF.
18151 /// During the construction of the ABI corpus, ABI artifacts that
18152 /// match the a given suppression specification are dropped on the
18153 /// floor; that is, they are discarded and won't be part of the final
18154 /// ABI corpus. This is a way to reduce the amount of data held by
18155 /// the final ABI corpus.
18157 /// Note that the suppression specifications provided to this function
18158 /// are only considered during the construction of the ABI corpus.
18159 /// For instance, they are not taken into account during e.g
18160 /// comparisons of two ABI corpora that might happen later. If you
18161 /// want to apply suppression specificatins to the comparison (or
18162 /// reporting) of ABI corpora please refer to the documentation of the
18163 /// @ref diff_context type to learn how to set suppressions that are
18164 /// to be used in that context.
18166 /// @param ctxt the context that is going to be used by functions that
18167 /// read ELF and DWARF information to construct and ABI corpus.
18169 /// @param supprs the suppression specifications to be applied during
18170 /// the construction of the ABI corpus.
18172 add_read_context_suppressions(read_context& ctxt,
18173 const suppr::suppressions_type& supprs)
18175 for (suppr::suppressions_type::const_iterator i = supprs.begin();
18178 if ((*i)->get_drops_artifact_from_ir())
18179 ctxt.get_suppressions().push_back(*i);
18182 /// Set the @ref corpus_group being created to the current read context.
18184 /// @param ctxt the read_context to consider.
18186 /// @param group the @ref corpus_group to set.
18188 set_read_context_corpus_group(read_context& ctxt,
18189 corpus_group_sptr& group)
18191 ctxt.cur_corpus_group_ = group;
18194 /// Read all @ref abigail::translation_unit possible from the debug info
18195 /// accessible from an elf file, stuff them into a libabigail ABI
18196 /// Corpus and return it.
18198 /// @param ctxt the context to use for reading the elf file.
18200 /// @param resulting_corp a pointer to the resulting abigail::corpus.
18202 /// @return the resulting status.
18204 read_corpus_from_elf(read_context& ctxt, status& status)
18206 status = STATUS_UNKNOWN;
18208 // Load debug info from the elf path.
18209 if (!ctxt.load_debug_info())
18210 status |= STATUS_DEBUG_INFO_NOT_FOUND;
18213 string alt_di_path;
18214 if (refers_to_alt_debug_info(ctxt, alt_di_path) && !ctxt.alt_dwarf())
18215 status |= STATUS_ALT_DEBUG_INFO_NOT_FOUND;
18218 if (!get_ignore_symbol_table(ctxt))
18220 ctxt.load_elf_properties();
18221 // Read the symbols for publicly defined decls
18222 if (!ctxt.load_symbol_maps())
18223 status |= STATUS_NO_SYMBOLS_FOUND;
18226 if (// If no elf symbol was found ...
18227 status & STATUS_NO_SYMBOLS_FOUND
18228 // ... or if debug info was found but not the required alternate
18230 || ((status & STATUS_ALT_DEBUG_INFO_NOT_FOUND)
18231 && !(status & STATUS_DEBUG_INFO_NOT_FOUND)))
18232 // ... then we cannot handle the binary.
18233 return corpus_sptr();
18235 // Read the variable and function descriptions from the debug info
18236 // we have, through the dwfl handle.
18237 corpus_sptr corp = read_debug_info_into_corpus(ctxt);
18239 status |= STATUS_OK;
18244 /// Read a corpus and add it to a given @ref corpus_group.
18246 /// @param ctxt the reading context to consider.
18248 /// @param group the @ref corpus_group to add the new corpus to.
18250 /// @param status output parameter. The status of the read. It is set
18251 /// by this function upon its completion.
18253 read_and_add_corpus_to_group_from_elf(read_context& ctxt,
18254 corpus_group& group,
18257 corpus_sptr result;
18258 corpus_sptr corp = read_corpus_from_elf(ctxt, status);
18259 if (status & STATUS_OK)
18261 if (!corp->get_group())
18262 group.add_corpus(corp);
18269 /// Read all @ref abigail::translation_unit possible from the debug info
18270 /// accessible from an elf file, stuff them into a libabigail ABI
18271 /// Corpus and return it.
18273 /// @param elf_path the path to the elf file.
18275 /// @param debug_info_root_paths a vector of pointers to root paths
18276 /// under which to look for the debug info of the elf files that are
18277 /// later handled by the Dwfl. This for cases where the debug info is
18278 /// split into a different file from the binary we want to inspect.
18279 /// On Red Hat compatible systems, this root path is usually
18280 /// /usr/lib/debug by default. If this argument is set to NULL, then
18281 /// "./debug" and /usr/lib/debug will be searched for sub-directories
18282 /// containing the debug info file.
18284 /// @param environment the environment used by the current context.
18285 /// This environment contains resources needed by the reader and by
18286 /// the types and declarations that are to be created later. Note
18287 /// that ABI artifacts that are to be compared all need to be created
18288 /// within the same environment. Also, the lifetime of the
18289 /// environment must be greater than the lifetime of the resulting
18290 /// corpus because the corpus uses resources that are allocated in the
18293 /// @param load_all_types if set to false only the types that are
18294 /// reachable from publicly exported declarations (of functions and
18295 /// variables) are read. If set to true then all types found in the
18296 /// debug information are loaded.
18298 /// @param resulting_corp a pointer to the resulting abigail::corpus.
18300 /// @return the resulting status.
18302 read_corpus_from_elf(const std::string& elf_path,
18303 const vector<char**>& debug_info_root_paths,
18304 ir::environment* environment,
18305 bool load_all_types,
18308 read_context_sptr c = create_read_context(elf_path,
18309 debug_info_root_paths,
18312 read_context& ctxt = *c;
18313 return read_corpus_from_elf(ctxt, status);
18316 /// Look into the symbol tables of a given elf file and see if we find
18317 /// a given symbol.
18319 /// @param env the environment we are operating from.
18321 /// @param elf_path the path to the elf file to consider.
18323 /// @param symbol_name the name of the symbol to look for.
18325 /// @param demangle if true, try to demangle the symbol name found in
18326 /// the symbol table.
18328 /// @param syms the vector of symbols found with the name @p symbol_name.
18330 /// @return true iff the symbol was found among the publicly exported
18331 /// symbols of the ELF file.
18333 lookup_symbol_from_elf(const environment* env,
18334 const string& elf_path,
18335 const string& symbol_name,
18337 vector<elf_symbol_sptr>& syms)
18340 if (elf_version(EV_CURRENT) == EV_NONE)
18343 int fd = open(elf_path.c_str(), O_RDONLY);
18351 Elf* elf = elf_begin(fd, ELF_C_READ, 0);
18355 bool value = lookup_symbol_from_elf(env, elf, symbol_name,
18363 /// Look into the symbol tables of an elf file to see if a public
18364 /// function of a given name is found.
18366 /// @param env the environment we are operating from.
18368 /// @param elf_path the path to the elf file to consider.
18370 /// @param symbol_name the name of the function to look for.
18372 /// @param syms the vector of public function symbols found with the
18373 /// name @p symname.
18375 /// @return true iff a function with symbol name @p symbol_name is
18378 lookup_public_function_symbol_from_elf(const environment* env,
18379 const string& path,
18380 const string& symname,
18381 vector<elf_symbol_sptr>& syms)
18383 if (elf_version(EV_CURRENT) == EV_NONE)
18386 int fd = open(path.c_str(), O_RDONLY);
18394 Elf* elf = elf_begin(fd, ELF_C_READ, 0);
18398 bool value = lookup_public_function_symbol_from_elf(env, elf, symname, syms);
18405 /// Check if the underlying elf file refers to an alternate debug info
18406 /// file associated to it.
18408 /// Note that "alternate debug info sections" is a GNU extension as
18409 /// of DWARF4 and is described at
18410 /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1.
18412 /// @param ctxt the context used to read the elf file.
18414 /// @param alt_di the path to the alternate debug info file. This is
18415 /// set iff the function returns true.
18417 /// @return true if the ELF file refers to an alternate debug info
18420 refers_to_alt_debug_info(const read_context& ctxt,
18421 string& alt_di_path)
18423 if (!ctxt.alt_debug_info_path().empty())
18425 alt_di_path = ctxt.alt_debug_info_path();
18431 /// Check if the underlying elf file has an alternate debug info file
18432 /// associated to it.
18434 /// Note that "alternate debug info sections" is a GNU extension as
18435 /// of DWARF4 and is described at
18436 /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1.
18438 /// @param ctxt the read_context to use to handle the underlying elf file.
18440 /// @param has_alt_di out parameter. This is set to true upon
18441 /// succesful completion of the function iff an alternate debug info
18442 /// file was found, false otherwise. Note thas this parameter is set
18443 /// only if the function returns STATUS_OK.
18445 /// @param alt_debug_info_path if the function returned STATUS_OK and
18446 /// if @p has been set to true, then this parameter contains the path
18447 /// to the alternate debug info file found.
18449 /// return STATUS_OK upon successful completion, false otherwise.
18451 has_alt_debug_info(read_context& ctxt,
18453 string& alt_debug_info_path)
18455 // Load debug info from the elf path.
18456 if (!ctxt.load_debug_info())
18457 return STATUS_DEBUG_INFO_NOT_FOUND;
18459 if (ctxt.alt_dwarf())
18462 alt_debug_info_path = ctxt.alt_debug_info_path();
18465 has_alt_di = false;
18470 /// Check if a given elf file has an alternate debug info file
18471 /// associated to it.
18473 /// Note that "alternate debug info sections" is a GNU extension as
18474 /// of DWARF4 and is described at
18475 /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1.
18477 /// @param elf_path the path to the elf file to consider.
18479 /// @param a pointer to the root directory under which the split debug info
18480 /// file associated to elf_path is to be found. This has to be NULL
18481 /// if the debug info file is not in a split file.
18483 /// @param has_alt_di out parameter. This is set to true upon
18484 /// succesful completion of the function iff an alternate debug info
18485 /// file was found, false otherwise. Note thas this parameter is set
18486 /// only if the function returns STATUS_OK.
18488 /// @param alt_debug_info_path if the function returned STATUS_OK and
18489 /// if @p has been set to true, then this parameter contains the path
18490 /// to the alternate debug info file found.
18492 /// return STATUS_OK upon successful completion, false otherwise.
18494 has_alt_debug_info(const string& elf_path,
18495 char** debug_info_root_path,
18497 string& alt_debug_info_path)
18499 vector<char**> di_roots;
18500 di_roots.push_back(debug_info_root_path);
18501 read_context_sptr c = create_read_context(elf_path, di_roots, 0);
18502 read_context& ctxt = *c;
18504 // Load debug info from the elf path.
18505 if (!ctxt.load_debug_info())
18506 return STATUS_DEBUG_INFO_NOT_FOUND;
18508 if (ctxt.alt_dwarf())
18511 alt_debug_info_path = ctxt.alt_debug_info_path();
18514 has_alt_di = false;
18519 /// Fetch the SONAME ELF property from an ELF binary file.
18521 /// @param path The path to the elf file to consider.
18523 /// @param soname out parameter. Set to the SONAME property of the
18524 /// binary file, if it present in the ELF file.
18526 /// return false if an error occured while looking for the SONAME
18527 /// property in the binary, true otherwise.
18529 get_soname_of_elf_file(const string& path, string &soname)
18532 int fd = open(path.c_str(), O_RDONLY);
18536 elf_version (EV_CURRENT);
18537 Elf* elf = elf_begin (fd, ELF_C_READ_MMAP, NULL);
18539 GElf_Ehdr ehdr_mem;
18540 GElf_Ehdr* ehdr = gelf_getehdr (elf, &ehdr_mem);
18544 for (int i = 0; i < ehdr->e_phnum; ++i)
18546 GElf_Phdr phdr_mem;
18547 GElf_Phdr* phdr = gelf_getphdr (elf, i, &phdr_mem);
18549 if (phdr != NULL && phdr->p_type == PT_DYNAMIC)
18551 Elf_Scn* scn = gelf_offscn (elf, phdr->p_offset);
18552 GElf_Shdr shdr_mem;
18553 GElf_Shdr* shdr = gelf_getshdr (scn, &shdr_mem);
18554 int maxcnt = (shdr != NULL
18555 ? shdr->sh_size / shdr->sh_entsize : INT_MAX);
18556 ABG_ASSERT (shdr == NULL || shdr->sh_type == SHT_DYNAMIC);
18557 Elf_Data* data = elf_getdata (scn, NULL);
18561 for (int cnt = 0; cnt < maxcnt; ++cnt)
18564 GElf_Dyn* dyn = gelf_getdyn (data, cnt, &dynmem);
18568 if (dyn->d_tag == DT_NULL)
18571 if (dyn->d_tag != DT_SONAME)
18574 soname = elf_strptr (elf, shdr->sh_link, dyn->d_un.d_val);
18587 /// Get the type of a given elf type.
18589 /// @param path the absolute path to the ELF file to analyzed.
18591 /// @param type the kind of the ELF file designated by @p path.
18593 /// @param out parameter. Is set to the type of ELF file of @p path.
18594 /// This parameter is set iff the function returns true.
18596 /// @return true iff the file could be opened and analyzed.
18598 get_type_of_elf_file(const string& path, elf_type& type)
18600 int fd = open(path.c_str(), O_RDONLY);
18604 elf_version (EV_CURRENT);
18605 Elf *elf = elf_begin (fd, ELF_C_READ_MMAP, NULL);
18606 type = elf_file_type(elf);
18613 }// end namespace dwarf_reader
18615 }// end namespace abigail