3 // Copyright (C) 2013-2019 Red Hat, Inc.
5 // This file is part of the GNU Application Binary Interface Generic
6 // Analysis and Instrumentation Library (libabigail). This library is
7 // free software; you can redistribute it and/or modify it under the
8 // terms of the GNU Lesser General Public License as published by the
9 // Free Software Foundation; either version 3, or (at your option) any
12 // This library is distributed in the hope that it will be useful, but
13 // WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 // General Lesser Public License for more details.
17 // You should have received a copy of the GNU Lesser General Public
18 // License along with this program; see the file COPYING-LGPLV3. If
19 // not, see <http://www.gnu.org/licenses/>.
21 // Author: Dodji Seketeli
25 /// This file contains the definitions of the entry points to
26 /// de-serialize an instance of @ref abigail::corpus from a file in
27 /// elf format, containing dwarf information.
30 #include <sys/types.h>
39 #include <elfutils/libdwfl.h>
48 #include "abg-cxx-compat.h"
49 #include "abg-ir-priv.h"
50 #include "abg-suppression-priv.h"
51 #include "abg-corpus-priv.h"
53 #include "abg-internal.h"
54 // <headers defining libabigail's API go under here>
55 ABG_BEGIN_EXPORT_DECLARATIONS
57 #include "abg-dwarf-reader.h"
58 #include "abg-sptr-utils.h"
59 #include "abg-tools-utils.h"
61 ABG_END_EXPORT_DECLARATIONS
62 // </headers defining libabigail's API>
65 #define UINT64_MAX 0xffffffffffffffff
75 /// The namespace for the DWARF reader.
76 namespace dwarf_reader
79 using abg_compat::dynamic_pointer_cast;
80 using abg_compat::static_pointer_cast;
81 using abg_compat::unordered_map;
82 using abg_compat::unordered_set;
87 /// Where a DIE comes from. For instance, a DIE can come from the main
88 /// debug info section, the alternate debug info section or from the
89 /// type unit section.
92 NO_DEBUG_INFO_DIE_SOURCE,
93 PRIMARY_DEBUG_INFO_DIE_SOURCE,
94 ALT_DEBUG_INFO_DIE_SOURCE,
96 NUMBER_OF_DIE_SOURCES, // This one must always be the latest
100 /// Prefix increment operator for @ref die_source.
102 /// @param source the die_source to increment.
103 /// @return the incremented source.
105 operator++(die_source& source)
107 source = static_cast<die_source>(source + 1);
111 /// A functor used by @ref dwfl_sptr.
115 operator()(Dwfl* dwfl)
117 };//end struct dwfl_deleter
119 /// A convenience typedef for a shared pointer to a Dwfl.
120 typedef shared_ptr<Dwfl> dwfl_sptr;
122 /// A convenience typedef for a vector of Dwarf_Off.
123 typedef vector<Dwarf_Off> dwarf_offsets_type;
125 /// Convenience typedef for a map which key is the offset of a dwarf
126 /// die and which value is the corresponding artefact.
127 typedef unordered_map<Dwarf_Off, type_or_decl_base_sptr> die_artefact_map_type;
129 /// Convenience typedef for a map which key is the offset of a dwarf
130 /// die, (given by dwarf_dieoffset()) and which value is the
131 /// corresponding class_decl.
132 typedef unordered_map<Dwarf_Off, class_decl_sptr> die_class_map_type;
134 /// Convenience typedef for a map which key is the offset of a dwarf
135 /// die, (given by dwarf_dieoffset()) and which value is the
136 /// corresponding class_or_union_sptr.
137 typedef unordered_map<Dwarf_Off, class_or_union_sptr> die_class_or_union_map_type;
139 /// Convenience typedef for a map which key the offset of a dwarf die
140 /// and which value is the corresponding function_decl.
141 typedef unordered_map<Dwarf_Off, function_decl_sptr> die_function_decl_map_type;
143 /// Convenience typedef for a map which key is the offset of a dwarf
144 /// die and which value is the corresponding function_type.
145 typedef unordered_map<Dwarf_Off, function_type_sptr> die_function_type_map_type;
147 /// Convenience typedef for a map which key is the offset of a
148 /// DW_TAG_compile_unit and the value is the corresponding @ref
149 /// translation_unit_sptr.
150 typedef unordered_map<Dwarf_Off, translation_unit_sptr> die_tu_map_type;
152 /// Convenience typedef for a map which key is the offset of a DIE and
153 /// the value is the corresponding qualified name of the DIE.
154 typedef unordered_map<Dwarf_Off, interned_string> die_istring_map_type;
156 /// Convenience typedef for a map which is an interned_string and
157 /// which value is a vector of offsets.
158 typedef unordered_map<interned_string,
160 hash_interned_string>
161 istring_dwarf_offsets_map_type;
163 /// Convenience typedef for a map which key is an elf address and
164 /// which value is an elf_symbol_sptr.
165 typedef unordered_map<GElf_Addr, elf_symbol_sptr> addr_elf_symbol_sptr_map_type;
167 /// Convenience typedef for a set of ELF addresses.
168 typedef unordered_set<GElf_Addr> address_set_type;
170 typedef unordered_set<interned_string, hash_interned_string> istring_set_type;
172 /// Convenience typedef for a shared pointer to an @ref address_set_type.
173 typedef shared_ptr<address_set_type> address_set_sptr;
175 /// Convenience typedef for a shared pointer to an
176 /// addr_elf_symbol_sptr_map_type.
177 typedef shared_ptr<addr_elf_symbol_sptr_map_type> addr_elf_symbol_sptr_map_sptr;
179 /// Convenience typedef for a map that associates an @ref
180 /// interned_string to a @ref function_type_sptr.
181 typedef unordered_map<interned_string,
183 hash_interned_string> istring_fn_type_map_type;
185 /// Convenience typedef for a stack containing the scopes up to the
186 /// current point in the abigail Internal Representation (aka IR) tree
187 /// that is being built.
188 typedef stack<scope_decl*> scope_stack_type;
190 /// Convenience typedef for a map which key is a dwarf offset. The
191 /// value is also a dwarf offset.
192 typedef unordered_map<Dwarf_Off, Dwarf_Off> offset_offset_map_type;
194 /// Convenience typedef for a map which key is a string and which
195 /// value is a vector of smart pointer to a class.
196 typedef unordered_map<string, classes_type> string_classes_map;
198 /// The abstraction of the place where a partial unit has been
199 /// imported. This is what the DW_TAG_imported_unit DIE expresses.
201 /// This type thus contains:
202 /// - the offset to which the partial unit is imported
203 /// - the offset of the imported partial unit.
204 /// - the offset of the imported partial unit.
205 struct imported_unit_point
207 Dwarf_Off offset_of_import;
208 // The boolean below is true iff the imported unit comes from the
209 // alternate debug info file.
210 die_source imported_unit_die_source;
211 Dwarf_Off imported_unit_die_off;
212 Dwarf_Off imported_unit_cu_off;
213 Dwarf_Off imported_unit_child_off;
215 /// Default constructor for @ref the type imported_unit_point.
216 imported_unit_point ()
217 : offset_of_import(),
218 imported_unit_die_source(PRIMARY_DEBUG_INFO_DIE_SOURCE),
219 imported_unit_die_off(),
220 imported_unit_cu_off(),
221 imported_unit_child_off()
224 /// Constructor of @ref the type imported_unit_point.
226 /// @param import_off the offset of the point at which the unit has
228 imported_unit_point (Dwarf_Off import_off)
229 : offset_of_import(import_off),
230 imported_unit_die_source(PRIMARY_DEBUG_INFO_DIE_SOURCE),
231 imported_unit_die_off(),
232 imported_unit_cu_off(),
233 imported_unit_child_off()
236 /// Constructor of @ref the type imported_unit_point.
238 /// @param import_off the offset of the point at which the unit has
241 /// @param from where the imported DIE comes from.
243 /// @param imported_die the die of the unit that has been imported.
244 imported_unit_point (Dwarf_Off import_off,
245 const Dwarf_Die& imported_die,
247 : offset_of_import(import_off),
248 imported_unit_die_source(from),
249 imported_unit_die_off(dwarf_dieoffset
250 (const_cast<Dwarf_Die*>(&imported_die))),
251 imported_unit_cu_off(),
252 imported_unit_child_off()
254 Dwarf_Die imported_unit_child;
256 dwarf_child(const_cast<Dwarf_Die*>(&imported_die),
257 &imported_unit_child);
258 imported_unit_child_off =
259 dwarf_dieoffset(const_cast<Dwarf_Die*>(&imported_unit_child));
261 Dwarf_Die cu_die_memory;
264 cu_die = dwarf_diecu(const_cast<Dwarf_Die*>(&imported_unit_child),
265 &cu_die_memory, 0, 0);
266 imported_unit_cu_off = dwarf_dieoffset(cu_die);
268 }; // struct imported_unit_point
270 /// Convenience typedef for a vector of @ref imported_unit_point.
271 typedef vector<imported_unit_point> imported_unit_points_type;
273 /// Convenience typedef for a vector of @ref imported_unit_point.
274 typedef unordered_map<Dwarf_Off, imported_unit_points_type>
275 tu_die_imported_unit_points_map_type;
277 /// "Less than" operator for instances of @ref imported_unit_point
280 /// @param the left hand side operand of the "Less than" operator.
282 /// @param the right hand side operand of the "Less than" operator.
284 /// @return true iff @p l is less than @p r.
286 operator<(const imported_unit_point& l, const imported_unit_point& r)
287 {return l.offset_of_import < r.offset_of_import;}
290 add_symbol_to_map(const elf_symbol_sptr& sym,
291 string_elf_symbols_map_type& map);
294 find_symbol_table_section(Elf* elf_handle, Elf_Scn*& section);
297 get_symbol_versionning_sections(Elf* elf_handle,
298 Elf_Scn*& versym_section,
299 Elf_Scn*& verdef_section,
300 Elf_Scn*& verneed_section);
303 get_parent_die(const read_context& ctxt,
304 const Dwarf_Die* die,
305 Dwarf_Die& parent_die,
306 size_t where_offset);
309 get_scope_die(const read_context& ctxt,
310 const Dwarf_Die* die,
312 Dwarf_Die& scope_die);
315 die_is_anonymous(const Dwarf_Die* die);
318 die_is_type(const Dwarf_Die* die);
321 die_is_decl(const Dwarf_Die* die);
324 die_is_namespace(const Dwarf_Die* die);
327 die_is_unspecified(Dwarf_Die* die);
330 die_is_void_type(Dwarf_Die* die);
333 die_is_pointer_type(const Dwarf_Die* die);
336 pointer_or_qual_die_of_anonymous_class_type(const Dwarf_Die* die);
339 die_is_reference_type(const Dwarf_Die* die);
342 die_is_pointer_or_reference_type(const Dwarf_Die* die);
345 die_is_pointer_reference_or_typedef_type(const Dwarf_Die* die);
348 die_is_class_type(const Dwarf_Die* die);
351 die_is_qualified_type(const Dwarf_Die* die);
354 die_is_function_type(const Dwarf_Die *die);
357 die_has_object_pointer(const Dwarf_Die* die,
358 Dwarf_Die& object_pointer);
361 die_this_pointer_from_object_pointer(Dwarf_Die* die,
362 Dwarf_Die& this_pointer);
365 die_this_pointer_is_const(Dwarf_Die* die);
368 die_object_pointer_is_for_const_method(Dwarf_Die* die);
371 die_is_at_class_scope(const read_context& ctxt,
372 const Dwarf_Die* die,
374 Dwarf_Die& class_scope_die);
376 eval_last_constant_dwarf_sub_expr(Dwarf_Op* expr,
379 bool& is_tls_address);
381 static translation_unit::language
382 dwarf_language_to_tu_language(size_t l);
385 die_unsigned_constant_attribute(const Dwarf_Die* die,
390 die_signed_constant_attribute(const Dwarf_Die*die,
395 die_constant_attribute(const Dwarf_Die *die,
397 array_type_def::subrange_type::bound_value &value);
400 die_attribute_has_form(const Dwarf_Die* die,
405 form_is_DW_FORM_strx(unsigned form);
408 die_attribute_is_signed(const Dwarf_Die* die, unsigned attr_name);
411 die_attribute_is_unsigned(const Dwarf_Die* die, unsigned attr_name);
414 die_attribute_has_no_signedness(const Dwarf_Die* die, unsigned attr_name);
417 die_address_attribute(Dwarf_Die* die, unsigned attr_name, Dwarf_Addr& result);
420 die_name(const Dwarf_Die* die);
423 die_location(const read_context& ctxt, const Dwarf_Die* die);
426 die_location_address(Dwarf_Die* die,
428 bool& is_tls_address);
431 die_die_attribute(const Dwarf_Die* die,
434 bool look_thru_abstract_origin = true);
437 get_internal_anonymous_die_prefix_name(const Dwarf_Die *die);
440 build_internal_anonymous_die_name(const string &base_name,
441 size_t anonymous_type_index);
445 get_internal_anonymous_die_name(Dwarf_Die *die,
446 size_t anonymous_type_index);
449 die_qualified_type_name(const read_context& ctxt,
450 const Dwarf_Die* die,
454 die_qualified_decl_name(const read_context& ctxt,
455 const Dwarf_Die* die,
459 die_qualified_name(const read_context& ctxt,
460 const Dwarf_Die* die,
464 die_qualified_type_name_empty(const read_context& ctxt,
465 const Dwarf_Die* die, size_t where,
466 string &qualified_name);
469 die_return_and_parm_names_from_fn_type_die(const read_context& ctxt,
470 const Dwarf_Die* die,
473 string &return_type_name,
475 vector<string>& parm_names,
480 die_function_signature(const read_context& ctxt,
481 const Dwarf_Die *die,
482 size_t where_offset);
485 die_peel_qual_ptr(Dwarf_Die *die, Dwarf_Die& peeled_die);
488 die_function_type_is_method_type(const read_context& ctxt,
489 const Dwarf_Die *die,
491 Dwarf_Die& object_pointer_die,
492 Dwarf_Die& class_die,
496 die_pretty_print_type(read_context& ctxt,
497 const Dwarf_Die* die,
498 size_t where_offset);
501 die_pretty_print_decl(read_context& ctxt,
502 const Dwarf_Die* die,
503 size_t where_offset);
506 die_pretty_print(read_context& ctxt,
507 const Dwarf_Die* die,
508 size_t where_offset);
511 maybe_canonicalize_type(const Dwarf_Die* die,
515 maybe_canonicalize_type(const type_base_sptr& t,
519 maybe_canonicalize_type(const Dwarf_Die* die,
520 const type_base_sptr& t,
524 get_default_array_lower_bound(translation_unit::language l);
527 find_lower_bound_in_imported_unit_points(const imported_unit_points_type&,
529 imported_unit_points_type::const_iterator&);
531 static array_type_def::subrange_sptr
532 build_subrange_type(read_context& ctxt,
533 const Dwarf_Die* die,
535 bool associate_type_to_die = true);
538 build_subranges_from_array_type_die(read_context& ctxt,
539 const Dwarf_Die* die,
540 array_type_def::subranges_type& subranges,
542 bool associate_type_to_die = true);
545 compare_dies(const read_context& ctxt,
546 const Dwarf_Die *l, const Dwarf_Die *r,
547 bool update_canonical_dies_on_the_fly);
549 /// Convert an elf symbol type (given by the ELF{32,64}_ST_TYPE
550 /// macros) into an elf_symbol::type value.
552 /// Note that this function aborts when given an unexpected value.
554 /// @param the symbol type value to convert.
556 /// @return the converted value.
557 static elf_symbol::type
558 stt_to_elf_symbol_type(unsigned char stt)
560 elf_symbol::type t = elf_symbol::NOTYPE_TYPE;
565 t = elf_symbol::NOTYPE_TYPE;
568 t = elf_symbol::OBJECT_TYPE;
571 t = elf_symbol::FUNC_TYPE;
574 t = elf_symbol::SECTION_TYPE;
577 t = elf_symbol::FILE_TYPE;
580 t = elf_symbol::COMMON_TYPE;
583 t = elf_symbol::TLS_TYPE;
586 t = elf_symbol::GNU_IFUNC_TYPE;
589 // An unknown value that probably ought to be supported? Let's
590 // abort right here rather than yielding garbage.
591 ABG_ASSERT_NOT_REACHED;
597 /// Convert an elf symbol binding (given by the ELF{32,64}_ST_BIND
598 /// macros) into an elf_symbol::binding value.
600 /// Note that this function aborts when given an unexpected value.
602 /// @param the symbol binding value to convert.
604 /// @return the converted value.
605 static elf_symbol::binding
606 stb_to_elf_symbol_binding(unsigned char stb)
608 elf_symbol::binding b = elf_symbol::GLOBAL_BINDING;
613 b = elf_symbol::LOCAL_BINDING;
616 b = elf_symbol::GLOBAL_BINDING;
619 b = elf_symbol::WEAK_BINDING;
622 b = elf_symbol::GNU_UNIQUE_BINDING;
625 ABG_ASSERT_NOT_REACHED;
632 /// Convert an ELF symbol visiblity given by the symbols ->st_other
633 /// data member as returned by the GELF_ST_VISIBILITY macro into a
634 /// elf_symbol::visiblity value.
636 /// @param stv the value of the ->st_other data member of the ELF
639 /// @return the converted elf_symbol::visiblity value.
640 static elf_symbol::visibility
641 stv_to_elf_symbol_visibility(unsigned char stv)
644 elf_symbol::visibility v = elf_symbol::DEFAULT_VISIBILITY;
649 v = elf_symbol::DEFAULT_VISIBILITY;
652 v = elf_symbol::INTERNAL_VISIBILITY;
655 v = elf_symbol::HIDDEN_VISIBILITY;
658 v = elf_symbol::PROTECTED_VISIBILITY;
661 ABG_ASSERT_NOT_REACHED;
667 /// Convert the value of the e_machine field of GElf_Ehdr into a
668 /// string. This is to get a string representing the architecture of
669 /// the elf file at hand.
671 /// @param e_machine the value of GElf_Ehdr::e_machine.
673 /// @return the string representation of GElf_Ehdr::e_machine.
675 e_machine_to_string(GElf_Half e_machine)
681 result = "elf-no-arch";
684 result = "elf-att-we-32100";
687 result = "elf-sun-sparc";
690 result = "elf-intel-80386";
693 result = "elf-motorola-68k";
696 result = "elf-motorola-88k";
699 result = "elf-intel-80860";
702 result = "elf-mips-r3000-be";
705 result = "elf-ibm-s370";
708 result = "elf-mips-r3000-le";
711 result = "elf-hp-parisc";
714 result = "elf-fujitsu-vpp500";
717 result = "elf-sun-sparc-v8plus";
720 result = "elf-intel-80960";
723 result = "elf-powerpc";
726 result = "elf-powerpc-64";
729 result = "elf-ibm-s390";
732 result = "elf-nec-v800";
735 result = "elf-fujitsu-fr20";
738 result = "elf-trw-rh32";
741 result = "elf-motorola-rce";
747 result = "elf-digital-alpha";
750 result = "elf-hitachi-sh";
753 result = "elf-sun-sparc-v9-64";
756 result = "elf-siemens-tricore";
759 result = "elf-argonaut-risc-core";
762 result = "elf-hitachi-h8-300";
765 result = "elf-hitachi-h8-300h";
768 result = "elf-hitachi-h8s";
771 result = "elf-hitachi-h8-500";
774 result = "elf-intel-ia-64";
777 result = "elf-stanford-mips-x";
780 result = "elf-motorola-coldfire";
783 result = "elf-motorola-68hc12";
786 result = "elf-fujitsu-mma";
789 result = "elf-siemens-pcp";
792 result = "elf-sony-ncpu";
795 result = "elf-denso-ndr1";
798 result = "elf-motorola-starcore";
801 result = "elf-toyota-me16";
804 result = "elf-stm-st100";
807 result = "elf-alc-tinyj";
810 result = "elf-amd-x86_64";
813 result = "elf-sony-pdsp";
816 result = "elf-siemens-fx66";
819 result = "elf-stm-st9+";
822 result = "elf-stm-st7";
825 result = "elf-motorola-68hc16";
828 result = "elf-motorola-68hc11";
831 result = "elf-motorola-68hc08";
834 result = "elf-motorola-68hc05";
837 result = "elf-sg-svx";
840 result = "elf-stm-st19";
843 result = "elf-digital-vax";
846 result = "elf-axis-cris";
849 result = "elf-infineon-javelin";
852 result = "elf-firepath";
855 result = "elf-lsi-zsp";
858 result = "elf-don-knuth-mmix";
861 result = "elf-harvard-huany";
864 result = "elf-sitera-prism";
867 result = "elf-atmel-avr";
870 result = "elf-fujistu-fr30";
873 result = "elf-mitsubishi-d10v";
876 result = "elf-mitsubishi-d30v";
879 result = "elf-nec-v850";
882 result = "elf-mitsubishi-m32r";
885 result = "elf-matsushita-mn10300";
888 result = "elf-matsushita-mn10200";
891 result = "elf-picojava";
894 result = "elf-openrisc-32";
897 result = "elf-arc-a5";
900 result = "elf-tensilica-xtensa";
903 #ifdef HAVE_EM_AARCH64_MACRO
905 result = "elf-arm-aarch64";
909 #ifdef HAVE_EM_TILEPRO_MACRO
911 result = "elf-tilera-tilepro";
915 #ifdef HAVE_EM_TILEGX_MACRO
917 result = "elf-tilera-tilegx";
922 result = "elf-last-arch-number";
925 result = "elf-non-official-alpha";
929 std::ostringstream o;
930 o << "elf-unknown-arch-value-" << e_machine;
938 /// The kind of ELF hash table found by the function
939 /// find_hash_table_section_index.
942 NO_HASH_TABLE_KIND = 0,
943 SYSV_HASH_TABLE_KIND,
947 /// Get the offset offset of the hash table section.
949 /// @param elf_handle the elf handle to use.
951 /// @param ht_section_offset this is set to the resulting offset
952 /// of the hash table section. This is set iff the function returns true.
954 /// @param symtab_section_offset the offset of the section of the
955 /// symbol table the hash table refers to.
956 static hash_table_kind
957 find_hash_table_section_index(Elf* elf_handle,
958 size_t& ht_section_index,
959 size_t& symtab_section_index)
962 return NO_HASH_TABLE_KIND;
964 GElf_Shdr header_mem, *section_header;
965 bool found_sysv_ht = false, found_gnu_ht = false;
966 for (Elf_Scn* section = elf_nextscn(elf_handle, 0);
968 section = elf_nextscn(elf_handle, section))
970 section_header= gelf_getshdr(section, &header_mem);
971 if (section_header->sh_type != SHT_HASH
972 && section_header->sh_type != SHT_GNU_HASH)
975 ht_section_index = elf_ndxscn(section);
976 symtab_section_index = section_header->sh_link;
978 if (section_header->sh_type == SHT_HASH)
979 found_sysv_ht = true;
980 else if (section_header->sh_type == SHT_GNU_HASH)
985 return GNU_HASH_TABLE_KIND;
986 else if (found_sysv_ht)
987 return SYSV_HASH_TABLE_KIND;
989 return NO_HASH_TABLE_KIND;
992 /// Find the symbol table.
994 /// If we are looking at a relocatable or executable file, this
995 /// function will return the .symtab symbol table (of type
996 /// SHT_SYMTAB). But if we are looking at a DSO it returns the
997 /// .dynsym symbol table (of type SHT_DYNSYM).
999 /// @param elf_handle the elf handle to consider.
1001 /// @param symtab the symbol table found.
1003 /// @return true iff the symbol table is found.
1005 find_symbol_table_section(Elf* elf_handle, Elf_Scn*& symtab)
1007 Elf_Scn* section = 0, *dynsym = 0, *sym_tab = 0;
1008 while ((section = elf_nextscn(elf_handle, section)) != 0)
1010 GElf_Shdr header_mem, *header;
1011 header = gelf_getshdr(section, &header_mem);
1012 if (header->sh_type == SHT_DYNSYM)
1014 else if (header->sh_type == SHT_SYMTAB)
1018 if (dynsym || sym_tab)
1021 GElf_Ehdr* elf_header = gelf_getehdr(elf_handle, &eh_mem);
1022 if (elf_header->e_type == ET_REL
1023 || elf_header->e_type == ET_EXEC)
1024 symtab = sym_tab ? sym_tab : dynsym;
1026 symtab = dynsym ? dynsym : sym_tab;
1032 /// Find the index (in the section headers table) of the symbol table
1035 /// If we are looking at a relocatable or executable file, this
1036 /// function will return the index for the .symtab symbol table (of
1037 /// type SHT_SYMTAB). But if we are looking at a DSO it returns the
1038 /// index for the .dynsym symbol table (of type SHT_DYNSYM).
1040 /// @param elf_handle the elf handle to use.
1042 /// @param symtab_index the index of the symbol_table, that was found.
1044 /// @return true iff the symbol table section index was found.
1046 find_symbol_table_section_index(Elf* elf_handle,
1047 size_t& symtab_index)
1049 Elf_Scn* section = 0;
1050 if (!find_symbol_table_section(elf_handle, section))
1053 symtab_index = elf_ndxscn(section);
1057 /// Find and return a section by its name and its type.
1059 /// @param elf_handle the elf handle to use.
1061 /// @param name the name of the section.
1063 /// @param section_type the type of the section. This is the
1064 /// Elf32_Shdr::sh_type (or Elf64_Shdr::sh_type) data member.
1065 /// Examples of values of this parameter are SHT_PROGBITS or SHT_NOBITS.
1067 /// @return the section found, nor nil if none was found.
1069 find_section(Elf* elf_handle, const string& name, Elf64_Word section_type)
1071 size_t section_header_string_index = 0;
1072 if (elf_getshdrstrndx (elf_handle, §ion_header_string_index) < 0)
1075 Elf_Scn* section = 0;
1076 GElf_Shdr header_mem, *header;
1077 while ((section = elf_nextscn(elf_handle, section)) != 0)
1079 header = gelf_getshdr(section, &header_mem);
1080 if (header == NULL || header->sh_type != section_type)
1083 const char* section_name =
1084 elf_strptr(elf_handle, section_header_string_index, header->sh_name);
1085 if (section_name && name == section_name)
1092 /// Test if the ELF binary denoted by a given ELF handle is a Linux
1095 /// @param elf_handle the ELF handle to consider.
1097 /// @return true iff the binary denoted by @p elf_handle is a Linux
1100 binary_is_linux_kernel_module(Elf *elf_handle)
1102 return (find_section(elf_handle, ".modinfo", SHT_PROGBITS)
1103 && find_section(elf_handle,
1104 ".gnu.linkonce.this_module",
1108 /// Test if the ELF binary denoted by a given ELF handle is a Linux
1109 /// Kernel binary (either vmlinux or a kernel module).
1111 /// @param elf_handle the ELF handle to consider.
1113 /// @return true iff the binary denoted by @p elf_handle is a Linux
1116 binary_is_linux_kernel(Elf *elf_handle)
1118 return (find_section(elf_handle,
1119 "__ksymtab_strings",
1121 || binary_is_linux_kernel_module(elf_handle));
1124 /// Find and return the .text section.
1126 /// @param elf_handle the elf handle to use.
1128 /// @return the .text section found.
1130 find_text_section(Elf* elf_handle)
1131 {return find_section(elf_handle, ".text", SHT_PROGBITS);}
1133 /// Find and return the .bss section.
1135 /// @param elf_handle.
1137 /// @return the .bss section found.
1139 find_bss_section(Elf* elf_handle)
1140 {return find_section(elf_handle, ".bss", SHT_NOBITS);}
1142 /// Find and return the .rodata section.
1144 /// @param elf_handle.
1146 /// @return the .rodata section found.
1148 find_rodata_section(Elf* elf_handle)
1149 {return find_section(elf_handle, ".rodata", SHT_PROGBITS);}
1151 /// Find and return the .data section.
1153 /// @param elf_handle the elf handle to use.
1155 /// @return the .data section found.
1157 find_data_section(Elf* elf_handle)
1158 {return find_section(elf_handle, ".data", SHT_PROGBITS);}
1160 /// Find and return the .data1 section.
1162 /// @param elf_handle the elf handle to use.
1164 /// @return the .data1 section found.
1166 find_data1_section(Elf* elf_handle)
1167 {return find_section(elf_handle, ".data1", SHT_PROGBITS);}
1169 /// Find the __ksymtab_strings section of a Linux kernel binary.
1172 /// @return the find_ksymtab_strings_section of the linux kernel
1173 /// binary denoted by @p elf_handle, or nil if such a section could
1176 find_ksymtab_strings_section(Elf *elf_handle)
1178 if (binary_is_linux_kernel(elf_handle))
1179 return find_section(elf_handle, "__ksymtab_strings", SHT_PROGBITS);
1183 /// Get the address at which a given binary is loaded in memoryâ‹…
1185 /// @param elf_handle the elf handle for the binary to consider.
1187 /// @param load_address the address where the binary is loaded. This
1188 /// is set by the function iff it returns true.
1190 /// @return true if the function could get the binary load address
1191 /// and assign @p load_address to it.
1193 get_binary_load_address(Elf *elf_handle,
1194 GElf_Addr &load_address)
1197 GElf_Ehdr *elf_header = gelf_getehdr(elf_handle, &eh_mem);
1198 size_t num_segments = elf_header->e_phnum;
1199 GElf_Phdr *program_header = 0;
1201 bool found_loaded_segment = false;
1204 for (unsigned i = 0; i < num_segments; ++i)
1206 program_header = gelf_getphdr(elf_handle, i, &ph_mem);
1207 if (program_header && program_header->p_type == PT_LOAD)
1209 if (!found_loaded_segment)
1211 result = program_header->p_vaddr;
1212 found_loaded_segment = true;
1215 if (program_header->p_vaddr < result)
1216 // The resulting load address we want is the lowest
1217 // load address of all the loaded segments.
1218 result = program_header->p_vaddr;
1222 if (found_loaded_segment)
1224 load_address = result;
1230 /// Find the file name of the alternate debug info file.
1232 /// @param elf_module the elf module to consider.
1234 /// @param out parameter. Is set to the file name of the alternate
1235 /// debug info file, iff this function returns true.
1237 /// @return true iff the location of the alternate debug info file was
1240 find_alt_debug_info_link(Dwfl_Module *elf_module,
1241 string &alt_file_name)
1244 Dwarf *dwarf = dwfl_module_getdwarf(elf_module, &bias);
1245 Elf *elf = dwarf_getelf(dwarf);
1246 GElf_Ehdr ehmem, *elf_header;
1247 elf_header = gelf_getehdr(elf, &ehmem);
1249 Elf_Scn* section = 0;
1250 while ((section = elf_nextscn(elf, section)) != 0)
1252 GElf_Shdr header_mem, *header;
1253 header = gelf_getshdr(section, &header_mem);
1254 if (header->sh_type != SHT_PROGBITS)
1257 const char *section_name = elf_strptr(elf,
1258 elf_header->e_shstrndx,
1263 size_t buildid_len = 0;
1264 if (section_name != 0
1265 && strcmp(section_name, ".gnu_debugaltlink") == 0)
1267 Elf_Data *data = elf_getdata(section, 0);
1268 if (data != 0 && data->d_size != 0)
1270 alt_name = (char*) data->d_buf;
1271 char *end_of_alt_name =
1272 (char *) memchr(alt_name, '\0', data->d_size);
1273 buildid_len = data->d_size - (end_of_alt_name - alt_name + 1);
1274 if (buildid_len == 0)
1276 buildid = end_of_alt_name + 1;
1282 if (buildid == 0 || alt_name == 0)
1285 alt_file_name = alt_name;
1292 /// Find alternate debuginfo file of a given "link" under a set of
1293 /// root directories.
1295 /// The link is a string that is read by the function
1296 /// find_alt_debug_info_link(). That link is a path that is relative
1297 /// to a given debug info file, e.g, "../../../.dwz/something.debug".
1298 /// It designates the alternate debug info file associated to a given
1299 /// debug info file.
1301 /// This function will thus try to find the .dwz/something.debug file
1302 /// under some given root directories.
1304 /// @param root_dirs the set of root directories to look from.
1306 /// @param alt_file_name a relative path to the alternate debug info
1307 /// file to look for.
1309 /// @param alt_file_path the resulting absolute path to the alternate
1310 /// debuginfo path denoted by @p alt_file_name and found under one of
1311 /// the directories in @p root_dirs. This is set iff the function
1314 /// @return true iff the function found the alternate debuginfo file.
1316 find_alt_debug_info_path(const vector<char**> root_dirs,
1317 const string &alt_file_name,
1318 string &alt_file_path)
1320 if (alt_file_name.empty())
1323 string altfile_name = tools_utils::trim_leading_string(alt_file_name, "../");
1325 for (vector<char**>::const_iterator i = root_dirs.begin();
1326 i != root_dirs.end();
1328 if (tools_utils::find_file_under_dir(**i, altfile_name, alt_file_path))
1334 /// Return the alternate debug info associated to a given main debug
1337 /// @param elf_module the elf module to consider.
1339 /// @param debug_root_dirs a set of root debuginfo directories under
1340 /// which too look for the alternate debuginfo file.
1342 /// @param alt_file_name output parameter. This is set to the file
1343 /// path of the alternate debug info file associated to @p elf_module.
1344 /// This is set iff the function returns a non-null result.
1346 /// @param alt_fd the file descriptor used to access the alternate
1347 /// debug info. If this parameter is set by the function, then the
1348 /// caller needs to fclose it, otherwise the file descriptor is going
1349 /// to be leaked. Note however that on recent versions of elfutils
1350 /// where libdw.h contains the function dwarf_getalt(), this parameter
1351 /// is set to 0, so it doesn't need to be fclosed.
1353 /// Note that the alternate debug info file is a DWARF extension as of
1354 /// DWARF 4 ans is decribed at
1355 /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1.
1357 /// @return the alternate debuginfo, or null. If @p alt_fd is
1358 /// non-zero, then the caller of this function needs to call
1359 /// dwarf_end() on the returned alternate debuginfo pointer,
1360 /// otherwise, it's going to be leaked.
1362 find_alt_debug_info(Dwfl_Module *elf_module,
1363 const vector<char**> debug_root_dirs,
1364 string& alt_file_name,
1367 if (elf_module == 0)
1371 find_alt_debug_info_link(elf_module, alt_file_name);
1373 #ifdef LIBDW_HAS_DWARF_GETALT
1374 // We are on recent versions of elfutils where the function
1375 // dwarf_getalt exists, so let's use it.
1376 Dwarf_Addr bias = 0;
1377 Dwarf* dwarf = dwfl_module_getdwarf(elf_module, &bias);
1378 result = dwarf_getalt(dwarf);
1381 // We are on an old version of elfutils where the function
1382 // dwarf_getalt doesn't exist yet, so let's open code its
1385 const char *file_name = 0;
1386 void **user_data = 0;
1387 Dwarf_Addr low_addr = 0;
1390 file_name = dwfl_module_info(elf_module, &user_data,
1391 &low_addr, 0, 0, 0, 0, 0);
1393 alt_fd = dwfl_standard_find_debuginfo(elf_module, user_data,
1394 file_name, low_addr,
1395 alt_name, file_name,
1398 result = dwarf_begin(alt_fd, DWARF_C_READ);
1403 // So we didn't find the alternate debuginfo file from the
1404 // information that is in the debuginfo file associated to
1405 // elf_module. Maybe the alternate debuginfo file is located
1406 // under one of the directories in debug_root_dirs. So let's
1408 string alt_file_path;
1409 if (!find_alt_debug_info_path(debug_root_dirs,
1414 // If we reach this point it means we have found the path to the
1415 // alternate debuginfo file and it's in alt_file_path. So let's
1416 // open it and read it.
1417 int fd = open(alt_file_path.c_str(), O_RDONLY);
1420 result = dwarf_begin(fd, DWARF_C_READ);
1422 #ifdef LIBDW_HAS_DWARF_GETALT
1423 Dwarf_Addr bias = 0;
1424 Dwarf* dwarf = dwfl_module_getdwarf(elf_module, &bias);
1425 dwarf_setalt(dwarf, result);
1432 /// Compare a symbol name against another name, possibly demangling
1433 /// the symbol_name before performing the comparison.
1435 /// @param symbol_name the symbol_name to take in account.
1437 /// @param name the second name to take in account.
1439 /// @param demangle if true, demangle @p symbol_name and compare the
1440 /// result of the demangling with @p name.
1442 /// @return true iff symbol_name equals name.
1444 compare_symbol_name(const string& symbol_name,
1450 string m = demangle_cplus_mangled_name(symbol_name);
1453 return symbol_name == name;
1456 /// Return the SHT_GNU_versym, SHT_GNU_verdef and SHT_GNU_verneed
1457 /// sections that are involved in symbol versionning.
1459 /// @param elf_handle the elf handle to use.
1461 /// @param versym_section the SHT_GNU_versym section found. If the
1462 /// section wasn't found, this is set to nil.
1464 /// @param verdef_section the SHT_GNU_verdef section found. If the
1465 /// section wasn't found, this is set to nil.
1467 /// @param verneed_section the SHT_GNU_verneed section found. If the
1468 /// section wasn't found, this is set to nil.
1470 /// @return true iff at least one of the sections where found.
1472 get_symbol_versionning_sections(Elf* elf_handle,
1473 Elf_Scn*& versym_section,
1474 Elf_Scn*& verdef_section,
1475 Elf_Scn*& verneed_section)
1477 Elf_Scn* section = NULL;
1479 Elf_Scn* versym = NULL, *verdef = NULL, *verneed = NULL;
1481 while ((section = elf_nextscn(elf_handle, section)) != NULL)
1483 GElf_Shdr* h = gelf_getshdr(section, &mem);
1484 if (h->sh_type == SHT_GNU_versym)
1486 else if (h->sh_type == SHT_GNU_verdef)
1488 else if (h->sh_type == SHT_GNU_verneed)
1492 if (versym || verdef || verneed)
1494 // At least one the versionning sections was found. Return it.
1495 versym_section = versym;
1496 verdef_section = verdef;
1497 verneed_section = verneed;
1504 /// Get the version definition (from the SHT_GNU_verdef section) of a
1505 /// given symbol represented by a pointer to GElf_Versym.
1507 /// @param elf_hande the elf handle to use.
1509 /// @param versym the symbol to get the version definition for.
1511 /// @param verdef_section the SHT_GNU_verdef section.
1513 /// @param version the resulting version definition. This is set iff
1514 /// the function returns true.
1516 /// @return true upon successful completion, false otherwise.
1518 get_version_definition_for_versym(Elf* elf_handle,
1519 GElf_Versym* versym,
1520 Elf_Scn* verdef_section,
1521 elf_symbol::version& version)
1523 Elf_Data* verdef_data = elf_getdata(verdef_section, NULL);
1524 GElf_Verdef verdef_mem;
1525 GElf_Verdef* verdef = gelf_getverdef(verdef_data, 0, &verdef_mem);
1526 size_t vd_offset = 0;
1528 for (;; vd_offset += verdef->vd_next)
1532 if (verdef->vd_ndx == (*versym & 0x7fff))
1533 // Found the version of the symbol.
1535 vd_offset += verdef->vd_next;
1536 verdef = (verdef->vd_next == 0
1538 : gelf_getverdef(verdef_data, vd_offset, &verdef_mem));
1543 GElf_Verdaux verdaux_mem;
1544 GElf_Verdaux *verdaux = gelf_getverdaux(verdef_data,
1545 vd_offset + verdef->vd_aux,
1547 GElf_Shdr header_mem;
1548 GElf_Shdr* verdef_section_header = gelf_getshdr(verdef_section,
1550 size_t verdef_stridx = verdef_section_header->sh_link;
1551 version.str(elf_strptr(elf_handle, verdef_stridx, verdaux->vda_name));
1552 if (*versym & 0x8000)
1553 version.is_default(false);
1555 version.is_default(true);
1558 if (!verdef || verdef->vd_next == 0)
1564 /// Get the version needed (from the SHT_GNU_verneed section) to
1565 /// resolve an undefined symbol represented by a pointer to
1568 /// @param elf_hande the elf handle to use.
1570 /// @param versym the symbol to get the version definition for.
1572 /// @param verneed_section the SHT_GNU_verneed section.
1574 /// @param version the resulting version definition. This is set iff
1575 /// the function returns true.
1577 /// @return true upon successful completion, false otherwise.
1579 get_version_needed_for_versym(Elf* elf_handle,
1580 GElf_Versym* versym,
1581 Elf_Scn* verneed_section,
1582 elf_symbol::version& version)
1584 if (versym == 0 || elf_handle == 0 || verneed_section == 0)
1587 size_t vn_offset = 0;
1588 Elf_Data* verneed_data = elf_getdata(verneed_section, NULL);
1589 GElf_Verneed verneed_mem;
1590 GElf_Verneed* verneed = gelf_getverneed(verneed_data, 0, &verneed_mem);
1592 for (;verneed; vn_offset += verneed->vn_next)
1594 size_t vna_offset = vn_offset;
1595 GElf_Vernaux vernaux_mem;
1596 GElf_Vernaux *vernaux = gelf_getvernaux(verneed_data,
1597 vn_offset + verneed->vn_aux,
1599 for (;vernaux != 0 && verneed;)
1601 if (vernaux->vna_other == *versym)
1602 // Found the version of the symbol.
1604 vna_offset += verneed->vn_next;
1605 verneed = (verneed->vn_next == 0
1607 : gelf_getverneed(verneed_data, vna_offset, &verneed_mem));
1610 if (verneed != 0 && vernaux != 0 && vernaux->vna_other == *versym)
1612 GElf_Shdr header_mem;
1613 GElf_Shdr* verneed_section_header = gelf_getshdr(verneed_section,
1615 size_t verneed_stridx = verneed_section_header->sh_link;
1616 version.str(elf_strptr(elf_handle,
1618 vernaux->vna_name));
1619 if (*versym & 0x8000)
1620 version.is_default(false);
1622 version.is_default(true);
1626 if (!verneed || verneed->vn_next == 0)
1632 /// Return the version for a symbol that is at a given index in its
1633 /// SHT_SYMTAB section.
1635 /// @param elf_handle the elf handle to use.
1637 /// @param symbol_index the index of the symbol to consider.
1639 /// @param get_def_version if this is true, it means that that we want
1640 /// the version for a defined symbol; in that case, the version is
1641 /// looked for in a section of type SHT_GNU_verdef. Otherwise, if
1642 /// this parameter is false, this means that we want the version for
1643 /// an undefined symbol; in that case, the version is the needed one
1644 /// for the symbol to be resolved; so the version is looked fo in a
1645 /// section of type SHT_GNU_verneed.
1647 /// @param version the version found for symbol at @p symbol_index.
1649 /// @return true iff a version was found for symbol at index @p
1652 get_version_for_symbol(Elf* elf_handle,
1653 size_t symbol_index,
1654 bool get_def_version,
1655 elf_symbol::version& version)
1657 Elf_Scn *versym_section = NULL,
1658 *verdef_section = NULL,
1659 *verneed_section = NULL;
1661 if (!get_symbol_versionning_sections(elf_handle,
1667 GElf_Versym versym_mem;
1668 Elf_Data* versym_data = (versym_section)
1669 ? elf_getdata(versym_section, NULL)
1671 GElf_Versym* versym = (versym_data)
1672 ? gelf_getversym(versym_data, symbol_index, &versym_mem)
1675 if (versym == 0 || *versym <= 1)
1676 // I got these value from the code of readelf.c in elfutils.
1677 // Apparently, if the symbol version entry has these values, the
1678 // symbol must be discarded. This is not documented in the
1679 // official specification.
1682 if (get_def_version)
1684 if (*versym == 0x8001)
1685 // I got this value from the code of readelf.c in elfutils
1686 // too. It's not really documented in the official
1691 && get_version_definition_for_versym(elf_handle, versym,
1692 verdef_section, version))
1698 && get_version_needed_for_versym(elf_handle, versym,
1699 verneed_section, version))
1706 /// Lookup a symbol using the SysV ELF hash table.
1708 /// Note that this function hasn't been tested. So it hasn't been
1709 /// debugged yet. IOW, it is not known to work. Or rather, it's
1710 /// almost like it's surely doesn't work ;-)
1712 /// Use it at your own risks. :-)
1714 ///@parm env the environment we are operating from.
1716 /// @param elf_handle the elf_handle to use.
1718 /// @param sym_name the symbol name to look for.
1720 /// @param ht_index the index (in the section headers table) of the
1721 /// hash table section to use.
1723 /// @param sym_tab_index the index (in the section headers table) of
1724 /// the symbol table to use.
1726 /// @param demangle if true, demangle @p sym_name before comparing it
1727 /// to names from the symbol table.
1729 /// @param syms_found a vector of symbols found with the name @p
1730 /// sym_name. table.
1732 lookup_symbol_from_sysv_hash_tab(const environment* env,
1734 const string& sym_name,
1736 size_t sym_tab_index,
1738 vector<elf_symbol_sptr>& syms_found)
1740 Elf_Scn* sym_tab_section = elf_getscn(elf_handle, sym_tab_index);
1741 ABG_ASSERT(sym_tab_section);
1743 Elf_Data* sym_tab_data = elf_getdata(sym_tab_section, 0);
1744 ABG_ASSERT(sym_tab_data);
1746 GElf_Shdr sheader_mem;
1747 GElf_Shdr* sym_tab_section_header = gelf_getshdr(sym_tab_section,
1749 Elf_Scn* hash_section = elf_getscn(elf_handle, ht_index);
1750 ABG_ASSERT(hash_section);
1752 // Poke at the different parts of the hash table and get them ready
1754 unsigned long hash = elf_hash(sym_name.c_str());
1755 Elf_Data* ht_section_data = elf_getdata(hash_section, 0);
1756 Elf32_Word* ht_data = reinterpret_cast<Elf32_Word*>(ht_section_data->d_buf);
1757 size_t nb_buckets = ht_data[0];
1758 size_t nb_chains = ht_data[1];
1760 if (nb_buckets == 0)
1761 // An empty hash table. Not sure if that is possible, but it
1762 // would mean an empty table of exported symbols.
1765 //size_t nb_chains = ht_data[1];
1766 Elf32_Word* ht_buckets = &ht_data[2];
1767 Elf32_Word* ht_chains = &ht_buckets[nb_buckets];
1769 // Now do the real work.
1770 size_t bucket = hash % nb_buckets;
1771 size_t symbol_index = ht_buckets[bucket];
1774 const char* sym_name_str;
1776 elf_symbol::type sym_type;
1777 elf_symbol::binding sym_binding;
1778 elf_symbol::visibility sym_visibility;
1780 Elf_Scn *strings_section = find_ksymtab_strings_section(elf_handle);
1781 size_t strings_ndx = strings_section
1782 ? elf_ndxscn(strings_section)
1787 ABG_ASSERT(gelf_getsym(sym_tab_data, symbol_index, &symbol));
1788 sym_name_str = elf_strptr(elf_handle,
1789 sym_tab_section_header->sh_link,
1792 && compare_symbol_name(sym_name_str, sym_name, demangle))
1794 sym_type = stt_to_elf_symbol_type(GELF_ST_TYPE(symbol.st_info));
1795 sym_binding = stb_to_elf_symbol_binding(GELF_ST_BIND(symbol.st_info));
1797 stv_to_elf_symbol_visibility(GELF_ST_VISIBILITY(symbol.st_other));
1798 sym_size = symbol.st_size;
1799 elf_symbol::version ver;
1800 if (get_version_for_symbol(elf_handle, symbol_index,
1801 /*get_def_version=*/true, ver))
1802 ABG_ASSERT(!ver.str().empty());
1803 elf_symbol_sptr symbol_found =
1804 elf_symbol::create(env,
1810 symbol.st_shndx != SHN_UNDEF,
1811 symbol.st_shndx == SHN_COMMON,
1812 ver, sym_visibility,
1813 symbol.st_shndx == strings_ndx);
1814 syms_found.push_back(symbol_found);
1817 symbol_index = ht_chains[symbol_index];
1818 } while (symbol_index != STN_UNDEF || symbol_index >= nb_chains);
1823 /// Get the size of the elf class, in bytes.
1825 /// @param elf_handle the elf handle to use.
1827 /// @return the size computed.
1829 get_elf_class_size_in_bytes(Elf* elf_handle)
1834 ABG_ASSERT(gelf_getehdr(elf_handle, &hdr));
1835 int c = hdr.e_ident[EI_CLASS];
1846 ABG_ASSERT_NOT_REACHED;
1852 /// Get a given word of a bloom filter, referred to by the index of
1853 /// the word. The word size depends on the current elf class and this
1854 /// function abstracts that nicely.
1856 /// @param elf_handle the elf handle to use.
1858 /// @param bloom_filter the bloom filter to consider.
1860 /// @param index the index of the bloom filter to return.
1862 bloom_word_at(Elf* elf_handle,
1863 Elf32_Word* bloom_filter,
1866 GElf_Word result = 0;
1868 ABG_ASSERT(gelf_getehdr(elf_handle, &h));
1870 c = h.e_ident[EI_CLASS];
1875 result = bloom_filter[index];
1879 GElf_Word* f= reinterpret_cast<GElf_Word*>(bloom_filter);
1890 /// The abstraction of the gnu elf hash table.
1892 /// The members of this struct are explained at
1893 /// - https://sourceware.org/ml/binutils/2006-10/msg00377.html
1894 /// - https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections.
1898 Elf32_Word* buckets;
1900 size_t first_sym_index;
1903 Elf32_Word* bloom_filter;
1906 Elf_Scn* sym_tab_section;
1907 GElf_Shdr sym_tab_section_header;
1921 }; // end struct gnu_ht
1923 /// Setup the members of the gnu hash table.
1925 /// @param elf_handle a handle on the elf file to use.
1927 /// @param ht_index the index (into the elf section headers table) of
1928 /// the hash table section to use.
1930 /// @param sym_tab_index the index (into the elf section headers
1931 /// table) of the symbol table the gnu hash table is about.
1933 /// @param ht the resulting hash table.
1935 /// @return true iff the hash table @ ht could be setup.
1937 setup_gnu_ht(Elf* elf_handle,
1939 size_t sym_tab_index,
1942 ht.sym_tab_section = elf_getscn(elf_handle, sym_tab_index);
1943 ABG_ASSERT(ht.sym_tab_section);
1944 ABG_ASSERT(gelf_getshdr(ht.sym_tab_section, &ht.sym_tab_section_header));
1946 ht.sym_tab_section_header.sh_size / ht.sym_tab_section_header.sh_entsize;
1947 Elf_Scn* hash_section = elf_getscn(elf_handle, ht_index);
1948 ABG_ASSERT(hash_section);
1950 // Poke at the different parts of the hash table and get them ready
1952 Elf_Data* ht_section_data = elf_getdata(hash_section, 0);
1953 Elf32_Word* ht_data = reinterpret_cast<Elf32_Word*>(ht_section_data->d_buf);
1955 ht.nb_buckets = ht_data[0];
1956 if (ht.nb_buckets == 0)
1957 // An empty hash table. Not sure if that is possible, but it
1958 // would mean an empty table of exported symbols.
1960 ht.first_sym_index = ht_data[1];
1961 // The number of words used by the bloom filter. A size of a word
1963 ht.bf_nwords = ht_data[2];
1964 // The shift used by the bloom filter code.
1965 ht.shift = ht_data[3];
1966 // The data of the bloom filter proper.
1967 ht.bloom_filter = &ht_data[4];
1968 // The size of the bloom filter in 4 bytes word. This is going to
1969 // be used to index the 'bloom_filter' above, which is of type
1970 // Elf32_Word*; thus we need that bf_size be expressed in 4 bytes
1972 ht.bf_size = (get_elf_class_size_in_bytes(elf_handle) / 4) * ht.bf_nwords;
1973 // The buckets of the hash table.
1974 ht.buckets = ht.bloom_filter + ht.bf_size;
1975 // The chain of the hash table.
1976 ht.chain = ht.buckets + ht.nb_buckets;
1981 /// Look into the symbol tables of the underlying elf file and find
1982 /// the symbol we are being asked.
1984 /// This function uses the GNU hash table for the symbol lookup.
1986 /// The reference of for the implementation of this function can be
1988 /// - https://sourceware.org/ml/binutils/2006-10/msg00377.html
1989 /// - https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections.
1991 /// @param elf_handle the elf handle to use.
1993 /// @param sym_name the name of the symbol to look for.
1995 /// @param ht_index the index of the hash table header to use.
1997 /// @param sym_tab_index the index of the symbol table header to use
1998 /// with this hash table.
2000 /// @param demangle if true, demangle @p sym_name.
2002 /// @param syms_found the vector of symbols found with the name @p
2005 /// @return true if a symbol was actually found.
2007 lookup_symbol_from_gnu_hash_tab(const environment* env,
2009 const string& sym_name,
2011 size_t sym_tab_index,
2013 vector<elf_symbol_sptr>& syms_found)
2016 if (!setup_gnu_ht(elf_handle, ht_index, sym_tab_index, ht))
2019 // Now do the real work.
2021 // Compute bloom hashes (GNU hash and second bloom specific hashes).
2022 size_t h1 = elf_gnu_hash(sym_name.c_str());
2023 size_t h2 = h1 >> ht.shift;
2024 // The size of one of the words used in the bloom
2026 int c = get_elf_class_size_in_bytes(elf_handle) * 8;
2027 int n = (h1 / c) % ht.bf_nwords;
2028 unsigned char bitmask = (1 << (h1 % c)) | (1 << (h2 % c));
2030 // Test if the symbol is *NOT* present in this ELF file.
2031 if ((bloom_word_at(elf_handle, ht.bloom_filter, n) & bitmask) != bitmask)
2034 size_t i = ht.buckets[h1 % ht.nb_buckets];
2038 Elf32_Word stop_word, *stop_wordp;
2039 elf_symbol::version ver;
2041 const char* sym_name_str;
2044 elf_symbol::type sym_type;
2045 elf_symbol::binding sym_binding;
2046 elf_symbol::visibility sym_visibility;
2047 Elf_Scn *strings_section = find_ksymtab_strings_section(elf_handle);
2048 size_t strings_ndx = strings_section
2049 ? elf_ndxscn(strings_section)
2052 // Let's walk the hash table and record the versions of all the
2053 // symbols which name equal sym_name.
2054 for (i = ht.buckets[h1 % ht.nb_buckets],
2055 stop_wordp = &ht.chain[i - ht.first_sym_index];
2058 < ht.chain + (ht.sym_count - ht.first_sym_index));
2061 stop_word = *stop_wordp;
2062 if ((stop_word & ~ 1)!= (h1 & ~1))
2063 // A given bucket can reference several hashes. Here we
2064 // stumbled across a hash value different from the one we are
2065 // looking for. Let's keep walking.
2068 ABG_ASSERT(gelf_getsym(elf_getdata(ht.sym_tab_section, 0),
2070 sym_name_str = elf_strptr(elf_handle,
2071 ht.sym_tab_section_header.sh_link,
2074 && compare_symbol_name(sym_name_str, sym_name, demangle))
2076 // So we found a symbol (in the symbol table) that equals
2077 // sym_name. Now lets try to get its version and record it.
2078 sym_type = stt_to_elf_symbol_type(GELF_ST_TYPE(symbol.st_info));
2079 sym_binding = stb_to_elf_symbol_binding(GELF_ST_BIND(symbol.st_info));
2081 stv_to_elf_symbol_visibility(GELF_ST_VISIBILITY(symbol.st_other));
2083 if (get_version_for_symbol(elf_handle, i,
2084 /*get_def_version=*/true,
2086 ABG_ASSERT(!ver.str().empty());
2088 elf_symbol_sptr symbol_found =
2089 elf_symbol::create(env, i,
2092 sym_type, sym_binding,
2093 symbol.st_shndx != SHN_UNDEF,
2094 symbol.st_shndx == SHN_COMMON,
2095 ver, sym_visibility,
2096 symbol.st_shndx == strings_ndx);
2097 syms_found.push_back(symbol_found);
2102 // The last bit of the stop_word is 1. That means we need to
2103 // stop here. We reached the end of the chain of values
2104 // referenced by the hask bucket.
2110 /// Look into the symbol tables of the underlying elf file and find
2111 /// the symbol we are being asked.
2113 /// This function uses the elf hash table (be it the GNU hash table or
2114 /// the sysv hash table) for the symbol lookup.
2116 /// @param env the environment we are operating from.
2118 /// @param elf_handle the elf handle to use.
2120 /// @param ht_kind the kind of hash table to use. This is returned by
2121 /// the function function find_hash_table_section_index.
2123 /// @param ht_index the index (in the section headers table) of the
2124 /// hash table section to use.
2126 /// @param sym_tab_index the index (in section headers table) of the
2127 /// symbol table index to use with this hash table.
2129 /// @param symbol_name the name of the symbol to look for.
2131 /// @param demangle if true, demangle @p sym_name.
2133 /// @param syms_found the symbols that were actually found with the
2134 /// name @p symbol_name.
2136 /// @return true iff the function found the symbol from the elf hash
2139 lookup_symbol_from_elf_hash_tab(const environment* env,
2141 hash_table_kind ht_kind,
2143 size_t symtab_index,
2144 const string& symbol_name,
2146 vector<elf_symbol_sptr>& syms_found)
2148 if (elf_handle == 0 || symbol_name.empty())
2151 if (ht_kind == NO_HASH_TABLE_KIND)
2154 if (ht_kind == SYSV_HASH_TABLE_KIND)
2155 return lookup_symbol_from_sysv_hash_tab(env,
2156 elf_handle, symbol_name,
2161 else if (ht_kind == GNU_HASH_TABLE_KIND)
2162 return lookup_symbol_from_gnu_hash_tab(env,
2163 elf_handle, symbol_name,
2171 /// Lookup a symbol from the symbol table directly.
2174 /// @param env the environment we are operating from.
2176 /// @param elf_handle the elf handle to use.
2178 /// @param sym_name the name of the symbol to look up.
2180 /// @param sym_tab_index the index (in the section headers table) of
2181 /// the symbol table section.
2183 /// @param demangle if true, demangle the names found in the symbol
2184 /// table before comparing them with @p sym_name.
2186 /// @param sym_name_found the actual name of the symbol found.
2188 /// @param sym_type the type of the symbol found.
2190 /// @param sym_binding the binding of the symbol found.
2192 /// @param sym_versions the versions of the symbol found.
2194 /// @return true iff the symbol was found.
2196 lookup_symbol_from_symtab(const environment* env,
2198 const string& sym_name,
2199 size_t sym_tab_index,
2201 vector<elf_symbol_sptr>& syms_found)
2203 // TODO: read all of the symbol table, store it in memory in a data
2204 // structure that associates each symbol with its versions and in
2205 // which lookups of a given symbol is fast.
2206 Elf_Scn* sym_tab_section = elf_getscn(elf_handle, sym_tab_index);
2207 ABG_ASSERT(sym_tab_section);
2209 GElf_Shdr header_mem;
2210 GElf_Shdr * sym_tab_header = gelf_getshdr(sym_tab_section,
2213 size_t symcount = sym_tab_header->sh_size / sym_tab_header->sh_entsize;
2214 Elf_Data* symtab = elf_getdata(sym_tab_section, NULL);
2217 elf_symbol::version ver;
2219 Elf_Scn *strings_section = find_ksymtab_strings_section(elf_handle);
2220 size_t strings_ndx = strings_section
2221 ? elf_ndxscn(strings_section)
2224 for (size_t i = 0; i < symcount; ++i)
2227 sym = gelf_getsym(symtab, i, &sym_mem);
2228 name_str = elf_strptr(elf_handle,
2229 sym_tab_header->sh_link,
2232 if (name_str && compare_symbol_name(name_str, sym_name, demangle))
2234 elf_symbol::type sym_type =
2235 stt_to_elf_symbol_type(GELF_ST_TYPE(sym->st_info));
2236 elf_symbol::binding sym_binding =
2237 stb_to_elf_symbol_binding(GELF_ST_BIND(sym->st_info));
2238 elf_symbol::visibility sym_visibility =
2239 stv_to_elf_symbol_visibility(GELF_ST_VISIBILITY(sym->st_other));
2240 bool sym_is_defined = sym->st_shndx != SHN_UNDEF;
2241 bool sym_is_common = sym->st_shndx == SHN_COMMON;
2243 if (get_version_for_symbol(elf_handle, i,
2244 /*get_def_version=*/sym_is_defined,
2246 ABG_ASSERT(!ver.str().empty());
2247 elf_symbol_sptr symbol_found =
2248 elf_symbol::create(env, i, sym->st_size,
2250 sym_binding, sym_is_defined,
2251 sym_is_common, ver, sym_visibility,
2252 sym->st_shndx == strings_ndx);
2253 syms_found.push_back(symbol_found);
2264 /// Look into the symbol tables of the underlying elf file and see
2265 /// if we find a given symbol.
2267 /// @param env the environment we are operating from.
2269 /// @param symbol_name the name of the symbol to look for.
2271 /// @param demangle if true, try to demangle the symbol name found in
2272 /// the symbol table before comparing it to @p symbol_name.
2274 /// @param syms_found the list of symbols found, with the name @p
2277 /// @param sym_type this is set to the type of the symbol found. This
2278 /// shall b a standard elf.h value for symbol types, that is SHT_OBJECT,
2279 /// STT_FUNC, STT_IFUNC, etc ...
2281 /// Note that this parameter is set iff the function returns true.
2283 /// @param sym_binding this is set to the binding of the symbol found.
2284 /// This is a standard elf.h value of the symbol binding kind, that
2285 /// is, STB_LOCAL, STB_GLOBAL, or STB_WEAK.
2287 /// @param symbol_versions the versions of the symbol @p symbol_name,
2288 /// if it was found.
2290 /// @return true iff a symbol with the name @p symbol_name was found.
2292 lookup_symbol_from_elf(const environment* env,
2294 const string& symbol_name,
2296 vector<elf_symbol_sptr>& syms_found)
2298 size_t hash_table_index = 0, symbol_table_index = 0;
2299 hash_table_kind ht_kind = NO_HASH_TABLE_KIND;
2302 ht_kind = find_hash_table_section_index(elf_handle,
2304 symbol_table_index);
2306 if (ht_kind == NO_HASH_TABLE_KIND)
2308 if (!find_symbol_table_section_index(elf_handle, symbol_table_index))
2311 return lookup_symbol_from_symtab(env,
2319 return lookup_symbol_from_elf_hash_tab(env,
2329 /// Look into the symbol tables of the underlying elf file and see if
2330 /// we find a given public (global or weak) symbol of function type.
2332 /// @param env the environment we are operating from.
2334 /// @param elf_handle the elf handle to use for the query.
2336 /// @param symbol_name the function symbol to look for.
2338 /// @param func_syms the vector of public functions symbols found, if
2341 /// @return true iff the symbol was found.
2343 lookup_public_function_symbol_from_elf(const environment* env,
2345 const string& symbol_name,
2346 vector<elf_symbol_sptr>& func_syms)
2348 vector<elf_symbol_sptr> syms_found;
2351 if (lookup_symbol_from_elf(env, elf_handle, symbol_name,
2352 /*demangle=*/false, syms_found))
2354 for (vector<elf_symbol_sptr>::const_iterator i = syms_found.begin();
2355 i != syms_found.end();
2358 elf_symbol::type type = (*i)->get_type();
2359 elf_symbol::binding binding = (*i)->get_binding();
2361 if ((type == elf_symbol::FUNC_TYPE
2362 || type == elf_symbol::GNU_IFUNC_TYPE
2363 || type == elf_symbol::COMMON_TYPE)
2364 && (binding == elf_symbol::GLOBAL_BINDING
2365 || binding == elf_symbol::WEAK_BINDING))
2367 func_syms.push_back(*i);
2376 /// Look into the symbol tables of the underlying elf file and see if
2377 /// we find a given public (global or weak) symbol of variable type.
2379 /// @param env the environment we are operating from.
2381 /// @param elf the elf handle to use for the query.
2383 /// @param symname the variable symbol to look for.
2385 /// @param var_syms the vector of public variable symbols found, if any.
2387 /// @return true iff symbol @p symname was found.
2389 lookup_public_variable_symbol_from_elf(const environment* env,
2391 const string& symname,
2392 vector<elf_symbol_sptr>& var_syms)
2394 vector<elf_symbol_sptr> syms_found;
2397 if (lookup_symbol_from_elf(env, elf, symname, /*demangle=*/false, syms_found))
2399 for (vector<elf_symbol_sptr>::const_iterator i = syms_found.begin();
2400 i != syms_found.end();
2402 if ((*i)->is_variable()
2403 && ((*i)->get_binding() == elf_symbol::GLOBAL_BINDING
2404 || (*i)->get_binding() == elf_symbol::WEAK_BINDING))
2406 var_syms.push_back(*i);
2414 /// Get data tag information of an ELF file by looking up into its
2417 /// @param elf the elf handle to use for the query.
2419 /// @param dt_tag data tag to look for in dynamic segment
2420 /// @param dt_tag_data vector of found information for a given @p data_tag
2422 /// @return true iff data tag @p data_tag was found
2425 lookup_data_tag_from_dynamic_segment(Elf* elf,
2426 Elf64_Sxword data_tag,
2427 vector<string>& dt_tag_data)
2429 size_t num_prog_headers = 0;
2431 if (elf_getphdrnum(elf, &num_prog_headers) < 0)
2434 // Cycle through each program header.
2435 for (size_t i = 0; i < num_prog_headers; ++i)
2438 GElf_Phdr *phdr = gelf_getphdr(elf, i, &phdr_mem);
2439 if (phdr == NULL || phdr->p_type != PT_DYNAMIC)
2442 // Poke at the dynamic segment like a section, so that we can
2443 // get its section header information; also we'd like to read
2444 // the data of the segment by using elf_getdata() but that
2445 // function needs a Elf_Scn data structure to act on.
2446 // Elfutils doesn't really have any particular function to
2447 // access segment data, other than the functions used to
2448 // access section data.
2449 Elf_Scn *dynamic_section = gelf_offscn(elf, phdr->p_offset);
2451 GElf_Shdr *dynamic_section_header = gelf_getshdr(dynamic_section,
2453 if (dynamic_section_header == NULL
2454 || dynamic_section_header->sh_type != SHT_DYNAMIC)
2457 // Get data of the dynamic segment (seen as a section).
2458 Elf_Data *data = elf_getdata(dynamic_section, NULL);
2462 // Get the index of the section headers string table.
2463 size_t string_table_index = 0;
2464 ABG_ASSERT (elf_getshdrstrndx(elf, &string_table_index) >= 0);
2466 size_t dynamic_section_header_entry_size = gelf_fsize(elf,
2472 gelf_getshdr(elf_getscn(elf,
2473 dynamic_section_header->sh_link),
2475 ABG_ASSERT(link != NULL);
2477 size_t num_dynamic_section_entries =
2478 dynamic_section_header->sh_size / dynamic_section_header_entry_size;
2480 // Now walk through all the DT_* data tags that are in the
2482 for (size_t j = 0; j < num_dynamic_section_entries; ++j)
2484 GElf_Dyn dynamic_section_mem;
2485 GElf_Dyn *dynamic_section = gelf_getdyn(data,
2487 &dynamic_section_mem);
2488 if (dynamic_section->d_tag == data_tag)
2490 dt_tag_data.push_back(elf_strptr(elf,
2491 dynamic_section_header->sh_link,
2492 dynamic_section->d_un.d_val));
2500 /// Convert the type of ELF file into @ref elf_type.
2502 /// @param elf the elf handle to use for the query.
2504 /// @return the @ref elf_type for a given elf type.
2506 elf_file_type(Elf* elf)
2509 GElf_Ehdr *header = gelf_getehdr (elf, &ehdr_mem);
2510 vector<string> dt_debug_data;
2512 switch (header->e_type)
2515 if (lookup_data_tag_from_dynamic_segment(elf, DT_DEBUG, dt_debug_data))
2516 return ELF_TYPE_PI_EXEC;
2518 return ELF_TYPE_DSO;
2520 return ELF_TYPE_EXEC;
2522 return ELF_TYPE_RELOCATABLE;
2524 return ELF_TYPE_UNKNOWN;
2528 // ---------------------------------------
2529 // <location expression evaluation types>
2530 // ---------------------------------------
2532 /// An abstraction of a value representing the result of the
2533 /// evaluation of a dwarf expression. This is abstraction represents
2534 /// a partial view on the possible values because we are only
2535 /// interested in extracting the latest and longuest constant
2536 /// sub-expression of a given dwarf expression.
2540 int64_t const_value_;
2548 expr_result(bool is_const)
2549 : is_const_(is_const),
2553 explicit expr_result(int64_t v)
2558 /// @return true if the value is a constant. Otherwise, return
2559 /// false, meaning the value represents a quantity for which we need
2560 /// inferior (a running program) state to determine the value.
2566 /// @param f a flag saying if the value is set to a constant or not.
2571 /// Get the current constant value iff this represents a
2574 /// @param value the out parameter. Is set to the constant value of
2575 /// the @ref expr_result. This is set iff the function return true.
2577 ///@return true if this has a constant value, false otherwise.
2579 const_value(int64_t& value)
2583 value = const_value_;
2589 /// Getter of the constant value of the current @ref expr_result.
2591 /// Note that the current @ref expr_result must be constant,
2592 /// otherwise the current process is aborted.
2594 /// @return the constant value of the current @ref expr_result.
2598 ABG_ASSERT(is_const());
2599 return const_value_;
2602 operator int64_t() const
2603 {return const_value();}
2606 operator=(const int64_t v)
2613 operator==(const expr_result& o) const
2614 {return const_value_ == o.const_value_ && is_const_ == o.is_const_;}
2617 operator>=(const expr_result& o) const
2618 {return const_value_ >= o.const_value_;}
2621 operator<=(const expr_result& o) const
2622 {return const_value_ <= o.const_value_;}
2625 operator>(const expr_result& o) const
2626 {return const_value_ > o.const_value_;}
2629 operator<(const expr_result& o) const
2630 {return const_value_ < o.const_value_;}
2633 operator+(const expr_result& v) const
2635 expr_result r(*this);
2636 r.const_value_ += v.const_value_;
2637 r.is_const_ = r.is_const_ && v.is_const_;
2642 operator+=(int64_t v)
2649 operator-(const expr_result& v) const
2651 expr_result r(*this);
2652 r.const_value_ -= v.const_value_;
2653 r.is_const_ = r.is_const_ && v.is_const_;
2658 operator%(const expr_result& v) const
2660 expr_result r(*this);
2661 r.const_value_ %= v.const_value_;
2662 r.is_const_ = r.is_const_ && v.is_const();
2667 operator*(const expr_result& v) const
2669 expr_result r(*this);
2670 r.const_value_ *= v.const_value_;
2671 r.is_const_ = r.is_const_ && v.is_const();
2676 operator|(const expr_result& v) const
2678 expr_result r(*this);
2679 r.const_value_ |= v.const_value_;
2680 r.is_const_ = r.is_const_ && v.is_const_;
2685 operator^(const expr_result& v) const
2687 expr_result r(*this);
2688 r.const_value_ ^= v.const_value_;
2689 r.is_const_ = r.is_const_ && v.is_const_;
2694 operator>>(const expr_result& v) const
2696 expr_result r(*this);
2697 r.const_value_ = r.const_value_ >> v.const_value_;
2698 r.is_const_ = r.is_const_ && v.is_const_;
2703 operator<<(const expr_result& v) const
2705 expr_result r(*this);
2706 r.const_value_ = r.const_value_ << v.const_value_;
2707 r.is_const_ = r.is_const_ && v.is_const_;
2714 expr_result r(*this);
2715 r.const_value_ = ~r.const_value_;
2722 expr_result r(*this);
2723 r.const_value_ = -r.const_value_;
2730 expr_result r = *this;
2731 r.const_value_ = std::abs(static_cast<long double>(r.const_value()));
2736 operator&(const expr_result& o)
2738 expr_result r(*this);
2739 r.const_value_ &= o.const_value_;
2740 r.is_const_ = r.is_const_ && o.is_const_;
2745 operator/(const expr_result& o)
2747 expr_result r(*this);
2748 r.is_const_ = r.is_const_ && o.is_const_;
2749 return r.const_value() / o.const_value();
2751 };// class end expr_result;
2753 /// A class that implements a stack of @ref expr_result, to be used in
2754 /// the engine evaluating DWARF expressions.
2755 class expr_result_stack_type
2757 vector<expr_result> elems_;
2761 expr_result_stack_type()
2762 {elems_.reserve(4);}
2765 operator[](unsigned i)
2767 unsigned s = elems_.size();
2769 return elems_[s - 1 -i];
2773 operator[](unsigned i) const
2774 {return const_cast<expr_result_stack_type*>(this)->operator[](i);}
2778 {return elems_.size();}
2780 vector<expr_result>::reverse_iterator
2782 {return elems_.rbegin();}
2784 const vector<expr_result>::reverse_iterator
2786 {return const_cast<expr_result_stack_type*>(this)->begin();}
2788 vector<expr_result>::reverse_iterator
2790 {return elems_.rend();}
2792 const vector<expr_result>::reverse_iterator
2794 {return const_cast<expr_result_stack_type*>(this)->end();}
2798 {return elems_.back();}
2802 {return const_cast<expr_result_stack_type*>(this)->front();}
2805 push_front(expr_result e)
2806 {elems_.push_back(e);}
2811 expr_result r = front();
2817 erase(vector<expr_result>::reverse_iterator i)
2818 {elems_.erase(--i.base());}
2823 }; // end class expr_result_stack_type
2825 /// Abstraction of the evaluation context of a dwarf expression.
2826 struct dwarf_expr_eval_context
2829 expr_result_stack_type stack;
2830 // Is set to true if the result of the expression that got evaluated
2831 // is a TLS address.
2834 dwarf_expr_eval_context()
2835 : accum(/*is_const=*/false),
2838 stack.push_front(expr_result(true));
2845 stack.push_front(expr_result(true));
2846 accum = expr_result(false);
2847 set_tls_addr = false;
2850 /// Set a flag to to tell that the result of the expression that got
2851 /// evaluated is a TLS address.
2853 /// @param f true iff the result of the expression that got
2854 /// evaluated is a TLS address, false otherwise.
2856 set_tls_address(bool f)
2859 /// Getter for the flag that tells if the result of the expression
2860 /// that got evaluated is a TLS address.
2862 /// @return true iff the result of the expression that got evaluated
2863 /// is a TLS address.
2865 set_tls_address() const
2866 {return set_tls_addr;}
2871 expr_result r = stack.front();
2877 push(const expr_result& v)
2878 {stack.push_front(v);}
2879 };//end class dwarf_expr_eval_context
2881 // ---------------------------------------
2882 // </location expression evaluation types>
2883 // ---------------------------------------
2885 /// An enum for the diffent kinds of linux kernel specific symbol
2887 enum kernel_symbol_table_kind
2889 /// This is for an undefined kind of kernel symbol table.
2890 KERNEL_SYMBOL_TABLE_KIND_UNDEFINED,
2892 /// The __ksymtab symbol table.
2893 KERNEL_SYMBOL_TABLE_KIND_KSYMTAB,
2895 /// The __ksymtab_gpl symbol table.
2896 KERNEL_SYMBOL_TABLE_KIND_KSYMTAB_GPL
2899 /// An enum which specifies the format of the kernel symbol table
2900 /// (__ksymtab or __ksymtab_gpl).
2903 /// This enumerator means that no __ksymtab format has been
2905 UNDEFINED_KSYMTAB_FORMAT,
2907 /// Before Linux v4.19, the format of the __ksymtab (and the
2908 /// __ksymtab_gpl) section was the following.
2910 /// It's an array of entries. Each entry describes a symbol. Each
2911 /// entry is made of two words. each is of the word size of the
2912 /// architecture. (8-bytes on a 64 bits arch and 4-bytes on a 32
2913 /// bits arch) The first word is the address of a symbol. The
2914 /// second one is the address of a static global variable symbol
2915 /// which value is the string representing the symbol name. That
2916 /// string is in the __ksymtab_strings section.
2918 /// So we are mostly interested in the symbol address part of each
2921 /// Thus this enumerator means that we have the pre v4.19 __ksymtab
2923 PRE_V4_19_KSYMTAB_FORMAT,
2925 /// Since, Linux v4.19, the format of the __ksymtab section has
2926 /// changed. The commit that changed is
2927 /// https://github.com/torvalds/linux/commit/7290d58095712a89f845e1bca05334796dd49ed2.
2929 /// The __ksymtab and __ksymtab_gpl sections each are an array of
2930 /// entries. Each entry describes a symbol. Each entry is made of
2931 /// two words. Each word is 4-bytes length. The first word is the
2932 /// 'place-relative' address of a symbol. The second one is the
2933 /// 'place-relative' address of a static global variable symbol
2934 /// which value is the string representing the symbol name. That
2935 /// string is in the __ksymtab_strings section.
2937 /// Below is the description of what a "place-relative address"
2938 /// means. For that, we are going to define the meaning of four
2939 /// values: 'N', 'S', 'O', and 'A'.
2941 /// *** 'N' and '0' ***
2942 /// Suppose 'N' is the value of the number stored at offset 'O' (big
2943 /// oh, not zero) in the __ksymtab section.
2946 /// That N designates a symbol in the symtab section which value is
2947 /// S. So S is the symbol value (in the .symtab symbol table)
2948 /// referred to by the number N found at offset 'O'.
2951 /// Also, suppose the __ksymtab section will be loaded at memory
2952 /// address A, as indicated by the 'address' field of the section
2953 /// header describing the __ksymtab section.
2955 /// So here is the formula that gives us S, from N:
2959 /// Storing addresses this way does away with the need to have
2960 /// relocations for the __ksymtab section. So in effect, vmlinux
2961 /// binaries implementing this new format of __ksymtab won't have
2962 /// any .rela__ksymtab relocation section for the __ksymtab section
2963 /// in particular (nor any relocation section at all).
2966 /// Note that we are mostly interested in the symbol address part of
2968 V4_19_KSYMTAB_FORMAT
2969 }; // end enum ksymtab_format
2971 /// The context used to build ABI corpus from debug info in DWARF
2974 /// This context is to be created by create_read_context(). It's then
2975 /// passed to all the routines that read specific dwarf bits as they
2976 /// get some important data from it.
2978 /// When a new data member is added to this context, it must be
2979 /// initiliazed by the read_context::initiliaze() function. So please
2987 bool load_in_linux_kernel_mode;
2988 bool load_all_types;
2989 bool ignore_symbol_table;
2995 load_in_linux_kernel_mode(),
2997 ignore_symbol_table(),
3001 };// read_context::options_type
3003 /// A set of containers that contains one container per kind of @ref
3004 /// die_source. This allows to associate DIEs to things, depending
3005 /// on the source of the DIE.
3006 template <typename ContainerType>
3007 class die_source_dependant_container_set
3009 ContainerType primary_debug_info_container_;
3010 ContainerType alt_debug_info_container_;
3011 ContainerType type_unit_container_;
3015 /// Getter for the container associated to DIEs coming from a
3016 /// given @ref die_source.
3018 /// @param source the die_source for which we want the container.
3020 /// @return the container that associates DIEs coming from @p
3021 /// source to something.
3023 get_container(die_source source)
3025 ContainerType *result = 0;
3028 case PRIMARY_DEBUG_INFO_DIE_SOURCE:
3029 result = &primary_debug_info_container_;
3031 case ALT_DEBUG_INFO_DIE_SOURCE:
3032 result = &alt_debug_info_container_;
3034 case TYPE_UNIT_DIE_SOURCE:
3035 result = &type_unit_container_;
3037 case NO_DEBUG_INFO_DIE_SOURCE:
3038 case NUMBER_OF_DIE_SOURCES:
3039 ABG_ASSERT_NOT_REACHED;
3044 /// Getter for the container associated to DIEs coming from a
3045 /// given @ref die_source.
3047 /// @param source the die_source for which we want the container.
3049 /// @return the container that associates DIEs coming from @p
3050 /// source to something.
3051 const ContainerType&
3052 get_container(die_source source) const
3054 return const_cast<die_source_dependant_container_set*>(this)->
3055 get_container(source);
3058 /// Getter for the container associated to DIEs coming from the
3059 /// same source as a given DIE.
3061 /// @param ctxt the read context to consider.
3063 /// @param die the DIE which should have the same source as the
3064 /// source of the container we want.
3066 /// @return the container that associates DIEs coming from the
3067 /// same source as @p die.
3069 get_container(const read_context& ctxt, const Dwarf_Die *die)
3071 die_source source = NO_DEBUG_INFO_DIE_SOURCE;
3072 ABG_ASSERT(ctxt.get_die_source(die, source));
3073 return get_container(source);
3076 /// Getter for the container associated to DIEs coming from the
3077 /// same source as a given DIE.
3079 /// @param ctxt the read context to consider.
3081 /// @param die the DIE which should have the same source as the
3082 /// source of the container we want.
3084 /// @return the container that associates DIEs coming from the
3085 /// same source as @p die.
3086 const ContainerType&
3087 get_container(const read_context& ctxt, const Dwarf_Die *die) const
3089 return const_cast<die_source_dependant_container_set*>(this)->
3090 get_container(ctxt, die);
3093 /// Clear the container set.
3097 primary_debug_info_container_.clear();
3098 alt_debug_info_container_.clear();
3099 type_unit_container_.clear();
3101 }; // end die_dependant_container_set
3103 suppr::suppressions_type supprs_;
3104 unsigned short dwarf_version_;
3105 Dwfl_Callbacks offline_callbacks_;
3106 // The set of directories under which to look for debug info.
3107 vector<char**> debug_info_root_paths_;
3110 // The alternate debug info. Alternate debug info sections are a
3111 // DWARF extension as of DWARF4 and are described at
3112 // http://www.dwarfstd.org/ShowIssue.php?issue=120604.1. Below are
3113 // the file desctor used to access the alternate debug info
3114 // sections, and the representation of the DWARF debug info. Both
3115 // need to be freed after we are done using them, with fclose and
3119 string alt_debug_info_path_;
3120 // The address range of the offline elf file we are looking at.
3121 Dwfl_Module* elf_module_;
3122 mutable Elf* elf_handle_;
3124 mutable Elf_Scn* bss_section_;
3125 mutable Elf_Scn* text_section_;
3126 mutable Elf_Scn* rodata_section_;
3127 mutable Elf_Scn* data_section_;
3128 mutable Elf_Scn* data1_section_;
3129 mutable Elf_Scn* symtab_section_;
3130 // The "Official procedure descriptor section, aka .opd", used in
3131 // ppc64 elf v1 binaries. This section contains the procedure
3132 // descriptors on that platform.
3133 Elf_Scn* opd_section_;
3134 /// The format of the special __ksymtab section from the linux
3136 mutable ksymtab_format ksymtab_format_;
3137 /// The size of one entry of the __ksymtab section.
3138 mutable size_t ksymtab_entry_size_;
3139 /// The number of entries in the __ksymtab section.
3140 mutable size_t nb_ksymtab_entries_;
3141 /// The number of entries in the __ksymtab_gpl section.
3142 mutable size_t nb_ksymtab_gpl_entries_;
3143 /// The special __ksymtab and __ksymtab_gpl sections from linux
3144 /// kernel or module binaries. The former is used to store
3145 /// references to symbols exported using the EXPORT_SYMBOL macro
3146 /// from the linux kernel. The latter is used to store references
3147 /// to symbols exported using the EXPORT_SYMBOL_GPL macro from the
3149 Elf_Scn* ksymtab_section_;
3150 Elf_Scn* ksymtab_reloc_section_;
3151 Elf_Scn* ksymtab_gpl_section_;
3152 Elf_Scn* ksymtab_gpl_reloc_section_;
3153 Elf_Scn* ksymtab_strings_section_;
3154 Elf_Scn* versym_section_;
3155 Elf_Scn* verdef_section_;
3156 Elf_Scn* verneed_section_;
3157 bool symbol_versionning_sections_loaded_;
3158 bool symbol_versionning_sections_found_;
3159 Dwarf_Die* cur_tu_die_;
3160 mutable dwarf_expr_eval_context dwarf_expr_eval_context_;
3161 // A set of maps (one per kind of die source) that associates a decl
3162 // string representation with the DIEs (offsets) representing that
3164 mutable die_source_dependant_container_set<istring_dwarf_offsets_map_type>
3165 decl_die_repr_die_offsets_maps_;
3166 // A set of maps (one per kind of die source) that associates a type
3167 // string representation with the DIEs (offsets) representing that
3169 mutable die_source_dependant_container_set<istring_dwarf_offsets_map_type>
3170 type_die_repr_die_offsets_maps_;
3171 mutable die_source_dependant_container_set<die_istring_map_type>
3172 die_qualified_name_maps_;
3173 mutable die_source_dependant_container_set<die_istring_map_type>
3174 die_pretty_repr_maps_;
3175 mutable die_source_dependant_container_set<die_istring_map_type>
3176 die_pretty_type_repr_maps_;
3177 // A set of maps (one per kind of die source) that associates the
3178 // offset of a decl die to its corresponding decl artifact.
3179 mutable die_source_dependant_container_set<die_artefact_map_type>
3180 decl_die_artefact_maps_;
3181 // A set of maps (one per kind of die source) that associates the
3182 // offset of a type die to its corresponding type artifact.
3183 mutable die_source_dependant_container_set<die_artefact_map_type>
3184 type_die_artefact_maps_;
3185 /// A set of vectors (one per kind of die source) that associates
3186 /// the offset of a type DIE to the offset of its canonical DIE.
3187 mutable die_source_dependant_container_set<offset_offset_map_type>
3188 canonical_type_die_offsets_;
3189 /// A set of vectors (one per kind of die source) that associates
3190 /// the offset of a decl DIE to the offset of its canonical DIE.
3191 mutable die_source_dependant_container_set<offset_offset_map_type>
3192 canonical_decl_die_offsets_;
3193 /// A map that associates a function type representations to
3194 /// function types, inside a translation unit.
3195 mutable istring_fn_type_map_type per_tu_repr_to_fn_type_maps_;
3197 die_class_or_union_map_type die_wip_classes_map_;
3198 die_class_or_union_map_type alternate_die_wip_classes_map_;
3199 die_class_or_union_map_type type_unit_die_wip_classes_map_;
3200 die_function_type_map_type die_wip_function_types_map_;
3201 die_function_type_map_type alternate_die_wip_function_types_map_;
3202 die_function_type_map_type type_unit_die_wip_function_types_map_;
3203 die_function_decl_map_type die_function_with_no_symbol_map_;
3204 vector<Dwarf_Off> types_to_canonicalize_;
3205 vector<Dwarf_Off> alt_types_to_canonicalize_;
3206 vector<Dwarf_Off> type_unit_types_to_canonicalize_;
3207 vector<type_base_sptr> extra_types_to_canonicalize_;
3208 string_classes_map decl_only_classes_map_;
3209 die_tu_map_type die_tu_map_;
3210 corpus_group_sptr cur_corpus_group_;
3211 corpus_sptr cur_corpus_;
3212 translation_unit_sptr cur_tu_;
3213 scope_decl_sptr nil_scope_;
3214 scope_stack_type scope_stack_;
3215 offset_offset_map_type primary_die_parent_map_;
3216 // A map that associates each tu die to a vector of unit import
3217 // points, in the main debug info
3218 tu_die_imported_unit_points_map_type tu_die_imported_unit_points_map_;
3219 // A map that associates each tu die to a vector of unit import
3220 // points, in the alternate debug info
3221 tu_die_imported_unit_points_map_type alt_tu_die_imported_unit_points_map_;
3222 tu_die_imported_unit_points_map_type type_units_tu_die_imported_unit_points_map_;
3223 // A DIE -> parent map for DIEs coming from the alternate debug info
3225 offset_offset_map_type alternate_die_parent_map_;
3226 offset_offset_map_type type_section_die_parent_map_;
3227 list<var_decl_sptr> var_decls_to_add_;
3228 addr_elf_symbol_sptr_map_sptr fun_addr_sym_map_;
3229 // On PPC64, the function entry point address is different from the
3230 // GElf_Sym::st_value value, which is the address of the descriptor
3231 // of the function. The map below thus associates the address of
3232 // the entry point to the function symbol. If we are not on ppc64,
3233 // then this map ought to be empty. Only the fun_addr_sym_map_ is
3234 // used in that case. On ppc64, though, both maps are used.
3235 addr_elf_symbol_sptr_map_sptr fun_entry_addr_sym_map_;
3236 string_elf_symbols_map_sptr fun_syms_;
3237 addr_elf_symbol_sptr_map_sptr var_addr_sym_map_;
3238 string_elf_symbols_map_sptr var_syms_;
3239 string_elf_symbols_map_sptr undefined_fun_syms_;
3240 string_elf_symbols_map_sptr undefined_var_syms_;
3241 address_set_sptr linux_exported_fn_syms_;
3242 address_set_sptr linux_exported_var_syms_;
3243 address_set_sptr linux_exported_gpl_fn_syms_;
3244 address_set_sptr linux_exported_gpl_var_syms_;
3245 vector<string> dt_needed_;
3247 string elf_architecture_;
3248 corpus::exported_decls_builder* exported_decls_builder_;
3249 options_type options_;
3254 /// Constructor of read_context.
3256 /// @param elf_path the path to the elf file the context is to be
3259 /// @param debug_info_root_paths a vector of pointers to the path to
3260 /// the root directory under which the debug info is to be found for
3261 /// @p elf_path. Leave this empty if the debug info is not in a
3264 /// @param environment the environment used by the current context.
3265 /// This environment contains resources needed by the reader and by
3266 /// the types and declarations that are to be created later. Note
3267 /// that ABI artifacts that are to be compared all need to be
3268 /// created within the same environment.
3270 /// Please also note that the life time of this environment object
3271 /// must be greater than the life time of the resulting @ref
3272 /// read_context the context uses resources that are allocated in
3273 /// the environment.
3275 /// @param load_all_types if set to false only the types that are
3276 /// reachable from publicly exported declarations (of functions and
3277 /// variables) are read. If set to true then all types found in the
3278 /// debug information are loaded.
3280 /// @param linux_kernel_mode if set to true, then consider the special
3281 /// linux kernel symbol tables when determining if a symbol is
3282 /// exported or not.
3283 read_context(const string& elf_path,
3284 const vector<char**>& debug_info_root_paths,
3285 ir::environment* environment,
3286 bool load_all_types,
3287 bool linux_kernel_mode)
3289 initialize(elf_path, debug_info_root_paths, environment,
3290 load_all_types, linux_kernel_mode);
3293 /// Initializer of read_context.
3295 /// @param elf_path the path to the elf file the context is to be
3298 /// @param debug_info_root_paths a vector of pointers to the path to
3299 /// the root directory under which the debug info is to be found for
3300 /// @p elf_path. Leave this empty if the debug info is not in a
3303 /// @param environment the environment used by the current context.
3304 /// This environment contains resources needed by the reader and by
3305 /// the types and declarations that are to be created later. Note
3306 /// that ABI artifacts that are to be compared all need to be
3307 /// created within the same environment.
3309 /// Please also note that the life time of this environment object
3310 /// must be greater than the life time of the resulting @ref
3311 /// read_context the context uses resources that are allocated in
3312 /// the environment.
3314 /// @param load_all_types if set to false only the types that are
3315 /// reachable from publicly exported declarations (of functions and
3316 /// variables) are read. If set to true then all types found in the
3317 /// debug information are loaded.
3319 /// @param linux_kernel_mode if set to true, then consider the
3320 /// special linux kernel symbol tables when determining if a symbol
3321 /// is exported or not.
3323 initialize(const string& elf_path,
3324 const vector<char**>& debug_info_root_paths,
3325 ir::environment* environment,
3326 bool load_all_types,
3327 bool linux_kernel_mode)
3336 elf_path_ = elf_path;
3339 rodata_section_ = 0;
3342 symtab_section_ = 0;
3344 ksymtab_format_ = UNDEFINED_KSYMTAB_FORMAT;
3345 ksymtab_entry_size_ = 0;
3346 nb_ksymtab_entries_ = 0;
3347 nb_ksymtab_gpl_entries_ = 0;
3348 ksymtab_section_ = 0;
3349 ksymtab_reloc_section_ = 0;
3350 ksymtab_gpl_section_ = 0;
3351 ksymtab_gpl_reloc_section_ = 0;
3352 ksymtab_strings_section_ = 0;
3353 versym_section_ = 0;
3354 verdef_section_ = 0;
3355 verneed_section_ = 0;
3356 symbol_versionning_sections_loaded_ = 0;
3357 symbol_versionning_sections_found_ = 0;
3359 exported_decls_builder_ = 0;
3361 clear_alt_debug_info_data();
3364 decl_die_repr_die_offsets_maps_.clear();
3365 type_die_repr_die_offsets_maps_.clear();
3366 die_qualified_name_maps_.clear();
3367 die_pretty_repr_maps_.clear();
3368 die_pretty_type_repr_maps_.clear();
3369 decl_die_artefact_maps_.clear();
3370 type_die_artefact_maps_.clear();
3371 canonical_type_die_offsets_.clear();
3372 canonical_decl_die_offsets_.clear();
3373 die_wip_classes_map_.clear();
3374 alternate_die_wip_classes_map_.clear();
3375 type_unit_die_wip_classes_map_.clear();
3376 die_wip_function_types_map_.clear();
3377 alternate_die_wip_function_types_map_.clear();
3378 type_unit_die_wip_function_types_map_.clear();
3379 die_function_with_no_symbol_map_.clear();
3380 types_to_canonicalize_.clear();
3381 alt_types_to_canonicalize_.clear();
3382 type_unit_types_to_canonicalize_.clear();
3383 extra_types_to_canonicalize_.clear();
3384 decl_only_classes_map_.clear();
3385 die_tu_map_.clear();
3386 cur_corpus_group_.reset();
3387 cur_corpus_.reset();
3389 primary_die_parent_map_.clear();
3390 tu_die_imported_unit_points_map_.clear();
3391 alt_tu_die_imported_unit_points_map_.clear();
3392 type_units_tu_die_imported_unit_points_map_.clear();
3393 alternate_die_parent_map_.clear();
3394 type_section_die_parent_map_.clear();
3395 var_decls_to_add_.clear();
3396 fun_addr_sym_map_.reset();
3397 fun_entry_addr_sym_map_.reset();
3399 var_addr_sym_map_.reset();
3401 undefined_fun_syms_.reset();
3402 undefined_var_syms_.reset();
3403 linux_exported_fn_syms_.reset();
3404 linux_exported_var_syms_.reset();
3405 linux_exported_gpl_fn_syms_.reset();
3406 linux_exported_gpl_var_syms_.reset();
3409 elf_architecture_.clear();
3411 clear_per_translation_unit_data();
3413 memset(&offline_callbacks_, 0, sizeof(offline_callbacks_));
3414 create_default_dwfl(debug_info_root_paths);
3415 options_.env = environment;
3416 options_.load_in_linux_kernel_mode = linux_kernel_mode;
3417 options_.load_all_types = load_all_types;
3418 load_in_linux_kernel_mode(linux_kernel_mode);
3421 /// Clear the resources related to the alternate DWARF data.
3423 clear_alt_debug_info_data()
3431 dwarf_end(alt_dwarf_);
3434 alt_debug_info_path_.clear();
3438 /// Detructor of the @ref read_context type.
3441 clear_alt_debug_info_data();
3444 /// Clear the data that is relevant only for the current translation
3445 /// unit being read. The rest of the data is relevant for the
3446 /// entire ABI corpus.
3448 clear_per_translation_unit_data()
3450 while (!scope_stack().empty())
3451 scope_stack().pop();
3452 var_decls_to_re_add_to_tree().clear();
3453 per_tu_repr_to_fn_type_maps().clear();
3456 /// Clear the data that is relevant for the current corpus being
3459 clear_per_corpus_data()
3461 die_qualified_name_maps_.clear();
3462 die_pretty_repr_maps_.clear();
3463 die_pretty_type_repr_maps_.clear();
3464 clear_types_to_canonicalize();
3467 /// Getter of the options of the read context.
3469 /// @return the options of the read context.
3474 /// Getter of the options of the read context.
3476 /// @return the options of the read context.
3481 /// Getter of the options of the read context.
3483 /// @return the options of the read context.
3485 options(const options_type& o)
3488 /// Getter for the current environment.
3490 /// @return the current environment.
3491 const ir::environment*
3493 {return options_.env;}
3495 /// Getter for the current environment.
3497 /// @return the current environment.
3500 {return options_.env;}
3502 /// Setter for the current environment.
3504 /// @param env the new current environment.
3506 env(ir::environment* env)
3507 {options_.env = env;}
3509 /// Getter of the suppression specifications to be used during
3510 /// ELF/DWARF parsing.
3512 /// @return the suppression specifications.
3513 const suppr::suppressions_type&
3514 get_suppressions() const
3517 /// Getter of the suppression specifications to be used during
3518 /// ELF/DWARF parsing.
3520 /// @return the suppression specifications.
3521 suppr::suppressions_type&
3525 /// Getter for the callbacks of the Dwarf Front End library of
3526 /// elfutils that is used by this reader to read dwarf.
3528 /// @return the callbacks.
3529 const Dwfl_Callbacks*
3530 offline_callbacks() const
3531 {return &offline_callbacks_;}
3533 /// Getter for the callbacks of the Dwarf Front End library of
3534 /// elfutils that is used by this reader to read dwarf.
3535 /// @returnthe callbacks
3538 {return &offline_callbacks_;}
3540 /// Constructor for a default Dwfl handle that knows how to load debug
3541 /// info from a library or executable elf file.
3543 /// @param debug_info_root_paths a vector of pointers to the root
3544 /// path under which to look for the debug info of the elf files
3545 /// that are later handled by the Dwfl. This is for cases where the
3546 /// debug info is split into a different file from the binary we
3547 /// want to inspect. On Red Hat compatible systems, this root path
3548 /// is usually /usr/lib/debug by default. If this argument is set
3549 /// to the empty set, then "./debug" and /usr/lib/debug will be
3550 /// searched for sub-directories containing the debug info file.
3551 /// Note that for now, elfutils wants this path to be absolute
3552 /// otherwise things just don't work and the debug info is not
3555 /// @return the constructed Dwfl handle.
3557 create_default_dwfl(const vector<char**>& debug_info_root_paths)
3559 offline_callbacks()->find_debuginfo = dwfl_standard_find_debuginfo;
3560 offline_callbacks()->section_address = dwfl_offline_section_address;
3561 offline_callbacks()->debuginfo_path =
3562 debug_info_root_paths.empty() ? 0 : debug_info_root_paths.front();
3563 handle_.reset(dwfl_begin(offline_callbacks()),
3565 debug_info_root_paths_ = debug_info_root_paths;
3569 dwarf_version() const
3570 {return dwarf_version_;}
3573 dwarf_version(unsigned short v)
3574 {dwarf_version_ = v;}
3576 /// Getter for a smart pointer to a handle on the dwarf front end
3577 /// library that we use to read dwarf.
3579 /// @return the dwfl handle.
3584 /// Setter for a smart pointer to a handle on the dwarf front end
3585 /// library that we use to read dwarf.
3587 /// @param h the new dwfl handle.
3589 dwfl_handle(dwfl_sptr& h)
3594 {return elf_module_;}
3596 /// Return the ELF descriptor for the binary we are analizing.
3598 /// @return a pointer to the Elf descriptor representing the binary
3599 /// we are analizing.
3603 if (elf_handle_ == 0)
3608 elf_handle_ = dwfl_module_getelf(elf_module(), &bias);
3614 /// Return the ELF descriptor used for DWARF access.
3616 /// This can be the same as read_context::elf_handle() above, if the
3617 /// DWARF info is in the same ELF file as the one of the binary we
3618 /// are analizing. It is different if e.g, the debug info is split
3619 /// from the ELF file we are analizing.
3621 /// @return a pointer to the ELF descriptor used to access debug
3624 dwarf_elf_handle() const
3625 {return dwarf_getelf(dwarf());}
3627 /// Test if the debug information is in a separate ELF file wrt the
3628 /// main ELF file of the program (application or shared library) we
3631 /// @return true if the debug information is in a separate ELF file
3632 /// compared to the main ELF file of the program (application or
3633 /// shared library) that we are looking at.
3635 dwarf_is_splitted() const
3636 {return dwarf_elf_handle() != elf_handle();}
3638 /// Add paths to the set of paths under which to look for split
3639 /// debuginfo files.
3641 /// @param debug_info_root_paths the paths to add.
3643 add_debug_info_root_paths(const vector<char **>& debug_info_root_paths)
3645 debug_info_root_paths_.insert(debug_info_root_paths_.end(),
3646 debug_info_root_paths.begin(),
3647 debug_info_root_paths.end());
3650 /// Add a path to the set of paths under which to look for split
3651 /// debuginfo files.
3653 /// @param debug_info_root_path the path to add.
3655 add_debug_info_root_path(char** debug_info_root_path)
3656 {debug_info_root_paths_.push_back(debug_info_root_path);}
3658 /// Find the alternate debuginfo file associated to a given elf file.
3660 /// @param elf_module represents the elf file to consider.
3662 /// @param alt_file_name the resulting path to the alternate
3663 /// debuginfo file found. This is set iff the function returns a
3666 find_alt_debug_info(Dwfl_Module *elf_module,
3667 string& alt_file_name,
3671 result = dwarf_reader::find_alt_debug_info(elf_module,
3672 debug_info_root_paths_,
3673 alt_file_name, alt_fd);
3677 /// Load the debug info associated with an elf file that is at a
3680 /// @return a pointer to the DWARF debug info pointer upon
3681 /// successful debug info loading, NULL otherwise.
3692 dwfl_report_offline(dwfl_handle().get(),
3693 basename(const_cast<char*>(elf_path().c_str())),
3696 dwfl_report_end(dwfl_handle().get(), 0, 0);
3698 Dwarf_Addr bias = 0;
3699 dwarf_ = dwfl_module_getdwarf(elf_module_, &bias);
3700 // Look for split debuginfo files under multiple possible
3702 for (vector<char**>::const_iterator i = debug_info_root_paths_.begin();
3703 dwarf_ == 0 && i != debug_info_root_paths_.end();
3706 offline_callbacks()->debuginfo_path = *i;
3707 dwarf_ = dwfl_module_getdwarf(elf_module_, &bias);
3711 alt_dwarf_ = find_alt_debug_info(elf_module_,
3712 alt_debug_info_path_,
3718 /// Return the main debug info we are looking at.
3720 /// @return the main debug info.
3725 /// Return the alternate debug info we are looking at.
3727 /// Note that "alternate debug info sections" is a GNU extension as
3728 /// of DWARF4 and is described at
3729 /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1
3731 /// @return the alternate debug info.
3734 {return alt_dwarf_;}
3736 /// Return the correct debug info, depending on the DIE source we
3739 /// @param source the DIE source to consider.
3741 /// @return the right debug info, depending on @p source.
3743 dwarf_per_die_source(die_source source) const
3748 case PRIMARY_DEBUG_INFO_DIE_SOURCE:
3749 case TYPE_UNIT_DIE_SOURCE:
3752 case ALT_DEBUG_INFO_DIE_SOURCE:
3753 result = alt_dwarf();
3755 case NO_DEBUG_INFO_DIE_SOURCE:
3756 case NUMBER_OF_DIE_SOURCES:
3757 ABG_ASSERT_NOT_REACHED;
3762 /// Return the path to the alternate debug info as contained in the
3763 /// .gnu_debugaltlink section of the main elf file.
3765 /// Note that "alternate debug info sections" is a GNU extension as
3766 /// of DWARF4 and is described at
3767 /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1
3769 /// @return the path to the alternate debug info file, or an empty
3770 /// path if no alternate debug info file is associated.
3772 alt_debug_info_path() const
3773 {return alt_debug_info_path_;}
3775 /// Return the path to the ELF path we are reading.
3777 /// @return the elf path.
3782 /// Return the bss section of the ELF file we are reading.
3784 /// The first time this function is called, the ELF file is scanned
3785 /// to look for the section we are looking for. Once the section is
3786 /// found, it's cached.
3788 /// Subsequent calls to this function just return the cached
3791 /// @return the bss section.
3796 bss_section_ = find_bss_section(elf_handle());
3797 return bss_section_;
3800 /// Return the text section of the ELF file we are reading.
3802 /// The first time this function is called, the ELF file is scanned
3803 /// to look for the section we are looking for. Once the section is
3804 /// found, it's cached.
3806 /// Subsequent calls to this function just return the cached
3809 /// return the text section.
3811 text_section() const
3814 text_section_ = find_text_section(elf_handle());
3815 return text_section_;
3818 /// Return the rodata section of the ELF file we are reading.
3820 /// The first time this function is called, the ELF file is scanned
3821 /// to look for the section we are looking for. Once the section is
3822 /// found, it's cached.
3824 /// Subsequent calls to this function just return the cached
3827 /// return the rodata section.
3829 rodata_section() const
3831 if (!rodata_section_)
3832 rodata_section_ =find_rodata_section(elf_handle());
3833 return rodata_section_;
3836 /// Return the data section of the ELF file we are reading.
3838 /// The first time this function is called, the ELF file is scanned
3839 /// to look for the section we are looking for. Once the section is
3840 /// found, it's cached.
3842 /// Subsequent calls to this function just return the cached
3845 /// return the data section.
3847 data_section() const
3850 data_section_ = find_data_section(elf_handle());
3851 return data_section_;
3854 /// Return the data1 section of the ELF file we are reading.
3856 /// The first time this function is called, the ELF file is scanned
3857 /// to look for the section we are looking for. Once the section is
3858 /// found, it's cached.
3860 /// Subsequent calls to this function just return the cached
3863 /// return the data1 section.
3865 data1_section() const
3867 if (!data1_section_)
3868 data1_section_ = find_data1_section(elf_handle());
3869 return data1_section_;
3874 {return cur_tu_die_;}
3877 cur_tu_die(Dwarf_Die* cur_tu_die)
3878 {cur_tu_die_ = cur_tu_die;}
3880 dwarf_expr_eval_context&
3881 dwarf_expr_eval_ctxt() const
3882 {return dwarf_expr_eval_context_;}
3884 /// Getter of the maps set that associates a representation of a
3885 /// decl DIE to a vector of offsets of DIEs having that representation.
3887 /// @return the maps set that associates a representation of a decl
3888 /// DIE to a vector of offsets of DIEs having that representation.
3889 const die_source_dependant_container_set<istring_dwarf_offsets_map_type>&
3890 decl_die_repr_die_offsets_maps() const
3891 {return decl_die_repr_die_offsets_maps_;}
3893 /// Getter of the maps set that associates a representation of a
3894 /// decl DIE to a vector of offsets of DIEs having that representation.
3896 /// @return the maps set that associates a representation of a decl
3897 /// DIE to a vector of offsets of DIEs having that representation.
3898 die_source_dependant_container_set<istring_dwarf_offsets_map_type>&
3899 decl_die_repr_die_offsets_maps()
3900 {return decl_die_repr_die_offsets_maps_;}
3902 /// Getter of the maps set that associate a representation of a type
3903 /// DIE to a vector of offsets of DIEs having that representation.
3905 /// @return the maps set that associate a representation of a type
3906 /// DIE to a vector of offsets of DIEs having that representation.
3907 const die_source_dependant_container_set<istring_dwarf_offsets_map_type>&
3908 type_die_repr_die_offsets_maps() const
3909 {return type_die_repr_die_offsets_maps_;}
3911 /// Getter of the maps set that associate a representation of a type
3912 /// DIE to a vector of offsets of DIEs having that representation.
3914 /// @return the maps set that associate a representation of a type
3915 /// DIE to a vector of offsets of DIEs having that representation.
3916 die_source_dependant_container_set<istring_dwarf_offsets_map_type>&
3917 type_die_repr_die_offsets_maps()
3918 {return type_die_repr_die_offsets_maps_;}
3921 /// Compute the offset of the canonical DIE of a given DIE.
3923 /// @param die the DIE to consider.
3925 /// @param canonical_die_offset out parameter. This is set to the
3926 /// resulting canonical DIE that was computed.
3928 /// @param die_as_type if yes, it means @p die has to be considered
3931 compute_canonical_die_offset(const Dwarf_Die *die,
3932 Dwarf_Off &canonical_die_offset,
3933 bool die_as_type) const
3935 offset_offset_map_type &canonical_dies =
3937 ? const_cast<read_context*>(this)->canonical_type_die_offsets_.
3938 get_container(*this, die)
3939 : const_cast<read_context*>(this)->canonical_decl_die_offsets_.
3940 get_container(*this, die);
3942 Dwarf_Die canonical_die;
3943 compute_canonical_die(die, canonical_dies, canonical_die, die_as_type);
3945 canonical_die_offset = dwarf_dieoffset(&canonical_die);
3948 /// Compute (find) the canonical DIE of a given DIE.
3950 /// @param die the DIE to consider.
3952 /// @param canonical_dies the vector in which the canonical dies ar
3953 /// stored. The index of each element is the offset of the DIE we
3954 /// want the canonical DIE for. And the value of the element at
3955 /// that index is the canonical DIE offset we are looking for.
3957 /// @param canonical_die_offset out parameter. This is set to the
3958 /// resulting canonical DIE that was computed.
3960 /// @param die_as_type if yes, it means @p die has to be considered
3963 compute_canonical_die(const Dwarf_Die *die,
3964 offset_offset_map_type& canonical_dies,
3965 Dwarf_Die &canonical_die,
3966 bool die_as_type) const
3969 ABG_ASSERT(get_die_source(die, source));
3971 Dwarf_Off die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
3973 compute_canonical_die(die_offset, source,
3975 canonical_die, die_as_type);
3978 /// Compute (find) the canonical DIE of a given DIE.
3980 /// @param die_offset the offset of the DIE to consider.
3982 /// @param source the source of the DIE to consider.
3984 /// @param canonical_dies the vector in which the canonical dies ar
3985 /// stored. The index of each element is the offset of the DIE we
3986 /// want the canonical DIE for. And the value of the element at
3987 /// that index is the canonical DIE offset we are looking for.
3989 /// @param canonical_die_offset out parameter. This is set to the
3990 /// resulting canonical DIE that was computed.
3992 /// @param die_as_type if yes, it means @p die has to be considered
3995 compute_canonical_die(Dwarf_Off die_offset,
3997 offset_offset_map_type& canonical_dies,
3998 Dwarf_Die &canonical_die,
3999 bool die_as_type) const
4001 // The map that associates the string representation of 'die'
4002 // with a vector of offsets of potentially equivalent DIEs.
4003 istring_dwarf_offsets_map_type& map =
4005 ? (const_cast<read_context*>(this)->
4006 type_die_repr_die_offsets_maps().get_container(source))
4007 : (const_cast<read_context*>(this)->
4008 decl_die_repr_die_offsets_maps().get_container(source));
4011 ABG_ASSERT(dwarf_offdie(dwarf_per_die_source(source), die_offset, &die));
4013 // The variable repr is the the string representation of 'die'.
4015 // Even if die_as_type is true -- which means that 'die' is said
4016 // to be considered as a type -- we always consider a
4017 // DW_TAG_subprogram DIE as a decl here, as far as its string
4018 // representation is concerned.
4019 interned_string name =
4021 ? get_die_pretty_type_representation(&die, /*where=*/0)
4022 : get_die_pretty_representation(&die, /*where=*/0);
4024 Dwarf_Off canonical_die_offset = 0;
4025 istring_dwarf_offsets_map_type::iterator i = map.find(name);
4028 dwarf_offsets_type offsets;
4029 offsets.push_back(die_offset);
4030 map[name] = offsets;
4031 set_canonical_die_offset(canonical_dies, die_offset, die_offset);
4032 get_die_from_offset(source, die_offset, &canonical_die);
4036 if (odr_is_relevant(&die))
4038 // ODR is relevant for this DIE. In this case, all types with
4039 // the same name are considered equivalent. So the array
4040 // i->second shoud only have on element. If not, then
4041 // the DIEs referenced in the array should all compare equal.
4042 // Otherwise, this is an ODR violation. In any case, return
4043 // the first element of the array.
4044 // ABG_ASSERT(i->second.size() == 1);
4045 canonical_die_offset = i->second.front();
4046 get_die_from_offset(source, canonical_die_offset, &canonical_die);
4047 set_canonical_die_offset(canonical_dies, die_offset, die_offset);
4051 Dwarf_Off cur_die_offset;
4052 Dwarf_Die potential_canonical_die;
4053 for (dwarf_offsets_type::const_iterator o = i->second.begin();
4054 o != i->second.end();
4057 cur_die_offset = *o;
4058 get_die_from_offset(source, cur_die_offset, &potential_canonical_die);
4059 if (compare_dies(*this, &die, &potential_canonical_die,
4060 /*update_canonical_dies_on_the_fly=*/false))
4062 canonical_die_offset = cur_die_offset;
4063 set_canonical_die_offset(canonical_dies, die_offset,
4064 canonical_die_offset);
4065 get_die_from_offset(source, canonical_die_offset, &canonical_die);
4070 canonical_die_offset = die_offset;
4071 i->second.push_back(die_offset);
4072 set_canonical_die_offset(canonical_dies, die_offset, die_offset);
4073 get_die_from_offset(source, canonical_die_offset, &canonical_die);
4076 /// Getter of the canonical DIE of a given DIE.
4078 /// @param die the DIE to consider.
4080 /// @param canonical_die output parameter. Is set to the resuling
4081 /// canonical die, if this function returns true.
4083 /// @param where the offset of the logical DIE we are supposed to be
4084 /// calling this function from. If set to zero this means this is
4087 /// @param die_as_type if set to yes, it means @p die is to be
4088 /// considered as a type DIE.
4090 /// @return true iff a canonical DIE was found for @p die.
4092 get_canonical_die(const Dwarf_Die *die,
4093 Dwarf_Die &canonical_die,
4095 bool die_as_type) const
4098 ABG_ASSERT(get_die_source(die, source));
4100 offset_offset_map_type &canonical_dies =
4102 ? const_cast<read_context*>(this)->canonical_type_die_offsets_.
4103 get_container(source)
4104 : const_cast<read_context*>(this)->canonical_decl_die_offsets_.
4105 get_container(source);
4107 Dwarf_Off die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
4108 if (Dwarf_Off canonical_die_offset =
4109 get_canonical_die_offset(canonical_dies, die_offset))
4111 get_die_from_offset(source, canonical_die_offset, &canonical_die);
4115 // The map that associates the string representation of 'die'
4116 // with a vector of offsets of potentially equivalent DIEs.
4117 istring_dwarf_offsets_map_type& map =
4119 ? (const_cast<read_context*>(this)->
4120 type_die_repr_die_offsets_maps().get_container(*this, die))
4121 : (const_cast<read_context*>(this)->
4122 decl_die_repr_die_offsets_maps().get_container(*this, die));
4124 // The variable repr is the the string representation of 'die'.
4126 // Even if die_as_type is true -- which means that 'die' is said
4127 // to be considered as a type -- we always consider a
4128 // DW_TAG_subprogram DIE as a decl here, as far as its string
4129 // representation is concerned.
4130 interned_string name =
4131 (die_as_type /*&& dwarf_tag(die) != DW_TAG_subprogram*/)
4132 ? get_die_pretty_type_representation(die, where)
4133 : get_die_pretty_representation(die, where);
4135 istring_dwarf_offsets_map_type::iterator i = map.find(name);
4139 if (odr_is_relevant(die))
4141 // ODR is relevant for this DIE. In this case, all types with
4142 // the same name are considered equivalent. So the array
4143 // i->second shoud only have on element. If not, then
4144 // the DIEs referenced in the array should all compare equal.
4145 // Otherwise, this is an ODR violation. In any case, return
4146 // the first element of the array.
4147 // ABG_ASSERT(i->second.size() == 1);
4148 Dwarf_Off canonical_die_offset = i->second.front();
4149 get_die_from_offset(source, canonical_die_offset, &canonical_die);
4150 set_canonical_die_offset(canonical_dies,
4152 canonical_die_offset);
4156 Dwarf_Off cur_die_offset;
4157 for (dwarf_offsets_type::const_iterator o = i->second.begin();
4158 o != i->second.end();
4161 cur_die_offset = *o;
4162 get_die_from_offset(source, cur_die_offset, &canonical_die);
4163 // compare die and canonical_die.
4164 if (compare_dies(*this, die, &canonical_die,
4165 /*update_canonical_dies_on_the_fly=*/true))
4167 set_canonical_die_offset(canonical_dies,
4177 /// Retrieve the canonical DIE of a given DIE.
4179 /// The canonical DIE is a DIE that is structurally equivalent to
4182 /// Note that this function caches the canonical DIE that was
4183 /// computed. Subsequent invocations of this function on the same
4184 /// DIE return the same cached DIE.
4186 /// @param die the DIE to get a canonical type for.
4188 /// @param canonical_die the resulting canonical DIE.
4190 /// @param where the offset of the logical DIE we are supposed to be
4191 /// calling this function from. If set to zero this means this is
4194 /// @param die_as_type if true, consider DIE is a type.
4196 /// @return true if an *existing* canonical DIE was found.
4197 /// Otherwise, @p die is considered as being a canonical DIE for
4198 /// itself. @p canonical_die is thus set to the canonical die in
4201 get_or_compute_canonical_die(const Dwarf_Die* die,
4202 Dwarf_Die& canonical_die,
4204 bool die_as_type) const
4207 ABG_ASSERT(get_die_source(die, source));
4209 offset_offset_map_type &canonical_dies =
4211 ? const_cast<read_context*>(this)->canonical_type_die_offsets_.
4212 get_container(source)
4213 : const_cast<read_context*>(this)->canonical_decl_die_offsets_.
4214 get_container(source);
4216 Dwarf_Off initial_die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
4218 if (Dwarf_Off canonical_die_offset =
4219 get_canonical_die_offset(canonical_dies,
4220 initial_die_offset))
4222 get_die_from_offset(source, canonical_die_offset, &canonical_die);
4226 // The map that associates the string representation of 'die'
4227 // with a vector of offsets of potentially equivalent DIEs.
4228 istring_dwarf_offsets_map_type& map =
4230 ? (const_cast<read_context*>(this)->
4231 type_die_repr_die_offsets_maps().get_container(*this, die))
4232 : (const_cast<read_context*>(this)->
4233 decl_die_repr_die_offsets_maps().get_container(*this, die));
4235 // The variable repr is the the string representation of 'die'.
4237 // Even if die_as_type is true -- which means that 'die' is said
4238 // to be considered as a type -- we always consider a
4239 // DW_TAG_subprogram DIE as a decl here, as far as its string
4240 // representation is concerned.
4241 interned_string name =
4243 ? get_die_pretty_type_representation(die, where)
4244 : get_die_pretty_representation(die, where);
4246 istring_dwarf_offsets_map_type::iterator i = map.find(name);
4249 dwarf_offsets_type offsets;
4250 offsets.push_back(initial_die_offset);
4251 map[name] = offsets;
4252 get_die_from_offset(source, initial_die_offset, &canonical_die);
4253 set_canonical_die_offset(canonical_dies,
4255 initial_die_offset);
4259 if (odr_is_relevant(die))
4261 // ODR is relevant for this DIE. In this case, all types with
4262 // the same name are considered equivalent. So the array
4263 // i->second shoud only have on element. If not, then
4264 // the DIEs referenced in the array should all compare equal.
4265 // Otherwise, this is an ODR violation. In any case, return
4266 // the first element of the array.
4267 // ABG_ASSERT(i->second.size() == 1);
4268 Dwarf_Off die_offset = i->second.front();
4269 get_die_from_offset(source, die_offset, &canonical_die);
4270 set_canonical_die_offset(canonical_dies,
4276 // walk i->second without any iterator (using a while loop rather
4277 // than a for loop) because compare_dies might add new content to
4278 // the end of the i->second vector during the walking.
4279 dwarf_offsets_type::size_type n = 0, s = i->second.size();
4282 Dwarf_Off die_offset = i->second[n];
4283 get_die_from_offset(source, die_offset, &canonical_die);
4284 // compare die and canonical_die.
4285 if (compare_dies(*this, die, &canonical_die,
4286 /*update_canonical_dies_on_the_fly=*/true))
4288 set_canonical_die_offset(canonical_dies,
4296 // We didn't find a canonical DIE for 'die'. So let's consider
4297 // that it is its own canonical DIE.
4298 get_die_from_offset(source, initial_die_offset, &canonical_die);
4299 i->second.push_back(initial_die_offset);
4300 set_canonical_die_offset(canonical_dies,
4302 initial_die_offset);
4307 /// Get the source of the DIE.
4309 /// The function returns an enumerator value saying if the DIE comes
4310 /// from the .debug_info section of the primary debug info file, the
4311 /// .debug_info section of the alternate debug info file, or the
4312 /// .debug_types section.
4314 /// @param die the DIE to get the source of.
4316 /// @param source out parameter. The function sets this parameter
4317 /// to the source of the DIE @p iff it returns true.
4319 /// @return true iff the source of the DIE could be determined and
4322 get_die_source(const Dwarf_Die *die, die_source &source) const
4325 return get_die_source(*die, source);
4328 /// Get the source of the DIE.
4330 /// The function returns an enumerator value saying if the DIE comes
4331 /// from the .debug_info section of the primary debug info file, the
4332 /// .debug_info section of the alternate debug info file, or the
4333 /// .debug_types section.
4335 /// @param die the DIE to get the source of.
4337 /// @param source out parameter. The function sets this parameter
4338 /// to the source of the DIE @p iff it returns true.
4340 /// @return true iff the source of the DIE could be determined and
4343 get_die_source(const Dwarf_Die &die, die_source &source) const
4347 uint8_t address_size = 0, offset_size = 0;
4348 if (!dwarf_diecu(const_cast<Dwarf_Die*>(&die),
4349 &cu_die, &address_size,
4353 Dwarf_Half version = 0;
4354 Dwarf_Off abbrev_offset = 0;
4355 uint64_t type_signature = 0;
4356 Dwarf_Off type_offset = 0;
4357 if (!dwarf_cu_die(cu_die.cu, &cu_kind,
4358 &version, &abbrev_offset,
4359 &address_size, &offset_size,
4360 &type_signature, &type_offset))
4363 int tag = dwarf_tag(&cu_kind);
4365 if (tag == DW_TAG_compile_unit
4366 || tag == DW_TAG_partial_unit)
4368 Dwarf *die_dwarf = dwarf_cu_getdwarf(cu_die.cu);
4369 if (dwarf() == die_dwarf)
4370 source = PRIMARY_DEBUG_INFO_DIE_SOURCE;
4371 else if (alt_dwarf() == die_dwarf)
4372 source = ALT_DEBUG_INFO_DIE_SOURCE;
4374 ABG_ASSERT_NOT_REACHED;
4376 else if (tag == DW_TAG_type_unit)
4377 source = TYPE_UNIT_DIE_SOURCE;
4384 /// Getter for the DIE designated by an offset.
4386 /// @param source the source of the DIE to get.
4388 /// @param offset the offset of the DIE to get.
4390 /// @param die the resulting DIE. The pointer has to point to an
4391 /// allocated memory region.
4393 get_die_from_offset(die_source source, Dwarf_Off offset, Dwarf_Die *die) const
4395 if (source == TYPE_UNIT_DIE_SOURCE)
4396 ABG_ASSERT(dwarf_offdie_types(dwarf_per_die_source(source), offset, die));
4398 ABG_ASSERT(dwarf_offdie(dwarf_per_die_source(source), offset, die));
4403 /// Add an entry to the relevant die->decl map.
4405 /// @param die the DIE to add the the map.
4407 /// @param decl the decl to consider.
4409 /// @param where_offset where in the DIE stream we logically are.
4411 /// @param do_associate_by_repr if true then this function
4412 /// associates the representation string of @p die with the
4413 /// declaration @p decl, in a corpus-wide manner. That is, in the
4414 /// entire current corpus, there is going to be just one declaration
4415 /// associated with a DIE of the string representation of @p die.
4417 /// @param do_associate_by_repr_per_tu if true, then this function
4418 /// associates the representation string of @p die with the
4419 /// declaration @p decl in a translation unit wide manner. That is,
4420 /// in the entire current translation unit, there is going to be
4421 /// just one declaration associated with a DIE of the string
4422 /// representation of @p die.
4424 associate_die_to_decl(Dwarf_Die* die,
4425 decl_base_sptr decl,
4426 size_t where_offset,
4427 bool do_associate_by_repr = false)
4430 ABG_ASSERT(get_die_source(die, source));
4432 die_artefact_map_type& m =
4433 decl_die_artefact_maps().get_container(source);
4436 if (do_associate_by_repr)
4438 Dwarf_Die equiv_die;
4439 get_or_compute_canonical_die(die, equiv_die, where_offset,
4440 /*die_as_type=*/false);
4441 die_offset = dwarf_dieoffset(&equiv_die);
4444 die_offset = dwarf_dieoffset(die);
4446 m[die_offset] = decl;
4451 /// Lookup the decl for a given DIE.
4453 /// The returned decl is either the decl of the DIE that as the
4454 /// exact offset @p die_offset
4458 /// @param die_offset the offset of the DIE to consider.
4460 /// @param source where the DIE represented by @p die_offset comes
4463 /// Note that "alternate debug info sections" is a GNU extension as
4464 /// of DWARF4 and is described at
4465 /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1
4467 /// @return the resulting decl, or null if no decl is associated to
4468 /// the DIE represented by @p die_offset.
4470 lookup_decl_from_die_offset(Dwarf_Off die_offset, die_source source)
4472 decl_base_sptr result =
4473 is_decl(lookup_artifact_from_die_offset(die_offset, source,
4474 /*die_as_type=*/false));
4479 /// Get the qualified name of a given DIE.
4481 /// If the name of the DIE was already computed before just return
4482 /// that name from a cache. Otherwise, build the name, cache it and
4485 /// @param die the DIE to consider.
4487 /// @param where_offset where in the DIE stream we logically are.
4489 /// @return the interned string representing the qualified name of
4492 get_die_qualified_name(Dwarf_Die *die, size_t where_offset)
4495 die_istring_map_type& map =
4496 die_qualified_name_maps_.get_container(*this, die);
4498 size_t die_offset = dwarf_dieoffset(die);
4499 die_istring_map_type::const_iterator i = map.find(die_offset);
4503 read_context& ctxt = *const_cast<read_context*>(this);
4504 string qualified_name = die_qualified_name(ctxt, die, where_offset);
4505 interned_string istr = env()->intern(qualified_name);
4506 map[die_offset] = istr;
4513 /// Get the qualified name of a given DIE.
4515 /// If the name of the DIE was already computed before just return
4516 /// that name from a cache. Otherwise, build the name, cache it and
4519 /// @param die the DIE to consider.
4521 /// @param where_offset where in the DIE stream we logically are.
4523 /// @return the interned string representing the qualified name of
4526 get_die_qualified_name(Dwarf_Die *die, size_t where_offset) const
4528 return const_cast<read_context*>(this)->
4529 get_die_qualified_name(die, where_offset);
4532 /// Get the qualified name of a given DIE which is considered to be
4533 /// the DIE for a type.
4535 /// For instance, for a DW_TAG_subprogram DIE, this function
4536 /// computes the name of the function *type* that corresponds to the
4539 /// If the name of the DIE was already computed before just return
4540 /// that name from a cache. Otherwise, build the name, cache it and
4543 /// @param die the DIE to consider.
4545 /// @param where_offset where in the DIE stream we logically are.
4547 /// @return the interned string representing the qualified name of
4550 get_die_qualified_type_name(const Dwarf_Die *die, size_t where_offset) const
4554 // The name of the translation unit die is "".
4555 if (die == cur_tu_die())
4556 return env()->intern("");
4558 die_istring_map_type& map =
4559 die_qualified_name_maps_.get_container(*const_cast<read_context*>(this),
4562 size_t die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
4563 die_istring_map_type::const_iterator i =
4564 map.find(die_offset);
4568 read_context& ctxt = *const_cast<read_context*>(this);
4569 string qualified_name;
4570 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
4571 if ((tag == DW_TAG_structure_type
4572 || tag == DW_TAG_class_type
4573 || tag == DW_TAG_union_type)
4574 && die_is_anonymous(die))
4576 location l = die_location(*this, die);
4577 qualified_name = l ? l.expand() : "noloc";
4578 qualified_name = "unnamed-at-" + qualified_name;
4582 die_qualified_type_name(ctxt, die, where_offset);
4584 interned_string istr = env()->intern(qualified_name);
4585 map[die_offset] = istr;
4592 /// Get the pretty representation of a DIE that represents a type.
4594 /// For instance, for the DW_TAG_subprogram, this function computes
4595 /// the pretty representation of the type of the function, not the
4596 /// pretty representation of the function declaration.
4598 /// Once the pretty representation is computed, it's stored in a
4599 /// cache. Subsequent invocations of this function on the same DIE
4600 /// will yield the cached name.
4602 /// @param die the DIE to consider.
4604 /// @param where_offset where in the DIE stream we logically are.
4606 /// @return the interned_string that represents the pretty
4609 get_die_pretty_type_representation(const Dwarf_Die *die,
4610 size_t where_offset) const
4613 die_istring_map_type& map =
4614 die_pretty_type_repr_maps_.get_container(*const_cast<read_context*>(this),
4617 size_t die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
4618 die_istring_map_type::const_iterator i = map.find(die_offset);
4622 read_context& ctxt = *const_cast<read_context*>(this);
4623 string pretty_representation =
4624 die_pretty_print_type(ctxt, die, where_offset);
4625 interned_string istr = env()->intern(pretty_representation);
4626 map[die_offset] = istr;
4633 /// Get the pretty representation of a DIE.
4635 /// Once the pretty representation is computed, it's stored in a
4636 /// cache. Subsequent invocations of this function on the same DIE
4637 /// will yield the cached name.
4639 /// @param die the DIE to consider.
4641 /// @param where_offset where in the DIE stream we logically are.
4643 /// @return the interned_string that represents the pretty
4646 get_die_pretty_representation(const Dwarf_Die *die, size_t where_offset) const
4650 die_istring_map_type& map =
4651 die_pretty_repr_maps_.get_container(*const_cast<read_context*>(this),
4654 size_t die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
4655 die_istring_map_type::const_iterator i = map.find(die_offset);
4659 read_context& ctxt = *const_cast<read_context*>(this);
4660 string pretty_representation =
4661 die_pretty_print(ctxt, die, where_offset);
4662 interned_string istr = env()->intern(pretty_representation);
4663 map[die_offset] = istr;
4670 /// Lookup the artifact that was built to represent a type that has
4671 /// the same pretty representation as the type denoted by a given
4674 /// Note that the DIE must have previously been associated with the
4675 /// artifact using the functions associate_die_to_decl or
4676 /// associate_die_to_type.
4678 /// Also, note that the scope of the lookup is the current ABI
4681 /// @param die the DIE to consider.
4683 /// @param where_offset where in the DIE stream we logically are.
4685 /// @return the type artifact found.
4686 type_or_decl_base_sptr
4687 lookup_type_artifact_from_die(Dwarf_Die *die) const
4689 type_or_decl_base_sptr artifact =
4690 lookup_artifact_from_die(die, /*type_as_die=*/true);
4691 if (function_decl_sptr fn = is_function_decl(artifact))
4692 return fn->get_type();
4696 /// Lookup the artifact that was built to represent a type or a
4697 /// declaration that has the same pretty representation as the type
4698 /// denoted by a given DIE.
4700 /// Note that the DIE must have previously been associated with the
4701 /// artifact using the functions associate_die_to_decl or
4702 /// associate_die_to_type.
4704 /// Also, note that the scope of the lookup is the current ABI
4707 /// @param die the DIE to consider.
4709 /// @param where_offset where in the DIE stream we logically are.
4711 /// @param die_as_type if true, it means the DIE is to be considered
4714 /// @return the artifact found.
4715 type_or_decl_base_sptr
4716 lookup_artifact_from_die(const Dwarf_Die *die, bool die_as_type = false) const
4718 Dwarf_Die equiv_die;
4719 if (!get_or_compute_canonical_die(die, equiv_die, /*where=*/0, die_as_type))
4720 return type_or_decl_base_sptr();
4722 const die_artefact_map_type& m =
4724 ? type_die_artefact_maps().get_container(*this, &equiv_die)
4725 : decl_die_artefact_maps().get_container(*this, &equiv_die);
4727 size_t die_offset = dwarf_dieoffset(&equiv_die);
4728 die_artefact_map_type::const_iterator i = m.find(die_offset);
4731 return type_or_decl_base_sptr();
4735 /// Lookup the artifact that was built to represent a type or a
4736 /// declaration that has the same pretty representation as the type
4737 /// denoted by the offset of a given DIE.
4739 /// Note that the DIE must have previously been associated with the
4740 /// artifact using either associate_die_to_decl or
4741 /// associate_die_to_type.
4743 /// Also, note that the scope of the lookup is the current ABI
4746 /// @param die the DIE to consider.
4748 /// @param where_offset where in the DIE stream we logically are.
4750 /// @param die_as_type if true, it means the DIE is to be considered
4753 /// @return the artifact found.
4754 type_or_decl_base_sptr
4755 lookup_artifact_from_die_offset(Dwarf_Off die_offset,
4757 bool die_as_type = false) const
4759 const die_artefact_map_type& m =
4761 ? type_die_artefact_maps().get_container(source)
4762 : decl_die_artefact_maps().get_container(source);
4764 die_artefact_map_type::const_iterator i = m.find(die_offset);
4766 return type_or_decl_base_sptr();
4770 /// Get the language used to generate a given DIE.
4772 /// @param die the DIE to consider.
4774 /// @param lang the resulting language.
4776 /// @return true iff the language of the DIE was found.
4778 get_die_language(const Dwarf_Die *die, translation_unit::language &lang) const
4781 ABG_ASSERT(dwarf_diecu(const_cast<Dwarf_Die*>(die), &cu_die, 0, 0));
4784 if (!die_unsigned_constant_attribute(&cu_die, DW_AT_language, l))
4787 lang = dwarf_language_to_tu_language(l);
4791 /// Test if a given DIE originates from a program written in the C
4794 /// @param die the DIE to consider.
4796 /// @return true iff @p die originates from a program in the C
4799 die_is_in_c(const Dwarf_Die *die) const
4801 translation_unit::language l = translation_unit::LANG_UNKNOWN;
4802 if (!get_die_language(die, l))
4804 return is_c_language(l);
4807 /// Test if a given DIE originates from a program written in the C++
4810 /// @param die the DIE to consider.
4812 /// @return true iff @p die originates from a program in the C++
4815 die_is_in_cplus_plus(const Dwarf_Die *die) const
4817 translation_unit::language l = translation_unit::LANG_UNKNOWN;
4818 if (!get_die_language(die, l))
4820 return is_cplus_plus_language(l);
4823 /// Test if a given DIE originates from a program written either in
4826 /// @param die the DIE to consider.
4828 /// @return true iff @p die originates from a program written either in
4831 die_is_in_c_or_cplusplus(const Dwarf_Die *die) const
4833 translation_unit::language l = translation_unit::LANG_UNKNOWN;
4834 if (!get_die_language(die, l))
4836 return (is_cplus_plus_language(l) || is_c_language(l));
4839 /// Check if we can assume the One Definition Rule[1] to be relevant
4840 /// for the current translation unit.
4842 /// [1]: https://en.wikipedia.org/wiki/One_Definition_Rule
4844 /// At the moment this returns true if the current translation unit
4845 /// is in C++ language. In that case, it's relevant to assume that
4846 /// we use optimizations based on the ODR.
4848 odr_is_relevant() const
4849 {return odr_is_relevant(cur_transl_unit()->get_language());}
4851 /// Check if we can assume the One Definition Rule[1] to be relevant
4852 /// for a given language.
4854 /// [1]: https://en.wikipedia.org/wiki/One_Definition_Rule
4856 /// At the moment this returns true if the language considered
4857 /// is C++, Java or Ada.
4859 odr_is_relevant(translation_unit::language l) const
4861 return (is_cplus_plus_language(l)
4862 || is_java_language(l)
4863 || is_ada_language(l));
4866 /// Check if we can assume the One Definition Rule to be relevant
4867 /// for a given DIE.
4869 /// @param die the DIE to consider.
4871 /// @return true if the ODR is relevant for @p die.
4873 odr_is_relevant(Dwarf_Off die_offset, die_source source) const
4876 ABG_ASSERT(dwarf_offdie(dwarf_per_die_source(source), die_offset, &die));
4877 return odr_is_relevant(&die);
4880 /// Check if we can assume the One Definition Rule to be relevant
4881 /// for a given DIE.
4883 /// @param die the DIE to consider.
4885 /// @return true if the ODR is relevant for @p die.
4887 odr_is_relevant(const Dwarf_Die *die) const
4889 translation_unit::language lang;
4890 if (!get_die_language(die, lang))
4891 return odr_is_relevant();
4893 return odr_is_relevant(lang);
4896 /// Getter for the maps set that associates a decl DIE offset to an
4899 /// @return the maps set that associates a decl DIE offset to an
4901 die_source_dependant_container_set<die_artefact_map_type>&
4902 decl_die_artefact_maps()
4903 {return decl_die_artefact_maps_;}
4905 /// Getter for the maps set that associates a decl DIE offset to an
4908 /// @return the maps set that associates a decl DIE offset to an
4910 const die_source_dependant_container_set<die_artefact_map_type>&
4911 decl_die_artefact_maps() const
4912 {return decl_die_artefact_maps_;}
4914 /// Getter for the maps set that associates a type DIE offset to an
4917 /// @return the maps set that associates a type DIE offset to an
4919 die_source_dependant_container_set<die_artefact_map_type>&
4920 type_die_artefact_maps()
4921 {return type_die_artefact_maps_;}
4923 /// Getter for the maps set that associates a type DIE offset to an
4926 /// @return the maps set that associates a type DIE offset to an
4928 const die_source_dependant_container_set<die_artefact_map_type>&
4929 type_die_artefact_maps() const
4930 {return type_die_artefact_maps_;}
4932 /// Getter of the maps that associates function type representations
4933 /// to function types, inside a translation unit.
4935 /// @return the maps that associates function type representations
4936 /// to function types, inside a translation unit.
4937 istring_fn_type_map_type&
4938 per_tu_repr_to_fn_type_maps()
4939 {return per_tu_repr_to_fn_type_maps_;}
4941 /// Getter of the maps that associates function type representations
4942 /// to function types, inside a translation unit.
4944 /// @return the maps that associates function type representations
4945 /// to function types, inside a translation unit.
4946 const istring_fn_type_map_type&
4947 per_tu_repr_to_fn_type_maps() const
4948 {return per_tu_repr_to_fn_type_maps_;}
4950 /// Associate the representation of a function type DIE to a given
4951 /// function type, inside the current translation unit.
4953 /// @param die the DIE to associate to the function type, using its
4956 /// @param fn_type the function type to associate to @p die.
4958 associate_die_repr_to_fn_type_per_tu(const Dwarf_Die *die,
4959 const function_type_sptr &fn_type)
4961 if (!die_is_function_type(die))
4964 interned_string repr =
4965 get_die_pretty_type_representation(die, /*where=*/0);
4966 ABG_ASSERT(!repr.empty());
4968 per_tu_repr_to_fn_type_maps()[repr]= fn_type;
4971 /// Lookup the function type associated to a given function type
4972 /// DIE, in the current translation unit.
4974 /// @param die the DIE of function type to consider.
4976 /// @return the @ref function_type_sptr associated to @p die, or nil
4977 /// of no function_type is associated to @p die.
4979 lookup_fn_type_from_die_repr_per_tu(const Dwarf_Die *die)
4981 if (!die_is_function_type(die))
4982 return function_type_sptr();
4984 interned_string repr =
4985 get_die_pretty_representation(die, /*where=*/0);
4986 ABG_ASSERT(!repr.empty());
4988 istring_fn_type_map_type::const_iterator i =
4989 per_tu_repr_to_fn_type_maps().find(repr);
4991 if (i == per_tu_repr_to_fn_type_maps().end())
4992 return function_type_sptr();
4997 /// Set the canonical DIE offset of a given DIE.
4999 /// @param canonical_dies the vector that holds canonical DIEs.
5001 /// @param die_offset the offset of the DIE to set the canonical DIE
5004 /// @param canonical_die_offset the canonical DIE offset to
5005 /// associate to @p die_offset.
5007 set_canonical_die_offset(offset_offset_map_type &canonical_dies,
5008 Dwarf_Off die_offset,
5009 Dwarf_Off canonical_die_offset) const
5011 canonical_dies[die_offset] = canonical_die_offset;}
5013 /// Set the canonical DIE offset of a given DIE.
5016 /// @param die_offset the offset of the DIE to set the canonical DIE
5019 /// @param source the source of the DIE denoted by @p die_offset.
5021 /// @param canonical_die_offset the canonical DIE offset to
5022 /// associate to @p die_offset.
5024 /// @param die_as_type if true, it means that @p die_offset has to
5025 /// be considered as a type.
5027 set_canonical_die_offset(Dwarf_Off die_offset,
5029 Dwarf_Off canonical_die_offset,
5030 bool die_as_type) const
5032 offset_offset_map_type &canonical_dies =
5034 ? const_cast<read_context*>(this)->canonical_type_die_offsets_.
5035 get_container(source)
5036 : const_cast<read_context*>(this)->canonical_decl_die_offsets_.
5037 get_container(source);
5039 set_canonical_die_offset(canonical_dies,
5041 canonical_die_offset);
5044 /// Set the canonical DIE offset of a given DIE.
5047 /// @param die the DIE to set the canonical DIE for.
5049 /// @param canonical_die_offset the canonical DIE offset to
5050 /// associate to @p die_offset.
5052 /// @param die_as_type if true, it means that @p die has to be
5053 /// considered as a type.
5055 set_canonical_die_offset(const Dwarf_Die *die,
5056 Dwarf_Off canonical_die_offset,
5057 bool die_as_type) const
5060 ABG_ASSERT(get_die_source(die, source));
5062 Dwarf_Off die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
5064 set_canonical_die_offset(die_offset, source,
5065 canonical_die_offset,
5069 /// Get the canonical DIE offset of a given DIE.
5071 /// @param canonical_dies the vector that contains canonical DIES.
5073 /// @param die_offset the offset of the DIE to consider.
5075 /// @return the canonical of the DIE denoted by @p die_offset, or
5076 /// zero if no canonical DIE was found.
5078 get_canonical_die_offset(offset_offset_map_type &canonical_dies,
5079 Dwarf_Off die_offset) const
5081 offset_offset_map_type::const_iterator it = canonical_dies.find(die_offset);
5082 if (it == canonical_dies.end())
5087 /// Get the canonical DIE offset of a given DIE.
5089 /// @param die_offset the offset of the DIE to consider.
5091 /// @param source the source of the DIE denoted by @p die_offset.
5093 /// @param die_as_type if true, it means that @p is to be considered
5096 /// @return the canonical of the DIE denoted by @p die_offset, or
5097 /// zero if no canonical DIE was found.
5099 get_canonical_die_offset(Dwarf_Off die_offset,
5101 bool die_as_type) const
5103 offset_offset_map_type &canonical_dies =
5105 ? const_cast<read_context*>(this)->canonical_type_die_offsets_.
5106 get_container(source)
5107 : const_cast<read_context*>(this)->canonical_decl_die_offsets_.
5108 get_container(source);
5110 return get_canonical_die_offset(canonical_dies, die_offset);
5113 /// Associate a DIE (representing a type) to the type that it
5116 /// @param die the DIE to consider.
5118 /// @param type the type to associate the DIE to.
5120 /// @param where_offset where in the DIE stream we logically are.
5122 associate_die_to_type(const Dwarf_Die *die,
5123 type_base_sptr type,
5129 Dwarf_Die equiv_die;
5130 get_or_compute_canonical_die(die, equiv_die, where, /*die_as_type=*/true);
5132 die_artefact_map_type& m =
5133 type_die_artefact_maps().get_container(*this, &equiv_die);
5135 size_t die_offset = dwarf_dieoffset(&equiv_die);
5136 m[die_offset] = type;
5139 /// Lookup the type associated to a given DIE.
5141 /// Note that the DIE must have been associated to type by a
5142 /// previous invocation of the function
5143 /// read_context::associate_die_to_type().
5145 /// @param die the DIE to consider.
5147 /// @return the type associated to the DIE or NULL if no type is
5148 /// associated to the DIE.
5150 lookup_type_from_die(const Dwarf_Die* die) const
5152 type_or_decl_base_sptr artifact =
5153 lookup_artifact_from_die(die, /*die_as_type=*/true);
5154 if (function_decl_sptr fn = is_function_decl(artifact))
5155 return fn->get_type();
5156 return is_type(artifact);
5159 /// Lookup the type associated to a DIE at a given offset, from a
5162 /// Note that the DIE must have been associated to type by a
5163 /// previous invocation of the function
5164 /// read_context::associate_die_to_type().
5166 /// @param die_offset the offset of the DIE to consider.
5168 /// @param source the source of the DIE to consider.
5170 /// @return the type associated to the DIE or NULL if no type is
5171 /// associated to the DIE.
5173 lookup_type_from_die_offset(size_t die_offset, die_source source) const
5175 type_base_sptr result;
5176 const die_artefact_map_type& m =
5177 type_die_artefact_maps().get_container(source);
5178 die_artefact_map_type::const_iterator i = m.find(die_offset);
5181 if (function_decl_sptr fn = is_function_decl(i->second))
5182 return fn->get_type();
5183 result = is_type(i->second);
5188 // Maybe we are looking for a class type being constructed?
5189 const die_class_or_union_map_type& m = die_wip_classes_map(source);
5190 die_class_or_union_map_type::const_iterator i = m.find(die_offset);
5198 // Maybe we are looking for a function type being constructed?
5199 const die_function_type_map_type& m =
5200 die_wip_function_types_map(source);
5201 die_function_type_map_type::const_iterator i = m.find(die_offset);
5210 /// Getter of a map that associates a die that represents a
5211 /// class/struct with the declaration of the class, while the class
5212 /// is being constructed.
5214 /// @param source where the DIE is from.
5216 /// @return the map that associates a DIE to the class that is being
5218 const die_class_or_union_map_type&
5219 die_wip_classes_map(die_source source) const
5220 {return const_cast<read_context*>(this)->die_wip_classes_map(source);}
5222 /// Getter of a map that associates a die that represents a
5223 /// class/struct with the declaration of the class, while the class
5224 /// is being constructed.
5226 /// @param source where the DIE comes from.
5228 /// @return the map that associates a DIE to the class that is being
5230 die_class_or_union_map_type&
5231 die_wip_classes_map(die_source source)
5235 case PRIMARY_DEBUG_INFO_DIE_SOURCE:
5237 case ALT_DEBUG_INFO_DIE_SOURCE:
5238 return alternate_die_wip_classes_map_;
5239 case TYPE_UNIT_DIE_SOURCE:
5240 return type_unit_die_wip_classes_map_;
5241 case NO_DEBUG_INFO_DIE_SOURCE:
5242 case NUMBER_OF_DIE_SOURCES:
5243 ABG_ASSERT_NOT_REACHED;
5245 return die_wip_classes_map_;
5248 /// Getter for a map that associates a die (that represents a
5249 /// function type) whith a function type, while the function type is
5250 /// being constructed (WIP == work in progress).
5252 /// @param source where the DIE comes from.n
5254 /// @return the map of wip function types.
5255 const die_function_type_map_type&
5256 die_wip_function_types_map(die_source source) const
5257 {return const_cast<read_context*>(this)->die_wip_function_types_map(source);}
5259 /// Getter for a map that associates a die (that represents a
5260 /// function type) whith a function type, while the function type is
5261 /// being constructed (WIP == work in progress).
5263 /// @param source where DIEs of the map come from.
5265 /// @return the map of wip function types.
5266 die_function_type_map_type&
5267 die_wip_function_types_map(die_source source)
5271 case PRIMARY_DEBUG_INFO_DIE_SOURCE:
5273 case ALT_DEBUG_INFO_DIE_SOURCE:
5274 return alternate_die_wip_function_types_map_;
5275 case TYPE_UNIT_DIE_SOURCE:
5276 return type_unit_die_wip_function_types_map_;
5277 case NO_DEBUG_INFO_DIE_SOURCE:
5278 case NUMBER_OF_DIE_SOURCES:
5279 ABG_ASSERT_NOT_REACHED;
5281 return die_wip_function_types_map_;
5284 /// Getter for a map that associates a die with a function decl
5285 /// which has a linkage name but no elf symbol yet.
5287 /// This is to fixup function decls with linkage names, but with no
5288 /// link to their underlying elf symbol. There are some DIEs like
5289 /// that in DWARF sometimes, especially when the compiler optimizes
5290 /// stuff aggressively.
5291 die_function_decl_map_type&
5292 die_function_decl_with_no_symbol_map()
5293 {return die_function_with_no_symbol_map_;}
5295 /// Return true iff a given offset is for the DIE of a class that is
5296 /// being built, but that is not fully built yet. WIP == "work in
5299 /// @param offset the DIE offset to consider.
5301 /// @param source where the DIE of the map come from.
5303 /// @return true iff @p offset is the offset of the DIE of a class
5304 /// that is being currently built.
5306 is_wip_class_die_offset(Dwarf_Off offset, die_source source) const
5308 die_class_or_union_map_type::const_iterator i =
5309 die_wip_classes_map(source).find(offset);
5310 return (i != die_wip_classes_map(source).end());
5313 /// Return true iff a given offset is for the DIE of a function type
5314 /// that is being built at the moment, but is not fully built yet.
5315 /// WIP == work in progress.
5317 /// @param offset DIE offset to consider.
5319 /// @param source where the DIE comes from.
5321 /// @return true iff @p offset is the offset of the DIE of a
5322 /// function type that is being currently built.
5324 is_wip_function_type_die_offset(Dwarf_Off offset, die_source source) const
5326 die_function_type_map_type::const_iterator i =
5327 die_wip_function_types_map(source).find(offset);
5328 return (i != die_wip_function_types_map(source).end());
5331 /// Getter for the map of declaration-only classes that are to be
5332 /// resolved to their definition classes by the end of the corpus
5335 /// @return a map of string -> vector of classes where the key is
5336 /// the fully qualified name of the class and the value is the
5337 /// vector of declaration-only class.
5338 const string_classes_map&
5339 declaration_only_classes() const
5340 {return decl_only_classes_map_;}
5342 /// Getter for the map of declaration-only classes that are to be
5343 /// resolved to their definition classes by the end of the corpus
5346 /// @return a map of string -> vector of classes where the key is
5347 /// the fully qualified name of the class and the value is the
5348 /// vector of declaration-only class.
5350 declaration_only_classes()
5351 {return decl_only_classes_map_;}
5353 /// If a given class is a declaration-only class then stash it on
5354 /// the side so that at the end of the corpus reading we can resolve
5355 /// it to its definition.
5357 /// @param klass the class to consider.
5359 maybe_schedule_declaration_only_class_for_resolution(class_decl_sptr& klass)
5361 if (klass->get_is_declaration_only()
5362 && klass->get_definition_of_declaration() == 0)
5364 string qn = klass->get_qualified_name();
5365 string_classes_map::iterator record =
5366 declaration_only_classes().find(qn);
5367 if (record == declaration_only_classes().end())
5368 declaration_only_classes()[qn].push_back(klass);
5370 record->second.push_back(klass);
5374 /// Test if a given declaration-only class has been scheduled for
5375 /// resolution to a defined class.
5377 /// @param klass the class to consider for the test.
5379 /// @return true iff @p klass is a declaration-only class and if
5380 /// it's been scheduled for resolution to a defined class.
5382 is_decl_only_class_scheduled_for_resolution(class_decl_sptr& klass)
5384 if (klass->get_is_declaration_only())
5385 return (declaration_only_classes().find(klass->get_qualified_name())
5386 != declaration_only_classes().end());
5391 /// Walk the declaration-only classes that have been found during
5392 /// the building of the corpus and resolve them to their definitions.
5394 resolve_declaration_only_classes()
5396 vector<string> resolved_classes;
5398 for (string_classes_map::iterator i =
5399 declaration_only_classes().begin();
5400 i != declaration_only_classes().end();
5403 bool to_resolve = false;
5404 for (classes_type::iterator j = i->second.begin();
5405 j != i->second.end();
5407 if ((*j)->get_is_declaration_only()
5408 && ((*j)->get_definition_of_declaration() == 0))
5413 resolved_classes.push_back(i->first);
5417 // Now, for each decl-only class that have the current name
5418 // 'i->first', let's try to poke at the fully defined class
5419 // that is defined in the same translation unit as the
5422 // If we find one class (defined in the TU of the declaration)
5423 // that defines the declaration, then the declaration can be
5424 // resolved to that class.
5426 // If no defining class is found in the TU of the declaration,
5427 // then there are possibly three cases to consider:
5429 // 1/ There is exactly one class that defines the
5430 // declaration and that class is defined in another TU. In
5431 // this case, the declaration is resolved to that
5434 // 2/ There are more than one class that define that
5435 // declaration and none of them is defined in the TU of the
5436 // declaration. In this case, the declaration is left
5439 // 3/ No class defines the declaration. In this case, the
5440 // declaration is left unresoved.
5442 // So get the classes that might define the current
5443 // declarations which name is i->first.
5444 const type_base_wptrs_type *classes =
5445 lookup_class_types(i->first, *current_corpus());
5449 unordered_map<string, class_decl_sptr> per_tu_class_map;
5450 for (type_base_wptrs_type::const_iterator c = classes->begin();
5451 c != classes->end();
5454 class_decl_sptr klass = is_class_type(type_base_sptr(*c));
5457 klass = is_class_type(look_through_decl_only_class(klass));
5458 if (klass->get_is_declaration_only())
5461 string tu_path = klass->get_translation_unit()->get_absolute_path();
5462 if (tu_path.empty())
5465 // Build a map that associates the translation unit path
5466 // to the class (that potentially defines the declarations
5467 // that we consider) that are defined in that translation unit.
5468 per_tu_class_map[tu_path] = klass;
5471 if (!per_tu_class_map.empty())
5473 // Walk the declarations to resolve and resolve them
5474 // either to the definitions that are in the same TU as
5475 // the declaration, or to the definition found elsewhere,
5476 // if there is only one such definition.
5477 for (classes_type::iterator j = i->second.begin();
5478 j != i->second.end();
5481 if ((*j)->get_is_declaration_only()
5482 && ((*j)->get_definition_of_declaration() == 0))
5485 (*j)->get_translation_unit()->get_absolute_path();
5486 unordered_map<string, class_decl_sptr>::const_iterator e =
5487 per_tu_class_map.find(tu_path);
5488 if (e != per_tu_class_map.end())
5489 (*j)->set_definition_of_declaration(e->second);
5490 else if (per_tu_class_map.size() == 1)
5491 (*j)->set_definition_of_declaration
5492 (per_tu_class_map.begin()->second);
5495 resolved_classes.push_back(i->first);
5499 size_t num_decl_only_classes = declaration_only_classes().size(),
5500 num_resolved = resolved_classes.size();
5502 cerr << "resolved " << num_resolved
5503 << " class declarations out of "
5504 << num_decl_only_classes
5507 for (vector<string>::const_iterator i = resolved_classes.begin();
5508 i != resolved_classes.end();
5510 declaration_only_classes().erase(*i);
5512 for (string_classes_map::iterator i = declaration_only_classes().begin();
5513 i != declaration_only_classes().end();
5518 if (i == declaration_only_classes().begin())
5519 cerr << "Here are the "
5520 << num_decl_only_classes - num_resolved
5521 << " unresolved class declarations:\n";
5523 cerr << " " << i->first << "\n";
5528 /// Some functions described by DWARF may have their linkage name
5529 /// set, but no link to their actual underlying elf symbol. When
5530 /// these are virtual member functions, comparing the enclosing type
5531 /// against another one which has its underlying symbol properly set
5532 /// might lead to spurious type changes.
5534 /// If the corpus contains a symbol with the same name as the
5535 /// linkage name of the function, then set up the link between the
5536 /// function and its underlying symbol.
5538 /// Note that for the moment, only virtual member functions are
5539 /// fixed up like this. This is because they really are the only
5540 /// fuctions of functions that can affect types (in spurious ways).
5542 fixup_functions_with_no_symbols()
5544 corpus_sptr corp = current_corpus();
5548 die_function_decl_map_type &fns_with_no_symbol =
5549 die_function_decl_with_no_symbol_map();
5552 cerr << fns_with_no_symbol.size()
5553 << " functions to fixup, potentially\n";
5555 for (die_function_decl_map_type::iterator i = fns_with_no_symbol.begin();
5556 i != fns_with_no_symbol.end();
5558 if (elf_symbol_sptr sym =
5559 corp->lookup_function_symbol(i->second->get_linkage_name()))
5561 ABG_ASSERT(is_member_function(i->second));
5562 ABG_ASSERT(get_member_function_is_virtual(i->second));
5563 i->second->set_symbol(sym);
5565 cerr << "fixed up '"
5566 << i->second->get_pretty_representation()
5567 << "' with symbol '"
5568 << sym->get_id_string()
5572 fns_with_no_symbol.clear();
5575 /// Return a reference to the vector containing the offsets of the
5576 /// types that need late canonicalizing.
5578 /// @param source whe DIEs referred to by the offsets contained in
5579 /// the vector to return are from.
5581 types_to_canonicalize(die_source source)
5585 case PRIMARY_DEBUG_INFO_DIE_SOURCE:
5587 case ALT_DEBUG_INFO_DIE_SOURCE:
5588 return alt_types_to_canonicalize_;
5589 case TYPE_UNIT_DIE_SOURCE:
5590 return type_unit_types_to_canonicalize_;
5591 case NO_DEBUG_INFO_DIE_SOURCE:
5592 case NUMBER_OF_DIE_SOURCES:
5593 ABG_ASSERT_NOT_REACHED;
5595 return types_to_canonicalize_;
5598 /// Return a reference to the vector containing the offsets of the
5599 /// types that need late canonicalizing.
5601 /// @param source where the DIEs referred to by the offset in the
5602 /// returned vector are from.
5603 const vector<Dwarf_Off>&
5604 types_to_canonicalize(die_source source) const
5605 {return const_cast<read_context*>(this)->types_to_canonicalize(source);}
5607 /// Return a reference to the vector containing the types created
5608 /// during the binary analysis but that are not tied to a given
5611 /// @return reference to the vector containing the types created
5612 /// during the binary analysis but that are not tied to a given
5614 const vector<type_base_sptr>&
5615 extra_types_to_canonicalize() const
5616 {return extra_types_to_canonicalize_;}
5618 /// Clear the containers holding types to canonicalize.
5620 clear_types_to_canonicalize()
5622 types_to_canonicalize_.clear();
5623 alt_types_to_canonicalize_.clear();
5624 type_unit_types_to_canonicalize_.clear();
5625 extra_types_to_canonicalize_.clear();
5628 /// Put the offset of a DIE representing a type on a side vector so
5629 /// that when the reading of the debug info of the current
5630 /// translation unit is done, we can get back to the type DIE and
5631 /// from there, to the type it's associated to, and then
5632 /// canonicalize it. This what we call late canonicalization.
5634 /// @param die the type DIE to schedule for late type
5635 /// canonicalization.
5637 schedule_type_for_late_canonicalization(const Dwarf_Die *die)
5642 Dwarf_Die equiv_die;
5643 ABG_ASSERT(get_canonical_die(die, equiv_die,
5645 /*die_as_type=*/true));
5647 ABG_ASSERT(get_die_source(&equiv_die, source));
5648 o = dwarf_dieoffset(&equiv_die);
5650 const die_artefact_map_type& m =
5651 type_die_artefact_maps().get_container(*this, die);
5653 die_artefact_map_type::const_iterator i = m.find(o);
5654 ABG_ASSERT(i != m.end());
5656 // Then really do the scheduling.
5657 types_to_canonicalize(source).push_back(o);
5660 /// Types that were created but not tied to a particular DIE, must
5661 /// be scheduled for late canonicalization using this method.
5663 /// @param t the type to schedule for late canonicalization.
5665 schedule_type_for_late_canonicalization(const type_base_sptr &t)
5667 extra_types_to_canonicalize_.push_back(t);
5670 /// Canonicalize types which DIE offsets are stored in vectors on
5671 /// the side. This is a sub-routine of
5672 /// read_context::perform_late_type_canonicalizing().
5674 /// @param source where the DIE of the types to canonicalize are
5677 canonicalize_types_scheduled(die_source source)
5679 tools_utils::timer cn_timer;
5682 cerr << "going to canonicalize types";
5683 corpus_sptr c = current_corpus();
5685 cerr << " of corpus " << current_corpus()->get_path();
5686 cerr << " (DIEs source: " << source << ")\n";
5690 if (!types_to_canonicalize(source).empty())
5692 tools_utils::timer single_type_cn_timer;
5693 size_t total = types_to_canonicalize(source).size();
5695 cerr << total << " types to canonicalize\n";
5696 for (size_t i = 0; i < total; ++i)
5698 Dwarf_Off element = types_to_canonicalize(source)[i];
5700 lookup_type_from_die_offset(element, source);
5704 cerr << "canonicalizing type "
5705 << get_pretty_representation(t, false)
5706 << " [" << i + 1 << "/" << total << "]";
5707 if (corpus_sptr c = current_corpus())
5708 cerr << "@" << c->get_path();
5710 single_type_cn_timer.start();
5716 single_type_cn_timer.stop();
5717 cerr << ":" <<single_type_cn_timer << "\n";
5721 // Now canonicalize types that were created but not tied to
5723 if (!extra_types_to_canonicalize().empty())
5725 tools_utils::timer single_type_cn_timer;
5726 size_t total = extra_types_to_canonicalize().size();
5728 cerr << total << " extra types to canonicalize\n";
5730 for (vector<type_base_sptr>::const_iterator it =
5731 extra_types_to_canonicalize().begin();
5732 it != extra_types_to_canonicalize().end();
5737 cerr << "canonicalizing extra type "
5738 << get_pretty_representation(*it, false)
5739 << " [" << i << "/" << total << "]";
5740 if (corpus_sptr c = current_corpus())
5741 cerr << "@" << c->get_path();
5743 single_type_cn_timer.start();
5748 single_type_cn_timer.stop();
5750 << single_type_cn_timer
5760 cerr << "finished canonicalizing types";
5761 corpus_sptr c = current_corpus();
5763 cerr << " of corpus " << current_corpus()->get_path();
5764 cerr << " (DIEs source: "
5771 /// Compute the number of canonicalized and missed types in the late
5772 /// canonicalization phase.
5774 /// @param source where the DIEs of the canonicalized types are
5777 /// @param canonicalized the number of types that got canonicalized
5778 /// is added to the value already present in this parameter.
5780 /// @param missed the number of types scheduled for late
5781 /// canonicalization and which couldn't be canonicalized (for a
5782 /// reason) is added to the value already present in this parameter.
5784 add_late_canonicalized_types_stats(die_source source,
5785 size_t& canonicalized,
5786 size_t& missed) const
5788 for (vector<Dwarf_Off>::const_iterator i =
5789 types_to_canonicalize(source).begin();
5790 i != types_to_canonicalize(source).end();
5793 type_base_sptr t = lookup_type_from_die_offset(*i, source);
5794 if (t->get_canonical_type())
5801 /// Compute the number of canonicalized and missed types in the late
5802 /// canonicalization phase.
5804 /// @param canonicalized the number of types that got canonicalized
5805 /// is added to the value already present in this parameter.
5807 /// @param missed the number of types scheduled for late
5808 /// canonicalization and which couldn't be canonicalized (for a
5809 /// reason) is added to the value already present in this parameter.
5811 add_late_canonicalized_types_stats(size_t& canonicalized,
5812 size_t& missed) const
5814 for (die_source source = PRIMARY_DEBUG_INFO_DIE_SOURCE;
5815 source < NUMBER_OF_DIE_SOURCES;
5817 add_late_canonicalized_types_stats(source, canonicalized, missed);
5820 // Look at the types that need to be canonicalized after the
5821 // translation unit has been constructed and canonicalize them.
5823 perform_late_type_canonicalizing()
5825 for (die_source source = PRIMARY_DEBUG_INFO_DIE_SOURCE;
5826 source < NUMBER_OF_DIE_SOURCES;
5828 canonicalize_types_scheduled(source);
5832 size_t num_canonicalized = 0, num_missed = 0, total = 0;
5833 add_late_canonicalized_types_stats(num_canonicalized,
5835 total = num_canonicalized + num_missed;
5839 cerr << " # late canonicalized types: "
5840 << num_canonicalized
5841 << " (" << num_canonicalized * 100 / total << "%)\n"
5842 << " # missed canonicalization opportunities: "
5844 << " (" << num_missed * 100 / total << "%)\n";
5849 const die_tu_map_type&
5851 {return die_tu_map_;}
5855 {return die_tu_map_;}
5857 /// Getter for the map that associates a translation unit DIE to the
5858 /// vector of imported unit points that it contains.
5860 /// @param source where the DIEs are from.
5862 /// @return the map.
5863 const tu_die_imported_unit_points_map_type&
5864 tu_die_imported_unit_points_map(die_source source) const
5865 {return const_cast<read_context*>(this)->tu_die_imported_unit_points_map(source);}
5867 /// Getter for the map that associates a translation unit DIE to the
5868 /// vector of imported unit points that it contains.
5870 /// @param source where the DIEs are from.
5872 /// @return the map.
5873 tu_die_imported_unit_points_map_type&
5874 tu_die_imported_unit_points_map(die_source source)
5878 case PRIMARY_DEBUG_INFO_DIE_SOURCE:
5880 case ALT_DEBUG_INFO_DIE_SOURCE:
5881 return alt_tu_die_imported_unit_points_map_;
5882 case TYPE_UNIT_DIE_SOURCE:
5883 return type_units_tu_die_imported_unit_points_map_;
5884 case NO_DEBUG_INFO_DIE_SOURCE:
5885 case NUMBER_OF_DIE_SOURCES:
5886 // We cannot reach this point.
5887 ABG_ASSERT_NOT_REACHED;
5889 return tu_die_imported_unit_points_map_;
5892 /// Getter of the current corpus being constructed.
5894 /// @return the current corpus.
5896 current_corpus() const
5897 {return cur_corpus_;}
5899 /// Getter of the current corpus being constructed.
5901 /// @return the current corpus.
5904 {return cur_corpus_;}
5906 /// Setter of the current corpus being constructed.
5908 /// @param c the new corpus.
5910 current_corpus(const corpus_sptr& c)
5916 /// Reset the current corpus being constructed.
5918 /// This actually deletes the current corpus being constructed.
5920 reset_current_corpus()
5921 {cur_corpus_.reset();}
5923 /// Getter of the current corpus group being constructed.
5925 /// @return current the current corpus being constructed, if any, or
5927 const corpus_group_sptr
5928 current_corpus_group() const
5929 {return cur_corpus_group_;}
5931 /// Getter of the current corpus group being constructed.
5933 /// @return current the current corpus being constructed, if any, or
5936 current_corpus_group()
5937 {return cur_corpus_group_;}
5939 /// Setter of the current corpus group being constructed.
5941 /// @param g the new corpus group.
5943 current_corpus_group(const corpus_group_sptr& g)
5946 cur_corpus_group_ = g;
5949 /// Test if there is a corpus group being built.
5951 /// @return if there is a corpus group being built, false otherwise.
5953 has_corpus_group() const
5954 {return bool(cur_corpus_group_);}
5956 /// Return the main corpus from the current corpus group, if any.
5958 /// @return the main corpus of the current corpus group, if any, nil
5959 /// if no corpus group is being constructed.
5961 main_corpus_from_current_group()
5963 if (cur_corpus_group_)
5964 return cur_corpus_group_->get_main_corpus();
5965 return corpus_sptr();
5968 /// Return the main corpus from the current corpus group, if any.
5970 /// @return the main corpus of the current corpus group, if any, nil
5971 /// if no corpus group is being constructed.
5973 main_corpus_from_current_group() const
5974 {return const_cast<read_context*>(this)->main_corpus_from_current_group();}
5976 /// Test if the current corpus being built is the main corpus of the
5977 /// current corpus group.
5979 /// @return return true iff the current corpus being built is the
5980 /// main corpus of the current corpus group.
5982 current_corpus_is_main_corpus_from_current_group() const
5984 corpus_sptr main_corpus = main_corpus_from_current_group();
5986 if (main_corpus && main_corpus.get() == cur_corpus_.get())
5992 /// Return true if the current corpus is part of a corpus group
5993 /// being built and if it's not the main corpus of the group.
5995 /// For instance, this would return true if we are loading a linux
5996 /// kernel *module* that is part of the current corpus group that is
5997 /// being built. In this case, it means we should re-use types
5998 /// coming from the "vmlinux" binary that is the main corpus of the
6001 /// @return the corpus group the current corpus belongs to, if the
6002 /// current corpus is part of a corpus group being built. Nil otherwise.
6004 should_reuse_type_from_corpus_group() const
6006 if (has_corpus_group() && is_c_language(cur_transl_unit()->get_language()))
6007 if (corpus_sptr main_corpus = main_corpus_from_current_group())
6008 if (!current_corpus_is_main_corpus_from_current_group())
6009 return current_corpus_group();
6011 return corpus_sptr();
6014 /// Get the map that associates each DIE to its parent DIE. This is
6015 /// for DIEs coming from the main debug info sections.
6017 /// @param source where the DIEs in the map come from.
6019 /// @return the DIE -> parent map.
6020 const offset_offset_map_type&
6021 die_parent_map(die_source source) const
6022 {return const_cast<read_context*>(this)->die_parent_map(source);}
6024 /// Get the map that associates each DIE to its parent DIE. This is
6025 /// for DIEs coming from the main debug info sections.
6027 /// @param source where the DIEs in the map come from.
6029 /// @return the DIE -> parent map.
6030 offset_offset_map_type&
6031 die_parent_map(die_source source)
6035 case PRIMARY_DEBUG_INFO_DIE_SOURCE:
6037 case ALT_DEBUG_INFO_DIE_SOURCE:
6038 return alternate_die_parent_map_;
6039 case TYPE_UNIT_DIE_SOURCE:
6040 return type_section_die_parent_map();
6041 case NO_DEBUG_INFO_DIE_SOURCE:
6042 case NUMBER_OF_DIE_SOURCES:
6043 ABG_ASSERT_NOT_REACHED;
6045 return primary_die_parent_map_;
6048 const offset_offset_map_type&
6049 type_section_die_parent_map() const
6050 {return type_section_die_parent_map_;}
6052 offset_offset_map_type&
6053 type_section_die_parent_map()
6054 {return type_section_die_parent_map_;}
6056 /// Getter of the current translation unit.
6058 /// @return the current translation unit being constructed.
6059 const translation_unit_sptr&
6060 cur_transl_unit() const
6063 /// Getter of the current translation unit.
6065 /// @return the current translation unit being constructed.
6066 translation_unit_sptr&
6070 /// Setter of the current translation unit.
6072 /// @param tu the current translation unit being constructed.
6074 cur_transl_unit(translation_unit_sptr tu)
6080 /// Return the global scope of the current translation unit.
6082 /// @return the global scope of the current translation unit.
6083 const scope_decl_sptr&
6084 global_scope() const
6085 {return cur_transl_unit()->get_global_scope();}
6087 /// Return a scope that is nil.
6089 /// @return a scope that is nil.
6090 const scope_decl_sptr&
6092 {return nil_scope_;}
6094 const scope_stack_type&
6096 {return scope_stack_;}
6100 {return scope_stack_;}
6105 if (scope_stack().empty())
6107 if (cur_transl_unit())
6108 scope_stack().push(cur_transl_unit()->get_global_scope().get());
6110 return scope_stack().top();
6113 list<var_decl_sptr>&
6114 var_decls_to_re_add_to_tree()
6115 {return var_decls_to_add_;}
6117 /// Return the type of the current elf file.
6119 /// @return the type of the current elf file.
6123 return elf_file_type(elf_handle());
6126 /// The section containing the symbol table from the current ELF
6129 /// Note that after it's first invocation, this function caches the
6130 /// symbol table that it found. Subsequent invocations just return
6131 /// the cached symbol table section.
6133 /// @return the symbol table section if found
6135 find_symbol_table_section() const
6137 if (!symtab_section_)
6138 dwarf_reader::find_symbol_table_section(elf_handle(),
6139 const_cast<read_context*>(this)->symtab_section_);
6140 return symtab_section_;
6143 /// Return the "Official Procedure descriptors section." This
6144 /// section is named .opd, and is usually present only on PPC64
6147 /// @return the .opd section, if found. Return nil otherwise.
6149 find_opd_section() const
6152 const_cast<read_context*>(this)->opd_section_=
6153 find_section(elf_handle(), ".opd", SHT_PROGBITS);
6154 return opd_section_;
6157 /// Return the __ksymtab section of a linux kernel ELF file (either
6158 /// a vmlinux binary or a kernel module).
6160 /// @return the __ksymtab section if found, nil otherwise.
6162 find_ksymtab_section() const
6164 if (!ksymtab_section_)
6165 const_cast<read_context*>(this)->ksymtab_section_ =
6166 find_section(elf_handle(), "__ksymtab", SHT_PROGBITS);
6167 return ksymtab_section_;
6170 /// Return the .rel{a,}__ksymtab section of a linux kernel ELF file (either
6171 /// a vmlinux binary or a kernel module).
6173 /// @return the .rel{a,}__ksymtab section if found, nil otherwise.
6175 find_ksymtab_reloc_section() const
6177 if (!ksymtab_reloc_section_)
6179 Elf_Scn *sec = find_section(elf_handle(), ".rela__ksymtab", SHT_RELA);
6181 sec = find_section(elf_handle(), ".rel__ksymtab", SHT_REL);
6182 const_cast<read_context*>(this)->ksymtab_reloc_section_ = sec;
6184 return ksymtab_reloc_section_;
6187 /// Return the __ksymtab_gpl section of a linux kernel ELF file
6188 /// (either a vmlinux binary or a kernel module).
6190 /// @return the __ksymtab_gpl section if found, nil otherwise.
6192 find_ksymtab_gpl_section() const
6194 if (!ksymtab_gpl_section_)
6195 const_cast<read_context*>(this)->ksymtab_gpl_section_ =
6196 find_section(elf_handle(), "__ksymtab_gpl", SHT_PROGBITS);
6197 return ksymtab_gpl_section_;
6200 /// Return the .rel{a,}__ksymtab_gpl section of a linux kernel ELF file
6201 /// (either a vmlinux binary or a kernel module).
6203 /// @return the .rel{a,}__ksymtab_gpl section if found, nil otherwise.
6205 find_ksymtab_gpl_reloc_section() const
6207 if (!ksymtab_gpl_reloc_section_)
6209 Elf_Scn *sec = find_section(elf_handle(), ".rela__ksymtab_gpl", SHT_RELA);
6211 sec = find_section(elf_handle(), ".rel__ksymtab_gpl", SHT_REL);
6212 const_cast<read_context*>(this)->ksymtab_gpl_reloc_section_ = sec;
6214 return ksymtab_gpl_reloc_section_;
6217 /// Return the __ksymtab_strings section of a linux kernel ELF file
6218 /// (either a vmlinux binary or a kernel module).
6220 /// @return the __ksymtab_strings section if found, nil otherwise.
6222 find_ksymtab_strings_section() const
6224 if (!ksymtab_strings_section_)
6225 const_cast<read_context*>(this)->ksymtab_strings_section_ =
6226 dwarf_reader::find_ksymtab_strings_section(elf_handle());
6227 return ksymtab_strings_section_;
6230 /// Return either a __ksymtab or a __ksymtab_gpl section, in case
6231 /// only the __ksymtab_gpl exists.
6233 /// @return the __ksymtab section if it exists, or the
6234 /// __ksymtab_gpl; or NULL if neither is found.
6236 find_any_ksymtab_section() const
6238 Elf_Scn *result = find_ksymtab_section();
6240 result = find_ksymtab_gpl_section();
6244 /// Return either a .rel{a,}__ksymtab or a .rel{a,}__ksymtab_gpl section
6246 /// @return the .rel{a,}__ksymtab section if it exists, or the
6247 /// .rel{a,}__ksymtab_gpl; or NULL if neither is found.
6249 find_any_ksymtab_reloc_section() const
6251 Elf_Scn *result = find_ksymtab_reloc_section();
6253 result = find_ksymtab_gpl_reloc_section();
6257 /// Return the SHT_GNU_versym, SHT_GNU_verdef and SHT_GNU_verneed
6258 /// sections that are involved in symbol versionning.
6260 /// @param versym_section the SHT_GNU_versym section found.
6262 /// @param verdef_section the SHT_GNU_verdef section found.
6264 /// @param verneed_section the SHT_GNU_verneed section found.
6266 /// @return true iff the sections where found.
6268 get_symbol_versionning_sections(Elf_Scn*& versym_section,
6269 Elf_Scn*& verdef_section,
6270 Elf_Scn*& verneed_section)
6272 if (!symbol_versionning_sections_loaded_)
6274 symbol_versionning_sections_found_ =
6275 dwarf_reader::get_symbol_versionning_sections(elf_handle(),
6279 symbol_versionning_sections_loaded_ = true;
6282 versym_section = versym_section_;
6283 verdef_section = verdef_section_;
6284 verneed_section = verneed_section_;
6285 return symbol_versionning_sections_found_;
6288 /// Return the version for a symbol that is at a given index in its
6289 /// SHT_SYMTAB section.
6291 /// The first invocation of this function caches the results and
6292 /// subsequent invocations just return the cached results.
6294 /// @param symbol_index the index of the symbol to consider.
6296 /// @param get_def_version if this is true, it means that that we want
6297 /// the version for a defined symbol; in that case, the version is
6298 /// looked for in a section of type SHT_GNU_verdef. Otherwise, if
6299 /// this parameter is false, this means that we want the version for
6300 /// an undefined symbol; in that case, the version is the needed one
6301 /// for the symbol to be resolved; so the version is looked fo in a
6302 /// section of type SHT_GNU_verneed.
6304 /// @param version the version found for symbol at @p symbol_index.
6306 /// @return true iff a version was found for symbol at index @p
6309 get_version_for_symbol(size_t symbol_index,
6310 bool get_def_version,
6311 elf_symbol::version& version)
6313 Elf_Scn *versym_section = NULL,
6314 *verdef_section = NULL,
6315 *verneed_section = NULL;
6317 if (!get_symbol_versionning_sections(versym_section,
6322 GElf_Versym versym_mem;
6323 Elf_Data* versym_data = (versym_section)
6324 ? elf_getdata(versym_section, NULL)
6326 GElf_Versym* versym = (versym_data)
6327 ? gelf_getversym(versym_data, symbol_index, &versym_mem)
6330 if (versym == 0 || *versym <= 1)
6331 // I got these value from the code of readelf.c in elfutils.
6332 // Apparently, if the symbol version entry has these values, the
6333 // symbol must be discarded. This is not documented in the
6334 // official specification.
6337 if (get_def_version)
6339 if (*versym == 0x8001)
6340 // I got this value from the code of readelf.c in elfutils
6341 // too. It's not really documented in the official
6346 && get_version_definition_for_versym(elf_handle(), versym,
6347 verdef_section, version))
6353 && get_version_needed_for_versym(elf_handle(), versym,
6354 verneed_section, version))
6361 /// Look into the symbol tables of the underlying elf file and see
6362 /// if we find a given symbol.
6364 /// @param symbol_name the name of the symbol to look for.
6366 /// @param demangle if true, demangle the symbols found in the symbol
6369 /// @param syms the vector of symbols with the name @p symbol_name
6370 /// that were found.
6372 /// @return true iff the symbol was found.
6374 lookup_symbol_from_elf(const string& symbol_name,
6376 vector<elf_symbol_sptr>& syms) const
6378 return dwarf_reader::lookup_symbol_from_elf(env(),
6385 /// Lookup an elf symbol, referred to by its index, from the .symtab
6388 /// The resulting symbol returned is an instance of a GElf_Sym, from
6389 /// the libelf library.
6391 /// @param symbol_index the index of the symbol to look up.
6393 /// @param elf_sym out parameter. This is set to the resulting ELF
6394 /// symbol iff the function returns TRUE, meaning the symbol was
6397 /// @return TRUE iff the symbol was found.
6399 lookup_native_elf_symbol_from_index(size_t symbol_index, GElf_Sym &elf_sym)
6401 Elf_Scn* symtab_section = find_symbol_table_section();
6402 if (!symtab_section)
6405 Elf_Data* symtab = elf_getdata(symtab_section, 0);
6408 if (!gelf_getsym(symtab, symbol_index, &elf_sym))
6414 /// Given the index of a symbol into the symbol table of an ELF
6415 /// file, look the symbol up, build an instace of @ref elf_symbol
6418 /// @param symbol_index the index of the symbol into the symbol
6419 /// table of the current elf file.
6421 /// @return the elf symbol found or nil if none was found.
6423 lookup_elf_symbol_from_index(size_t symbol_index)
6426 elf_symbol_sptr result =
6427 lookup_elf_symbol_from_index(symbol_index, s);
6431 /// Lookup an ELF symbol given its index into the .symtab section.
6433 /// This function returns both the native symbol (from libelf) and
6434 /// the @p abigail::ir::elf_symbol instance, which is the
6435 /// libabigail-specific representation of the symbol.
6437 /// @param symbol_index the index of the symbol to look for.
6439 /// @param native_sym output parameter. This is set to the native
6440 /// ELF symbol found iff the function returns a non-nil value.
6442 /// @return an instance of libabigail::ir::elf_symbol representing
6443 /// the ELF symbol found, iff one was found. Otherwise, returns
6446 lookup_elf_symbol_from_index(size_t symbol_index,
6447 GElf_Sym &native_sym)
6449 if (!lookup_native_elf_symbol_from_index(symbol_index, native_sym))
6450 return elf_symbol_sptr();
6452 Elf_Scn* symtab_section = find_symbol_table_section();
6453 if (!symtab_section)
6454 return elf_symbol_sptr();
6456 GElf_Shdr header_mem;
6457 GElf_Shdr* symtab_sheader = gelf_getshdr(symtab_section,
6460 Elf_Data* symtab = elf_getdata(symtab_section, 0);
6463 bool sym_is_defined = native_sym.st_shndx != SHN_UNDEF;
6464 bool sym_is_common = native_sym.st_shndx == SHN_COMMON; // this occurs in
6467 const char* name_str = elf_strptr(elf_handle(),
6468 symtab_sheader->sh_link,
6469 native_sym.st_name);
6473 elf_symbol::version ver;
6474 get_version_for_symbol(symbol_index,
6478 elf_symbol::visibility vis =
6479 stv_to_elf_symbol_visibility(GELF_ST_VISIBILITY(native_sym.st_other));
6481 Elf_Scn *strings_section = find_ksymtab_strings_section();
6482 size_t strings_ndx = strings_section
6483 ? elf_ndxscn(strings_section)
6486 elf_symbol_sptr sym =
6487 elf_symbol::create(env(), symbol_index, native_sym.st_size,
6488 name_str, stt_to_elf_symbol_type
6489 (GELF_ST_TYPE(native_sym.st_info)),
6490 stb_to_elf_symbol_binding
6491 (GELF_ST_BIND(native_sym.st_info)),
6492 sym_is_defined, sym_is_common, ver, vis,
6493 native_sym.st_shndx == strings_ndx);
6497 /// Read 8 bytes and convert their value into an uint64_t.
6499 /// @param bytes the array of bytes to read the next 8 bytes from.
6500 /// Note that this array must be at least 8 bytes long.
6502 /// @param result where to store the resuting uint64_t that was read.
6504 /// @param is_big_endian if true, read the 8 bytes in Big Endian
6505 /// mode, otherwise, read them in Little Endian.
6507 /// @param true if the 8 bytes could be read, false otherwise.
6509 read_uint64_from_array_of_bytes(const uint8_t *bytes,
6511 uint64_t &result) const
6513 return read_int_from_array_of_bytes(bytes, 8, is_big_endian, result);
6516 /// Read N bytes and convert their value into an integer type T.
6518 /// Note that N cannot be bigger than 8 for now. The type passed needs to be
6519 /// at least of the size of number_of_bytes.
6521 /// @param bytes the array of bytes to read the next 8 bytes from.
6522 /// Note that this array must be at least 8 bytes long.
6524 /// @param number_of_bytes the number of bytes to read. This number
6525 /// cannot be bigger than 8.
6527 /// @param is_big_endian if true, read the 8 bytes in Big Endian
6528 /// mode, otherwise, read them in Little Endian.
6530 /// @param result where to store the resuting integer that was read.
6533 /// @param true if the 8 bytes could be read, false otherwise.
6534 template<typename T>
6536 read_int_from_array_of_bytes(const uint8_t *bytes,
6537 unsigned char number_of_bytes,
6544 ABG_ASSERT(number_of_bytes <= 8);
6545 ABG_ASSERT(number_of_bytes <= sizeof(T));
6549 const uint8_t *cur = bytes;
6552 // In Big Endian, the most significant byte is at the lowest
6554 const uint8_t* msb = cur;
6557 // Now read the remaining least significant bytes.
6558 for (uint i = 1; i < number_of_bytes; ++i)
6559 res = (res << 8) | ((T)msb[i]);
6563 // In Little Endian, the least significant byte is at the
6565 const uint8_t* lsb = cur;
6567 // Now read the remaining most significant bytes.
6568 for (uint i = 1; i < number_of_bytes; ++i)
6569 res = res | (((T)lsb[i]) << i * 8);
6576 /// Lookup the address of the function entry point that corresponds
6577 /// to the address of a given function descriptor.
6579 /// On PPC64, a function pointer is the address of a function
6580 /// descriptor. Function descriptors are located in the .opd
6581 /// section. Each function descriptor is a triplet of three
6582 /// addresses, each one on 64 bits. Among those three address only
6583 /// the first one is of any interest to us: the address of the entry
6584 /// point of the function.
6586 /// This function returns the address of the entry point of the
6587 /// function whose descriptor's address is given.
6589 /// http://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html#FUNC-DES
6591 /// https://www.ibm.com/developerworks/community/blogs/5894415f-be62-4bc0-81c5-3956e82276f3/entry/deeply_understand_64_bit_powerpc_elf_abi_function_descriptors?lang=en
6593 /// @param fn_desc_address the address of the function descriptor to
6596 /// @return the address of the entry point of the function whose
6597 /// descriptor has the address @p fn_desc_address. If there is no
6598 /// .opd section (e.g because we are not on ppc64) or more generally
6599 /// if the function descriptor could not be found then this function
6600 /// just returns the address of the fuction descriptor.
6602 lookup_ppc64_elf_fn_entry_point_address(GElf_Addr fn_desc_address) const
6605 return fn_desc_address;
6607 if (!elf_architecture_is_ppc64())
6608 return fn_desc_address;
6610 bool is_big_endian = elf_architecture_is_big_endian();
6612 Elf_Scn *opd_section = find_opd_section();
6614 return fn_desc_address;
6616 GElf_Shdr header_mem;
6617 // The section header of the .opd section.
6618 GElf_Shdr *opd_sheader = gelf_getshdr(opd_section, &header_mem);
6620 // The offset of the function descriptor entry, in the .opd
6622 size_t fn_desc_offset = fn_desc_address - opd_sheader->sh_addr;
6623 Elf_Data *elf_data = elf_rawdata(opd_section, 0);
6625 // Ensure that the opd_section has at least 8 bytes, starting from
6626 // the offset we want read the data from.
6627 if (elf_data->d_size <= fn_desc_offset + 8)
6628 return fn_desc_address;
6630 // A pointer to the data of the .opd section, that we can actually
6631 // do something with.
6632 uint8_t * bytes = (uint8_t*) elf_data->d_buf;
6634 // The resulting address we are looking for is going to be formed
6635 // in this variable.
6636 GElf_Addr result = 0;
6637 ABG_ASSERT(read_uint64_from_array_of_bytes(bytes + fn_desc_offset,
6638 is_big_endian, result));
6643 /// Given the address of the beginning of a function, lookup the
6644 /// symbol of the function, build an instance of @ref elf_symbol out
6645 /// of it and return it.
6647 /// @param symbol_start_addr the address of the beginning of the
6648 /// function to consider.
6650 /// @param sym the resulting symbol. This is set iff the function
6653 /// @return the elf symbol found at address @p symbol_start_addr, or
6654 /// nil if none was found.
6656 lookup_elf_fn_symbol_from_address(GElf_Addr symbol_start_addr) const
6658 addr_elf_symbol_sptr_map_type::const_iterator i,
6659 nil = fun_entry_addr_sym_map().end();
6661 if ((i = fun_entry_addr_sym_map().find(symbol_start_addr)) == nil)
6662 return elf_symbol_sptr();
6667 /// Given the address of a global variable, lookup the symbol of the
6668 /// variable, build an instance of @ref elf_symbol out of it and
6671 /// @param symbol_start_addr the address of the beginning of the
6672 /// variable to consider.
6674 /// @param the symbol found, iff the function returns true.
6676 /// @return the elf symbol found or nil if none was found.
6678 lookup_elf_var_symbol_from_address(GElf_Addr symbol_start_addr) const
6680 addr_elf_symbol_sptr_map_type::const_iterator i,
6681 nil = var_addr_sym_map().end();
6683 if ((i = var_addr_sym_map().find(symbol_start_addr)) == nil)
6684 return elf_symbol_sptr();
6689 /// Lookup an elf symbol, knowing its address.
6691 /// This function first looks for a function symbol having this
6692 /// address; if it doesn't find any, then it looks for a variable
6695 /// @param symbol_addr the address of the symbol of the symbol we
6696 /// are looking for. Note that the address is a relative offset
6697 /// starting from the beginning of the .text section. Addresses
6698 /// that are presen in the symbol table (the one named .symtab).
6700 /// @return the elf symbol if found, or nil otherwise.
6702 lookup_elf_symbol_from_address(GElf_Addr symbol_addr) const
6704 elf_symbol_sptr result = lookup_elf_fn_symbol_from_address(symbol_addr);
6706 result = lookup_elf_var_symbol_from_address(symbol_addr);
6710 /// Look in the symbol tables of the underying elf file and see if
6711 /// we find a symbol of a given name of function type.
6713 /// @param sym_name the name of the symbol to look for.
6715 /// @param syms the public function symbols that were found, with
6716 /// the name @p sym_name.
6718 /// @return true iff the symbol was found.
6720 lookup_public_function_symbol_from_elf(const string& sym_name,
6721 vector<elf_symbol_sptr>& syms)
6723 return dwarf_reader::lookup_public_function_symbol_from_elf(env(),
6729 /// Look in the symbol tables of the underying elf file and see if
6730 /// we find a symbol of a given name of variable type.
6732 /// @param sym_name the name of the symbol to look for.
6734 /// @param syms the variable symbols that were found, with the name
6737 /// @return true iff the symbol was found.
6739 lookup_public_variable_symbol_from_elf(const string& sym_name,
6740 vector<elf_symbol_sptr>& syms)
6742 return dwarf_reader::lookup_public_variable_symbol_from_elf(env(),
6748 /// Test if a given function symbol has been exported.
6750 /// @param symbol_address the address of the symbol we are looking
6751 /// for. Note that this address must be a relative offset from the
6752 /// beginning of the .text section, just like the kind of addresses
6753 /// that are present in the .symtab section.
6755 /// @returnthe elf symbol if found, or nil otherwise.
6757 function_symbol_is_exported(GElf_Addr symbol_address) const
6759 elf_symbol_sptr symbol = lookup_elf_fn_symbol_from_address(symbol_address);
6763 if (!symbol->is_public())
6764 return elf_symbol_sptr();
6766 address_set_sptr set;
6767 bool looking_at_linux_kernel_binary =
6768 load_in_linux_kernel_mode() && is_linux_kernel_binary();
6770 if (looking_at_linux_kernel_binary)
6772 if ((set = linux_exported_fn_syms()))
6774 if (set->find(symbol_address) != set->end())
6777 if ((set = linux_exported_gpl_fn_syms()))
6779 if (set->find(symbol_address) != set->end())
6782 return elf_symbol_sptr();
6788 /// Test if a given variable symbol has been exported.
6790 /// @param symbol_address the address of the symbol we are looking
6791 /// for. Note that this address must be a relative offset from the
6792 /// beginning of the .text section, just like the kind of addresses
6793 /// that are present in the .symtab section.
6795 /// @returnthe elf symbol if found, or nil otherwise.
6797 variable_symbol_is_exported(GElf_Addr symbol_address) const
6799 elf_symbol_sptr symbol = lookup_elf_var_symbol_from_address(symbol_address);
6803 if (!symbol->is_public())
6804 return elf_symbol_sptr();
6806 address_set_sptr set;
6807 bool looking_at_linux_kernel_binary =
6808 load_in_linux_kernel_mode() && is_linux_kernel_binary();
6810 if (looking_at_linux_kernel_binary)
6812 if ((set = linux_exported_var_syms()))
6814 if (set->find(symbol_address) != set->end())
6817 if ((set = linux_exported_gpl_var_syms()))
6819 if (set->find(symbol_address) != set->end())
6822 return elf_symbol_sptr();
6828 /// Getter for the map of function address -> symbol.
6830 /// @return the function address -> symbol map.
6831 const addr_elf_symbol_sptr_map_sptr
6832 fun_addr_sym_map_sptr() const
6834 maybe_load_symbol_maps();
6835 return fun_addr_sym_map_;
6838 /// Getter for the map of function address -> symbol.
6840 /// @return the function address -> symbol map.
6841 addr_elf_symbol_sptr_map_sptr
6842 fun_addr_sym_map_sptr()
6844 maybe_load_symbol_maps();
6845 return fun_addr_sym_map_;
6848 /// Getter for the map of function symbol address -> function symbol
6851 /// @return the map. Note that this initializes the map once when
6853 const addr_elf_symbol_sptr_map_type&
6854 fun_addr_sym_map() const
6856 maybe_load_symbol_maps();
6857 return *fun_addr_sym_map_;
6860 /// Getter for the map of function symbol address -> function symbol
6863 /// @return the map. Note that this initializes the map once when
6865 addr_elf_symbol_sptr_map_type&
6868 maybe_load_symbol_maps();
6869 return *fun_addr_sym_map_;
6872 /// Getter for a pointer to the map that associates the address of
6873 /// an entry point of a function with the symbol of that function.
6875 /// Note that on non-"PPC64 ELFv1" binaries, this map is the same as
6876 /// the one that assciates the address of a function with the symbol
6877 /// of that function.
6879 /// @return a pointer to the map that associates the address of an
6880 /// entry point of a function with the symbol of that function.
6881 addr_elf_symbol_sptr_map_sptr&
6882 fun_entry_addr_sym_map_sptr()
6884 if (!fun_entry_addr_sym_map_ && !fun_addr_sym_map_)
6885 maybe_load_symbol_maps();
6886 if (elf_architecture_is_ppc64())
6887 return fun_entry_addr_sym_map_;
6888 return fun_addr_sym_map_;
6891 /// Getter for a pointer to the map that associates the address of
6892 /// an entry point of a function with the symbol of that function.
6894 /// Note that on non-"PPC64 ELFv1" binaries, this map is the same as
6895 /// the one that assciates the address of a function with the symbol
6896 /// of that function.
6898 /// @return a pointer to the map that associates the address of an
6899 /// entry point of a function with the symbol of that function.
6900 const addr_elf_symbol_sptr_map_sptr&
6901 fun_entry_addr_sym_map_sptr() const
6902 {return const_cast<read_context*>(this)->fun_entry_addr_sym_map_sptr();}
6905 /// Getter for the map that associates the address of an entry point
6906 /// of a function with the symbol of that function.
6908 /// Note that on non-"PPC64 ELFv1" binaries, this map is the same as
6909 /// the one that assciates the address of a function with the symbol
6910 /// of that function.
6912 /// @return the map that associates the address of an entry point of
6913 /// a function with the symbol of that function.
6914 addr_elf_symbol_sptr_map_type&
6915 fun_entry_addr_sym_map()
6916 {return *fun_entry_addr_sym_map_sptr();}
6918 /// Getter for the map that associates the address of an entry point
6919 /// of a function with the symbol of that function.
6921 /// Note that on non-"PPC64 ELFv1" binaries, this map is the same as
6922 /// the one that assciates the address of a function with the symbol
6923 /// of that function.
6925 /// @return the map that associates the address of an entry point of
6926 /// a function with the symbol of that function.
6927 const addr_elf_symbol_sptr_map_type&
6928 fun_entry_addr_sym_map() const
6929 { return *fun_entry_addr_sym_map_sptr();}
6931 /// Getter for the map of function symbols (name -> sym).
6933 /// @return a shared pointer to the map of function symbols.
6934 const string_elf_symbols_map_sptr&
6935 fun_syms_sptr() const
6937 maybe_load_symbol_maps();
6941 /// Getter for the map of function symbols (name -> sym).
6943 /// @return a shared pointer to the map of function symbols.
6944 string_elf_symbols_map_sptr&
6947 maybe_load_symbol_maps();
6951 /// Getter for the map of function symbols (name -> sym).
6953 /// @return a reference to the map of function symbols.
6954 const string_elf_symbols_map_type&
6957 maybe_load_symbol_maps();
6961 /// Getter for the map of function symbols (name -> sym).
6963 /// @return a reference to the map of function symbols.
6964 string_elf_symbols_map_type&
6967 maybe_load_symbol_maps();
6971 /// Getter for the map of variable symbols (name -> sym)
6973 /// @return a shared pointer to the map of variable symbols.
6974 const string_elf_symbols_map_sptr
6975 var_syms_sptr() const
6977 maybe_load_symbol_maps();
6981 /// Getter for the map of variable symbols (name -> sym)
6983 /// @return a shared pointer to the map of variable symbols.
6984 string_elf_symbols_map_sptr
6987 maybe_load_symbol_maps();
6991 /// Getter for the map of variable symbols (name -> sym)
6993 /// @return a reference to the map of variable symbols.
6994 const string_elf_symbols_map_type&
6997 maybe_load_symbol_maps();
7001 /// Getter for the map of variable symbols (name -> sym)
7003 /// @return a reference to the map of variable symbols.
7004 string_elf_symbols_map_type&
7007 maybe_load_symbol_maps();
7011 /// Getter for the map of undefined function symbols (name -> vector
7014 /// @return a (smart) pointer to the map of undefined function
7016 const string_elf_symbols_map_sptr&
7017 undefined_fun_syms_sptr() const
7019 maybe_load_symbol_maps();
7020 return undefined_fun_syms_;
7023 /// Getter for the map of undefined function symbols (name -> vector
7026 /// @return a (smart) pointer to the map of undefined function
7028 string_elf_symbols_map_sptr&
7029 undefined_fun_syms_sptr()
7031 maybe_load_symbol_maps();
7032 return undefined_fun_syms_;
7035 /// Getter for the map of undefined function symbols (name -> vector
7038 /// @return a reference to the map of undefined function symbols.
7039 const string_elf_symbols_map_type&
7040 undefined_fun_syms() const
7042 maybe_load_symbol_maps();
7043 return *undefined_fun_syms_;
7046 /// Getter for the map of undefined function symbols (name -> vector
7049 /// @return a reference to the map of undefined function symbols.
7050 string_elf_symbols_map_type&
7051 undefined_fun_syms()
7053 maybe_load_symbol_maps();
7054 return *undefined_fun_syms_;
7057 /// Getter for the map of undefined variable symbols (name -> vector
7060 /// @return a (smart) pointer to the map of undefined variable
7062 const string_elf_symbols_map_sptr&
7063 undefined_var_syms_sptr() const
7065 maybe_load_symbol_maps();
7066 return undefined_var_syms_;
7069 /// Getter for the map of undefined variable symbols (name -> vector
7072 /// @return a (smart) pointer to the map of undefined variable
7074 string_elf_symbols_map_sptr&
7075 undefined_var_syms_sptr()
7077 maybe_load_symbol_maps();
7078 return undefined_var_syms_;
7081 /// Getter for the map of undefined variable symbols (name -> vector
7084 /// @return a reference to the map of undefined variable symbols.
7085 const string_elf_symbols_map_type&
7086 undefined_var_syms() const
7088 maybe_load_symbol_maps();
7089 return *undefined_var_syms_;
7092 /// Getter for the map of undefined variable symbols (name -> vector
7095 /// @return a reference to the map of undefined variable symbols.
7096 string_elf_symbols_map_type&
7097 undefined_var_syms()
7099 maybe_load_symbol_maps();
7100 return *undefined_var_syms_;
7103 /// Getter for the set of addresses of function symbols that are
7104 /// explicitely exported, for a linux kernel (module) binary. These
7105 /// are the addresses of function symbols present in the __ksymtab
7108 linux_exported_fn_syms()
7109 {return linux_exported_fn_syms_;}
7111 /// Getter for the set of addresses of functions that are
7112 /// explicitely exported, for a linux kernel (module) binary. These
7113 /// are the addresses of function symbols present in the __ksymtab
7116 /// @return the set of addresses of exported function symbols.
7117 const address_set_sptr&
7118 linux_exported_fn_syms() const
7119 {return const_cast<read_context*>(this)->linux_exported_fn_syms();}
7121 /// Create an empty set of addresses of functions exported from a
7122 /// linux kernel (module) binary, or return the one that already
7125 /// @return the set of addresses of exported function symbols.
7127 create_or_get_linux_exported_fn_syms()
7129 if (!linux_exported_fn_syms_)
7130 linux_exported_fn_syms_.reset(new address_set_type);
7131 return linux_exported_fn_syms_;
7134 /// Getter for the set of addresses of v ariables that are
7135 /// explicitely exported, for a linux kernel (module) binary. These
7136 /// are the addresses of variable symbols present in the __ksymtab
7139 /// @return the set of addresses of exported variable symbols.
7141 linux_exported_var_syms()
7142 {return linux_exported_var_syms_;}
7144 /// Getter for the set of addresses of variables that are
7145 /// explicitely exported, for a linux kernel (module) binary. These
7146 /// are the addresses of variable symbols present in the __ksymtab
7149 /// @return the set of addresses of exported variable symbols.
7150 const address_set_sptr&
7151 linux_exported_var_syms() const
7152 {return const_cast<read_context*>(this)->linux_exported_var_syms();}
7155 /// Create an empty set of addresses of variables exported from a
7156 /// linux kernel (module) binary, or return the one that already
7159 /// @return the set of addresses of exported variable symbols.
7161 create_or_get_linux_exported_var_syms()
7163 if (!linux_exported_var_syms_)
7164 linux_exported_var_syms_.reset(new address_set_type);
7165 return linux_exported_var_syms_;
7169 /// Getter for the set of addresses of function symbols that are
7170 /// explicitely exported as GPL, for a linux kernel (module) binary.
7171 /// These are the addresses of function symbols present in the
7172 /// __ksymtab_gpl section.
7174 linux_exported_gpl_fn_syms()
7175 {return linux_exported_gpl_fn_syms_;}
7177 /// Getter for the set of addresses of function symbols that are
7178 /// explicitely exported as GPL, for a linux kernel (module) binary.
7179 /// These are the addresses of function symbols present in the
7180 /// __ksymtab_gpl section.
7181 const address_set_sptr&
7182 linux_exported_gpl_fn_syms() const
7183 {return const_cast<read_context*>(this)->linux_exported_gpl_fn_syms();}
7185 /// Create an empty set of addresses of GPL functions exported from
7186 /// a linux kernel (module) binary, or return the one that already
7189 /// @return the set of addresses of exported function symbols.
7191 create_or_get_linux_exported_gpl_fn_syms()
7193 if (!linux_exported_gpl_fn_syms_)
7194 linux_exported_gpl_fn_syms_.reset(new address_set_type);
7195 return linux_exported_gpl_fn_syms_;
7198 /// Getter for the set of addresses of variable symbols that are
7199 /// explicitely exported as GPL, for a linux kernel (module) binary.
7200 /// These are the addresses of variable symbols present in the
7201 /// __ksymtab_gpl section.
7203 linux_exported_gpl_var_syms()
7204 {return linux_exported_gpl_var_syms_;}
7206 /// Getter for the set of addresses of variable symbols that are
7207 /// explicitely exported as GPL, for a linux kernel (module) binary.
7208 /// These are the addresses of variable symbols present in the
7209 /// __ksymtab_gpl section.
7210 const address_set_sptr&
7211 linux_exported_gpl_var_syms() const
7212 {return const_cast<read_context*>(this)->linux_exported_gpl_var_syms();}
7214 /// Create an empty set of addresses of GPL variables exported from
7215 /// a linux kernel (module) binary, or return the one that already
7218 /// @return the set of addresses of exported variable symbols.
7220 create_or_get_linux_exported_gpl_var_syms()
7222 if (!linux_exported_gpl_var_syms_)
7223 linux_exported_gpl_var_syms_.reset(new address_set_type);
7224 return linux_exported_gpl_var_syms_;
7227 /// Getter for the ELF dt_needed tag.
7228 const vector<string>&
7230 {return dt_needed_;}
7232 /// Getter for the ELF dt_soname tag.
7235 {return dt_soname_;}
7237 /// Getter for the ELF architecture of the current file.
7239 elf_architecture() const
7240 {return elf_architecture_;}
7242 /// Return the size of a word for the current architecture.
7243 /// @return the size of a word.
7245 architecture_word_size() const
7247 unsigned char word_size = 0;
7249 GElf_Ehdr* elf_header = gelf_getehdr(elf_handle(), &eh_mem);
7250 if (elf_header->e_ident[EI_CLASS] == ELFCLASS32)
7252 else if (elf_header->e_ident[EI_CLASS] == ELFCLASS64)
7255 ABG_ASSERT_NOT_REACHED;
7259 /// Test if the architecture of the current binary is ppc64.
7261 /// @return true iff the architecture of the current binary is ppc64.
7263 elf_architecture_is_ppc64() const
7266 GElf_Ehdr* elf_header = gelf_getehdr(elf_handle(), &eh_mem);
7268 return (elf_header && elf_header->e_machine == EM_PPC64);
7271 /// Test if the endianness of the current binary is Big Endian.
7273 /// https://en.wikipedia.org/wiki/Endianness.
7275 /// @return true iff the current binary is Big Endian.
7277 elf_architecture_is_big_endian() const
7280 GElf_Ehdr* elf_header = gelf_getehdr(elf_handle(), &eh_mem);
7282 bool is_big_endian = (elf_header->e_ident[EI_DATA] == ELFDATA2MSB);
7285 ABG_ASSERT(elf_header->e_ident[EI_DATA] == ELFDATA2LSB);
7287 return is_big_endian;
7290 /// Test if the current elf file being read is an executable.
7292 /// @return true iff the current elf file being read is an
7295 current_elf_file_is_executable() const
7298 GElf_Ehdr* elf_header = gelf_getehdr(elf_handle(), &eh_mem);
7299 return elf_header->e_type == ET_EXEC;
7302 /// Test if the current elf file being read is a dynamic shared
7305 /// @return true iff the current elf file being read is a
7306 /// dynamic shared object.
7308 current_elf_file_is_dso() const
7311 GElf_Ehdr* elf_header = gelf_getehdr(elf_handle(), &eh_mem);
7312 return elf_header->e_type == ET_DYN;
7315 /// Getter for the map of global variables symbol address -> global
7316 /// variable symbol index.
7318 /// @return the map. Note that this initializes the map once when
7320 const addr_elf_symbol_sptr_map_type&
7321 var_addr_sym_map() const
7322 {return const_cast<read_context*>(this)->var_addr_sym_map();}
7324 /// Getter for the map of global variables symbol address -> global
7325 /// variable symbol index.
7327 /// @return the map. Note that this initializes the map once when
7329 addr_elf_symbol_sptr_map_type&
7332 if (!var_addr_sym_map_)
7333 maybe_load_symbol_maps();
7334 return *var_addr_sym_map_;
7337 /// Load the maps address -> function symbol, address -> variable
7338 /// symbol and the maps of function and variable undefined symbols.
7340 /// @param load_fun_map whether to load the address to function map.
7342 /// @param load_var_map whether to laod the address to variable map.
7344 /// @param load_undefined_fun_map whether to load the undefined
7347 /// @param load_undefined_var_map whether to laod the undefined
7350 /// @return return true iff the maps have be loaded.
7352 load_symbol_maps_from_symtab_section(bool load_fun_map,
7354 bool load_undefined_fun_map,
7355 bool load_undefined_var_map)
7357 Elf_Scn* symtab_section = find_symbol_table_section();
7358 if (!symtab_section)
7361 GElf_Shdr header_mem;
7362 GElf_Shdr* symtab_sheader = gelf_getshdr(symtab_section,
7364 size_t nb_syms = symtab_sheader->sh_size / symtab_sheader->sh_entsize;
7366 Elf_Data* symtab = elf_getdata(symtab_section, 0);
7369 GElf_Ehdr elf_header;
7370 ABG_ASSERT(gelf_getehdr(elf_handle(), &elf_header));
7372 bool is_ppc64 = elf_architecture_is_ppc64();
7374 for (size_t i = 0; i < nb_syms; ++i)
7376 GElf_Sym* sym, sym_mem;
7377 sym = gelf_getsym(symtab, i, &sym_mem);
7380 if ((load_fun_map || load_undefined_fun_map)
7381 && (GELF_ST_TYPE(sym->st_info) == STT_FUNC
7382 || GELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC))
7384 elf_symbol_sptr symbol = lookup_elf_symbol_from_index(i);
7386 ABG_ASSERT(symbol->is_function());
7389 if (load_fun_map && symbol->is_public())
7392 string_elf_symbols_map_type::iterator it =
7393 fun_syms_->find(symbol->get_name());
7394 if (it == fun_syms_->end())
7396 (*fun_syms_)[symbol->get_name()] = elf_symbols();
7397 it = fun_syms_->find(symbol->get_name());
7399 string name = symbol->get_name();
7400 it->second.push_back(symbol);
7404 GElf_Addr symbol_value =
7405 maybe_adjust_et_rel_sym_addr_to_abs_addr(sym);
7407 addr_elf_symbol_sptr_map_type::const_iterator it =
7408 fun_addr_sym_map_->find(symbol_value);
7409 if (it == fun_addr_sym_map_->end())
7410 (*fun_addr_sym_map_)[symbol_value] = symbol;
7411 else //if (sym->st_value != 0)
7412 it->second->get_main_symbol()->add_alias(symbol);
7416 // For ppc64 ELFv1 binaries, we need to build a
7417 // function entry point address -> function
7418 // symbol map. This is in addition to the
7419 // function pointer -> symbol map. This is
7420 // because on ppc64 ELFv1, a function pointer is
7421 // different from a function entry point
7424 // On ppc64 ELFv1, the DWARF DIE of a function
7425 // references the address of the entry point of
7426 // the function symbol; whereas the value of the
7427 // function symbol is the function pointer. As
7428 // these addresses are different, if I we want
7429 // to get to the symbol of a function from its
7430 // entry point address (as referenced by DWARF
7431 // function DIEs) we must have the two maps I
7432 // mentionned right above.
7434 // In other words, we need a map that associates
7435 // a function enty point address with the symbol
7436 // of that function, to be able to get the
7437 // function symbol that corresponds to a given
7438 // function DIE, on ppc64.
7440 // The value of the function pointer (the value
7441 // of the symbol) usually refers to the offset
7442 // of a table in the .opd section. But
7443 // sometimes, for a symbol named "foo", the
7444 // corresponding symbol named ".foo" (note the
7445 // dot before foo) which value is the entry
7446 // point address of the function; that entry
7447 // point address refers to a region in the .text
7450 // So we are only interested in values of the
7451 // symbol that are in the .opd section.
7452 GElf_Addr fn_desc_addr = sym->st_value;
7453 GElf_Addr fn_entry_point_addr =
7454 lookup_ppc64_elf_fn_entry_point_address(fn_desc_addr);
7455 addr_elf_symbol_sptr_map_type::const_iterator it2 =
7456 fun_entry_addr_sym_map().find(fn_entry_point_addr);
7458 if (it2 == fun_entry_addr_sym_map().end())
7459 fun_entry_addr_sym_map()[fn_entry_point_addr] = symbol;
7460 else if (address_is_in_opd_section(fn_desc_addr))
7464 // 'symbol' must have been registered as an
7465 // alias for it2->second->get_main_symbol(),
7466 // right before the "if (ppc64)" statement.
7470 // if the name of 'symbol' is foo, then the
7471 // name of it2->second is ".foo". That is,
7472 // foo is the name of the symbol when it
7473 // refers to the function descriptor in the
7474 // .opd section and ".foo" is an internal
7475 // name for the address of the entry point
7478 // In the latter case, we just want to keep
7479 // a refernce to "foo" as .foo is an
7482 bool two_symbols_alias =
7483 it2->second->get_main_symbol()->does_alias(*symbol);
7484 bool symbol_is_foo_and_prev_symbol_is_dot_foo =
7485 (it2->second->get_name()
7486 == string(".") + symbol->get_name());
7488 ABG_ASSERT(two_symbols_alias
7489 || symbol_is_foo_and_prev_symbol_is_dot_foo);
7491 if (symbol_is_foo_and_prev_symbol_is_dot_foo)
7492 // Let's just keep a reference of the
7493 // symbol that the user sees in the source
7494 // code (the one named foo). The symbol
7495 // which name is prefixed with a "dot" is
7496 // an artificial one.
7497 fun_entry_addr_sym_map()[fn_entry_point_addr] = symbol;
7502 else if (load_undefined_fun_map && !symbol->is_defined())
7504 string_elf_symbols_map_type::iterator it =
7505 undefined_fun_syms_->find(symbol->get_name());
7506 if (it == undefined_fun_syms_->end())
7508 (*undefined_fun_syms_)[symbol->get_name()] = elf_symbols();
7509 it = undefined_fun_syms_->find(symbol->get_name());
7511 it->second.push_back(symbol);
7514 else if ((load_var_map || load_undefined_var_map)
7515 && (GELF_ST_TYPE(sym->st_info) == STT_OBJECT
7516 || GELF_ST_TYPE(sym->st_info) == STT_TLS)
7517 // If the symbol is for an OBJECT, the index of the
7518 // section it refers to cannot be absolute.
7519 // Otherwise that OBJECT is not a variable.
7520 && (sym->st_shndx != SHN_ABS
7521 || GELF_ST_TYPE(sym->st_info) != STT_OBJECT ))
7523 elf_symbol_sptr symbol = lookup_elf_symbol_from_index(i);
7525 ABG_ASSERT(symbol->is_variable());
7527 if (load_var_map && symbol->is_public())
7530 string_elf_symbols_map_type::iterator it =
7531 var_syms_->find(symbol->get_name());
7532 if (it == var_syms_->end())
7534 (*var_syms_)[symbol->get_name()] = elf_symbols();
7535 it = var_syms_->find(symbol->get_name());
7537 string name = symbol->get_name();
7538 it->second.push_back(symbol);
7541 if (symbol->is_common_symbol())
7543 string_elf_symbols_map_type::iterator it =
7544 var_syms_->find(symbol->get_name());
7545 ABG_ASSERT(it != var_syms_->end());
7546 const elf_symbols& common_sym_instances = it->second;
7547 ABG_ASSERT(!common_sym_instances.empty());
7548 if (common_sym_instances.size() > 1)
7550 elf_symbol_sptr main_common_sym =
7551 common_sym_instances[0];
7552 ABG_ASSERT(main_common_sym->get_name()
7553 == symbol->get_name());
7554 ABG_ASSERT(main_common_sym->is_common_symbol());
7555 ABG_ASSERT(symbol.get() != main_common_sym.get());
7556 main_common_sym->add_common_instance(symbol);
7561 GElf_Addr symbol_value =
7562 maybe_adjust_et_rel_sym_addr_to_abs_addr(sym);
7563 addr_elf_symbol_sptr_map_type::const_iterator it =
7564 var_addr_sym_map_->find(symbol_value);
7565 if (it == var_addr_sym_map_->end())
7566 (*var_addr_sym_map_)[symbol_value] = symbol;
7568 it->second->get_main_symbol()->add_alias(symbol);
7571 else if (load_undefined_var_map && !symbol->is_defined())
7573 string_elf_symbols_map_type::iterator it =
7574 undefined_var_syms_->find(symbol->get_name());
7575 if (it == undefined_var_syms_->end())
7577 (*undefined_var_syms_)[symbol->get_name()] = elf_symbols();
7578 it = undefined_var_syms_->find(symbol->get_name());
7580 it->second.push_back(symbol);
7587 /// Try reading the first __ksymtab section entry as if it is in the
7588 /// pre-v4_19 format and lookup a symbol from the .symbol section to
7589 /// see if that succeeds. If it does, then we can assume the
7590 /// __ksymtab section is in the pre-v4_19 format.
7592 /// @return the symbol resulting from the lookup of the symbol
7593 /// address we got from reading the first entry of the ksymtab
7594 /// section assuming the pre-v4.19 format. If nil, it means the
7595 /// __ksymtab section is not in the pre-v4.19 format.
7597 try_reading_first_ksymtab_entry_using_pre_v4_19_format() const
7599 Elf_Scn *section = find_any_ksymtab_section();
7600 Elf_Data *elf_data = elf_rawdata(section, 0);
7601 uint8_t *bytes = reinterpret_cast<uint8_t*>(elf_data->d_buf);
7602 bool is_big_endian = elf_architecture_is_big_endian();
7603 elf_symbol_sptr symbol;
7604 unsigned char symbol_value_size = architecture_word_size();
7606 GElf_Addr symbol_address = 0, adjusted_symbol_address = 0;
7607 ABG_ASSERT(read_int_from_array_of_bytes(bytes,
7611 adjusted_symbol_address = maybe_adjust_fn_sym_address(symbol_address);
7612 symbol = lookup_elf_symbol_from_address(adjusted_symbol_address);
7616 /// Try reading the first __ksymtab section entry as if it is in the
7617 /// v4_19 format and lookup a symbol from the .symbol section to see
7618 /// if that succeeds. If it does, then we can assume the __ksymtab
7619 /// section is in the v4_19 format.
7621 /// @return the symbol resulting from the lookup of the symbol
7622 /// address we got from reading the first entry of the ksymtab
7623 /// section assuming the v4.19 format. If nil, it means the
7624 /// __ksymtab section is not in the v4.19 format.
7626 try_reading_first_ksymtab_entry_using_v4_19_format() const
7628 Elf_Scn *section = find_any_ksymtab_section();
7629 Elf_Data *elf_data = elf_rawdata(section, 0);
7630 uint8_t *bytes = reinterpret_cast<uint8_t*>(elf_data->d_buf);
7631 bool is_big_endian = elf_architecture_is_big_endian();
7632 elf_symbol_sptr symbol;
7635 const unsigned char symbol_value_size = sizeof(offset);
7636 GElf_Addr symbol_address = 0, adjusted_symbol_address = 0;
7637 ABG_ASSERT(read_int_from_array_of_bytes(bytes,
7642 GElf_Shdr *section_header = gelf_getshdr(section, &mem);
7643 symbol_address = offset + section_header->sh_addr;
7645 adjusted_symbol_address = maybe_adjust_fn_sym_address(symbol_address);
7646 symbol = lookup_elf_symbol_from_address(adjusted_symbol_address);
7650 /// Try to determine the format of the __ksymtab and __ksymtab_gpl
7651 /// sections of Linux kernel modules.
7653 /// This is important because we need to know the format of these
7654 /// sections to be able to read from them.
7656 /// @return the format the __ksymtab[_gpl] sections.
7658 get_ksymtab_format_module() const
7660 Elf_Scn *section = find_any_ksymtab_reloc_section();
7662 ABG_ASSERT(section);
7664 // Libdwfl has a weird quirk where, in the process of obtaining an Elf
7665 // descriptor via dwfl_module_getelf(), it will apply all relocations it
7666 // knows how to and it will zero the relocation info after applying it. If
7667 // the .rela__ksymtab* section contained only simple (absolute) relocations,
7668 // they will have been all applied and sh_size will be 0. For arches that
7669 // support relative ksymtabs, simple relocations only appear in pre-4.19
7671 GElf_Shdr section_mem;
7672 GElf_Shdr *section_shdr = gelf_getshdr(section, §ion_mem);
7673 if (section_shdr->sh_size == 0)
7674 return PRE_V4_19_KSYMTAB_FORMAT;
7676 bool is_relasec = (section_shdr->sh_type == SHT_RELA);
7678 // If we still have a normal non-zeroed relocation section, we can guess
7679 // what format the ksymtab is in depending on what types of relocs it
7683 Elf_Data *section_data = elf_getdata(section, 0);
7687 gelf_getrela(section_data, 0, &rela);
7688 type = GELF_R_TYPE(rela.r_info);
7693 gelf_getrel(section_data, 0, &rel);
7694 type = GELF_R_TYPE(rel.r_info);
7697 // Sigh, I dislike the arch-dependent code here, but this seems to be a
7698 // reliable heuristic for kernel modules for now. Relative ksymtabs only
7699 // supported on x86 and arm64 as of v4.19.
7700 ksymtab_format format;
7703 case R_X86_64_64: // Same as R_386_32, fallthrough
7704 #ifdef HAVE_R_AARCH64_ABS64_MACRO
7705 case R_AARCH64_ABS64:
7707 format = PRE_V4_19_KSYMTAB_FORMAT;
7709 case R_X86_64_PC32: // Same as R_386_PC32, fallthrough
7710 #ifdef HAVE_R_AARCH64_PREL32_MACRO
7711 case R_AARCH64_PREL32:
7713 format = V4_19_KSYMTAB_FORMAT;
7716 // Fall back to other methods of determining the ksymtab format.
7717 format = UNDEFINED_KSYMTAB_FORMAT;
7723 /// Determine the format of the __ksymtab and __ksymtab_gpl
7726 /// This is important because we need the know the format of these
7727 /// sections to be able to read from them.
7729 /// @return the format the __ksymtab[_gpl] sections.
7731 get_ksymtab_format() const
7733 if (!find_any_ksymtab_section())
7734 ksymtab_format_ = UNDEFINED_KSYMTAB_FORMAT;
7737 if (ksymtab_format_ == UNDEFINED_KSYMTAB_FORMAT)
7739 // Since Linux kernel modules are relocatable, we can first try
7740 // using a heuristic based on relocations to guess the ksymtab format.
7741 if (is_linux_kernel_module())
7743 ksymtab_format_ = get_ksymtab_format_module();
7744 if (ksymtab_format_ != UNDEFINED_KSYMTAB_FORMAT)
7745 return ksymtab_format_;
7748 // If it's not a kernel module or we couldn't determine its format
7749 // with relocations, fall back to the heuristics below.
7751 // OK this is a dirty little heuristic to determine the
7752 // format of the ksymtab section.
7754 // We try to read the first ksymtab entry assuming a
7755 // pre-v4.19 format. If that succeeds then we are in the
7756 // pr-v4.19 format. Otherwise, try reading it assuming a
7757 // v4.19 format. For now, we just support
7758 // PRE_V4_19_KSYMTAB_FORMAT and V4_19_KSYMTAB_FORMAT.
7759 if (try_reading_first_ksymtab_entry_using_pre_v4_19_format())
7760 ksymtab_format_ = PRE_V4_19_KSYMTAB_FORMAT;
7761 else if (try_reading_first_ksymtab_entry_using_v4_19_format())
7762 ksymtab_format_ = V4_19_KSYMTAB_FORMAT;
7764 // If a new format emerges, then we need to add its
7766 ABG_ASSERT_NOT_REACHED;
7769 return ksymtab_format_;
7772 /// Getter of the size of the symbol value part of an entry of the
7773 /// ksymtab section.
7775 /// @return the size of the symbol value part of the entry of the
7776 /// ksymtab section.
7778 get_ksymtab_symbol_value_size() const
7780 unsigned char result = 0;
7781 ksymtab_format format = get_ksymtab_format();
7782 if (format == UNDEFINED_KSYMTAB_FORMAT)
7784 else if (format == PRE_V4_19_KSYMTAB_FORMAT)
7785 result = architecture_word_size();
7786 else if (format == V4_19_KSYMTAB_FORMAT)
7789 ABG_ASSERT_NOT_REACHED;
7794 /// Getter of the size of one entry of the ksymtab section.
7796 /// @return the size of one entry of the ksymtab section.
7798 get_ksymtab_entry_size() const
7800 if (ksymtab_entry_size_ == 0)
7801 // The entry size if 2 * symbol_value_size.
7802 ksymtab_entry_size_ = 2 * get_ksymtab_symbol_value_size();
7804 return ksymtab_entry_size_;
7807 /// Getter of the number of entries that are present in the ksymtab
7810 /// @return the number of entries that are present in the ksymtab
7813 get_nb_ksymtab_entries() const
7815 if (nb_ksymtab_entries_ == 0)
7817 Elf_Scn *section = find_ksymtab_section();
7820 GElf_Shdr header_mem;
7821 GElf_Shdr *section_header = gelf_getshdr(section, &header_mem);
7822 size_t entry_size = get_ksymtab_entry_size();
7823 ABG_ASSERT(entry_size);
7824 nb_ksymtab_entries_ = section_header->sh_size / entry_size;
7827 return nb_ksymtab_entries_;
7830 /// Getter of the number of entries that are present in the
7831 /// ksymtab_gpl section.
7833 /// @return the number of entries that are present in the
7834 /// ksymtab_gpl section.
7836 get_nb_ksymtab_gpl_entries()
7838 if (nb_ksymtab_gpl_entries_ == 0)
7840 Elf_Scn *section = find_ksymtab_gpl_section();
7843 GElf_Shdr header_mem;
7844 GElf_Shdr *section_header = gelf_getshdr(section, &header_mem);
7845 size_t entry_size = get_ksymtab_entry_size();
7846 ABG_ASSERT(entry_size);
7847 nb_ksymtab_gpl_entries_ = section_header->sh_size / entry_size;
7850 return nb_ksymtab_gpl_entries_;
7853 /// Populate the symbol map by reading exported symbols from the
7854 /// ksymtab directly.
7856 /// @param section the ksymtab section to read from
7858 /// @param exported_fns_set the set of exported functions
7860 /// @param exported_vars_set the set of exported variables
7862 /// @param nb_entries the number of ksymtab entries to read
7864 /// @return true upon successful completion, false otherwise.
7866 populate_symbol_map_from_ksymtab(Elf_Scn *section,
7867 address_set_sptr exported_fns_set,
7868 address_set_sptr exported_vars_set,
7871 // The data of the section.
7872 Elf_Data *elf_data = elf_rawdata(section, 0);
7874 // An array-of-bytes view of the elf data above. Something we can
7875 // actually program with. Phew.
7876 uint8_t *bytes = reinterpret_cast<uint8_t*>(elf_data->d_buf);
7878 // This is where to store an address of a symbol that we read from
7880 GElf_Addr symbol_address = 0, adjusted_symbol_address = 0;
7882 // So the section is an array of entries. Each entry describes a
7883 // symbol. Each entry is made of two words.
7885 // The first word is the address of a symbol. The second one is
7886 // the address of a static global variable symbol which value is
7887 // the string representing the symbol name. That string is in the
7888 // __ksymtab_strings section. Here, we are only interested in the
7891 // Lets thus walk the array of entries, and let's read just the
7892 // symbol address part of each entry.
7893 bool is_big_endian = elf_architecture_is_big_endian();
7894 elf_symbol_sptr symbol;
7895 unsigned char symbol_value_size = get_ksymtab_symbol_value_size();
7897 for (size_t i = 0, entry_offset = 0;
7899 ++i, entry_offset = get_ksymtab_entry_size() * i)
7902 ABG_ASSERT(read_int_from_array_of_bytes(&bytes[entry_offset],
7907 // Starting from linux kernel v4.19, it can happen that the
7908 // address value read from the ksymtab[_gpl] section might
7909 // need some decoding to get the real symbol address that has
7910 // a meaning in the .symbol section.
7912 maybe_adjust_sym_address_from_v4_19_ksymtab(symbol_address,
7913 entry_offset, section);
7915 // We might also want to adjust the symbol address, depending
7916 // on if we are looking at an ET_REL, an executable or a
7917 // shared object binary.
7918 adjusted_symbol_address = maybe_adjust_fn_sym_address(symbol_address);
7920 if (adjusted_symbol_address == 0)
7921 // The resulting symbol address is zero, not sure this
7922 // valid; ignore it.
7925 // OK now the symbol address should be in a suitable form to
7926 // be used to look the symbol up in the usual .symbol section
7927 // (aka ELF symbol table).
7928 symbol = lookup_elf_symbol_from_address(adjusted_symbol_address);
7931 adjusted_symbol_address =
7932 maybe_adjust_var_sym_address(symbol_address);
7933 symbol = lookup_elf_symbol_from_address(adjusted_symbol_address);
7935 // This must be a symbol that is of type neither FUNC
7936 // (function) nor OBJECT (variable). There are for intance,
7937 // symbols of type 'NOTYPE' in the ksymtab symbol table. I
7938 // am not sure what those are.
7942 address_set_sptr set;
7943 if (symbol->is_function())
7945 ABG_ASSERT(lookup_elf_fn_symbol_from_address
7946 (adjusted_symbol_address));
7947 set = exported_fns_set;
7949 else if (symbol->is_variable())
7951 ABG_ASSERT(lookup_elf_var_symbol_from_address
7952 (adjusted_symbol_address));
7953 set = exported_vars_set;
7956 ABG_ASSERT_NOT_REACHED;
7957 set->insert(adjusted_symbol_address);
7962 /// Populate the symbol map by extracting the exported symbols from a
7963 /// ksymtab rela section.
7965 /// @param section the ksymtab section to read from
7967 /// @param exported_fns_set the set of exported functions
7969 /// @param exported_vars_set the set of exported variables
7971 /// @return true upon successful completion, false otherwise.
7973 populate_symbol_map_from_ksymtab_reloc(Elf_Scn *reloc_section,
7974 address_set_sptr exported_fns_set,
7975 address_set_sptr exported_vars_set)
7977 GElf_Shdr reloc_section_mem;
7978 GElf_Shdr *reloc_section_shdr = gelf_getshdr(reloc_section,
7979 &reloc_section_mem);
7980 size_t reloc_count =
7981 reloc_section_shdr->sh_size / reloc_section_shdr->sh_entsize;
7983 Elf_Data *reloc_section_data = elf_getdata(reloc_section, 0);
7985 bool is_relasec = (reloc_section_shdr->sh_type == SHT_RELA);
7986 elf_symbol_sptr symbol;
7987 GElf_Sym native_symbol;
7988 for (unsigned int i = 0; i < reloc_count; i++)
7993 gelf_getrela(reloc_section_data, i, &rela);
7994 symbol = lookup_elf_symbol_from_index(GELF_R_SYM(rela.r_info),
8000 gelf_getrel(reloc_section_data, i, &rel);
8001 symbol = lookup_elf_symbol_from_index(GELF_R_SYM(rel.r_info),
8007 // If the symbol is a linux string constant then ignore it.
8008 if (symbol->get_is_linux_string_cst())
8011 if (!symbol->is_function() && !symbol->is_variable())
8015 if (symbol->get_type() == elf_symbol::NOTYPE_TYPE)
8016 cerr << "skipping NOTYPE symbol "
8017 << symbol->get_name()
8019 << symbol->get_index()
8023 else if (symbol->get_type() == elf_symbol::SECTION_TYPE)
8024 cerr << "skipping SECTION symbol "
8026 << symbol->get_index()
8034 // If we are looking at an ET_REL (relocatable) binary, then
8035 // the symbol value of native_symbol is relative to the
8036 // section that symbol is defined in. We need to translate it
8037 // into an absolute (okay, binary-relative, rather) address.
8038 GElf_Addr symbol_address =
8039 maybe_adjust_et_rel_sym_addr_to_abs_addr (&native_symbol);
8041 address_set_sptr set;
8042 if (symbol->is_function())
8044 ABG_ASSERT(lookup_elf_fn_symbol_from_address(symbol_address));
8045 set = exported_fns_set;
8047 else if (symbol->is_variable())
8049 ABG_ASSERT(lookup_elf_var_symbol_from_address(symbol_address));
8050 set = exported_vars_set;
8053 ABG_ASSERT_NOT_REACHED;
8054 set->insert(symbol_address);
8059 /// Load a given kernel symbol table.
8061 /// One can thus retrieve the resulting symbols by using the
8062 /// accessors read_context::linux_exported_fn_syms(),
8063 /// read_context::linux_exported_var_syms(),
8064 /// read_context::linux_exported_gpl_fn_syms(), or
8065 /// read_context::linux_exported_gpl_var_syms().
8067 /// @param kind the kind of kernel symbol table to load.
8069 /// @return true upon successful completion, false otherwise.
8071 load_kernel_symbol_table(kernel_symbol_table_kind kind)
8073 size_t nb_entries = 0;
8074 Elf_Scn *section = 0, *reloc_section = 0;
8075 address_set_sptr linux_exported_fns_set, linux_exported_vars_set;
8079 case KERNEL_SYMBOL_TABLE_KIND_UNDEFINED:
8081 case KERNEL_SYMBOL_TABLE_KIND_KSYMTAB:
8082 section = find_ksymtab_section();
8083 reloc_section = find_ksymtab_reloc_section();
8084 nb_entries = get_nb_ksymtab_entries();
8085 linux_exported_fns_set = create_or_get_linux_exported_fn_syms();
8086 linux_exported_vars_set = create_or_get_linux_exported_var_syms();
8088 case KERNEL_SYMBOL_TABLE_KIND_KSYMTAB_GPL:
8089 section = find_ksymtab_gpl_section();
8090 reloc_section = find_ksymtab_gpl_reloc_section();
8091 nb_entries = get_nb_ksymtab_gpl_entries();
8092 linux_exported_fns_set = create_or_get_linux_exported_gpl_fn_syms();
8093 linux_exported_vars_set = create_or_get_linux_exported_gpl_var_syms();
8097 if (!linux_exported_vars_set
8098 || !linux_exported_fns_set
8103 ksymtab_format format = get_ksymtab_format();
8105 // Although pre-v4.19 kernel modules can have a relocation section for the
8106 // __ksymtab section, libdwfl zeroes the rela section after applying
8107 // "simple" absolute relocations via dwfl_module_getelf(). For v4.19 and
8108 // above, we get PC-relative relocations so dwfl_module_getelf() doesn't
8109 // apply those relocations and we're safe to read the relocation section to
8110 // determine which exported symbols are in the ksymtab.
8111 if (!reloc_section || format == PRE_V4_19_KSYMTAB_FORMAT)
8112 return populate_symbol_map_from_ksymtab(section, linux_exported_fns_set,
8113 linux_exported_vars_set,
8116 return populate_symbol_map_from_ksymtab_reloc(reloc_section,
8117 linux_exported_fns_set,
8118 linux_exported_vars_set);
8121 /// Load the special __ksymtab section. This is for linux kernel
8124 /// @return true upon successful completion, false otherwise.
8126 load_ksymtab_symbols()
8128 return load_kernel_symbol_table(KERNEL_SYMBOL_TABLE_KIND_KSYMTAB);
8131 /// Load the special __ksymtab_gpl section. This is for linux kernel
8134 /// @return true upon successful completion, false otherwise.
8136 load_ksymtab_gpl_symbols()
8138 return load_kernel_symbol_table(KERNEL_SYMBOL_TABLE_KIND_KSYMTAB_GPL);
8141 /// Load linux kernel (module) specific exported symbol sections.
8143 /// @return true upon successful completion, false otherwise.
8145 load_linux_specific_exported_symbol_maps()
8147 bool loaded = false;
8148 if (!linux_exported_fn_syms_
8149 || !linux_exported_var_syms_)
8150 loaded |= load_ksymtab_symbols();
8152 if (!linux_exported_gpl_fn_syms_
8153 || !linux_exported_gpl_var_syms_)
8154 loaded |= load_ksymtab_gpl_symbols();
8159 /// Load the maps of function symbol address -> function symbol,
8160 /// global variable symbol address -> variable symbol and also the
8161 /// maps of function and variable undefined symbols.
8163 /// All these maps are loaded only if they are not loaded already.
8165 /// @return true iff everything went fine.
8169 bool load_fun_map = !fun_addr_sym_map_ ;
8170 bool load_var_map = !var_addr_sym_map_;
8171 bool load_undefined_fun_map = !undefined_fun_syms_;
8172 bool load_undefined_var_map = !undefined_var_syms_;
8175 fun_syms_.reset(new string_elf_symbols_map_type);
8177 if (!fun_addr_sym_map_)
8178 fun_addr_sym_map_.reset(new addr_elf_symbol_sptr_map_type);
8180 if (!fun_entry_addr_sym_map_ && elf_architecture_is_ppc64())
8181 fun_entry_addr_sym_map_.reset(new addr_elf_symbol_sptr_map_type);
8184 var_syms_.reset(new string_elf_symbols_map_type);
8186 if (!var_addr_sym_map_)
8187 var_addr_sym_map_.reset(new addr_elf_symbol_sptr_map_type);
8189 if (!undefined_fun_syms_)
8190 undefined_fun_syms_.reset(new string_elf_symbols_map_type);
8192 if (!undefined_var_syms_)
8193 undefined_var_syms_.reset(new string_elf_symbols_map_type);
8195 if (!options_.ignore_symbol_table)
8197 if (load_symbol_maps_from_symtab_section(load_fun_map,
8199 load_undefined_fun_map,
8200 load_undefined_var_map))
8202 if (load_in_linux_kernel_mode() && is_linux_kernel_binary())
8203 return load_linux_specific_exported_symbol_maps();
8211 /// Return true if an address is in the ".opd" section that is
8212 /// present on the ppc64 platform.
8214 /// @param addr the address to consider.
8216 /// @return true iff @p addr is designates a word that is in the
8219 address_is_in_opd_section(Dwarf_Addr addr)
8221 Elf_Scn * opd_section = find_opd_section();
8224 if (address_is_in_section(addr, opd_section))
8229 /// Load the symbol maps if necessary.
8231 /// @return true iff the symbol maps has been loaded by this
8234 maybe_load_symbol_maps() const
8236 if (!fun_addr_sym_map_
8237 || !var_addr_sym_map_
8240 || !undefined_fun_syms_
8241 || !undefined_var_syms_)
8242 return const_cast<read_context*>(this)->load_symbol_maps();
8246 /// Load the DT_NEEDED and DT_SONAME elf TAGS.
8249 load_dt_soname_and_needed()
8251 lookup_data_tag_from_dynamic_segment(elf_handle(), DT_NEEDED, dt_needed_);
8253 vector<string> dt_tag_data;
8254 lookup_data_tag_from_dynamic_segment(elf_handle(), DT_SONAME, dt_tag_data);
8255 if (!dt_tag_data.empty())
8256 dt_soname_ = dt_tag_data[0];
8259 /// Read the string representing the architecture of the current ELF
8262 load_elf_architecture()
8268 GElf_Ehdr* elf_header = gelf_getehdr(elf_handle(), &eh_mem);
8270 elf_architecture_ = e_machine_to_string(elf_header->e_machine);
8273 /// Load various ELF data.
8275 /// This function loads ELF data that are not symbol maps or debug
8276 /// info. That is, things like various tags, elf architecture and
8279 load_elf_properties()
8281 load_dt_soname_and_needed();
8282 load_elf_architecture();
8285 /// Convert the value of the symbol address part of a post V4.19
8286 /// ksymtab entry (that contains place-relative addresses) into its
8287 /// corresponding symbol value in the .symtab section. The value of
8288 /// the symbol in .symtab equals to addr_offset + address-of-ksymtab
8291 /// @param addr the address read from the ksymtab section.
8293 /// @param addr_offset the offset at which @p addr was read.
8295 /// @param ksymtab_section the kymstab section @p addr was read
8298 maybe_adjust_sym_address_from_v4_19_ksymtab(GElf_Addr addr,
8300 Elf_Scn *ksymtab_section) const
8302 GElf_Addr result = addr;
8304 if (get_ksymtab_format() == V4_19_KSYMTAB_FORMAT)
8306 int32_t offset = addr;
8308 GElf_Shdr *section_header = gelf_getshdr(ksymtab_section, &mem);
8309 result = offset + section_header->sh_addr + addr_offset;
8315 /// This is a sub-routine of maybe_adjust_fn_sym_address and
8316 /// maybe_adjust_var_sym_address.
8318 /// Given an address that we got by looking at some debug
8319 /// information (e.g, a symbol's address referred to by a DWARF
8320 /// TAG), If the ELF file we are interested in is a shared library
8321 /// or an executable, then adjust the address to be coherent with
8322 /// where the executable (or shared library) is loaded. That way,
8323 /// the address can be used to look for symbols in the executable or
8326 /// @return the adjusted address, or the same address as @p addr if
8327 /// it didn't need any adjustment.
8329 maybe_adjust_address_for_exec_or_dyn(Dwarf_Addr addr) const
8335 GElf_Ehdr *elf_header = gelf_getehdr(elf_handle(), &eh_mem);
8337 if (elf_header->e_type == ET_DYN || elf_header->e_type == ET_EXEC)
8339 Dwarf_Addr dwarf_elf_load_address = 0, elf_load_address = 0;
8340 ABG_ASSERT(get_binary_load_address(dwarf_elf_handle(),
8341 dwarf_elf_load_address));
8342 ABG_ASSERT(get_binary_load_address(elf_handle(),
8344 if (dwarf_is_splitted()
8345 && (dwarf_elf_load_address != elf_load_address))
8346 // This means that in theory the DWARF and the executable are
8347 // not loaded at the same address. And addr is meaningful
8348 // only in the context of the DWARF.
8350 // So let's transform addr into an offset relative to where
8351 // the DWARF is loaded, and let's add that relative offset
8352 // to the load address of the executable. That way, addr
8353 // becomes meaningful in the context of the executable and
8354 // can thus be used to compare against the address of
8355 // symbols of the executable, for instance.
8356 addr = addr - dwarf_elf_load_address + elf_load_address;
8362 /// For a relocatable (*.o) elf file, this function expects an
8363 /// absolute address, representing a function symbol. It then
8364 /// extracts the address of the .text section from the symbol
8365 /// absolute address to get the relative address of the function
8366 /// from the beginning of the .text section.
8368 /// For executable or shared library, this function expects an
8369 /// address of a function symbol that was retrieved by looking at a
8370 /// DWARF "file". The function thus adjusts the address to make it
8371 /// be meaningful in the context of the ELF file.
8373 /// In both cases, the address can then be compared against the
8374 /// st_value field of a function symbol from the ELF file.
8376 /// @param addr an adress for a function symbol that was retrieved
8377 /// from a DWARF file.
8379 /// @return the (possibly) adjusted address, or just @p addr if no
8380 /// adjustment took place.
8382 maybe_adjust_fn_sym_address(Dwarf_Addr addr) const
8387 Elf* elf = elf_handle();
8389 GElf_Ehdr* elf_header = gelf_getehdr(elf, &eh_mem);
8391 if (elf_header->e_type == ET_REL)
8392 // We are looking at a relocatable file. In this case, we don't
8393 // do anything because:
8395 // 1/ the addresses from DWARF are absolute (relative to the
8396 // beginning of the relocatable file)
8398 // 2/ The ELF symbol addresses that we store in our lookup
8399 // tables are translated from section-related to absolute as
8400 // well. So we don't have anything to do at this point for
8404 addr = maybe_adjust_address_for_exec_or_dyn(addr);
8409 /// Translate a section-relative symbol address (i.e, symbol value)
8410 /// into an absolute symbol address by adding the address of the
8411 /// section the symbol belongs to, to the address value.
8413 /// This is useful when looking at symbol values coming from
8414 /// relocatable files (of ET_REL kind). If the binary is not
8415 /// ET_REL, then the function does nothing and returns the input
8416 /// address unchanged.
8418 /// @param addr the symbol address to possibly translate.
8420 /// @param section the section the symbol which value is @p addr
8423 /// @return the section-relative address, translated into an
8424 /// absolute address, if @p section is an ET_REL binary. Otherwise,
8425 /// return @p addr, unchanged.
8427 maybe_adjust_et_rel_sym_addr_to_abs_addr(GElf_Addr addr, Elf_Scn *section)
8432 Elf* elf = elf_handle();
8433 GElf_Ehdr elf_header;
8435 if (!gelf_getehdr(elf, &elf_header))
8438 if (elf_header.e_type != ET_REL)
8441 GElf_Shdr section_header;
8442 if (!gelf_getshdr(section, §ion_header))
8445 return addr + section_header.sh_addr;
8448 /// Translate a section-relative symbol address (i.e, symbol value)
8449 /// into an absolute symbol address by adding the address of the
8450 /// section the symbol belongs to, to the address value.
8452 /// This is useful when looking at symbol values coming from
8453 /// relocatable files (of ET_REL kind). If the binary is not
8454 /// ET_REL, then the function does nothing and returns the input
8455 /// address unchanged.
8457 /// @param sym the symbol whose address to possibly needs to be
8460 /// @return the section-relative address, translated into an
8461 /// absolute address, if @p sym is from an ET_REL binary.
8462 /// Otherwise, return the address of @p sym, unchanged.
8464 maybe_adjust_et_rel_sym_addr_to_abs_addr(GElf_Sym *sym)
8466 Elf_Scn *symbol_section = elf_getscn(elf_handle(), sym->st_shndx);
8467 ABG_ASSERT(symbol_section);
8468 GElf_Addr result = sym->st_value;
8469 result = maybe_adjust_et_rel_sym_addr_to_abs_addr(result, symbol_section);
8473 /// Test if a given address is in a given section.
8475 /// @param addr the address to consider.
8477 /// @param section the section to consider.
8479 address_is_in_section(Dwarf_Addr addr, Elf_Scn* section) const
8484 GElf_Shdr sheader_mem;
8485 GElf_Shdr* sheader = gelf_getshdr(section, &sheader_mem);
8487 if (sheader->sh_addr <= addr && addr <= sheader->sh_addr + sheader->sh_size)
8493 /// Get the section which a global variable address comes from.
8495 /// @param var_addr the address for the variable.
8497 /// @return the ELF section the @p var_addr comes from, or nil if no
8498 /// section was found for that variable address.
8500 get_data_section_for_variable_address(Dwarf_Addr var_addr) const
8502 // There are several potential 'data sections" from which a
8503 // variable address can come from: .data, .data1 and .rodata.
8504 // Let's try to try them all in sequence.
8506 Elf_Scn* data_scn = bss_section();
8507 if (!address_is_in_section(var_addr, data_scn))
8509 data_scn = data_section();
8510 if (!address_is_in_section(var_addr, data_scn))
8512 data_scn = data1_section();
8513 if (!address_is_in_section(var_addr, data_scn))
8515 data_scn = rodata_section();
8516 if (!address_is_in_section(var_addr, data_scn))
8524 /// For a relocatable (*.o) elf file, this function expects an
8525 /// absolute address, representing a global variable symbol. It
8526 /// then extracts the address of the {.data,.data1,.rodata,.bss}
8527 /// section from the symbol absolute address to get the relative
8528 /// address of the variable from the beginning of the data section.
8530 /// For executable or shared library, this function expects an
8531 /// address of a variable symbol that was retrieved by looking at a
8532 /// DWARF "file". The function thus adjusts the address to make it
8533 /// be meaningful in the context of the ELF file.
8535 /// In both cases, the address can then be compared against the
8536 /// st_value field of a function symbol from the ELF file.
8538 /// @param addr an address for a global variable symbol that was
8539 /// retrieved from a DWARF file.
8541 /// @return the (possibly) adjusted address, or just @p addr if no
8542 /// adjustment took place.
8544 maybe_adjust_var_sym_address(Dwarf_Addr addr) const
8546 Elf* elf = elf_handle();
8548 GElf_Ehdr* elf_header = gelf_getehdr(elf, &eh_mem);
8550 if (elf_header->e_type == ET_REL)
8551 // We are looking at a relocatable file. In this case, we don't
8552 // do anything because:
8554 // 1/ the addresses from DWARF are absolute (relative to the
8555 // beginning of the relocatable file)
8557 // 2/ The ELF symbol addresses that we store in our lookup
8558 // tables are translated from section-related to absolute as
8559 // well. So we don't have anything to do at this point for
8563 addr = maybe_adjust_address_for_exec_or_dyn(addr);
8568 /// Get the first exported function address in the set of addresses
8569 /// referred to by the DW_AT_ranges attribute of a given DIE.
8571 /// @param die the DIE we are considering.
8573 /// @param address output parameter. This is set to the first
8574 /// address found in the sequence pointed to by the DW_AT_ranges
8575 /// attribute found on the DIE @p die, iff the function returns
8576 /// true. Otherwise, no value is set into this output parameter.
8578 /// @return true iff the DIE @p die does have a DW_AT_ranges
8579 /// attribute and an address of an exported function was found in
8580 /// its sequence value.
8582 get_first_exported_fn_address_from_DW_AT_ranges(Dwarf_Die* die,
8583 Dwarf_Addr& address) const
8586 Dwarf_Addr end_addr;
8587 ptrdiff_t offset = 0;
8591 Dwarf_Addr addr, fn_addr;
8592 if ((offset = dwarf_ranges(die, offset, &base, &addr, &end_addr)) >= 0)
8594 fn_addr = maybe_adjust_fn_sym_address(addr);
8595 if (function_symbol_is_exported(fn_addr))
8601 } while (offset > 0);
8605 /// Get the address of the function.
8607 /// The address of the function is considered to be the value of the
8608 /// DW_AT_low_pc attribute, possibly adjusted (in relocatable files
8609 /// only) to not point to an absolute address anymore, but rather to
8610 /// the address of the function inside the .text segment.
8612 /// @param function_die the die of the function to consider.
8614 /// @param address the resulting address iff the function returns
8617 /// @return true if the function address was found.
8619 get_function_address(Dwarf_Die* function_die, Dwarf_Addr& address) const
8621 if (!die_address_attribute(function_die, DW_AT_low_pc, address))
8622 // So no DW_AT_low_pc was found. Let's see if the function DIE
8623 // has got a DW_AT_ranges attribute instead. If it does, the
8624 // first address of the set of addresses represented by the
8625 // value of that DW_AT_ranges represents the function (symbol)
8626 // address we are looking for.
8627 if (!get_first_exported_fn_address_from_DW_AT_ranges(function_die,
8631 address = maybe_adjust_fn_sym_address(address);
8635 /// Get the address of the global variable.
8637 /// The address of the global variable is considered to be the value
8638 /// of the DW_AT_location attribute, possibly adjusted (in
8639 /// relocatable files only) to not point to an absolute address
8640 /// anymore, but rather to the address of the global variable inside
8641 /// the data segment.
8643 /// @param variable_die the die of the function to consider.
8645 /// @param address the resulting address iff this function returns
8648 /// @return true if the variable address was found.
8650 get_variable_address(Dwarf_Die* variable_die,
8651 Dwarf_Addr& address) const
8653 bool is_tls_address = false;
8654 if (!die_location_address(variable_die, address, is_tls_address))
8656 if (!is_tls_address)
8657 address = maybe_adjust_var_sym_address(address);
8661 /// Tests if a suppression specification can match ABI artifacts
8662 /// coming from the binary being analyzed.
8664 /// This tests if the suppression matches the soname of and binary
8665 /// name of the ELF binary being analyzed.
8667 /// @param s the suppression specification to consider.
8669 suppression_can_match(const suppr::suppression_base& s) const
8671 if (s.priv_->matches_soname(dt_soname())
8672 && s.priv_->matches_binary_name(elf_path()))
8677 /// Test whether if a given function suppression matches a function
8678 /// designated by a regular expression that describes its linkage
8679 /// name (symbol name).
8681 /// @param s the suppression specification to evaluate to see if it
8682 /// matches a given function linkage name
8684 /// @param fn_linkage_name the linkage name of the function of interest.
8686 /// @return true iff the suppression specification @p s matches the
8687 /// function whose linkage name is @p fn_linkage_name.
8689 suppression_matches_function_sym_name(const suppr::function_suppression_sptr& s,
8690 const string& fn_linkage_name) const
8694 return suppression_matches_function_sym_name(*s,fn_linkage_name);
8697 /// Test whether if a given function suppression matches a function
8698 /// designated by a regular expression that describes its linkage
8699 /// name (symbol name).
8701 /// @param s the suppression specification to evaluate to see if it
8702 /// matches a given function linkage name
8704 /// @param fn_linkage_name the linkage name of the function of interest.
8706 /// @return true iff the suppression specification @p s matches the
8707 /// function whose linkage name is @p fn_linkage_name.
8709 suppression_matches_function_sym_name(const suppr::function_suppression& s,
8710 const string& fn_linkage_name) const
8712 if (!suppression_can_match(s))
8715 return suppr::suppression_matches_function_sym_name(s, fn_linkage_name);
8718 /// Test whether if a given function suppression matches a function
8719 /// designated by a regular expression that describes its name.
8721 /// @param s the suppression specification to evaluate to see if it
8722 /// matches a given function name.
8724 /// @param fn_name the name of the function of interest. Note that
8725 /// this name must be *non* qualified.
8727 /// @return true iff the suppression specification @p s matches the
8728 /// function whose name is @p fn_name.
8730 suppression_matches_function_name(const suppr::function_suppression_sptr& s,
8731 const string& fn_name) const
8735 return suppression_matches_function_name(*s, fn_name);
8738 /// Test whether if a given function suppression matches a function
8739 /// designated by a regular expression that describes its name.
8741 /// @param s the suppression specification to evaluate to see if it
8742 /// matches a given function name.
8744 /// @param fn_name the name of the function of interest. Note that
8745 /// this name must be *non* qualified.
8747 /// @return true iff the suppression specification @p s matches the
8748 /// function whose name is @p fn_name.
8750 suppression_matches_function_name(const suppr::function_suppression& s,
8751 const string& fn_name) const
8753 if (!suppression_can_match(s))
8756 return suppr::suppression_matches_function_name(s, fn_name);
8759 /// Test whether if a given variable suppression specification
8760 /// matches a variable denoted by its name.
8762 /// @param s the variable suppression specification to consider.
8764 /// @param var_name the name of the variable to consider.
8766 /// @return true iff the suppression specification @p s matches the
8767 /// variable whose name is @p var_name.
8769 suppression_matches_variable_name(const suppr::variable_suppression& s,
8770 const string& var_name) const
8772 if (!suppression_can_match(s))
8775 return suppr::suppression_matches_variable_name(s, var_name);
8778 /// Test whether if a given variable suppression specification
8779 /// matches a variable denoted by its linkage name.
8781 /// @param s the variable suppression specification to consider.
8783 /// @param var_linkage_name the linkage name of the variable to consider.
8785 /// @return true iff variable suppression specification @p s matches
8786 /// the variable denoted by linkage name @p var_linkage_name.
8788 suppression_matches_variable_sym_name(const suppr::variable_suppression& s,
8789 const string& var_linkage_name) const
8791 if (!suppression_can_match(s))
8794 return suppr::suppression_matches_variable_sym_name(s, var_linkage_name);
8797 /// Test if a given type suppression specification matches a type
8798 /// designated by its name and location.
8800 /// @param s the suppression specification to consider.
8802 /// @param type_name the fully qualified type name to consider.
8804 /// @param type_location the type location to consider.
8806 /// @return true iff the type suppression specification matches a
8807 /// type of a given name and location.
8809 suppression_matches_type_name_or_location(const suppr::type_suppression& s,
8810 const string& type_name,
8811 const location& type_location) const
8813 if (!suppression_can_match(s))
8816 return suppr::suppression_matches_type_name_or_location(s, type_name,
8820 /// Test if a type suppression specification matches the name of a
8821 /// type within a given scope.
8823 /// @param s the type suppression specification to consider.
8825 /// @param type_scope the type scope to consider.
8827 /// @param type the type to consider.
8829 /// @return true iff the type suppression specification matches a
8830 /// the name of type @p type.
8832 suppression_matches_type_name(const suppr::type_suppression& s,
8833 const scope_decl* type_scope,
8834 const type_base_sptr& type) const
8836 if (!suppression_can_match(s))
8838 return suppr::suppression_matches_type_name(s, type_scope, type);
8841 /// Getter of the exported decls builder object.
8843 /// @return the exported decls builder.
8844 corpus::exported_decls_builder*
8845 exported_decls_builder()
8846 {return exported_decls_builder_;}
8848 /// Setter of the exported decls builder object.
8850 /// Note that this @ref read_context is not responsible for the live
8851 /// time of the exported_decls_builder object. The corpus is.
8853 /// @param b the new builder.
8855 exported_decls_builder(corpus::exported_decls_builder* b)
8856 {exported_decls_builder_ = b;}
8858 /// Getter of the "load_all_types" flag. This flag tells if all the
8859 /// types (including those not reachable by public declarations) are
8860 /// to be read and represented in the final ABI corpus.
8862 /// @return the load_all_types flag.
8864 load_all_types() const
8865 {return options_.load_all_types;}
8867 /// Setter of the "load_all_types" flag. This flag tells if all the
8868 /// types (including those not reachable by public declarations) are
8869 /// to be read and represented in the final ABI corpus.
8871 /// @param f the new load_all_types flag.
8873 load_all_types(bool f)
8874 {options_.load_all_types = f;}
8877 load_in_linux_kernel_mode() const
8878 {return options_.load_in_linux_kernel_mode;}
8881 load_in_linux_kernel_mode(bool f)
8882 {options_.load_in_linux_kernel_mode = f;}
8884 /// Guess if the current binary is a Linux Kernel or a Linux Kernel module.
8886 /// To guess that, the function looks for the presence of the
8887 /// special "__ksymtab_strings" section in the binary.
8890 is_linux_kernel_binary() const
8892 return find_section(elf_handle(), "__ksymtab_strings", SHT_PROGBITS)
8893 || is_linux_kernel_module();
8896 /// Guess if the current binary is a Linux Kernel module.
8898 /// To guess that, the function looks for the presence of the special
8899 /// ".modinfo" and ".gnu.linkonce.this_module" sections in the binary.
8902 is_linux_kernel_module() const
8904 return find_section(elf_handle(), ".modinfo", SHT_PROGBITS)
8905 && find_section(elf_handle(), ".gnu.linkonce.this_module", SHT_PROGBITS);
8908 /// Getter of the "show_stats" flag.
8910 /// This flag tells if we should emit statistics about various
8913 /// @return the value of the flag.
8916 {return options_.show_stats;}
8918 /// Setter of the "show_stats" flag.
8920 /// This flag tells if we should emit statistics about various
8923 /// @param f the value of the flag.
8926 {options_.show_stats = f;}
8928 /// Getter of the "do_log" flag.
8930 /// This flag tells if we should log about various internal
8933 /// return the "do_log" flag.
8936 {return options_.do_log;}
8938 /// Setter of the "do_log" flag.
8940 /// This flag tells if we should log about various internal details.
8942 /// @param f the new value of the flag.
8945 {options_.do_log = f;}
8947 /// If a given function decl is suitable for the set of exported
8948 /// functions of the current corpus, this function adds it to that
8951 /// @param fn the function to consider for inclusion into the set of
8952 /// exported functions of the current corpus.
8954 maybe_add_fn_to_exported_decls(function_decl* fn)
8957 if (corpus::exported_decls_builder* b = exported_decls_builder())
8958 b->maybe_add_fn_to_exported_fns(fn);
8961 /// If a given variable decl is suitable for the set of exported
8962 /// variables of the current corpus, this variable adds it to that
8965 /// @param fn the variable to consider for inclusion into the set of
8966 /// exported variables of the current corpus.
8968 maybe_add_var_to_exported_decls(var_decl* var)
8971 if (corpus::exported_decls_builder* b = exported_decls_builder())
8972 b->maybe_add_var_to_exported_vars(var);
8975 /// Walk the DIEs under a given die and for each child, populate the
8976 /// die -> parent map to record the child -> parent relationship
8978 /// exists between the child and the given die.
8980 /// The function also builds the vector of places where units are
8983 /// This is done recursively as for each child DIE, this function
8984 /// walks its children as well.
8986 /// @param die the DIE whose children to walk recursively.
8988 /// @param source where the DIE @p die comes from.
8990 /// @param imported_units a vector containing all the offsets of the
8991 /// points where unit have been imported, under @p die.
8993 build_die_parent_relations_under(Dwarf_Die* die,
8995 imported_unit_points_type & imported_units)
9000 offset_offset_map_type& parent_of = die_parent_map(source);
9003 if (dwarf_child(die, &child) != 0)
9008 parent_of[dwarf_dieoffset(&child)] = dwarf_dieoffset(die);
9009 if (dwarf_tag(&child) == DW_TAG_imported_unit)
9011 Dwarf_Die imported_unit;
9012 if (die_die_attribute(&child, DW_AT_import, imported_unit))
9014 die_source imported_unit_die_source = NO_DEBUG_INFO_DIE_SOURCE;
9015 ABG_ASSERT(get_die_source(imported_unit, imported_unit_die_source));
9016 imported_units.push_back
9017 (imported_unit_point(dwarf_dieoffset(&child),
9019 imported_unit_die_source));
9022 build_die_parent_relations_under(&child, source, imported_units);
9024 while (dwarf_siblingof(&child, &child) == 0);
9028 /// Determine if we do have to build a DIE -> parent map, depending
9029 /// on a given language.
9031 /// Some languages like C++, Ada etc, do have the concept of
9032 /// namespace and yet, the DIE data structure doesn't provide us
9033 /// with a way to get the parent namespace of a given DIE. So for
9034 /// those languages, we need to build a DIE -> parent map so that we
9035 /// can get the namespace DIE (or more generally the scope DIE) of a given
9036 /// DIE as we need it.
9038 /// But then some more basic languages like C or assembly don't have
9041 /// This function, depending on the language, tells us if we need to
9042 /// build the DIE -> parent map or not.
9044 /// @param lang the language to consider.
9046 /// @return true iff we need to build the DIE -> parent map for this
9049 do_we_build_die_parent_maps(translation_unit::language lang)
9051 if (is_c_language(lang))
9056 case translation_unit::LANG_UNKNOWN:
9057 #ifdef HAVE_DW_LANG_Mips_Assembler_enumerator
9058 case translation_unit::LANG_Mips_Assembler:
9067 /// Walk all the DIEs accessible in the debug info (and in the
9068 /// alternate debug info as well) and build maps representing the
9069 /// relationship DIE -> parent. That is, make it so that we can get
9070 /// the parent for a given DIE.
9072 /// Note that the goal of this map is to be able to get the parent
9073 /// of a given DIE. This is to mainly to handle namespaces. For instance,
9074 /// when we get a DIE of a type, and we want to build an internal
9075 /// representation for it, we need to get its fully qualified name.
9076 /// For that, we need to know what is the parent DIE of that type
9077 /// DIE, so that we can know what the namespace of that type is.
9079 /// Note that as the C language doesn't have namespaces (all types
9080 /// are defined in the same global namespace), this function doesn't
9081 /// build the DIE -> parent map if the current translation unit
9082 /// comes from C. This saves time on big C ELF files with a lot of
9085 build_die_parent_maps()
9087 bool we_do_have_to_build_die_parent_map = false;
9088 uint8_t address_size = 0;
9089 size_t header_size = 0;
9090 // Get the DIE of the current translation unit, look at it to get
9091 // its language. If that language is in C, then all types are in
9092 // the global namespace so we don't need to build the DIE ->
9093 // parent map. So we dont build it in that case.
9094 for (Dwarf_Off offset = 0, next_offset = 0;
9095 (dwarf_next_unit(dwarf(), offset, &next_offset, &header_size,
9096 NULL, NULL, &address_size, NULL, NULL, NULL) == 0);
9097 offset = next_offset)
9099 Dwarf_Off die_offset = offset + header_size;
9101 if (!dwarf_offdie(dwarf(), die_offset, &cu))
9105 die_unsigned_constant_attribute(&cu, DW_AT_language, l);
9106 translation_unit::language lang = dwarf_language_to_tu_language(l);
9107 if (do_we_build_die_parent_maps(lang))
9108 we_do_have_to_build_die_parent_map = true;
9111 if (!we_do_have_to_build_die_parent_map)
9114 // Build the DIE -> parent relation for DIEs coming from the
9115 // .debug_info section in the alternate debug info file.
9116 die_source source = ALT_DEBUG_INFO_DIE_SOURCE;
9117 for (Dwarf_Off offset = 0, next_offset = 0;
9118 (dwarf_next_unit(alt_dwarf(), offset, &next_offset, &header_size,
9119 NULL, NULL, &address_size, NULL, NULL, NULL) == 0);
9120 offset = next_offset)
9122 Dwarf_Off die_offset = offset + header_size;
9124 if (!dwarf_offdie(alt_dwarf(), die_offset, &cu))
9128 imported_unit_points_type& imported_units =
9129 tu_die_imported_unit_points_map(source)[die_offset] =
9130 imported_unit_points_type();
9131 build_die_parent_relations_under(&cu, source, imported_units);
9134 // Build the DIE -> parent relation for DIEs coming from the
9135 // .debug_info section of the main debug info file.
9136 source = PRIMARY_DEBUG_INFO_DIE_SOURCE;
9139 for (Dwarf_Off offset = 0, next_offset = 0;
9140 (dwarf_next_unit(dwarf(), offset, &next_offset, &header_size,
9141 NULL, NULL, &address_size, NULL, NULL, NULL) == 0);
9142 offset = next_offset)
9144 Dwarf_Off die_offset = offset + header_size;
9146 if (!dwarf_offdie(dwarf(), die_offset, &cu))
9149 imported_unit_points_type& imported_units =
9150 tu_die_imported_unit_points_map(source)[die_offset] =
9151 imported_unit_points_type();
9152 build_die_parent_relations_under(&cu, source, imported_units);
9155 // Build the DIE -> parent relation for DIEs coming from the
9156 // .debug_types section.
9157 source = TYPE_UNIT_DIE_SOURCE;
9160 uint64_t type_signature = 0;
9161 Dwarf_Off type_offset;
9162 for (Dwarf_Off offset = 0, next_offset = 0;
9163 (dwarf_next_unit(dwarf(), offset, &next_offset, &header_size,
9164 NULL, NULL, &address_size, NULL,
9165 &type_signature, &type_offset) == 0);
9166 offset = next_offset)
9168 Dwarf_Off die_offset = offset + header_size;
9171 if (!dwarf_offdie_types(dwarf(), die_offset, &cu))
9174 imported_unit_points_type& imported_units =
9175 tu_die_imported_unit_points_map(source)[die_offset] =
9176 imported_unit_points_type();
9177 build_die_parent_relations_under(&cu, source, imported_units);
9180 };// end class read_context.
9182 static type_or_decl_base_sptr
9183 build_ir_node_from_die(read_context& ctxt,
9186 bool called_from_public_decl,
9187 size_t where_offset,
9188 bool is_required_decl_spec = false);
9190 static type_or_decl_base_sptr
9191 build_ir_node_from_die(read_context& ctxt,
9193 bool called_from_public_decl,
9194 size_t where_offset);
9196 static class_decl_sptr
9197 add_or_update_class_type(read_context& ctxt,
9201 class_decl_sptr klass,
9202 bool called_from_public_decl,
9203 size_t where_offset);
9205 static union_decl_sptr
9206 add_or_update_union_type(read_context& ctxt,
9209 union_decl_sptr union_type,
9210 bool called_from_public_decl,
9211 size_t where_offset);
9213 static decl_base_sptr
9214 build_ir_node_for_void_type(read_context& ctxt);
9216 static function_decl_sptr
9217 build_function_decl(read_context& ctxt,
9219 size_t where_offset,
9220 function_decl_sptr fn);
9223 function_is_suppressed(const read_context& ctxt,
9224 const scope_decl* scope,
9225 Dwarf_Die *function_die);
9227 static function_decl_sptr
9228 build_or_get_fn_decl_if_not_suppressed(read_context& ctxt,
9231 size_t where_offset,
9232 function_decl_sptr f = function_decl_sptr());
9234 static var_decl_sptr
9235 build_var_decl(read_context& ctxt,
9237 size_t where_offset,
9238 var_decl_sptr result = var_decl_sptr());
9240 static var_decl_sptr
9241 build_or_get_var_decl_if_not_suppressed(read_context& ctxt,
9244 size_t where_offset,
9245 var_decl_sptr res = var_decl_sptr(),
9246 bool is_required_decl_spec = false);
9248 variable_is_suppressed(const read_context& ctxt,
9249 const scope_decl* scope,
9250 Dwarf_Die *variable_die,
9251 bool is_required_decl_spec = false);
9254 finish_member_function_reading(Dwarf_Die* die,
9255 const function_decl_sptr& f,
9256 const class_or_union_sptr& klass,
9257 read_context& ctxt);
9259 /// Setter of the debug info root path for a dwarf reader context.
9261 /// @param ctxt the dwarf reader context to consider.
9263 /// @param path the new debug info root path. This must be a pointer to a
9264 /// character string which life time should be greater than the life
9265 /// time of the read context.
9267 set_debug_info_root_path(read_context& ctxt, char** path)
9268 {ctxt.offline_callbacks()->debuginfo_path = path;}
9270 /// Setter of the debug info root path for a dwarf reader context.
9272 /// @param ctxt the dwarf reader context to consider.
9274 /// @return a pointer to the debug info root path.
9276 /// time of the read context.
9278 get_debug_info_root_path(read_context& ctxt)
9279 {return ctxt.offline_callbacks()->debuginfo_path;}
9281 /// Getter of the "show_stats" flag.
9283 /// This flag tells if we should emit statistics about various
9286 /// @param ctx the read context to consider for this flag.
9288 /// @return the value of the flag.
9290 get_show_stats(read_context& ctxt)
9291 {return ctxt.show_stats();}
9293 /// Setter of the "show_stats" flag.
9295 /// This flag tells if we should emit statistics about various
9298 /// @param ctxt the read context to consider for this flag.
9300 /// @param f the value of the flag.
9302 set_show_stats(read_context& ctxt, bool f)
9303 {ctxt.show_stats(f);}
9305 /// Setter of the "do_log" flag.
9307 /// This flag tells if we should emit verbose logs for various
9308 /// internal things related to DWARF reading.
9310 /// @param ctxt the DWARF reading context to consider.
9312 /// @param f the new value of the flag.
9314 set_do_log(read_context& ctxt, bool f)
9317 /// Setter of the "set_ignore_symbol_table" flag.
9319 /// This flag tells if we should load information about ELF symbol
9320 /// tables. Not loading the symbol tables is a speed optimization
9321 /// that is done when the set of symbols we care about is provided
9322 /// off-hand. This is the case when we are supposed to analyze a
9323 /// Linux kernel binary. In that case, because we have the white list
9324 /// of functions/variable symbols we care about, we don't need to
9325 /// analyze the symbol table; things are thus faster in that case.
9327 /// By default, the symbol table is analyzed so this boolean is set to
9330 /// @param ctxt the read context to consider.
9332 /// @param f the new value of the flag.
9334 set_ignore_symbol_table(read_context &ctxt, bool f)
9335 {ctxt.options_.ignore_symbol_table = f;}
9337 /// Getter of the "set_ignore_symbol_table" flag.
9339 /// This flag tells if we should load information about ELF symbol
9340 /// tables. Not loading the symbol tables is a speed optimization
9341 /// that is done when the set of symbols we care about is provided
9342 /// off-hand. This is the case when we are supposed to analyze a
9343 /// Linux kernel binary. In that case, because we have the white list
9344 /// of functions/variable symbols we care about, we don't need to
9345 /// analyze the symbol table; things are thus faster in that case.
9347 /// By default, the symbol table is analyzed so this boolean is set to
9350 /// @param ctxt the read context to consider.
9352 /// @return the value of the flag.
9354 get_ignore_symbol_table(const read_context& ctxt)
9355 {return ctxt.options_.ignore_symbol_table;}
9357 /// Test if a given DIE is anonymous
9359 /// @param die the DIE to consider.
9361 /// @return true iff @p die is anonymous.
9363 die_is_anonymous(const Dwarf_Die* die)
9365 Dwarf_Attribute attr;
9366 if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), DW_AT_name, &attr))
9371 /// Get the value of an attribute that is supposed to be a string, or
9372 /// an empty string if the attribute could not be found.
9374 /// @param die the DIE to get the attribute value from.
9376 /// @param attr_name the attribute name. Must come from dwarf.h and
9377 /// be an enumerator representing an attribute like, e.g, DW_AT_name.
9379 /// @return the string representing the value of the attribute, or an
9380 /// empty string if no string attribute could be found.
9382 die_string_attribute(const Dwarf_Die* die, unsigned attr_name)
9387 Dwarf_Attribute attr;
9388 if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), attr_name, &attr))
9391 const char* str = dwarf_formstring(&attr);
9392 return str ? str : "";
9395 /// Get the value of an attribute that is supposed to be an unsigned
9398 /// @param die the DIE to read the information from.
9400 /// @param attr_name the DW_AT_* name of the attribute. Must come
9401 /// from dwarf.h and be an enumerator representing an attribute like,
9402 /// e.g, DW_AT_decl_line.
9404 ///@param cst the output parameter that is set to the value of the
9405 /// attribute @p attr_name. This parameter is set iff the function
9408 /// @return true if there was an attribute of the name @p attr_name
9409 /// and with a value that is a constant, false otherwise.
9411 die_unsigned_constant_attribute(const Dwarf_Die* die,
9418 Dwarf_Attribute attr;
9419 Dwarf_Word result = 0;
9420 if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), attr_name, &attr)
9421 || dwarf_formudata(&attr, &result))
9428 /// Read a signed constant value from a given attribute.
9430 /// The signed constant expected must be of form DW_FORM_sdata.
9432 /// @param die the DIE to get the attribute from.
9434 /// @param attr_name the attribute name.
9436 /// @param cst the resulting signed constant read.
9438 /// @return true iff a signed constant attribute of the name @p
9439 /// attr_name was found on the DIE @p die.
9441 die_signed_constant_attribute(const Dwarf_Die *die,
9448 Dwarf_Attribute attr;
9449 Dwarf_Sword result = 0;
9450 if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), attr_name, &attr)
9451 || dwarf_formsdata(&attr, &result))
9458 /// Read the value of a constant attribute that is either signed or
9459 /// unsigned into a array_type_def::subrange_type::bound_value value.
9461 /// The bound_value instance will capture the actual signedness of the
9464 /// @param die the DIE from which to read the value of the attribute.
9466 /// @param attr_name the attribute name to consider.
9468 /// @param value the resulting value read from attribute @p attr_name
9471 /// @return true iff DIE @p die has an attribute named @p attr_name
9472 /// with a constant value.
9474 die_constant_attribute(const Dwarf_Die *die,
9476 array_type_def::subrange_type::bound_value &value)
9478 if (die_attribute_is_unsigned(die, attr_name)
9479 || die_attribute_has_no_signedness(die, attr_name))
9482 if (!die_unsigned_constant_attribute(die, attr_name, l))
9484 value.set_unsigned(l);
9489 if (!die_signed_constant_attribute(die, attr_name, l))
9491 value.set_signed(l);
9496 /// Test if a given attribute on a DIE has a particular form.
9498 /// @param die the DIE to consider.
9500 /// @param attr_name the attribute name to consider on DIE @p die.
9502 /// @param attr_form the attribute form that we expect attribute @p
9503 /// attr_name has on DIE @p die.
9505 /// @return true iff the attribute named @p attr_name on DIE @p die
9506 /// has the form @p attr_form.
9508 die_attribute_has_form(const Dwarf_Die *die,
9510 unsigned int attr_form)
9512 Dwarf_Attribute attr;
9513 if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), attr_name, &attr))
9516 return dwarf_hasform(&attr, attr_form);
9519 /// Test if a given DWARF form is DW_FORM_strx{1,4}.
9521 /// Unfortunaly, the DW_FORM_strx{1,4} are enumerators of an untagged
9522 /// enum in dwarf.h so we have to use an unsigned int for the form,
9525 /// @param form the form to consider.
9527 /// @return true iff @p form is DW_FORM_strx{1,4}.
9529 form_is_DW_FORM_strx(unsigned form)
9533 #if defined HAVE_DW_FORM_strx1 \
9534 && defined HAVE_DW_FORM_strx2 \
9535 && defined HAVE_DW_FORM_strx3 \
9536 && defined HAVE_DW_FORM_strx4
9537 if (form == DW_FORM_strx1
9538 || form == DW_FORM_strx2
9539 || form == DW_FORM_strx3
9540 ||form == DW_FORM_strx4)
9547 /// Test if a given DIE attribute is signed.
9549 /// @param die the DIE to consider.
9551 /// @param attr_name the attribute name to consider.
9553 /// @return true iff the attribute named @p attr_name on DIE @p die is
9556 die_attribute_is_signed(const Dwarf_Die* die, unsigned attr_name)
9558 if (die_attribute_has_form(die, attr_name, DW_FORM_sdata))
9563 /// Test if a given DIE attribute is unsigned.
9565 /// @param die the DIE to consider.
9567 /// @param attr_name the attribute name to consider.
9569 /// @return true iff the attribute named @p attr_name on DIE @p die is
9572 die_attribute_is_unsigned(const Dwarf_Die* die, unsigned attr_name)
9574 if (die_attribute_has_form(die, attr_name, DW_FORM_udata))
9579 /// Test if a given DIE attribute is neither explicitely signed nor
9580 /// unsigned. Usually this is the case for attribute of the form
9583 /// @param die the DIE to consider.
9585 /// @param attr_name the name of the attribute to consider.
9587 /// @return true iff the attribute named @p attr_name of DIE @p die is
9588 /// neither specifically signed nor unsigned.
9590 die_attribute_has_no_signedness(const Dwarf_Die *die, unsigned attr_name)
9592 return (!die_attribute_is_unsigned(die, attr_name)
9593 && !die_attribute_is_signed(die, attr_name));
9596 /// Get the value of a DIE attribute; that value is meant to be a
9599 /// @param die the DIE to get the attribute from.
9601 /// @param attr_name the DW_AT_* name of the attribute. Must come
9602 /// from dwarf.h and be an enumerator representing an attribute like,
9603 /// e.g, DW_AT_external.
9605 /// @param flag the output parameter to store the flag value into.
9606 /// This is set iff the function returns true.
9608 /// @return true if the DIE has a flag attribute named @p attr_name,
9609 /// false otherwise.
9611 die_flag_attribute(Dwarf_Die* die, unsigned attr_name, bool& flag)
9613 Dwarf_Attribute attr;
9615 if (!dwarf_attr_integrate(die, attr_name, &attr)
9616 || dwarf_formflag(&attr, &f))
9623 /// Get the mangled name from a given DIE.
9625 /// @param die the DIE to read the mangled name from.
9627 /// @return the mangled name if it's present in the DIE, or just an
9628 /// empty string if it's not.
9630 die_linkage_name(const Dwarf_Die* die)
9635 string linkage_name = die_string_attribute(die, DW_AT_linkage_name);
9636 if (linkage_name.empty())
9637 linkage_name = die_string_attribute(die, DW_AT_MIPS_linkage_name);
9638 return linkage_name;
9641 /// Get the file path that is the value of the DW_AT_decl_file
9642 /// attribute on a given DIE, if the DIE is a decl DIE having that
9645 /// @param die the DIE to consider.
9647 /// @return a string containing the file path that is the logical
9648 /// value of the DW_AT_decl_file attribute. If the DIE @p die
9649 /// doesn't have a DW_AT_decl_file attribute, then the return value is
9650 /// just an empty string.
9652 die_decl_file_attribute(const Dwarf_Die* die)
9657 const char* str = dwarf_decl_file(const_cast<Dwarf_Die*>(die));
9659 return str ? str : "";
9662 /// Get the value of an attribute which value is supposed to be a
9663 /// reference to a DIE.
9665 /// @param die the DIE to read the value from.
9667 /// @param die_is_in_alt_di true if @p die comes from alternate debug
9670 /// @param attr_name the DW_AT_* attribute name to read.
9672 /// @param result the DIE resulting from reading the attribute value.
9673 /// This is set iff the function returns true.
9675 /// @param look_thru_abstract_origin if yes, the function looks
9676 /// through the possible DW_AT_abstract_origin attribute all the way
9677 /// down to the initial DIE that is cloned and look on that DIE to see
9678 /// if it has the @p attr_name attribute.
9680 /// @return true if the DIE @p die contains an attribute named @p
9681 /// attr_name that is a DIE reference, false otherwise.
9683 die_die_attribute(const Dwarf_Die* die,
9686 bool look_thru_abstract_origin)
9688 Dwarf_Attribute attr;
9689 if (look_thru_abstract_origin)
9691 if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), attr_name, &attr))
9696 if (!dwarf_attr(const_cast<Dwarf_Die*>(die), attr_name, &attr))
9699 bool r = dwarf_formref_die(&attr, &result);
9703 /// Read and return a DW_FORM_addr attribute from a given DIE.
9705 /// @param die the DIE to consider.
9707 /// @param attr_name the name of the DW_FORM_addr attribute to read
9710 /// @param the resulting address.
9712 /// @return true iff the attribute could be read, was of the expected
9713 /// DW_FORM_addr and could thus be translated into the @p result.
9715 die_address_attribute(Dwarf_Die* die, unsigned attr_name, Dwarf_Addr& result)
9717 Dwarf_Attribute attr;
9718 if (!dwarf_attr_integrate(die, attr_name, &attr))
9720 return dwarf_formaddr(&attr, &result) == 0;
9723 /// Returns the source location associated with a decl DIE.
9725 /// @param ctxt the @ref read_context to use.
9727 /// @param die the DIE the read the source location from.
9729 /// @return the location associated with @p die.
9731 die_location(const read_context& ctxt, const Dwarf_Die* die)
9736 string file = die_decl_file_attribute(die);
9738 die_unsigned_constant_attribute(die, DW_AT_decl_line, line);
9740 if (!file.empty() && line != 0)
9742 translation_unit_sptr tu = ctxt.cur_transl_unit();
9743 location l = tu->get_loc_mgr().create_new_location(file, line, 1);
9749 /// Return a copy of the name of a DIE.
9751 /// @param die the DIE to consider.
9753 /// @return a copy of the name of the DIE.
9755 die_name(const Dwarf_Die* die)
9757 string name = die_string_attribute(die, DW_AT_name);
9761 /// Return the location, the name and the mangled name of a given DIE.
9763 /// @param ctxt the read context to use.
9765 /// @param die the DIE to read location and names from.
9767 /// @param loc the location output parameter to set.
9769 /// @param name the name output parameter to set.
9771 /// @param linkage_name the linkage_name output parameter to set.
9773 die_loc_and_name(const read_context& ctxt,
9777 string& linkage_name)
9779 loc = die_location(ctxt, die);
9780 name = die_name(die);
9781 linkage_name = die_linkage_name(die);
9784 /// Get the size of a (type) DIE as the value for the parameter
9785 /// DW_AT_byte_size or DW_AT_bit_size.
9787 /// @param die the DIE to read the information from.
9789 /// @param size the resulting size in bits. This is set iff the
9790 /// function return true.
9792 /// @return true if the size attribute was found.
9794 die_size_in_bits(const Dwarf_Die* die, uint64_t& size)
9799 uint64_t byte_size = 0, bit_size = 0;
9801 if (!die_unsigned_constant_attribute(die, DW_AT_byte_size, byte_size))
9803 if (!die_unsigned_constant_attribute(die, DW_AT_bit_size, bit_size))
9807 bit_size = byte_size * 8;
9814 /// Get the access specifier (from the DW_AT_accessibility attribute
9815 /// value) of a given DIE.
9817 /// @param die the DIE to consider.
9819 /// @param access the resulting access. This is set iff the function
9822 /// @return bool if the DIE contains the DW_AT_accessibility die.
9824 die_access_specifier(Dwarf_Die * die, access_specifier& access)
9830 if (!die_unsigned_constant_attribute(die, DW_AT_accessibility, a))
9833 access_specifier result = private_access;
9837 case private_access:
9838 result = private_access;
9841 case protected_access:
9842 result = protected_access;
9846 result = public_access;
9857 /// Test whether a given DIE represents a decl that is public. That
9858 /// is, one with the DW_AT_external attribute set.
9860 /// @param die the DIE to consider for testing.
9862 /// @return true if a DW_AT_external attribute is present and its
9863 /// value is set to the true; return false otherwise.
9865 die_is_public_decl(Dwarf_Die* die)
9867 bool is_public = false;
9868 die_flag_attribute(die, DW_AT_external, is_public);
9872 /// Test whether a given DIE represents a declaration-only DIE.
9874 /// That is, if the DIE has the DW_AT_declaration flag set.
9876 /// @param die the DIE to consider.
9878 /// @return true if a DW_AT_declaration is present, false otherwise.
9880 die_is_declaration_only(Dwarf_Die* die)
9882 bool is_declaration_only = false;
9883 die_flag_attribute(die, DW_AT_declaration, is_declaration_only);
9884 return is_declaration_only;
9887 /// Tests whether a given DIE is artificial.
9889 /// @param die the test to test for.
9891 /// @return true if the DIE is artificial, false otherwise.
9893 die_is_artificial(Dwarf_Die* die)
9896 return die_flag_attribute(die, DW_AT_artificial, is_artificial);
9899 ///@return true if a tag represents a type, false otherwise.
9901 ///@param tag the tag to consider.
9903 is_type_tag(unsigned tag)
9905 bool result = false;
9909 case DW_TAG_array_type:
9910 case DW_TAG_class_type:
9911 case DW_TAG_enumeration_type:
9912 case DW_TAG_pointer_type:
9913 case DW_TAG_reference_type:
9914 case DW_TAG_string_type:
9915 case DW_TAG_structure_type:
9916 case DW_TAG_subroutine_type:
9917 case DW_TAG_typedef:
9918 case DW_TAG_union_type:
9919 case DW_TAG_ptr_to_member_type:
9920 case DW_TAG_set_type:
9921 case DW_TAG_subrange_type:
9922 case DW_TAG_base_type:
9923 case DW_TAG_const_type:
9924 case DW_TAG_file_type:
9925 case DW_TAG_packed_type:
9926 case DW_TAG_thrown_type:
9927 case DW_TAG_volatile_type:
9928 case DW_TAG_restrict_type:
9929 case DW_TAG_interface_type:
9930 case DW_TAG_unspecified_type:
9931 case DW_TAG_shared_type:
9932 case DW_TAG_rvalue_reference_type:
9944 /// Test if a given DIE is a type to be canonicalized. note that a
9945 /// function DIE (DW_TAG_subprogram) is considered to be a
9946 /// canonicalize-able type too because we can consider that DIE as
9947 /// being the type of the function, as well as the function decl
9950 /// @param tag the tag of the DIE to consider.
9952 /// @return true iff the DIE of tag @p tag is a canonicalize-able DIE.
9954 is_canonicalizeable_type_tag(unsigned tag)
9956 bool result = false;
9960 case DW_TAG_array_type:
9961 case DW_TAG_class_type:
9962 case DW_TAG_enumeration_type:
9963 case DW_TAG_pointer_type:
9964 case DW_TAG_reference_type:
9965 case DW_TAG_structure_type:
9966 case DW_TAG_subroutine_type:
9967 case DW_TAG_subprogram:
9968 case DW_TAG_typedef:
9969 case DW_TAG_union_type:
9970 case DW_TAG_base_type:
9971 case DW_TAG_const_type:
9972 case DW_TAG_volatile_type:
9973 case DW_TAG_restrict_type:
9974 case DW_TAG_rvalue_reference_type:
9986 /// Test if a DIE tag represents a declaration.
9988 /// @param tag the DWARF tag to consider.
9990 /// @return true iff @p tag is for a declaration.
9992 is_decl_tag(unsigned tag)
9996 case DW_TAG_formal_parameter:
9997 case DW_TAG_imported_declaration:
9999 case DW_TAG_unspecified_parameters:
10000 case DW_TAG_subprogram:
10001 case DW_TAG_variable:
10002 case DW_TAG_namespace:
10003 case DW_TAG_GNU_template_template_param:
10004 case DW_TAG_GNU_template_parameter_pack:
10005 case DW_TAG_GNU_formal_parameter_pack:
10011 /// Test if a DIE represents a type DIE.
10013 /// @param die the DIE to consider.
10015 /// @return true if @p die represents a type, false otherwise.
10017 die_is_type(const Dwarf_Die* die)
10021 return is_type_tag(dwarf_tag(const_cast<Dwarf_Die*>(die)));
10024 /// Test if a DIE represents a declaration.
10026 /// @param die the DIE to consider.
10028 /// @return true if @p die represents a decl, false otherwise.
10030 die_is_decl(const Dwarf_Die* die)
10034 return is_decl_tag(dwarf_tag(const_cast<Dwarf_Die*>(die)));
10037 /// Test if a DIE represents a namespace.
10039 /// @param die the DIE to consider.
10041 /// @return true if @p die represents a namespace, false otherwise.
10043 die_is_namespace(const Dwarf_Die* die)
10047 return (dwarf_tag(const_cast<Dwarf_Die*>(die)) == DW_TAG_namespace);
10050 /// Test if a DIE has tag DW_TAG_unspecified_type.
10052 /// @param die the DIE to consider.
10054 /// @return true if @p die has tag DW_TAG_unspecified_type.
10056 die_is_unspecified(Dwarf_Die* die)
10060 return (dwarf_tag(die) == DW_TAG_unspecified_type);
10063 /// Test if a DIE represents a void type.
10065 /// @param die the DIE to consider.
10067 /// @return true if @p die represents a void type, false otherwise.
10069 die_is_void_type(Dwarf_Die* die)
10071 if (!die || dwarf_tag(die) != DW_TAG_base_type)
10074 string name = die_name(die);
10075 if (name == "void")
10081 /// Test if a DIE represents a pointer type.
10083 /// @param die the die to consider.
10085 /// @return true iff @p die represents a pointer type.
10087 die_is_pointer_type(const Dwarf_Die* die)
10092 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
10093 if (tag == DW_TAG_pointer_type)
10099 /// Test if a DIE is for a pointer, reference or qualified type to
10100 /// anonymous class or struct.
10102 /// @param die the DIE to consider.
10104 /// @return true iff @p is for a pointer, reference or qualified type
10105 /// to anonymous class or struct.
10107 pointer_or_qual_die_of_anonymous_class_type(const Dwarf_Die* die)
10109 if (!die_is_pointer_or_reference_type(die)
10110 && !die_is_qualified_type(die))
10113 Dwarf_Die underlying_type_die;
10114 if (!die_die_attribute(die, DW_AT_type, underlying_type_die))
10117 if (!die_is_class_type(&underlying_type_die))
10120 string name = die_name(&underlying_type_die);
10122 return name.empty();
10125 /// Test if a DIE represents a reference type.
10127 /// @param die the die to consider.
10129 /// @return true iff @p die represents a reference type.
10131 die_is_reference_type(const Dwarf_Die* die)
10136 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
10137 if (tag == DW_TAG_reference_type || tag == DW_TAG_rvalue_reference_type)
10143 /// Test if a DIE represents an array type.
10145 /// @param die the die to consider.
10147 /// @return true iff @p die represents an array type.
10149 die_is_array_type(const Dwarf_Die* die)
10154 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
10155 if (tag == DW_TAG_array_type)
10161 /// Test if a DIE represents a pointer, reference or array type.
10163 /// @param die the die to consider.
10165 /// @return true iff @p die represents a pointer or reference type.
10167 die_is_pointer_or_reference_type(const Dwarf_Die* die)
10168 {return (die_is_pointer_type(die)
10169 || die_is_reference_type(die)
10170 || die_is_array_type(die));}
10172 /// Test if a DIE represents a pointer, a reference or a typedef type.
10174 /// @param die the die to consider.
10176 /// @return true iff @p die represents a pointer, a reference or a
10179 die_is_pointer_reference_or_typedef_type(const Dwarf_Die* die)
10180 {return (die_is_pointer_or_reference_type(die)
10181 || dwarf_tag(const_cast<Dwarf_Die*>(die)) == DW_TAG_typedef);}
10183 /// Test if a DIE represents a class type.
10185 /// @param die the die to consider.
10187 /// @return true iff @p die represents a class type.
10189 die_is_class_type(const Dwarf_Die* die)
10191 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
10193 if (tag == DW_TAG_class_type || tag == DW_TAG_structure_type)
10199 /// Test if a DIE is for a qualified type.
10201 /// @param die the DIE to consider.
10203 /// @return true iff @p die is for a qualified type.
10205 die_is_qualified_type(const Dwarf_Die* die)
10207 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
10208 if (tag == DW_TAG_const_type
10209 || tag == DW_TAG_volatile_type
10210 || tag == DW_TAG_restrict_type)
10216 /// Test if a DIE is for a function type.
10218 /// @param die the DIE to consider.
10220 /// @return true iff @p die is for a function type.
10222 die_is_function_type(const Dwarf_Die *die)
10224 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
10225 if (tag == DW_TAG_subprogram || tag == DW_TAG_subroutine_type)
10231 /// Test if a DIE for a function pointer or member function has an
10232 /// DW_AT_object_pointer attribute.
10234 /// @param die the DIE to consider.
10236 /// @param object_pointer out parameter. It's set to the DIE for the
10237 /// object pointer iff the function returns true.
10239 /// @return true iff the DIE @p die has an object pointer. In that
10240 /// case, the parameter @p object_pointer is set to the DIE of that
10241 /// object pointer.
10243 die_has_object_pointer(const Dwarf_Die* die, Dwarf_Die& object_pointer)
10248 if (die_die_attribute(die, DW_AT_object_pointer, object_pointer))
10254 /// When given the object pointer DIE of a function type or member
10255 /// function DIE, this function returns the "this" pointer that points
10256 /// to the associated class.
10258 /// @param die the DIE of the object pointer of the function or member
10259 /// function to consider.
10261 /// @param this_pointer_die out parameter. This is set to the DIE of
10262 /// the "this" pointer iff the function returns true.
10264 /// @return true iff the function found the "this" pointer from the
10265 /// object pointer DIE @p die. In that case, the parameter @p
10266 /// this_pointer_die is set to the DIE of that "this" pointer.
10268 die_this_pointer_from_object_pointer(Dwarf_Die* die,
10269 Dwarf_Die& this_pointer_die)
10272 ABG_ASSERT(dwarf_tag(die) == DW_TAG_formal_parameter);
10274 if (die_die_attribute(die, DW_AT_type, this_pointer_die))
10280 /// Test if a given "this" pointer that points to a particular class
10281 /// type is for a const class or not. If it's for a const class, then
10282 /// it means the function type or the member function associated to
10283 /// that "this" pointer is const.
10285 /// @param die the DIE of the "this" pointer to consider.
10287 /// @return true iff @p die points to a const class type.
10289 die_this_pointer_is_const(Dwarf_Die* die)
10293 if (dwarf_tag(die) == DW_TAG_pointer_type)
10295 Dwarf_Die pointed_to_type_die;
10296 if (die_die_attribute(die, DW_AT_type, pointed_to_type_die))
10297 if (dwarf_tag(&pointed_to_type_die) == DW_TAG_const_type)
10304 /// Test if an object pointer (referred-to via a DW_AT_object_pointer
10305 /// attribute) points to a const implicit class and so is for a const
10306 /// method or or a const member function type.
10308 /// @param die the DIE of the object pointer to consider.
10310 /// @return true iff the object pointer represented by @p die is for a
10311 /// a const method or const member function type.
10313 die_object_pointer_is_for_const_method(Dwarf_Die* die)
10316 ABG_ASSERT(dwarf_tag(die) == DW_TAG_formal_parameter);
10318 Dwarf_Die this_pointer_die;
10319 if (die_this_pointer_from_object_pointer(die, this_pointer_die))
10320 if (die_this_pointer_is_const(&this_pointer_die))
10326 /// Test if a DIE represents an entity that is at class scope.
10328 /// @param ctxt the read context to use.
10330 /// @param die the DIE to consider.
10332 /// @param where_offset where we are logically at in the DIE stream.
10334 /// @param class_scope_die out parameter. Set to the DIE of the
10335 /// containing class iff @p die happens to be at class scope; that is,
10336 /// iff the function returns true.
10338 /// @return true iff @p die is at class scope. In that case, @p
10339 /// class_scope_die is set to the DIE of the class that contains @p
10342 die_is_at_class_scope(const read_context& ctxt,
10343 const Dwarf_Die* die,
10344 size_t where_offset,
10345 Dwarf_Die& class_scope_die)
10347 if (!get_scope_die(ctxt, die, where_offset, class_scope_die))
10350 int tag = dwarf_tag(&class_scope_die);
10352 return (tag == DW_TAG_structure_type
10353 || tag == DW_TAG_class_type
10354 || tag == DW_TAG_union_type);
10357 /// Return the leaf object under a pointer, reference or qualified
10360 /// @param die the DIE of the type to consider.
10362 /// @param peeled_die out parameter. Set to the DIE of the leaf
10363 /// object iff the function actually peeled anything.
10365 /// @return true upon successful completion.
10367 die_peel_qual_ptr(Dwarf_Die *die, Dwarf_Die& peeled_die)
10372 int tag = dwarf_tag(die);
10374 if (tag == DW_TAG_const_type
10375 || tag == DW_TAG_volatile_type
10376 || tag == DW_TAG_restrict_type
10377 || tag == DW_TAG_pointer_type
10378 || tag == DW_TAG_reference_type
10379 || tag == DW_TAG_rvalue_reference_type)
10381 if (!die_die_attribute(die, DW_AT_type, peeled_die))
10387 while (tag == DW_TAG_const_type
10388 || tag == DW_TAG_volatile_type
10389 || tag == DW_TAG_restrict_type
10390 || tag == DW_TAG_pointer_type
10391 || tag == DW_TAG_reference_type
10392 || tag == DW_TAG_rvalue_reference_type)
10394 if (!die_die_attribute(&peeled_die, DW_AT_type, peeled_die))
10396 tag = dwarf_tag(&peeled_die);
10402 /// Return the leaf object under a typedef type DIE.
10404 /// @param die the DIE of the type to consider.
10406 /// @param peeled_die out parameter. Set to the DIE of the leaf
10407 /// object iff the function actually peeled anything.
10409 /// @return true upon successful completion.
10411 die_peel_typedef(Dwarf_Die *die, Dwarf_Die& peeled_die)
10416 int tag = dwarf_tag(die);
10418 if (tag == DW_TAG_typedef)
10420 if (!die_die_attribute(die, DW_AT_type, peeled_die))
10426 while (tag == DW_TAG_typedef)
10428 if (!die_die_attribute(&peeled_die, DW_AT_type, peeled_die))
10430 tag = dwarf_tag(&peeled_die);
10437 /// Return the leaf DIE under a pointer, a reference or a typedef DIE.
10439 /// @param die the DIE to consider.
10441 /// @param peeled_die the resulting peeled (or leaf) DIE. This is set
10442 /// iff the function returned true.
10444 /// @return true iff the function could peel @p die.
10446 die_peel_pointer_and_typedef(const Dwarf_Die *die, Dwarf_Die& peeled_die)
10451 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
10453 if (tag == DW_TAG_pointer_type
10454 || tag == DW_TAG_reference_type
10455 || tag == DW_TAG_rvalue_reference_type
10456 || tag == DW_TAG_typedef)
10458 if (!die_die_attribute(die, DW_AT_type, peeled_die))
10464 while (tag == DW_TAG_pointer_type
10465 || tag == DW_TAG_reference_type
10466 || tag == DW_TAG_rvalue_reference_type
10467 || tag == DW_TAG_typedef)
10469 if (!die_die_attribute(&peeled_die, DW_AT_type, peeled_die))
10471 tag = dwarf_tag(&peeled_die);
10476 /// Test if a DIE for a function type represents a method type.
10478 /// @param ctxt the read context.
10480 /// @param die the DIE to consider.
10482 /// @param where_offset where we logically are in the stream of DIEs.
10484 /// @param object_pointer_die out parameter. This is set by the
10485 /// function to the DIE that refers to the formal function parameter
10486 /// which holds the implicit "this" pointer of the method. That die
10487 /// is called the object pointer DIE. This is set iff the function
10489 /// @param class_die out parameter. This is set by the function to
10490 /// the DIE that represents the class of the method type. This is set
10491 /// iff the function returns true.
10493 /// @param is_static out parameter. This is set to true by the
10494 /// function if @p die is a static method. This is set iff the
10495 /// function returns true.
10497 /// @return true iff @p die is a DIE for a method type.
10499 die_function_type_is_method_type(const read_context& ctxt,
10500 const Dwarf_Die *die,
10501 size_t where_offset,
10502 Dwarf_Die& object_pointer_die,
10503 Dwarf_Die& class_die,
10509 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
10510 ABG_ASSERT(tag == DW_TAG_subroutine_type || tag == DW_TAG_subprogram);
10512 bool has_object_pointer = false;
10514 if (tag == DW_TAG_subprogram)
10516 Dwarf_Die spec_or_origin_die;
10517 if (die_die_attribute(die, DW_AT_specification,
10518 spec_or_origin_die)
10519 || die_die_attribute(die, DW_AT_abstract_origin,
10520 spec_or_origin_die))
10522 if (die_has_object_pointer(&spec_or_origin_die,
10523 object_pointer_die))
10524 has_object_pointer = true;
10527 if (die_is_at_class_scope(ctxt, &spec_or_origin_die,
10528 where_offset, class_die))
10536 if (die_has_object_pointer(die, object_pointer_die))
10537 has_object_pointer = true;
10540 if (die_is_at_class_scope(ctxt, die, where_offset, class_die))
10549 if (die_has_object_pointer(die, object_pointer_die))
10550 has_object_pointer = true;
10557 ABG_ASSERT(has_object_pointer);
10558 // The object pointer die points to a DW_TAG_formal_parameter which
10559 // is the "this" parameter. The type of the "this" parameter is a
10560 // pointer. Let's get that pointer type.
10561 Dwarf_Die this_type_die;
10562 if (!die_die_attribute(&object_pointer_die, DW_AT_type, this_type_die))
10565 // So the class type is the type pointed to by the type of the "this"
10567 if (!die_peel_qual_ptr(&this_type_die, class_die))
10570 // And make we return a class type, rather than a typedef to a
10572 die_peel_typedef(&class_die, class_die);
10580 VIRTUALITY_NOT_VIRTUAL,
10581 VIRTUALITY_VIRTUAL,
10582 VIRTUALITY_PURE_VIRTUAL
10585 /// Get the virtual-ness of a given DIE, that is, the value of the
10586 /// DW_AT_virtuality attribute.
10588 /// @param die the DIE to read from.
10590 /// @param virt the resulting virtuality attribute. This is set iff
10591 /// the function returns true.
10593 /// @return true if the virtual-ness could be determined.
10595 die_virtuality(const Dwarf_Die* die, virtuality& virt)
10601 die_unsigned_constant_attribute(die, DW_AT_virtuality, v);
10603 if (v == DW_VIRTUALITY_virtual)
10604 virt = VIRTUALITY_VIRTUAL;
10605 else if (v == DW_VIRTUALITY_pure_virtual)
10606 virt = VIRTUALITY_PURE_VIRTUAL;
10608 virt = VIRTUALITY_NOT_VIRTUAL;
10613 /// Test whether the DIE represent either a virtual base or function.
10615 /// @param die the DIE to consider.
10617 /// @return bool if the DIE represents a virtual base or function,
10618 /// false othersise.
10620 die_is_virtual(const Dwarf_Die* die)
10623 if (!die_virtuality(die, v))
10626 return v == VIRTUALITY_PURE_VIRTUAL || v == VIRTUALITY_VIRTUAL;
10629 /// Test if the DIE represents an entity that was declared inlined.
10631 /// @param die the DIE to test for.
10633 /// @return true if the DIE represents an entity that was declared
10636 die_is_declared_inline(Dwarf_Die* die)
10638 uint64_t inline_value = 0;
10639 if (!die_unsigned_constant_attribute(die, DW_AT_inline, inline_value))
10641 return inline_value == DW_INL_declared_inlined;
10644 /// This function is a fast routine (optimization) to compare the
10645 /// values of two string attributes of two DIEs.
10647 /// @param l the first DIE to consider.
10649 /// @param r the second DIE to consider.
10651 /// @param attr_name the name of the attribute to compare, on the two
10654 /// @param result out parameter. This is set to the result of the
10655 /// comparison. If the value of attribute @p attr_name on DIE @p l
10656 /// equals the value of attribute @p attr_name on DIE @p r, then the
10657 /// the argument of this parameter is set to true. Otherwise, it's
10658 /// set to false. Note that the argument of this parameter is set iff
10659 /// the function returned true.
10661 /// @return true iff the comparison could be performed. There are
10662 /// cases in which the comparison cannot be performed. For instance,
10663 /// if one of the DIEs does not have the attribute @p attr_name. In
10664 /// any case, if this function returns true, then the parameter @p
10665 /// result is set to the result of the comparison.
10667 compare_dies_string_attribute_value(const Dwarf_Die *l, const Dwarf_Die *r,
10668 unsigned attr_name,
10671 Dwarf_Attribute l_attr, r_attr;
10672 if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(l), attr_name, &l_attr)
10673 || !dwarf_attr_integrate(const_cast<Dwarf_Die*>(r), attr_name, &r_attr))
10676 ABG_ASSERT(l_attr.form == DW_FORM_strp
10677 || l_attr.form == DW_FORM_string
10678 || l_attr.form == DW_FORM_GNU_strp_alt
10679 || form_is_DW_FORM_strx(l_attr.form));
10681 ABG_ASSERT(r_attr.form == DW_FORM_strp
10682 || r_attr.form == DW_FORM_string
10683 || r_attr.form == DW_FORM_GNU_strp_alt
10684 || form_is_DW_FORM_strx(r_attr.form));
10686 if ((l_attr.form == DW_FORM_strp
10687 && r_attr.form == DW_FORM_strp)
10688 || (l_attr.form == DW_FORM_GNU_strp_alt
10689 && r_attr.form == DW_FORM_GNU_strp_alt)
10690 || (form_is_DW_FORM_strx(l_attr.form)
10691 && form_is_DW_FORM_strx(r_attr.form)))
10693 // So these string attributes are actually pointers into a
10694 // string table. The string table is most likely de-duplicated
10695 // so comparing the *values* of the pointers should be enough.
10697 // This is the fast path.
10698 if (l_attr.valp == r_attr.valp)
10700 else if (l_attr.valp && r_attr.valp)
10701 result = *l_attr.valp == *r_attr.valp;
10707 // If we reached this point it means we couldn't use the fast path
10708 // because the string atttributes are strings that are "inline" in
10709 // the debug info section. Let's just compare them the slow and
10711 string l_str = die_string_attribute(l, attr_name),
10712 r_str = die_string_attribute(r, attr_name);
10713 result = l_str == r_str;
10718 /// Compare the file path of the compilation units (aka CUs)
10719 /// associated to two DIEs.
10721 /// If the DIEs are for pointers or typedefs, this function also
10722 /// compares the file paths of the CUs of the leaf DIEs (underlying
10723 /// DIEs of the pointer or the typedef).
10725 /// @param l the first type DIE to consider.
10727 /// @param r the second type DIE to consider.
10729 /// @return true iff the file paths of the DIEs of the two types are
10732 compare_dies_cu_decl_file(const Dwarf_Die* l, const Dwarf_Die *r, bool &result)
10734 Dwarf_Die l_cu, r_cu;
10735 if (!dwarf_diecu(const_cast<Dwarf_Die*>(l), &l_cu, 0, 0)
10736 ||!dwarf_diecu(const_cast<Dwarf_Die*>(r), &r_cu, 0, 0))
10740 compare_dies_string_attribute_value(&l_cu, &r_cu,
10745 Dwarf_Die peeled_l, peeled_r;
10746 if (die_is_pointer_reference_or_typedef_type(l)
10747 && die_is_pointer_reference_or_typedef_type(r)
10748 && die_peel_pointer_and_typedef(l, peeled_l)
10749 && die_peel_pointer_and_typedef(r, peeled_r))
10751 if (!dwarf_diecu(&peeled_l, &l_cu, 0, 0)
10752 ||!dwarf_diecu(&peeled_r, &r_cu, 0, 0))
10755 compare_dies_string_attribute_value(&l_cu, &r_cu,
10764 // -----------------------------------
10765 // <location expression evaluation>
10766 // -----------------------------------
10768 /// Get the value of a given DIE attribute, knowing that it must be a
10769 /// location expression.
10771 /// @param die the DIE to read the attribute from.
10773 /// @param attr_name the name of the attribute to read the value for.
10775 /// @param expr the pointer to allocate and fill with the resulting
10776 /// array of operators + operands forming a dwarf expression. This is
10777 /// set iff the function returns true.
10779 /// @param expr_len the length of the resulting dwarf expression.
10780 /// This is set iff the function returns true.
10782 /// @return true if the attribute exists and has a dwarf expression as
10783 /// value. In that case the expr and expr_len arguments are set to
10784 /// the resulting dwarf exprssion.
10786 die_location_expr(const Dwarf_Die* die,
10787 unsigned attr_name,
10789 uint64_t* expr_len)
10794 Dwarf_Attribute attr;
10795 if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), attr_name, &attr))
10799 bool result = (dwarf_getlocation(&attr, expr, &len) == 0);
10807 /// If the current operation in the dwarf expression represents a push
10808 /// of a constant value onto the dwarf expr virtual machine (aka
10809 /// DEVM), perform the operation and update the DEVM.
10811 /// If the result of the operation is a constant, update the DEVM
10812 /// accumulator with its value. Otherwise, the DEVM accumulator is
10813 /// left with its previous value.
10815 /// @param ops the array of the dwarf expression operations to consider.
10817 /// @param ops_len the lengths of @p ops array above.
10819 /// @param index the index of the operation to interpret, in @p ops.
10821 /// @param next_index the index of the operation to interpret at the
10822 /// next step, after this function completed and returned. This is
10823 /// set an output parameter that is set iff the function returns true.
10825 /// @param ctxt the DEVM evaluation context.
10827 /// @return true if the current operation actually pushes a constant
10828 /// value onto the DEVM stack, false otherwise.
10830 op_pushes_constant_value(Dwarf_Op* ops,
10833 uint64_t& next_index,
10834 dwarf_expr_eval_context& ctxt)
10836 ABG_ASSERT(index < ops_len);
10838 Dwarf_Op& op = ops[index];
10844 value = ops[index].number;
10847 case DW_OP_const1u:
10848 case DW_OP_const1s:
10849 case DW_OP_const2u:
10850 case DW_OP_const2s:
10851 case DW_OP_const4u:
10852 case DW_OP_const4s:
10853 case DW_OP_const8u:
10854 case DW_OP_const8s:
10857 value = ops[index].number;
10961 expr_result r(value);
10964 next_index = index + 1;
10969 /// If the current operation in the dwarf expression represents a push
10970 /// of a non-constant value onto the dwarf expr virtual machine (aka
10971 /// DEVM), perform the operation and update the DEVM. A non-constant
10972 /// is namely a quantity for which we need inferior (a running program
10973 /// image) state to know the exact value.
10975 /// Upon successful completion, as the result of the operation is a
10976 /// non-constant the DEVM accumulator value is left to its state as of
10977 /// before the invocation of this function.
10979 /// @param ops the array of the dwarf expression operations to consider.
10981 /// @param ops_len the lengths of @p ops array above.
10983 /// @param index the index of the operation to interpret, in @p ops.
10985 /// @param next_index the index of the operation to interpret at the
10986 /// next step, after this function completed and returned. This is
10987 /// set an output parameter that is set iff the function returns true.
10989 /// @param ctxt the DEVM evaluation context.
10991 /// @return true if the current operation actually pushes a
10992 /// non-constant value onto the DEVM stack, false otherwise.
10994 op_pushes_non_constant_value(Dwarf_Op* ops,
10997 uint64_t& next_index,
10998 dwarf_expr_eval_context& ctxt)
11000 ABG_ASSERT(index < ops_len);
11001 Dwarf_Op& op = ops[index];
11037 next_index = index + 1;
11072 next_index = index + 1;
11076 next_index = index + 2;
11080 next_index = index + 1;
11084 next_index = index + 1;
11091 expr_result r(false);
11097 /// If the current operation in the dwarf expression represents a
11098 /// manipulation of the stack of the DWARF Expression Virtual Machine
11099 /// (aka DEVM), this function performs the operation and updates the
11100 /// state of the DEVM. If the result of the operation represents a
11101 /// constant value, then the accumulator of the DEVM is set to that
11102 /// result's value, Otherwise, the DEVM accumulator is left with its
11103 /// previous value.
11105 /// @param expr the array of the dwarf expression operations to consider.
11107 /// @param expr_len the lengths of @p ops array above.
11109 /// @param index the index of the operation to interpret, in @p ops.
11111 /// @param next_index the index of the operation to interpret at the
11112 /// next step, after this function completed and returned. This is
11113 /// set an output parameter that is set iff the function returns true.
11115 /// @param ctxt the DEVM evaluation context.
11117 /// @return true if the current operation actually manipulates the
11118 /// DEVM stack, false otherwise.
11120 op_manipulates_stack(Dwarf_Op* expr,
11123 uint64_t& next_index,
11124 dwarf_expr_eval_context& ctxt)
11126 Dwarf_Op& op = expr[index];
11132 v = ctxt.stack.front();
11137 v = ctxt.stack.front();
11142 ABG_ASSERT(ctxt.stack.size() > 1);
11148 ABG_ASSERT(index + 1 < expr_len);
11154 ABG_ASSERT(ctxt.stack.size() > 1);
11156 ctxt.stack.erase(ctxt.stack.begin() + 1);
11161 ABG_ASSERT(ctxt.stack.size() > 2);
11163 ctxt.stack.erase(ctxt.stack.begin() + 2);
11168 case DW_OP_deref_size:
11169 ABG_ASSERT(ctxt.stack.size() > 0);
11176 case DW_OP_xderef_size:
11177 ABG_ASSERT(ctxt.stack.size() > 1);
11184 case DW_OP_push_object_address:
11189 case DW_OP_form_tls_address:
11190 case DW_OP_GNU_push_tls_address:
11191 ABG_ASSERT(ctxt.stack.size() > 0);
11193 if (op.atom == DW_OP_form_tls_address)
11198 case DW_OP_call_frame_cfa:
11210 if (op.atom == DW_OP_form_tls_address
11211 || op.atom == DW_OP_GNU_push_tls_address)
11212 ctxt.set_tls_address(true);
11214 ctxt.set_tls_address(false);
11216 next_index = index + 1;
11221 /// If the current operation in the dwarf expression represents a push
11222 /// of an arithmetic or logic operation onto the dwarf expr virtual
11223 /// machine (aka DEVM), perform the operation and update the DEVM.
11225 /// If the result of the operation is a constant, update the DEVM
11226 /// accumulator with its value. Otherwise, the DEVM accumulator is
11227 /// left with its previous value.
11229 /// @param expr the array of the dwarf expression operations to consider.
11231 /// @param expr_len the lengths of @p expr array above.
11233 /// @param index the index of the operation to interpret, in @p expr.
11235 /// @param next_index the index of the operation to interpret at the
11236 /// next step, after this function completed and returned. This is
11237 /// set an output parameter that is set iff the function returns true.
11239 /// @param ctxt the DEVM evaluation context.
11241 /// @return true if the current operation actually represent an
11242 /// arithmetic or logic operation.
11244 op_is_arith_logic(Dwarf_Op* expr,
11247 uint64_t& next_index,
11248 dwarf_expr_eval_context& ctxt)
11250 ABG_ASSERT(index < expr_len);
11252 Dwarf_Op& op = expr[index];
11253 expr_result val1, val2;
11264 ABG_ASSERT(ctxt.stack.size() > 1);
11267 ctxt.push(val1 & val2);
11273 if (!val1.is_const())
11275 ctxt.push(val2 / val1);
11281 ctxt.push(val2 - val1);
11287 ctxt.push(val2 % val1);
11293 ctxt.push(val2 * val1);
11309 ctxt.push(val1 | val2);
11315 ctxt.push(val2 + val1);
11318 case DW_OP_plus_uconst:
11327 ctxt.push(val2 << val1);
11334 ctxt.push(val2 >> val1);
11340 ctxt.push(val2 ^ val1);
11347 if (ctxt.stack.front().is_const())
11348 ctxt.accum = ctxt.stack.front();
11350 next_index = index + 1;
11354 /// If the current operation in the dwarf expression represents a push
11355 /// of a control flow operation onto the dwarf expr virtual machine
11356 /// (aka DEVM), perform the operation and update the DEVM.
11358 /// If the result of the operation is a constant, update the DEVM
11359 /// accumulator with its value. Otherwise, the DEVM accumulator is
11360 /// left with its previous value.
11362 /// @param expr the array of the dwarf expression operations to consider.
11364 /// @param expr_len the lengths of @p expr array above.
11366 /// @param index the index of the operation to interpret, in @p expr.
11368 /// @param next_index the index of the operation to interpret at the
11369 /// next step, after this function completed and returned. This is
11370 /// set an output parameter that is set iff the function returns true.
11372 /// @param ctxt the DEVM evaluation context.
11374 /// @return true if the current operation actually represents a
11375 /// control flow operation, false otherwise.
11377 op_is_control_flow(Dwarf_Op* expr,
11380 uint64_t& next_index,
11381 dwarf_expr_eval_context& ctxt)
11383 ABG_ASSERT(index < expr_len);
11385 Dwarf_Op& op = expr[index];
11386 expr_result val1, val2;
11400 if (op.atom == DW_OP_eq)
11401 value = val2 == val1;
11402 else if (op.atom == DW_OP_ge)
11403 value = val2 >= val1;
11404 else if (op.atom == DW_OP_gt)
11405 value = val2 > val1;
11406 else if (op.atom == DW_OP_le)
11407 value = val2 <= val1;
11408 else if (op.atom == DW_OP_lt)
11409 value = val2 < val1;
11410 else if (op.atom == DW_OP_ne)
11411 value = val2 != val1;
11413 val1 = value ? 1 : 0;
11420 index += op.number - 1;
11426 index += val1.const_value() - 1;
11431 case DW_OP_call_ref:
11439 if (ctxt.stack.front().is_const())
11440 ctxt.accum = ctxt.stack.front();
11442 next_index = index + 1;
11446 /// This function quickly evaluates a DWARF expression that is a
11449 /// This is a "fast path" function that quickly evaluates a DWARF
11450 /// expression that is only made of a DW_OP_plus_uconst operator.
11452 /// This is a sub-routine of die_member_offset.
11454 /// @param expr the DWARF expression to evaluate.
11456 /// @param expr_len the length of the expression @p expr.
11458 /// @param value out parameter. This is set to the result of the
11459 /// evaluation of @p expr, iff this function returns true.
11461 /// @return true iff the evaluation of @p expr went OK.
11463 eval_quickly(Dwarf_Op* expr,
11467 if (expr_len == 1 && (expr[0].atom == DW_OP_plus_uconst))
11469 value = expr[0].number;
11475 /// Evaluate the value of the last sub-expression that is a constant,
11476 /// inside a given DWARF expression.
11478 /// @param expr the DWARF expression to consider.
11480 /// @param expr_len the length of the expression to consider.
11482 /// @param value the resulting value of the last constant
11483 /// sub-expression of the DWARF expression. This is set iff the
11484 /// function returns true.
11486 /// @param is_tls_address out parameter. This is set to true iff
11487 /// the resulting value of the evaluation is a TLS (thread local
11488 /// storage) address.
11490 /// @param eval_ctxt the evaluation context to (re)use. Note that
11491 /// this function initializes this context before using it.
11493 /// @return true if the function could find a constant sub-expression
11494 /// to evaluate, false otherwise.
11496 eval_last_constant_dwarf_sub_expr(Dwarf_Op* expr,
11499 bool& is_tls_address,
11500 dwarf_expr_eval_context &eval_ctxt)
11502 // Reset the evaluation context before evaluating the constant sub
11503 // expression contained in the DWARF expression 'expr'.
11506 uint64_t index = 0, next_index = 0;
11509 if (op_is_arith_logic(expr, expr_len, index,
11510 next_index, eval_ctxt)
11511 || op_pushes_constant_value(expr, expr_len, index,
11512 next_index, eval_ctxt)
11513 || op_manipulates_stack(expr, expr_len, index,
11514 next_index, eval_ctxt)
11515 || op_pushes_non_constant_value(expr, expr_len, index,
11516 next_index, eval_ctxt)
11517 || op_is_control_flow(expr, expr_len, index,
11518 next_index, eval_ctxt))
11521 next_index = index + 1;
11523 ABG_ASSERT(next_index > index);
11524 index = next_index;
11525 } while (index < expr_len);
11527 is_tls_address = eval_ctxt.set_tls_address();
11528 if (eval_ctxt.accum.is_const())
11530 value = eval_ctxt.accum;
11536 /// Evaluate the value of the last sub-expression that is a constant,
11537 /// inside a given DWARF expression.
11539 /// @param expr the DWARF expression to consider.
11541 /// @param expr_len the length of the expression to consider.
11543 /// @param value the resulting value of the last constant
11544 /// sub-expression of the DWARF expression. This is set iff the
11545 /// function returns true.
11547 /// @return true if the function could find a constant sub-expression
11548 /// to evaluate, false otherwise.
11550 eval_last_constant_dwarf_sub_expr(Dwarf_Op* expr,
11553 bool& is_tls_address)
11555 dwarf_expr_eval_context eval_ctxt;
11556 return eval_last_constant_dwarf_sub_expr(expr, expr_len, value,
11557 is_tls_address, eval_ctxt);
11560 // -----------------------------------
11561 // </location expression evaluation>
11562 // -----------------------------------
11564 /// Get the offset of a struct/class member as represented by the
11565 /// value of the DW_AT_data_member_location attribute.
11567 /// There is a huge gotcha in here. The value of the
11568 /// DW_AT_data_member_location is not necessarily a constant that one
11569 /// would just read and be done with it. Rather, it can be a DWARF
11570 /// expression that one has to interpret. In general, the offset can
11571 /// be given by the DW_AT_bit_offset attribute. In that case the
11572 /// offset is a constant. But it can also be given by the
11573 /// DW_AT_data_member_location attribute. In that case it's a DWARF
11574 /// location expression.
11576 /// When the it's the DW_AT_data_member_location that is present,
11577 /// there are three cases to possibly take into account:
11579 /// 1/ The offset in the vtable where the offset of a virtual base
11580 /// can be found, aka vptr offset. Given the address of a
11581 /// given object O, the vptr offset for B is given by the
11582 /// (DWARF) expression:
11584 /// address(O) + *(*address(0) - VIRTUAL_OFFSET)
11586 /// where VIRTUAL_OFFSET is a constant value; In this case,
11587 /// this function returns the constant VIRTUAL_OFFSET, as this
11588 /// is enough to detect changes in a given virtual base
11589 /// relative to the other virtual bases.
11591 /// 2/ The offset of a regular data member. Given the address of
11592 /// a struct object named O, the memory location for a
11593 /// particular data member is given by the (DWARF) expression:
11595 /// address(O) + OFFSET
11597 /// where OFFSET is a constant. In this case, this function
11598 /// returns the OFFSET constant.
11600 /// 3/ The offset of a virtual member function in the virtual
11601 /// pointer. The DWARF expression is a constant that designates
11602 /// the offset of the function in the vtable. In this case this
11603 /// function returns that constant.
11605 ///@param ctxt the read context to consider.
11607 ///@param die the DIE to read the information from.
11609 ///@param offset the resulting constant offset, in bits. This
11610 ///argument is set iff the function returns true.
11612 die_member_offset(const read_context& ctxt,
11613 const Dwarf_Die* die,
11616 Dwarf_Op* expr = NULL;
11617 uint64_t expr_len = 0;
11620 if (die_unsigned_constant_attribute(die, DW_AT_bit_offset, off))
11622 // The DW_AT_bit_offset is present. If it contains a non-zero
11623 // value, let's read that one.
11631 if (!die_location_expr(die, DW_AT_data_member_location, &expr, &expr_len))
11634 // Otherwise, the DW_AT_data_member_location attribute is present.
11635 // In that case, let's evaluate it and get its constant
11636 // sub-expression and return that one.
11638 if (!eval_quickly(expr, expr_len, offset))
11640 bool is_tls_address = false;
11641 if (!eval_last_constant_dwarf_sub_expr(expr, expr_len,
11642 offset, is_tls_address,
11643 ctxt.dwarf_expr_eval_ctxt()))
11651 /// Read the value of the DW_AT_location attribute from a DIE,
11652 /// evaluate the resulting DWARF expression and, if it's a constant
11653 /// expression, return it.
11655 /// @param die the DIE to consider.
11657 /// @param address the resulting constant address. This is set iff
11658 /// the function returns true.
11660 /// @return true iff the whole sequence of action described above
11661 /// could be completed normally.
11663 die_location_address(Dwarf_Die* die,
11664 Dwarf_Addr& address,
11665 bool& is_tls_address)
11667 Dwarf_Op* expr = NULL;
11668 uint64_t expr_len = 0;
11670 is_tls_address = false;
11671 if (!die_location_expr(die, DW_AT_location, &expr, &expr_len))
11675 if (!eval_last_constant_dwarf_sub_expr(expr, expr_len, addr, is_tls_address))
11683 /// Return the index of a function in its virtual table. That is,
11684 /// return the value of the DW_AT_vtable_elem_location attribute.
11686 /// @param die the DIE of the function to consider.
11688 /// @param vindex the resulting index. This is set iff the function
11691 /// @return true if the DIE has a DW_AT_vtable_elem_location
11694 die_virtual_function_index(Dwarf_Die* die,
11700 Dwarf_Op* expr = NULL;
11701 uint64_t expr_len = 0;
11702 if (!die_location_expr(die, DW_AT_vtable_elem_location,
11707 bool is_tls_addr = false;
11708 if (!eval_last_constant_dwarf_sub_expr(expr, expr_len, i, is_tls_addr))
11715 /// Test if a given DIE represents an anonymous type.
11717 /// Anonymous types we are interested in are classes, unions and
11720 /// @param die the DIE to consider.
11722 /// @return true iff @p die represents an anonymous type.
11724 is_anonymous_type_die(Dwarf_Die *die)
11726 int tag = dwarf_tag(die);
11728 if (tag == DW_TAG_class_type
11729 || tag == DW_TAG_structure_type
11730 || tag == DW_TAG_union_type
11731 || tag == DW_TAG_enumeration_type)
11732 return die_is_anonymous(die);
11737 /// Return the base of the internal name to represent an anonymous
11740 /// Typically, anonymous enums would be named
11741 /// __anonymous_enum__<number>, anonymous struct or classes would be
11742 /// named __anonymous_struct__<number> and anonymous unions would be
11743 /// named __anonymous_union__<number>. The first part of these
11744 /// anonymous names (i.e, __anonymous_{enum,struct,union}__ is called
11745 /// the base name. This function returns that base name, depending on
11746 /// the kind of type DIE we are looking at.
11748 /// @param die the type DIE to look at. This function expects a type
11749 /// DIE with an empty DW_AT_name property value (anonymous).
11751 /// @return a string representing the base of the internal anonymous
11754 get_internal_anonymous_die_prefix_name(const Dwarf_Die *die)
11756 ABG_ASSERT(die_is_type(die));
11757 ABG_ASSERT(die_string_attribute(die, DW_AT_name) == "");
11759 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
11761 if (tag == DW_TAG_class_type || tag == DW_TAG_structure_type)
11762 type_name = tools_utils::get_anonymous_struct_internal_name_prefix();
11763 else if (tag == DW_TAG_union_type)
11764 type_name = tools_utils::get_anonymous_union_internal_name_prefix();
11765 else if (tag == DW_TAG_enumeration_type)
11766 type_name = tools_utils::get_anonymous_enum_internal_name_prefix();
11771 /// Build a full internal anonymous type name.
11773 /// @param base_name this is the base name as returned by the function
11774 /// @ref get_internal_anonymous_die_prefix_name.
11776 /// @param anonymous_type_index this is the index of the anonymous
11777 /// type in its scope. That is, if there are more than one anonymous
11778 /// types of a given kind in a scope, this index is what tells them
11779 /// appart, starting from 0.
11781 /// @return the built string, which is a concatenation of @p base_name
11782 /// and @p anonymous_type_index.
11784 build_internal_anonymous_die_name(const string &base_name,
11785 size_t anonymous_type_index)
11787 string name = base_name;
11788 if (anonymous_type_index && !base_name.empty())
11790 std::ostringstream o;
11791 o << base_name << anonymous_type_index;
11797 /// Build a full internal anonymous type name.
11799 /// @param die the DIE representing the anonymous type to consider.
11801 /// @param anonymous_type_index the index of the anonymous type
11802 /// represented by @p DIE, in its scope. That is, if there are
11803 /// several different anonymous types of the same kind as @p die, this
11804 /// index is what tells them appart.
11806 /// @return the internal name of the anonymous type represented by @p
11809 get_internal_anonymous_die_name(Dwarf_Die *die,
11810 size_t anonymous_type_index)
11812 string name = get_internal_anonymous_die_prefix_name(die);
11813 name = build_internal_anonymous_die_name(name, anonymous_type_index);
11817 // ------------------------------------
11818 // <DIE pretty printer>
11819 // ------------------------------------
11821 /// Compute the qualified name of a DIE that represents a type.
11823 /// For instance, if the DIE tag is DW_TAG_subprogram then this
11824 /// function computes the name of the function *type*.
11826 /// @param ctxt the read context.
11828 /// @param die the DIE to consider.
11830 /// @param where_offset where in the are logically are in the DIE
11833 /// @return a copy of the qualified name of the type.
11835 die_qualified_type_name(const read_context& ctxt,
11836 const Dwarf_Die* die,
11837 size_t where_offset)
11842 int tag = dwarf_tag (const_cast<Dwarf_Die*>(die));
11843 if (tag == DW_TAG_compile_unit
11844 || tag == DW_TAG_partial_unit
11845 || tag == DW_TAG_type_unit)
11848 string name = die_name(die);
11850 Dwarf_Die scope_die;
11851 if (!get_scope_die(ctxt, die, where_offset, scope_die))
11854 string parent_name = die_qualified_name(ctxt, &scope_die, where_offset);
11855 bool colon_colon = die_is_type(die) || die_is_namespace(die);
11856 string separator = colon_colon ? "::" : ".";
11862 case DW_TAG_unspecified_type:
11865 case DW_TAG_base_type:
11867 abigail::ir::integral_type int_type;
11868 if (parse_integral_type(name, int_type))
11875 case DW_TAG_typedef:
11876 case DW_TAG_enumeration_type:
11877 case DW_TAG_structure_type:
11878 case DW_TAG_class_type:
11879 case DW_TAG_union_type:
11881 if (tag == DW_TAG_typedef)
11883 // If the underlying type of the typedef is unspecified,
11884 // bail out as we don't support that yet.
11885 Dwarf_Die underlying_type_die;
11886 if (die_die_attribute(die, DW_AT_type, underlying_type_die))
11888 string n = die_qualified_type_name(ctxt, &underlying_type_die,
11890 if (die_is_unspecified(&underlying_type_die)
11897 // TODO: handle cases where there are more than one
11898 // anonymous type of the same kind in the same scope. In
11899 // that case, their name must be built with the function
11900 // get_internal_anonymous_die_name or something of the same
11902 name = get_internal_anonymous_die_prefix_name(die);
11904 ABG_ASSERT(!name.empty());
11905 repr = parent_name.empty() ? name : parent_name + separator + name;
11909 case DW_TAG_const_type:
11910 case DW_TAG_volatile_type:
11911 case DW_TAG_restrict_type:
11913 Dwarf_Die underlying_type_die;
11914 bool has_underlying_type_die =
11915 die_die_attribute(die, DW_AT_type, underlying_type_die);
11917 if (has_underlying_type_die && die_is_unspecified(&underlying_type_die))
11920 if (tag == DW_TAG_const_type)
11922 if (has_underlying_type_die
11923 && die_is_reference_type(&underlying_type_die))
11924 // A reference is always const. So, to lower false
11925 // positive reports in diff computations, we consider a
11926 // const reference just as a reference. But we need to
11927 // keep the qualified-ness of the type. So we introduce
11928 // a 'no-op' qualifier here. Please remember that this
11929 // has to be kept in sync with what is done in
11930 // get_name_of_qualified_type. So if you change this
11931 // here, you have to change that code there too.
11933 else if (!has_underlying_type_die
11934 || die_is_void_type(&underlying_type_die))
11942 else if (tag == DW_TAG_volatile_type)
11944 else if (tag == DW_TAG_restrict_type)
11947 ABG_ASSERT_NOT_REACHED;
11949 string underlying_type_repr;
11950 if (has_underlying_type_die)
11951 underlying_type_repr =
11952 die_qualified_type_name(ctxt, &underlying_type_die, where_offset);
11954 underlying_type_repr = "void";
11956 if (underlying_type_repr.empty())
11960 if (has_underlying_type_die
11961 && die_is_pointer_or_reference_type(&underlying_type_die))
11962 repr = underlying_type_repr + " " + repr;
11964 repr += " " + underlying_type_repr;
11969 case DW_TAG_pointer_type:
11970 case DW_TAG_reference_type:
11971 case DW_TAG_rvalue_reference_type:
11973 Dwarf_Die pointed_to_type_die;
11974 if (!die_die_attribute(die, DW_AT_type, pointed_to_type_die))
11976 if (tag == DW_TAG_pointer_type)
11981 if (die_is_unspecified(&pointed_to_type_die))
11984 string pointed_type_repr =
11985 die_qualified_type_name(ctxt, &pointed_to_type_die, where_offset);
11987 repr = pointed_type_repr;
11991 if (tag == DW_TAG_pointer_type)
11993 else if (tag == DW_TAG_reference_type)
11995 else if (tag == DW_TAG_rvalue_reference_type)
11998 ABG_ASSERT_NOT_REACHED;
12002 case DW_TAG_subrange_type:
12004 // In Ada, this one can be generated on its own, that is, not
12005 // as a sub-type of an array. So we need to support it on its
12006 // own. Note that when it's emitted as the sub-type of an
12007 // array like in C and C++, this is handled differently, for
12008 // now. But we try to make this usable by other languages
12009 // that are not Ada, even if we modelled it after Ada.
12011 // So we build a subrange type for the sole purpose of using
12012 // the ::as_string() method of that type. So we don't add
12013 // that type to the current type tree being built.
12014 array_type_def::subrange_sptr s =
12015 build_subrange_type(const_cast<read_context&>(ctxt),
12017 /*associate_die_to_type=*/false);
12018 repr += s->as_string();
12022 case DW_TAG_array_type:
12024 Dwarf_Die element_type_die;
12025 if (!die_die_attribute(die, DW_AT_type, element_type_die))
12027 string element_type_name =
12028 die_qualified_type_name(ctxt, &element_type_die, where_offset);
12029 if (element_type_name.empty())
12032 array_type_def::subranges_type subranges;
12033 build_subranges_from_array_type_die(const_cast<read_context&>(ctxt),
12034 die, subranges, where_offset,
12035 /*associate_type_to_die=*/false);
12037 repr = element_type_name;
12038 repr += array_type_def::subrange_type::vector_as_string(subranges);
12042 case DW_TAG_subroutine_type:
12043 case DW_TAG_subprogram:
12045 string return_type_name;
12047 vector<string> parm_names;
12048 bool is_const = false;
12049 bool is_static = false;
12051 die_return_and_parm_names_from_fn_type_die(ctxt, die, where_offset,
12052 /*pretty_print=*/true,
12053 return_type_name, class_name,
12054 parm_names, is_const,
12056 if (return_type_name.empty())
12057 return_type_name = "void";
12059 repr = return_type_name;
12061 if (!class_name.empty())
12063 // This is a method, so print the class name.
12064 repr += " (" + class_name + "::*)";
12069 for (vector<string>::const_iterator i = parm_names.begin();
12070 i != parm_names.end();
12073 if (i != parm_names.begin())
12082 case DW_TAG_string_type:
12083 case DW_TAG_ptr_to_member_type:
12084 case DW_TAG_set_type:
12085 case DW_TAG_file_type:
12086 case DW_TAG_packed_type:
12087 case DW_TAG_thrown_type:
12088 case DW_TAG_interface_type:
12089 case DW_TAG_shared_type:
12096 /// Compute the qualified name of a decl represented by a given DIE.
12098 /// For instance, for a DIE of tag DW_TAG_subprogram this function
12099 /// computes the signature of the function *declaration*.
12101 /// @param ctxt the read context.
12103 /// @param die the DIE to consider.
12105 /// @param where_offset where we are logically at in the DIE stream.
12107 /// @return a copy of the computed name.
12109 die_qualified_decl_name(const read_context& ctxt,
12110 const Dwarf_Die* die,
12111 size_t where_offset)
12113 if (!die || !die_is_decl(die))
12116 string name = die_name(die);
12118 Dwarf_Die scope_die;
12119 if (!get_scope_die(ctxt, die, where_offset, scope_die))
12122 string scope_name = die_qualified_name(ctxt, &scope_die, where_offset);
12123 string separator = "::";
12127 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
12130 case DW_TAG_namespace:
12131 case DW_TAG_member:
12132 case DW_TAG_variable:
12133 repr = scope_name.empty() ? name : scope_name + separator + name;
12135 case DW_TAG_subprogram:
12136 repr = die_function_signature(ctxt, die, where_offset);
12139 case DW_TAG_unspecified_parameters:
12143 case DW_TAG_formal_parameter:
12144 case DW_TAG_imported_declaration:
12145 case DW_TAG_GNU_template_template_param:
12146 case DW_TAG_GNU_template_parameter_pack:
12147 case DW_TAG_GNU_formal_parameter_pack:
12153 /// Compute the qualified name of the artifact represented by a given
12156 /// If the DIE represents a type, then the function computes the name
12157 /// of the type. Otherwise, if the DIE represents a decl then the
12158 /// function computes the name of the decl. Note that a DIE of tag
12159 /// DW_TAG_subprogram is going to be considered as a "type" -- just
12160 /// like if it was a DW_TAG_subroutine_type.
12162 /// @param ctxt the read context.
12164 /// @param die the DIE to consider.
12166 /// @param where_offset where we are logically at in the DIE stream.
12168 /// @return a copy of the computed name.
12170 die_qualified_name(const read_context& ctxt, const Dwarf_Die* die, size_t where)
12172 if (die_is_type(die))
12173 return die_qualified_type_name(ctxt, die, where);
12174 else if (die_is_decl(die))
12175 return die_qualified_decl_name(ctxt, die, where);
12179 /// Test if the qualified name of a given type should be empty.
12181 /// The reason why the name of a DIE with a given tag would be empty
12182 /// is that libabigail's internal representation doesn't yet support
12183 /// that tag; or if the DIE's qualified name is built from names of
12184 /// sub-types DIEs whose tags are not yet supported.
12186 /// @param ctxt the reading context.
12188 /// @param die the DIE to consider.
12190 /// @param where where we are logically at, in the DIE stream.
12192 /// @param qualified_name the qualified name of the DIE. This is set
12193 /// only iff the function returns false.
12195 /// @return true if the qualified name of the DIE is empty.
12197 die_qualified_type_name_empty(const read_context& ctxt,
12198 const Dwarf_Die* die,
12199 size_t where, string &qualified_name)
12204 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
12207 if (tag == DW_TAG_typedef
12208 || tag == DW_TAG_pointer_type
12209 || tag == DW_TAG_reference_type
12210 || tag == DW_TAG_rvalue_reference_type
12211 || tag == DW_TAG_array_type
12212 || tag == DW_TAG_const_type
12213 || tag == DW_TAG_volatile_type
12214 || tag == DW_TAG_restrict_type)
12216 Dwarf_Die underlying_type_die;
12217 if (die_die_attribute(die, DW_AT_type, underlying_type_die))
12220 die_qualified_type_name(ctxt, &underlying_type_die, where);
12227 string name = die_qualified_type_name(ctxt, die, where);
12232 qname = die_qualified_type_name(ctxt, die, where);
12236 qualified_name = qname;
12240 /// Given the DIE that represents a function type, compute the names
12241 /// of the following properties the function's type:
12244 /// - enclosing class (if the function is a member function)
12245 /// - function parameter types
12247 /// When the function we are looking at is a member function, it also
12248 /// tells if it's const.
12250 /// @param ctxt the reading context.
12252 /// @param die the DIE of the function or function type we are looking
12255 /// @param where_offset where we are logically at in the DIE stream.
12257 /// @param pretty_print if set to yes, the type names are going to be
12258 /// pretty-printed names; otherwise, they are just qualified type
12261 /// @param return_type_name out parameter. This contains the name of
12262 /// the return type of the function.
12264 /// @param class_name out parameter. If the function is a member
12265 /// function, this contains the name of the enclosing class.
12267 /// @param parm_names out parameter. This vector is set to the names
12268 /// of the types of the parameters of the function.
12270 /// @param is_const out parameter. If the function is a member
12271 /// function, this is set to true iff the member function is const.
12273 /// @param is_static out parameter. If the function is a static
12274 /// member function, then this is set to true.
12276 die_return_and_parm_names_from_fn_type_die(const read_context& ctxt,
12277 const Dwarf_Die* die,
12278 size_t where_offset,
12280 string &return_type_name,
12281 string &class_name,
12282 vector<string>& parm_names,
12287 Dwarf_Die ret_type_die;
12288 if (!die_die_attribute(die, DW_AT_type, ret_type_die))
12289 return_type_name = "void";
12293 ? ctxt.get_die_pretty_representation(&ret_type_die, where_offset)
12294 : ctxt.get_die_qualified_type_name(&ret_type_die, where_offset);
12296 if (return_type_name.empty())
12297 return_type_name = "void";
12299 Dwarf_Die object_pointer_die, class_die;
12300 bool is_method_type =
12301 die_function_type_is_method_type(ctxt, die, where_offset,
12302 object_pointer_die,
12303 class_die, is_static);
12306 if (is_method_type)
12308 class_name = ctxt.get_die_qualified_type_name(&class_die, where_offset);
12310 Dwarf_Die this_pointer_die;
12311 Dwarf_Die pointed_to_die;
12313 && die_die_attribute(&object_pointer_die, DW_AT_type,
12315 if (die_die_attribute(&this_pointer_die, DW_AT_type, pointed_to_die))
12316 if (dwarf_tag(&pointed_to_die) == DW_TAG_const_type)
12319 string fn_name = die_name(die);
12320 string non_qualified_class_name = die_name(&class_die);
12321 bool is_ctor = fn_name == non_qualified_class_name;
12322 bool is_dtor = !fn_name.empty() && fn_name[0] == '~';
12324 if (is_ctor || is_dtor)
12325 return_type_name.clear();
12328 if (dwarf_child(const_cast<Dwarf_Die*>(die), &child) == 0)
12331 int child_tag = dwarf_tag(&child);
12332 if (child_tag == DW_TAG_formal_parameter)
12334 Dwarf_Die parm_type_die;
12335 if (!die_die_attribute(&child, DW_AT_type, parm_type_die))
12337 string qualified_name =
12339 ? ctxt.get_die_pretty_representation(&parm_type_die, where_offset)
12340 : ctxt.get_die_qualified_type_name(&parm_type_die, where_offset);
12342 if (qualified_name.empty())
12344 parm_names.push_back(qualified_name);
12346 else if (child_tag == DW_TAG_unspecified_parameters)
12348 // This is a variadic function parameter.
12349 parm_names.push_back("variadic parameter type");
12350 // After a DW_TAG_unspecified_parameters tag, we shouldn't
12351 // keep reading for parameters. The
12352 // unspecified_parameters TAG should be the last parameter
12353 // that we record. For instance, if there are multiple
12354 // DW_TAG_unspecified_parameters DIEs then we should care
12355 // only for the first one.
12359 while (dwarf_siblingof(&child, &child) == 0);
12361 if (class_name.empty())
12363 Dwarf_Die parent_die;
12364 if (get_parent_die(ctxt, die, parent_die, where_offset))
12366 if (die_is_class_type(&parent_die))
12368 ctxt.get_die_qualified_type_name(&parent_die, where_offset);
12373 /// This computes the signature of the a function declaration
12374 /// represented by a DIE.
12376 /// @param ctxt the reading context.
12378 /// @param fn_die the DIE of the function to consider.
12380 /// @param where_offset where we are logically at in the stream of
12383 /// @return a copy of the computed function signature string.
12385 die_function_signature(const read_context& ctxt,
12386 const Dwarf_Die *fn_die,
12387 size_t where_offset)
12390 translation_unit::language lang;
12391 bool has_lang = false;
12392 if ((has_lang = ctxt.get_die_language(fn_die, lang)))
12394 // In a binary originating from the C language, it's OK to use
12395 // the linkage name of the function as a key for the map which
12396 // is meant to reduce the number of DIE comparisons involved
12397 // during DIE canonicalization computation.
12398 if (is_c_language(lang))
12400 string fn_name = die_linkage_name(fn_die);
12401 if (fn_name.empty())
12402 fn_name = die_name(fn_die);
12407 // TODO: When we can structurally compare DIEs originating from C++
12408 // as well, we can use the linkage name of functions in C++ too, to
12409 // reduce the number of comparisons involved during DIE
12410 // canonicalization.
12412 string return_type_name;
12413 Dwarf_Die ret_type_die;
12414 if (die_die_attribute(fn_die, DW_AT_type, ret_type_die))
12415 return_type_name = ctxt.get_die_qualified_type_name(&ret_type_die,
12418 if (return_type_name.empty())
12419 return_type_name = "void";
12421 Dwarf_Die scope_die;
12423 if (get_scope_die(ctxt, fn_die, where_offset, scope_die))
12424 scope_name = ctxt.get_die_qualified_name(&scope_die, where_offset);
12425 string fn_name = die_name(fn_die);
12426 if (!scope_name.empty())
12427 fn_name = scope_name + "::" + fn_name;
12430 vector<string> parm_names;
12431 bool is_const = false;
12432 bool is_static = false;
12434 die_return_and_parm_names_from_fn_type_die(ctxt, fn_die, where_offset,
12435 /*pretty_print=*/false,
12436 return_type_name, class_name,
12437 parm_names, is_const, is_static);
12439 bool is_virtual = die_is_virtual(fn_die);
12441 string repr = class_name.empty() ? "function" : "method";
12443 repr += " virtual";
12445 if (!return_type_name.empty())
12446 repr += " " + return_type_name;
12448 repr += " " + fn_name;
12452 bool some_parm_emitted = false;
12453 for (vector<string>::const_iterator i = parm_names.begin();
12454 i != parm_names.end();
12457 if (i != parm_names.begin())
12459 if (some_parm_emitted)
12463 if (!is_static && !class_name.empty())
12464 // We are printing a non-static method name, skip the implicit "this"
12468 some_parm_emitted = true;
12474 ABG_ASSERT(!class_name.empty());
12481 /// Return a pretty string representation of a type, for internal purposes.
12483 /// By internal purpose, we mean things like key-ing types for lookup
12484 /// purposes and so on.
12486 /// Note that this function is also used to pretty print functions.
12487 /// For functions, it prints the *type* of the function.
12489 /// @param ctxt the context to use.
12491 /// @param the DIE of the type to pretty print.
12493 /// @param where_offset where we logically are placed when calling
12494 /// this. It's useful to handle inclusion of DW_TAG_compile_unit
12497 /// @return the resulting pretty representation.
12499 die_pretty_print_type(read_context& ctxt,
12500 const Dwarf_Die* die,
12501 size_t where_offset)
12504 || (!die_is_type(die)
12505 && dwarf_tag(const_cast<Dwarf_Die*>(die)) != DW_TAG_subprogram))
12510 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
12513 case DW_TAG_string_type:
12514 // For now, we won't try to go get the actual representation of
12515 // the string because this would make things more complicated;
12516 // for that we'd need to interpret some location expressions to
12517 // get the length of the string. And for dynamically allocated
12518 // strings, the result of the location expression evaluation
12519 // might not even be a constant. So at the moment I consider
12520 // this to be a lot of hassle for no great return. Until proven
12521 // otherwise, of course.
12522 repr = "string type";
12524 case DW_TAG_unspecified_type:
12525 case DW_TAG_ptr_to_member_type:
12528 case DW_TAG_namespace:
12529 repr = "namespace " + ctxt.get_die_qualified_type_name(die, where_offset);
12532 case DW_TAG_base_type:
12533 repr = ctxt.get_die_qualified_type_name(die, where_offset);
12536 case DW_TAG_typedef:
12538 string qualified_name;
12539 if (!die_qualified_type_name_empty(ctxt, die,
12542 repr = "typedef " + qualified_name;
12546 case DW_TAG_const_type:
12547 case DW_TAG_volatile_type:
12548 case DW_TAG_restrict_type:
12549 case DW_TAG_pointer_type:
12550 case DW_TAG_reference_type:
12551 case DW_TAG_rvalue_reference_type:
12552 repr = ctxt.get_die_qualified_type_name(die, where_offset);
12555 case DW_TAG_enumeration_type:
12557 string qualified_name =
12558 ctxt.get_die_qualified_type_name(die, where_offset);
12559 repr = "enum " + qualified_name;
12563 case DW_TAG_structure_type:
12564 case DW_TAG_class_type:
12566 string qualified_name =
12567 ctxt.get_die_qualified_type_name(die, where_offset);
12568 repr = "class " + qualified_name;
12572 case DW_TAG_union_type:
12574 string qualified_name =
12575 ctxt.get_die_qualified_type_name(die, where_offset);
12576 repr = "union " + qualified_name;
12580 case DW_TAG_array_type:
12582 Dwarf_Die element_type_die;
12583 if (!die_die_attribute(die, DW_AT_type, element_type_die))
12585 string element_type_name =
12586 ctxt.get_die_qualified_type_name(&element_type_die, where_offset);
12587 if (element_type_name.empty())
12590 array_type_def::subranges_type subranges;
12591 build_subranges_from_array_type_die(ctxt, die, subranges, where_offset,
12592 /*associate_type_to_die=*/false);
12594 repr = element_type_name;
12595 repr += array_type_def::subrange_type::vector_as_string(subranges);
12599 case DW_TAG_subrange_type:
12601 // So this can be generated by Ada, on its own; that is, not
12602 // as a subtype of an array. In that case we need to handle
12605 // For now, we consider that the pretty printed name of the
12606 // subrange type is its name. We might need something more
12607 // advance, should the needs of the users get more
12609 repr += die_qualified_type_name(ctxt, die, where_offset);
12613 case DW_TAG_subroutine_type:
12614 case DW_TAG_subprogram:
12616 string return_type_name;
12618 vector<string> parm_names;
12619 bool is_const = false;
12620 bool is_static = false;
12622 die_return_and_parm_names_from_fn_type_die(ctxt, die, where_offset,
12623 /*pretty_print=*/true,
12624 return_type_name, class_name,
12625 parm_names, is_const,
12627 if (class_name.empty())
12628 repr = "function type";
12630 repr = "method type";
12631 repr += " " + ctxt.get_die_qualified_type_name(die, where_offset);
12635 case DW_TAG_set_type:
12636 case DW_TAG_file_type:
12637 case DW_TAG_packed_type:
12638 case DW_TAG_thrown_type:
12639 case DW_TAG_interface_type:
12640 case DW_TAG_shared_type:
12641 ABG_ASSERT_NOT_REACHED;
12647 /// Return a pretty string representation of a declaration, for
12648 /// internal purposes.
12650 /// By internal purpose, we mean things like key-ing declarations for
12651 /// lookup purposes and so on.
12653 /// Note that this function is also used to pretty print functions.
12654 /// For functions, it prints the signature of the function.
12656 /// @param ctxt the context to use.
12658 /// @param the DIE of the declaration to pretty print.
12660 /// @param where_offset where we logically are placed when calling
12661 /// this. It's useful to handle inclusion of DW_TAG_compile_unit
12664 /// @return the resulting pretty representation.
12666 die_pretty_print_decl(read_context& ctxt,
12667 const Dwarf_Die* die,
12668 size_t where_offset)
12670 if (!die || !die_is_decl(die))
12675 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
12678 case DW_TAG_namespace:
12679 repr = "namespace " + die_qualified_name(ctxt, die, where_offset);
12682 case DW_TAG_member:
12683 case DW_TAG_variable:
12685 string type_repr = "void";
12686 Dwarf_Die type_die;
12687 if (die_die_attribute(die, DW_AT_type, type_die))
12688 type_repr = die_qualified_type_name(ctxt, &type_die, where_offset);
12689 repr = die_qualified_name(ctxt, die, where_offset);
12691 repr = type_repr + " " + repr;
12695 case DW_TAG_subprogram:
12696 repr = die_function_signature(ctxt, die, where_offset);
12705 /// Compute the pretty printed representation of an artifact
12706 /// represented by a DIE.
12708 /// If the DIE is a type, compute the its pretty representation as a
12709 /// type; otherwise, if it's a declaration, compute its pretty
12710 /// representation as a declaration. Note for For instance, that a
12711 /// DW_TAG_subprogram DIE is going to be represented as a function
12714 /// @param ctxt the reading context.
12716 /// @param die the DIE to consider.
12718 /// @param where_offset we in the DIE stream we are logically at.
12720 /// @return a copy of the pretty printed artifact.
12722 die_pretty_print(read_context& ctxt, const Dwarf_Die* die, size_t where_offset)
12724 if (die_is_type(die))
12725 return die_pretty_print_type(ctxt, die, where_offset);
12726 else if (die_is_decl(die))
12727 return die_pretty_print_decl(ctxt, die, where_offset);
12731 // -----------------------------------
12732 // </die pretty printer>
12733 // -----------------------------------
12736 // ----------------------------------
12737 // <die comparison engine>
12738 // ---------------------------------
12740 /// Compares two decls DIEs
12742 /// This works only for DIEs emitted by the C language.
12744 /// This implementation doesn't yet support namespaces.
12746 /// This is a subroutine of compare_dies.
12748 /// @return true iff @p l equals @p r.
12750 compare_as_decl_dies(const Dwarf_Die *l, const Dwarf_Die *r)
12752 ABG_ASSERT(l && r);
12754 int l_tag = dwarf_tag(const_cast<Dwarf_Die*>(l));
12755 int r_tag = dwarf_tag(const_cast<Dwarf_Die*>(r));
12756 if (l_tag != r_tag)
12759 bool result = false;
12761 if (l_tag == DW_TAG_subprogram || l_tag == DW_TAG_variable)
12763 // Fast path for functions and global variables.
12764 if (compare_dies_string_attribute_value(l, r, DW_AT_linkage_name,
12766 || compare_dies_string_attribute_value(l, r, DW_AT_MIPS_linkage_name,
12773 if (compare_dies_string_attribute_value(l, r, DW_AT_name,
12782 // Fast path for types.
12783 if (compare_dies_string_attribute_value(l, r, DW_AT_name,
12789 /// Compares two type DIEs
12791 /// This is a subroutine of compare_dies.
12793 /// @param l the left operand of the comparison operator.
12795 /// @param r the right operand of the comparison operator.
12797 /// @return true iff @p l equals @p r.
12799 compare_as_type_dies(const Dwarf_Die *l, const Dwarf_Die *r)
12801 ABG_ASSERT(l && r);
12802 ABG_ASSERT(die_is_type(l));
12803 ABG_ASSERT(die_is_type(r));
12805 if (dwarf_tag(const_cast<Dwarf_Die*>(l)) == DW_TAG_string_type
12806 && dwarf_tag(const_cast<Dwarf_Die*>(r)) == DW_TAG_string_type
12807 && (dwarf_dieoffset(const_cast<Dwarf_Die*>(l))
12808 != dwarf_dieoffset(const_cast<Dwarf_Die*>(r))))
12809 // For now, we cannot compare DW_TAG_string_type because of its
12810 // string_length attribute that is a location descriptor that is
12811 // not necessarily a constant. So it's super hard to evaluate it
12812 // in a libabigail context. So for now, we just say that all
12813 // DW_TAG_string_type DIEs are different, by default.
12816 uint64_t l_size = 0, r_size = 0;
12817 die_size_in_bits(l, l_size);
12818 die_size_in_bits(r, r_size);
12820 return l_size == r_size;
12823 /// Test if two DIEs representing function declarations have the same
12824 /// linkage name, and thus are considered equal if they are C or C++,
12825 /// because the two DIEs represent functions in the same binary.
12827 /// If the DIEs don't have a linkage name, the function compares their
12828 /// name. But in that case, the caller of the function must know that
12829 /// in C++ for instance, that doesn't imply that the two functions are
12832 /// @param ctxt the @ref read_context to consider.
12834 /// @param l the first function DIE to consider.
12836 /// @param r the second function DIE to consider.
12838 /// @return true iff the function represented by @p l have the same
12839 /// linkage name as the function represented by @p r.
12841 fn_die_equal_by_linkage_name(const read_context &ctxt,
12842 const Dwarf_Die *l,
12843 const Dwarf_Die *r)
12851 int tag = dwarf_tag(const_cast<Dwarf_Die*>(l));
12852 ABG_ASSERT(tag == DW_TAG_subprogram);
12853 tag = dwarf_tag(const_cast<Dwarf_Die*>(r));
12854 ABG_ASSERT(tag == DW_TAG_subprogram);
12856 string lname = die_name(l), rname = die_name(r);
12857 string llinkage_name = die_linkage_name(l),
12858 rlinkage_name = die_linkage_name(r);
12860 if (ctxt.die_is_in_c_or_cplusplus(l)
12861 && ctxt.die_is_in_c_or_cplusplus(r))
12863 if (!llinkage_name.empty() && !rlinkage_name.empty())
12864 return llinkage_name == rlinkage_name;
12865 else if (!!llinkage_name.empty() != !!rlinkage_name.empty())
12868 return lname == rname;
12871 return (!llinkage_name.empty()
12872 && !rlinkage_name.empty()
12873 && llinkage_name == rlinkage_name);
12876 /// Compare two DIEs emitted by a C compiler.
12878 /// @param ctxt the read context used to load the DWARF information.
12880 /// @param l the left-hand-side argument of this comparison operator.
12882 /// @param r the righ-hand-side argument of this comparison operator.
12884 /// @param aggregates_being_compared this holds the names of the set
12885 /// of aggregates being compared. It's used by the comparison
12886 /// function to avoid recursing infinitely when faced with types
12887 /// referencing themselves through pointers or references. By
12888 /// default, just pass an empty instance of @ref istring_set_type to
12891 /// @param update_canonical_dies_on_the_fly if true, when two
12892 /// sub-types compare equal (during the comparison of @p l and @p r)
12893 /// update their canonical type. That way, two types of the same name
12894 /// are structurally compared to each other only once. So the
12895 /// non-linear structural comparison of two types of the same name
12896 /// only happen once.
12898 /// @return true iff @p l equals @p r.
12900 compare_dies(const read_context& ctxt,
12901 const Dwarf_Die *l, const Dwarf_Die *r,
12902 istring_set_type& aggregates_being_compared,
12903 bool update_canonical_dies_on_the_fly)
12908 int l_tag = dwarf_tag(const_cast<Dwarf_Die*>(l)),
12909 r_tag = dwarf_tag(const_cast<Dwarf_Die*>(r));
12911 if (l_tag != r_tag)
12914 Dwarf_Off l_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(l)),
12915 r_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(r));
12916 Dwarf_Off l_canonical_die_offset = 0, r_canonical_die_offset = 0;
12917 die_source l_die_source, r_die_source;
12918 ABG_ASSERT(ctxt.get_die_source(l, l_die_source));
12919 ABG_ASSERT(ctxt.get_die_source(r, r_die_source));
12921 // If 'l' and 'r' already have canonical DIEs, then just compare the
12922 // offsets of their canonical DIEs.
12923 bool l_has_canonical_die_offset =
12924 (l_canonical_die_offset =
12925 ctxt.get_canonical_die_offset(l_offset, l_die_source,
12926 /*die_as_type=*/true));
12928 bool r_has_canonical_die_offset =
12929 (r_canonical_die_offset =
12930 ctxt.get_canonical_die_offset(r_offset, r_die_source,
12931 /*die_as_type=*/true));
12933 if (l_has_canonical_die_offset && r_has_canonical_die_offset)
12934 return l_canonical_die_offset == r_canonical_die_offset;
12936 bool result = true;
12940 case DW_TAG_base_type:
12941 case DW_TAG_string_type:
12942 if (!compare_as_type_dies(l, r)
12943 || !compare_as_decl_dies(l, r))
12947 case DW_TAG_typedef:
12948 case DW_TAG_pointer_type:
12949 case DW_TAG_reference_type:
12950 case DW_TAG_rvalue_reference_type:
12951 case DW_TAG_const_type:
12952 case DW_TAG_volatile_type:
12953 case DW_TAG_restrict_type:
12955 if (!compare_as_type_dies(l, r))
12961 bool from_the_same_tu = false;
12962 if (!pointer_or_qual_die_of_anonymous_class_type(l)
12963 && compare_dies_cu_decl_file(l, r, from_the_same_tu)
12964 && from_the_same_tu)
12966 // These two typedefs, pointer, reference, or qualified
12967 // types have the same name and are defined in the same TU.
12968 // They thus ought to be the same.
12970 // Note that pointers, reference or qualified types to
12971 // anonymous types are not taking into account here because
12972 // those always need to be structurally compared.
12979 // No fancy optimization in this case. We need to
12980 // structurally compare the two DIEs.
12981 Dwarf_Die lu_type_die, ru_type_die;
12982 bool lu_is_void, ru_is_void;
12984 lu_is_void = !die_die_attribute(l, DW_AT_type, lu_type_die);
12985 ru_is_void = !die_die_attribute(r, DW_AT_type, ru_type_die);
12987 if (lu_is_void && ru_is_void)
12989 else if (lu_is_void != ru_is_void)
12992 result = compare_dies(ctxt, &lu_type_die, &ru_type_die,
12993 aggregates_being_compared,
12994 update_canonical_dies_on_the_fly);
12998 case DW_TAG_enumeration_type:
12999 if (!compare_as_type_dies(l, r)
13000 || !compare_as_decl_dies(l, r))
13004 // Walk the enumerators.
13005 Dwarf_Die l_enumtor, r_enumtor;
13006 bool found_l_enumtor, found_r_enumtor;
13008 for (found_l_enumtor = dwarf_child(const_cast<Dwarf_Die*>(l),
13010 found_r_enumtor = dwarf_child(const_cast<Dwarf_Die*>(r),
13012 found_l_enumtor && found_r_enumtor;
13013 found_l_enumtor = dwarf_siblingof(&l_enumtor, &l_enumtor) == 0,
13014 found_r_enumtor = dwarf_siblingof(&r_enumtor, &r_enumtor) == 0)
13016 int l_tag = dwarf_tag(&l_enumtor), r_tag = dwarf_tag(&r_enumtor);
13017 if ( l_tag != r_tag)
13023 if (l_tag != DW_TAG_enumerator)
13026 uint64_t l_val = 0, r_val = 0;
13027 die_unsigned_constant_attribute(&l_enumtor,
13030 die_unsigned_constant_attribute(&r_enumtor,
13033 if (l_val != r_val)
13039 if (found_l_enumtor != found_r_enumtor )
13045 case DW_TAG_structure_type:
13046 case DW_TAG_union_type:
13048 interned_string ln = ctxt.get_die_pretty_type_representation(l, 0);
13049 interned_string rn = ctxt.get_die_pretty_type_representation(r, 0);
13051 if ((aggregates_being_compared.find(ln)
13052 != aggregates_being_compared.end())
13053 || (aggregates_being_compared.find(rn)
13054 != aggregates_being_compared.end()))
13056 else if (!compare_as_decl_dies(l, r))
13058 else if (!compare_as_type_dies(l, r))
13062 aggregates_being_compared.insert(ln);
13063 aggregates_being_compared.insert(rn);
13065 Dwarf_Die l_member, r_member;
13066 bool found_l_member, found_r_member;
13067 for (found_l_member = dwarf_child(const_cast<Dwarf_Die*>(l),
13069 found_r_member = dwarf_child(const_cast<Dwarf_Die*>(r),
13071 found_l_member && found_r_member;
13072 found_l_member = dwarf_siblingof(&l_member, &l_member) == 0,
13073 found_r_member = dwarf_siblingof(&r_member, &r_member) == 0)
13075 int l_tag = dwarf_tag(&l_member), r_tag = dwarf_tag(&r_member);
13076 if (l_tag != r_tag)
13082 if (l_tag != DW_TAG_member && l_tag != DW_TAG_variable)
13085 if (!compare_dies(ctxt, &l_member, &r_member,
13086 aggregates_being_compared,
13087 update_canonical_dies_on_the_fly))
13093 if (found_l_member != found_r_member)
13096 aggregates_being_compared.erase(ln);
13097 aggregates_being_compared.erase(rn);
13102 case DW_TAG_array_type:
13104 Dwarf_Die l_child, r_child;
13105 bool found_l_child, found_r_child;
13106 for (found_l_child = dwarf_child(const_cast<Dwarf_Die*>(l),
13108 found_r_child = dwarf_child(const_cast<Dwarf_Die*>(r),
13110 found_l_child && found_r_child;
13111 found_l_child = dwarf_siblingof(&l_child, &l_child) == 0,
13112 found_r_child = dwarf_siblingof(&r_child, &r_child) == 0)
13114 int l_child_tag = dwarf_tag(&l_child),
13115 r_child_tag = dwarf_tag(&r_child);
13116 if (l_child_tag == DW_TAG_subrange_type
13117 || r_child_tag == DW_TAG_subrange_type)
13118 if (!compare_dies(ctxt, &l_child, &r_child,
13119 aggregates_being_compared,
13120 update_canonical_dies_on_the_fly))
13126 if (found_l_child != found_r_child)
13131 case DW_TAG_subrange_type:
13133 uint64_t l_lower_bound = 0, r_lower_bound = 0,
13134 l_upper_bound = 0, r_upper_bound = 0;
13135 die_unsigned_constant_attribute(l, DW_AT_lower_bound, l_lower_bound);
13136 die_unsigned_constant_attribute(r, DW_AT_lower_bound, r_lower_bound);
13137 if (!die_unsigned_constant_attribute(l, DW_AT_upper_bound,
13140 uint64_t l_count = 0;
13141 if (die_unsigned_constant_attribute(l, DW_AT_count, l_count))
13143 l_upper_bound = l_lower_bound + l_count;
13148 if (!die_unsigned_constant_attribute(r, DW_AT_upper_bound,
13151 uint64_t r_count = 0;
13152 if (die_unsigned_constant_attribute(l, DW_AT_count, r_count))
13154 r_upper_bound = r_lower_bound + r_count;
13160 if ((l_lower_bound != r_lower_bound)
13161 || (l_upper_bound != r_upper_bound))
13166 case DW_TAG_subroutine_type:
13167 case DW_TAG_subprogram:
13169 interned_string ln = ctxt.get_die_pretty_type_representation(l, 0);
13170 interned_string rn = ctxt.get_die_pretty_type_representation(r, 0);
13172 if ((aggregates_being_compared.find(ln)
13173 != aggregates_being_compared.end())
13174 || (aggregates_being_compared.find(rn)
13175 != aggregates_being_compared.end()))
13177 else if (l_tag == DW_TAG_subroutine_type)
13179 // The string reprs of l and r are already equal. Now let's
13180 // just check if they both come from the same TU.
13181 bool from_the_same_tu = false;
13182 if (compare_dies_cu_decl_file(l, r, from_the_same_tu)
13183 && from_the_same_tu)
13188 if (!fn_die_equal_by_linkage_name(ctxt, l, r))
13194 if (!ctxt.die_is_in_c(l) && !ctxt.die_is_in_c(r))
13196 // In C, we cannot have two different functions with the
13197 // same linkage name in a given binary. But here we are
13198 // looking at DIEs that don't originate from C. So we
13199 // need to compare return types and parameter types.
13200 Dwarf_Die l_return_type, r_return_type;
13201 bool l_return_type_is_void = !die_die_attribute(l, DW_AT_type,
13203 bool r_return_type_is_void = !die_die_attribute(r, DW_AT_type,
13205 if (l_return_type_is_void != r_return_type_is_void
13206 || (!l_return_type_is_void
13207 && !compare_dies(ctxt,
13208 &l_return_type, &r_return_type,
13209 aggregates_being_compared,
13210 update_canonical_dies_on_the_fly)))
13214 Dwarf_Die l_child, r_child;
13215 bool found_l_child, found_r_child;
13216 for (found_l_child = dwarf_child(const_cast<Dwarf_Die*>(l),
13218 found_r_child = dwarf_child(const_cast<Dwarf_Die*>(r),
13220 found_l_child && found_r_child;
13221 found_l_child = dwarf_siblingof(&l_child,
13223 found_r_child = dwarf_siblingof(&r_child,
13226 int l_child_tag = dwarf_tag(&l_child);
13227 int r_child_tag = dwarf_tag(&r_child);
13228 if (l_child_tag != r_child_tag
13229 || (l_child_tag == DW_TAG_formal_parameter
13230 && !compare_dies(ctxt, &l_child, &r_child,
13231 aggregates_being_compared,
13232 update_canonical_dies_on_the_fly)))
13238 if (found_l_child != found_r_child)
13243 aggregates_being_compared.erase(ln);
13244 aggregates_being_compared.erase(rn);
13249 case DW_TAG_formal_parameter:
13251 Dwarf_Die l_type, r_type;
13252 bool l_type_is_void = !die_die_attribute(l, DW_AT_type, l_type);
13253 bool r_type_is_void = !die_die_attribute(r, DW_AT_type, r_type);
13254 if ((l_type_is_void != r_type_is_void)
13255 || !compare_dies(ctxt, &l_type, &r_type,
13256 aggregates_being_compared,
13257 update_canonical_dies_on_the_fly))
13262 case DW_TAG_variable:
13263 case DW_TAG_member:
13264 if (compare_as_decl_dies(l, r))
13266 // Compare the offsets of the data members
13267 if (l_tag == DW_TAG_member)
13269 int64_t l_offset_in_bits = 0, r_offset_in_bits = 0;
13270 die_member_offset(ctxt, l, l_offset_in_bits);
13271 die_member_offset(ctxt, r, r_offset_in_bits);
13272 if (l_offset_in_bits != r_offset_in_bits)
13277 // Compare the types of the data members or variables.
13278 Dwarf_Die l_type, r_type;
13279 ABG_ASSERT(die_die_attribute(l, DW_AT_type, l_type));
13280 ABG_ASSERT(die_die_attribute(r, DW_AT_type, r_type));
13281 if (aggregates_being_compared.size () < 5)
13283 if (!compare_dies(ctxt, &l_type, &r_type,
13284 aggregates_being_compared,
13285 update_canonical_dies_on_the_fly))
13290 if (!compare_as_type_dies(&l_type, &r_type)
13291 ||!compare_as_decl_dies(&l_type, &r_type))
13300 case DW_TAG_class_type:
13301 case DW_TAG_enumerator:
13302 case DW_TAG_packed_type:
13303 case DW_TAG_set_type:
13304 case DW_TAG_file_type:
13305 case DW_TAG_ptr_to_member_type:
13306 case DW_TAG_thrown_type:
13307 case DW_TAG_interface_type:
13308 case DW_TAG_unspecified_type:
13309 case DW_TAG_shared_type:
13310 case DW_TAG_compile_unit:
13311 case DW_TAG_namespace:
13312 case DW_TAG_module:
13313 case DW_TAG_constant:
13314 case DW_TAG_partial_unit:
13315 case DW_TAG_imported_unit:
13316 case DW_TAG_dwarf_procedure:
13317 case DW_TAG_imported_declaration:
13318 case DW_TAG_entry_point:
13320 case DW_TAG_lexical_block:
13321 case DW_TAG_unspecified_parameters:
13322 case DW_TAG_variant:
13323 case DW_TAG_common_block:
13324 case DW_TAG_common_inclusion:
13325 case DW_TAG_inheritance:
13326 case DW_TAG_inlined_subroutine:
13327 case DW_TAG_with_stmt:
13328 case DW_TAG_access_declaration:
13329 case DW_TAG_catch_block:
13330 case DW_TAG_friend:
13331 case DW_TAG_namelist:
13332 case DW_TAG_namelist_item:
13333 case DW_TAG_template_type_parameter:
13334 case DW_TAG_template_value_parameter:
13335 case DW_TAG_try_block:
13336 case DW_TAG_variant_part:
13337 case DW_TAG_imported_module:
13338 case DW_TAG_condition:
13339 case DW_TAG_type_unit:
13340 case DW_TAG_template_alias:
13341 case DW_TAG_lo_user:
13342 case DW_TAG_MIPS_loop:
13343 case DW_TAG_format_label:
13344 case DW_TAG_function_template:
13345 case DW_TAG_class_template:
13346 case DW_TAG_GNU_BINCL:
13347 case DW_TAG_GNU_EINCL:
13348 case DW_TAG_GNU_template_template_param:
13349 case DW_TAG_GNU_template_parameter_pack:
13350 case DW_TAG_GNU_formal_parameter_pack:
13351 case DW_TAG_GNU_call_site:
13352 case DW_TAG_GNU_call_site_parameter:
13353 case DW_TAG_hi_user:
13354 ABG_ASSERT_NOT_REACHED;
13358 && update_canonical_dies_on_the_fly
13359 && is_canonicalizeable_type_tag(l_tag))
13361 // If 'l' has no canonical DIE and if 'r' has one, then propagage
13362 // the canonical DIE of 'r' to 'l'.
13364 // In case 'r' has no canonical DIE, then compute it, and then
13365 // propagate that canonical DIE to 'r'.
13366 die_source l_source = NO_DEBUG_INFO_DIE_SOURCE,
13367 r_source = NO_DEBUG_INFO_DIE_SOURCE;
13368 ABG_ASSERT(ctxt.get_die_source(l, l_source));
13369 ABG_ASSERT(ctxt.get_die_source(r, r_source));
13370 if (!l_has_canonical_die_offset
13371 // A DIE can be equivalent only to another DIE of the same
13373 && l_source == r_source)
13375 if (!r_has_canonical_die_offset)
13376 ctxt.compute_canonical_die_offset(r, r_canonical_die_offset,
13377 /*die_as_type=*/true);
13378 ABG_ASSERT(r_canonical_die_offset);
13379 ctxt.set_canonical_die_offset(l, r_canonical_die_offset,
13380 /*die_as_type=*/true);
13386 /// Compare two DIEs emitted by a C compiler.
13388 /// @param ctxt the read context used to load the DWARF information.
13390 /// @param l the left-hand-side argument of this comparison operator.
13392 /// @param r the righ-hand-side argument of this comparison operator.
13394 /// @param update_canonical_dies_on_the_fly if yes, then this function
13395 /// updates the canonical DIEs of sub-type DIEs of 'l' and 'r', while
13396 /// comparing l and r. This helps in making so that sub-type DIEs of
13397 /// 'l' and 'r' are compared structurally only once. This is how we
13398 /// turn this exponential comparison problem into a problem that is a
13399 /// closer to a linear one.
13401 /// @return true iff @p l equals @p r.
13403 compare_dies(const read_context& ctxt,
13404 const Dwarf_Die *l,
13405 const Dwarf_Die *r,
13406 bool update_canonical_dies_on_the_fly)
13408 istring_set_type aggregates_being_compared;
13409 return compare_dies(ctxt, l, r, aggregates_being_compared,
13410 update_canonical_dies_on_the_fly);
13413 // ----------------------------------
13414 // </die comparison engine>
13415 // ---------------------------------
13417 /// Get the point where a DW_AT_import DIE is used to import a given
13418 /// (unit) DIE, between two DIEs.
13420 /// @param ctxt the dwarf reading context to consider.
13422 /// @param partial_unit_offset the imported unit for which we want to
13423 /// know the insertion point. This is usually a partial unit (with
13424 /// tag DW_TAG_partial_unit) but it does not necessarily have to be
13427 /// @param first_die_offset the offset of the DIE from which this
13428 /// function starts looking for the import point of
13429 /// @partial_unit_offset. Note that this offset is excluded from the
13430 /// set of potential solutions.
13432 /// @param first_die_cu_offset the offset of the (compilation) unit
13433 /// that @p first_die_cu_offset belongs to.
13435 /// @param source where the DIE of first_die_cu_offset unit comes
13438 /// @param last_die_offset the offset of the last DIE of the up to
13439 /// which this function looks for the import point of @p
13440 /// partial_unit_offset. Note that this offset is excluded from the
13441 /// set of potential solutions.
13443 /// @param imported_point_offset. The resulting
13444 /// imported_point_offset. Note that if the imported DIE @p
13445 /// partial_unit_offset is not found between @p first_die_offset and
13446 /// @p last_die_offset, this parameter is left untouched by this
13449 /// @return true iff an imported unit is found between @p
13450 /// first_die_offset and @p last_die_offset.
13452 find_import_unit_point_between_dies(const read_context& ctxt,
13453 size_t partial_unit_offset,
13454 Dwarf_Off first_die_offset,
13455 Dwarf_Off first_die_cu_offset,
13457 size_t last_die_offset,
13458 size_t& imported_point_offset)
13460 const tu_die_imported_unit_points_map_type& tu_die_imported_unit_points_map =
13461 ctxt.tu_die_imported_unit_points_map(source);
13463 tu_die_imported_unit_points_map_type::const_iterator iter =
13464 tu_die_imported_unit_points_map.find(first_die_cu_offset);
13466 ABG_ASSERT(iter != tu_die_imported_unit_points_map.end());
13468 const imported_unit_points_type& imported_unit_points = iter->second;
13469 if (imported_unit_points.empty())
13472 imported_unit_points_type::const_iterator b = imported_unit_points.begin();
13473 imported_unit_points_type::const_iterator e = imported_unit_points.end();
13475 find_lower_bound_in_imported_unit_points(imported_unit_points,
13479 if (last_die_offset != static_cast<size_t>(-1))
13480 find_lower_bound_in_imported_unit_points(imported_unit_points,
13484 if (e != imported_unit_points.end())
13486 for (imported_unit_points_type::const_iterator i = e; i >= b; --i)
13487 if (i->imported_unit_die_off == partial_unit_offset)
13489 imported_point_offset = i->offset_of_import ;
13493 for (imported_unit_points_type::const_iterator i = e; i >= b; --i)
13495 if (find_import_unit_point_between_dies(ctxt,
13496 partial_unit_offset,
13497 i->imported_unit_child_off,
13498 i->imported_unit_cu_off,
13499 i->imported_unit_die_source,
13501 imported_point_offset))
13507 for (imported_unit_points_type::const_iterator i = b; i != e; ++i)
13508 if (i->imported_unit_die_off == partial_unit_offset)
13510 imported_point_offset = i->offset_of_import ;
13514 for (imported_unit_points_type::const_iterator i = b; i != e; ++i)
13516 if (find_import_unit_point_between_dies(ctxt,
13517 partial_unit_offset,
13518 i->imported_unit_child_off,
13519 i->imported_unit_cu_off,
13520 i->imported_unit_die_source,
13522 imported_point_offset))
13530 /// In the current translation unit, get the last point where a
13531 /// DW_AT_import DIE is used to import a given (unit) DIE, before a
13532 /// given DIE is found. That given DIE is called the limit DIE.
13534 /// Said otherwise, this function returns the last import point of a
13535 /// unit, before a limit.
13537 /// @param ctxt the dwarf reading context to consider.
13539 /// @param partial_unit_offset the imported unit for which we want to
13540 /// know the insertion point of. This is usually a partial unit (with
13541 /// tag DW_TAG_partial_unit) but it does not necessarily have to be
13544 /// @param where_offset the offset of the limit DIE.
13546 /// @param imported_point_offset. The resulting imported_point_offset.
13547 /// Note that if the imported DIE @p partial_unit_offset is not found
13548 /// before @p die_offset, this is set to the last @p
13549 /// partial_unit_offset found under @p parent_die.
13551 /// @return true iff an imported unit is found before @p die_offset.
13552 /// Note that if an imported unit is found after @p die_offset then @p
13553 /// imported_point_offset is set and the function return false.
13555 find_import_unit_point_before_die(const read_context& ctxt,
13556 size_t partial_unit_offset,
13557 size_t where_offset,
13558 size_t& imported_point_offset)
13560 size_t import_point_offset = 0;
13561 Dwarf_Die first_die_of_tu;
13563 if (dwarf_child(const_cast<Dwarf_Die*>(ctxt.cur_tu_die()),
13564 &first_die_of_tu) != 0)
13567 Dwarf_Die cu_die_memory;
13570 cu_die = dwarf_diecu(const_cast<Dwarf_Die*>(&first_die_of_tu),
13571 &cu_die_memory, 0, 0);
13573 if (find_import_unit_point_between_dies(ctxt, partial_unit_offset,
13574 dwarf_dieoffset(&first_die_of_tu),
13575 dwarf_dieoffset(cu_die),
13576 /*source=*/PRIMARY_DEBUG_INFO_DIE_SOURCE,
13578 import_point_offset))
13580 imported_point_offset = import_point_offset;
13584 if (import_point_offset)
13586 imported_point_offset = import_point_offset;
13593 /// Return the parent DIE for a given DIE.
13595 /// Note that the function build_die_parent_map() must have been
13596 /// called before this one can work. This function either succeeds or
13597 /// aborts the current process.
13599 /// @param ctxt the read context to consider.
13601 /// @param die the DIE for which we want the parent.
13603 /// @param parent_die the output parameter set to the parent die of
13604 /// @p die. Its memory must be allocated and handled by the caller.
13606 /// @param where_offset the offset of the DIE where we are "logically"
13607 /// positionned at, in the DIE tree. This is useful when @p die is
13608 /// e.g, DW_TAG_partial_unit that can be included in several places in
13611 /// @return true if the function could get a parent DIE, false
13614 get_parent_die(const read_context& ctxt,
13615 const Dwarf_Die* die,
13616 Dwarf_Die& parent_die,
13617 size_t where_offset)
13619 ABG_ASSERT(ctxt.dwarf());
13621 die_source source = NO_DEBUG_INFO_DIE_SOURCE;
13622 ABG_ASSERT(ctxt.get_die_source(die, source));
13624 const offset_offset_map_type& m = ctxt.die_parent_map(source);
13625 offset_offset_map_type::const_iterator i =
13626 m.find(dwarf_dieoffset(const_cast<Dwarf_Die*>(die)));
13633 case PRIMARY_DEBUG_INFO_DIE_SOURCE:
13634 ABG_ASSERT(dwarf_offdie(ctxt.dwarf(), i->second, &parent_die));
13636 case ALT_DEBUG_INFO_DIE_SOURCE:
13637 ABG_ASSERT(dwarf_offdie(ctxt.alt_dwarf(), i->second, &parent_die));
13639 case TYPE_UNIT_DIE_SOURCE:
13640 ABG_ASSERT(dwarf_offdie_types(ctxt.dwarf(), i->second, &parent_die));
13642 case NO_DEBUG_INFO_DIE_SOURCE:
13643 case NUMBER_OF_DIE_SOURCES:
13644 ABG_ASSERT_NOT_REACHED;
13647 if (dwarf_tag(&parent_die) == DW_TAG_partial_unit)
13649 if (where_offset == 0)
13651 parent_die = *ctxt.cur_tu_die();
13654 size_t import_point_offset = 0;
13656 find_import_unit_point_before_die(ctxt,
13657 dwarf_dieoffset(&parent_die),
13659 import_point_offset);
13661 // It looks like parent_die (which comes from the alternate
13662 // debug info file) hasn't been imported into this TU. So,
13663 // Let's assume its logical parent is the DIE of the current
13665 parent_die = *ctxt.cur_tu_die();
13668 ABG_ASSERT(import_point_offset);
13669 Dwarf_Die import_point_die;
13670 ABG_ASSERT(dwarf_offdie(ctxt.dwarf(),
13671 import_point_offset,
13672 &import_point_die));
13673 return get_parent_die(ctxt, &import_point_die,
13674 parent_die, where_offset);
13681 /// Get the DIE representing the scope of a given DIE.
13683 /// Please note that when the DIE we are looking at has a
13684 /// DW_AT_specification or DW_AT_abstract_origin attribute, the scope
13685 /// DIE is the parent DIE of the DIE referred to by that attribute.
13686 /// This is the only case where a scope DIE is different from the
13687 /// parent DIE of a given DIE.
13689 /// Also note that if the current translation unit is from C, then
13690 /// this returns the global scope.
13692 /// @param ctxt the reading context to use.
13694 /// @param die the DIE to consider.
13696 /// @param where_offset where we are logically at in the DIE stream.
13698 /// @param scope_die out parameter. This is set to the resulting
13699 /// scope DIE iff the function returns true.
13701 get_scope_die(const read_context& ctxt,
13702 const Dwarf_Die* die,
13703 size_t where_offset,
13704 Dwarf_Die& scope_die)
13706 if (is_c_language(ctxt.cur_transl_unit()->get_language()))
13708 ABG_ASSERT(dwarf_tag(const_cast<Dwarf_Die*>(die)) != DW_TAG_member);
13709 return dwarf_diecu(const_cast<Dwarf_Die*>(die), &scope_die, 0, 0);
13712 Dwarf_Die logical_parent_die;
13713 if (die_die_attribute(die, DW_AT_specification,
13714 logical_parent_die, false)
13715 || die_die_attribute(die, DW_AT_abstract_origin,
13716 logical_parent_die, false))
13717 return get_scope_die(ctxt, &logical_parent_die, where_offset, scope_die);
13719 if (!get_parent_die(ctxt, die, scope_die, where_offset))
13722 if (dwarf_tag(&scope_die) == DW_TAG_subprogram
13723 || dwarf_tag(&scope_die) == DW_TAG_subroutine_type
13724 || dwarf_tag(&scope_die) == DW_TAG_array_type)
13725 return get_scope_die(ctxt, &scope_die, where_offset, scope_die);
13730 /// Return the abigail IR node representing the scope of a given DIE.
13732 /// Note that it is the logical scope that is returned. That is, if
13733 /// the DIE has a DW_AT_specification or DW_AT_abstract_origin
13734 /// attribute, it's the scope of the referred-to DIE (via these
13735 /// attributes) that is returned.
13737 /// Also note that if the current translation unit is from C, then
13738 /// this returns the global scope.
13740 /// @param ctxt the dwarf reading context to use.
13742 /// @param die the DIE to get the scope for.
13744 /// @param called_from_public_decl is true if this function has been
13745 /// initially called within the context of a public decl.
13747 /// @param where_offset the offset of the DIE where we are "logically"
13748 /// positionned at, in the DIE tree. This is useful when @p die is
13749 /// e.g, DW_TAG_partial_unit that can be included in several places in
13751 static scope_decl_sptr
13752 get_scope_for_die(read_context& ctxt,
13754 bool called_for_public_decl,
13755 size_t where_offset)
13757 die_source source_of_die;
13758 ABG_ASSERT(ctxt.get_die_source(die, source_of_die));
13760 if (is_c_language(ctxt.cur_transl_unit()->get_language()))
13762 ABG_ASSERT(dwarf_tag(die) != DW_TAG_member);
13763 return ctxt.global_scope();
13766 Dwarf_Die cloned_die;
13767 if (die_die_attribute(die, DW_AT_specification, cloned_die, false)
13768 || die_die_attribute(die, DW_AT_abstract_origin, cloned_die, false))
13769 return get_scope_for_die(ctxt, &cloned_die,
13770 called_for_public_decl,
13773 Dwarf_Die parent_die;
13775 if (!get_parent_die(ctxt, die, parent_die, where_offset))
13776 return ctxt.nil_scope();
13778 if (dwarf_tag(&parent_die) == DW_TAG_compile_unit
13779 || dwarf_tag(&parent_die) == DW_TAG_partial_unit
13780 || dwarf_tag(&parent_die) == DW_TAG_type_unit)
13782 if (dwarf_tag(&parent_die) == DW_TAG_partial_unit
13783 || dwarf_tag(&parent_die) == DW_TAG_type_unit)
13785 ABG_ASSERT(source_of_die == ALT_DEBUG_INFO_DIE_SOURCE
13786 || source_of_die == TYPE_UNIT_DIE_SOURCE);
13787 return ctxt.cur_transl_unit()->get_global_scope();
13790 // For top level DIEs like DW_TAG_compile_unit, we just want to
13791 // return the global scope for the corresponding translation
13792 // unit. This must have been set by
13793 // build_translation_unit_and_add_to_ir if we already started to
13794 // build the translation unit of parent_die. Otherwise, just
13795 // return the global scope of the current translation unit.
13796 die_tu_map_type::const_iterator i =
13797 ctxt.die_tu_map().find(dwarf_dieoffset(&parent_die));
13798 if (i != ctxt.die_tu_map().end())
13799 return i->second->get_global_scope();
13800 return ctxt.cur_transl_unit()->get_global_scope();
13804 type_or_decl_base_sptr d;
13805 if (dwarf_tag(&parent_die) == DW_TAG_subprogram
13806 || dwarf_tag(&parent_die) == DW_TAG_array_type)
13807 // this is an entity defined in a scope that is a function.
13808 // Normally, I would say that this should be dropped. But I have
13809 // seen a case where a typedef DIE needed by a function parameter
13810 // was defined right before the parameter, under the scope of the
13811 // function. Yeah, weird. So if I drop the typedef DIE, I'd drop
13812 // the function parm too. So for that case, let's say that the
13813 // scope is the scope of the function itself. Note that this is
13814 // an error of the DWARF emitter. We should never see this DIE in
13817 scope_decl_sptr s = get_scope_for_die(ctxt, &parent_die,
13818 called_for_public_decl,
13820 if (is_anonymous_type_die(die))
13821 // For anonymous type that have nothing to do in a function or
13822 // array type context, let's put it in the containing
13823 // namespace. That is, do not let it be in a containing class
13824 // or union where it has nothing to do.
13825 while (is_class_or_union_type(s))
13827 if (!get_parent_die(ctxt, &parent_die, parent_die, where_offset))
13828 return ctxt.nil_scope();
13829 s = get_scope_for_die(ctxt, &parent_die,
13830 called_for_public_decl,
13836 d = build_ir_node_from_die(ctxt, &parent_die,
13837 called_for_public_decl,
13839 s = dynamic_pointer_cast<scope_decl>(d);
13841 // this is an entity defined in someting that is not a scope.
13843 return ctxt.nil_scope();
13845 class_decl_sptr cl = dynamic_pointer_cast<class_decl>(d);
13846 if (cl && cl->get_is_declaration_only())
13848 scope_decl_sptr scop (cl->get_definition_of_declaration());
13857 /// Convert a DWARF constant representing the value of the
13858 /// DW_AT_language property into the translation_unit::language
13861 /// @param l the DWARF constant to convert.
13863 /// @return the resulting translation_unit::language enumerator.
13864 static translation_unit::language
13865 dwarf_language_to_tu_language(size_t l)
13870 return translation_unit::LANG_C89;
13872 return translation_unit::LANG_C;
13873 case DW_LANG_Ada83:
13874 return translation_unit::LANG_Ada83;
13875 case DW_LANG_C_plus_plus:
13876 return translation_unit::LANG_C_plus_plus;
13877 case DW_LANG_Cobol74:
13878 return translation_unit::LANG_Cobol74;
13879 case DW_LANG_Cobol85:
13880 return translation_unit::LANG_Cobol85;
13881 case DW_LANG_Fortran77:
13882 return translation_unit::LANG_Fortran77;
13883 case DW_LANG_Fortran90:
13884 return translation_unit::LANG_Fortran90;
13885 case DW_LANG_Pascal83:
13886 return translation_unit::LANG_Pascal83;
13887 case DW_LANG_Modula2:
13888 return translation_unit::LANG_Modula2;
13890 return translation_unit::LANG_Java;
13892 return translation_unit::LANG_C99;
13893 case DW_LANG_Ada95:
13894 return translation_unit::LANG_Ada95;
13895 case DW_LANG_Fortran95:
13896 return translation_unit::LANG_Fortran95;
13898 return translation_unit::LANG_PL1;
13900 return translation_unit::LANG_ObjC;
13901 case DW_LANG_ObjC_plus_plus:
13902 return translation_unit::LANG_ObjC_plus_plus;
13904 #ifdef HAVE_DW_LANG_Rust_enumerator
13906 return translation_unit::LANG_Rust;
13909 #ifdef HAVE_DW_LANG_UPC_enumerator
13911 return translation_unit::LANG_UPC;
13914 #ifdef HAVE_DW_LANG_D_enumerator
13916 return translation_unit::LANG_D;
13919 #ifdef HAVE_DW_LANG_Python_enumerator
13920 case DW_LANG_Python:
13921 return translation_unit::LANG_Python;
13924 #ifdef HAVE_DW_LANG_Go_enumerator
13926 return translation_unit::LANG_Go;
13929 #ifdef HAVE_DW_LANG_C11_enumerator
13931 return translation_unit::LANG_C11;
13934 #ifdef HAVE_DW_LANG_C_plus_plus_03_enumerator
13935 case DW_LANG_C_plus_plus_03:
13936 return translation_unit::LANG_C_plus_plus_03;
13939 #ifdef HAVE_DW_LANG_C_plus_plus_11_enumerator
13940 case DW_LANG_C_plus_plus_11:
13941 return translation_unit::LANG_C_plus_plus_11;
13944 #ifdef HAVE_DW_LANG_C_plus_plus_14_enumerator
13945 case DW_LANG_C_plus_plus_14:
13946 return translation_unit::LANG_C_plus_plus_14;
13949 #ifdef HAVE_DW_LANG_Mips_Assembler_enumerator
13950 case DW_LANG_Mips_Assembler:
13951 return translation_unit::LANG_Mips_Assembler;
13955 return translation_unit::LANG_UNKNOWN;
13959 /// Get the default array lower bound value as defined by the DWARF
13960 /// specification, version 4, depending on the language of the
13961 /// translation unit.
13963 /// @param l the language of the translation unit.
13965 /// @return the default array lower bound value.
13967 get_default_array_lower_bound(translation_unit::language l)
13972 case translation_unit::LANG_UNKNOWN:
13975 case translation_unit::LANG_Cobol74:
13976 case translation_unit::LANG_Cobol85:
13979 case translation_unit::LANG_C89:
13980 case translation_unit::LANG_C99:
13981 case translation_unit::LANG_C11:
13982 case translation_unit::LANG_C:
13983 case translation_unit::LANG_C_plus_plus_03:
13984 case translation_unit::LANG_C_plus_plus_11:
13985 case translation_unit::LANG_C_plus_plus_14:
13986 case translation_unit::LANG_C_plus_plus:
13987 case translation_unit::LANG_ObjC:
13988 case translation_unit::LANG_ObjC_plus_plus:
13989 case translation_unit::LANG_Rust:
13992 case translation_unit::LANG_Fortran77:
13993 case translation_unit::LANG_Fortran90:
13994 case translation_unit::LANG_Fortran95:
13995 case translation_unit::LANG_Ada83:
13996 case translation_unit::LANG_Ada95:
13997 case translation_unit::LANG_Pascal83:
13998 case translation_unit::LANG_Modula2:
14001 case translation_unit::LANG_Java:
14004 case translation_unit::LANG_PL1:
14007 case translation_unit::LANG_UPC:
14008 case translation_unit::LANG_D:
14009 case translation_unit::LANG_Python:
14010 case translation_unit::LANG_Go:
14011 case translation_unit::LANG_Mips_Assembler:
14019 /// For a given offset, find the lower bound of a sorted vector of
14020 /// imported unit point offset.
14022 /// The lower bound is the smallest point (the point with the smallest
14023 /// offset) which is the greater than a given offset.
14025 /// @param imported_unit_points_type the sorted vector of imported
14028 /// @param val the offset to consider when looking for the lower
14031 /// @param r an iterator to the lower bound found. This parameter is
14032 /// set iff the function returns true.
14034 /// @return true iff the lower bound has been found.
14036 find_lower_bound_in_imported_unit_points(const imported_unit_points_type& p,
14038 imported_unit_points_type::const_iterator& r)
14040 imported_unit_point v(val);
14041 imported_unit_points_type::const_iterator result =
14042 std::lower_bound(p.begin(), p.end(), v);
14044 bool is_ok = result != p.end();
14052 /// Given a DW_TAG_compile_unit, build and return the corresponding
14053 /// abigail::translation_unit ir node. Note that this function
14054 /// recursively reads the children dies of the current DIE and
14055 /// populates the resulting translation unit.
14057 /// @param ctxt the read_context to use.
14059 /// @param die the DW_TAG_compile_unit DIE to consider.
14061 /// @param address_size the size of the addresses expressed in this
14062 /// translation unit in general.
14064 /// @return a pointer to the resulting translation_unit.
14065 static translation_unit_sptr
14066 build_translation_unit_and_add_to_ir(read_context& ctxt,
14070 translation_unit_sptr result;
14074 ABG_ASSERT(dwarf_tag(die) == DW_TAG_compile_unit);
14076 // Clear the part of the context that is dependent on the translation
14077 // unit we are reading.
14078 ctxt.clear_per_translation_unit_data();
14080 ctxt.cur_tu_die(die);
14082 string path = die_string_attribute(die, DW_AT_name);
14083 string compilation_dir = die_string_attribute(die, DW_AT_comp_dir);
14085 // See if the same translation unit exits already in the current
14086 // corpus. Sometimes, the same translation unit can be present
14087 // several times in the same debug info. The content of the
14088 // different instances of the translation unit are different. So to
14089 // represent that, we are going to re-use the same translation
14090 // unit. That is, it's going to be the union of all the translation
14091 // units of the same path.
14093 string abs_path = compilation_dir + "/" + path;
14094 result = ctxt.current_corpus()->find_translation_unit(abs_path);
14099 result.reset(new translation_unit(ctxt.env(),
14102 result->set_compilation_dir_path(compilation_dir);
14103 ctxt.current_corpus()->add(result);
14105 die_unsigned_constant_attribute(die, DW_AT_language, l);
14106 result->set_language(dwarf_language_to_tu_language(l));
14109 ctxt.cur_transl_unit(result);
14110 ctxt.die_tu_map()[dwarf_dieoffset(die)] = result;
14113 if (dwarf_child(die, &child) != 0)
14116 result->set_is_constructed(false);
14119 build_ir_node_from_die(ctxt, &child,
14120 die_is_public_decl(&child),
14121 dwarf_dieoffset(&child));
14122 while (dwarf_siblingof(&child, &child) == 0);
14124 if (!ctxt.var_decls_to_re_add_to_tree().empty())
14125 for (list<var_decl_sptr>::const_iterator v =
14126 ctxt.var_decls_to_re_add_to_tree().begin();
14127 v != ctxt.var_decls_to_re_add_to_tree().end();
14130 if (is_member_decl(*v))
14133 ABG_ASSERT((*v)->get_scope());
14134 string demangled_name =
14135 demangle_cplus_mangled_name((*v)->get_linkage_name());
14136 if (!demangled_name.empty())
14138 std::list<string> fqn_comps;
14139 fqn_to_components(demangled_name, fqn_comps);
14140 string mem_name = fqn_comps.back();
14141 fqn_comps.pop_back();
14142 class_decl_sptr class_type;
14144 if (!fqn_comps.empty())
14146 ty_name = components_to_type_name(fqn_comps);
14148 lookup_class_type(ty_name, *ctxt.cur_transl_unit());
14152 // So we are seeing a member variable for which there
14153 // is a global variable definition DIE not having a
14154 // reference attribute pointing back to the member
14155 // variable declaration DIE. Thus remove the global
14156 // variable definition from its current non-class
14159 if ((d = lookup_var_decl_in_scope(mem_name, class_type)))
14160 // This is the data member with the same name in cl.
14161 // We just need to flag it as static.
14165 // In this case there is no data member with the
14166 // same name in cl already. Let's add it there then
14168 remove_decl_from_scope(*v);
14169 d = add_decl_to_scope(*v, class_type);
14172 ABG_ASSERT(dynamic_pointer_cast<var_decl>(d));
14173 // Let's flag the data member as static.
14174 set_member_is_static(d, true);
14178 ctxt.var_decls_to_re_add_to_tree().clear();
14180 result->set_is_constructed(true);
14185 /// Build a abigail::namespace_decl out of a DW_TAG_namespace or
14186 /// DW_TAG_module (for fortran) DIE.
14188 /// Note that this function connects the DW_TAG_namespace to the IR
14189 /// being currently created, reads the children of the DIE and
14190 /// connects them to the IR as well.
14192 /// @param ctxt the read context to use.
14194 /// @param die the DIE to read from. Must be either DW_TAG_namespace
14195 /// or DW_TAG_module.
14197 /// @param where_offset the offset of the DIE where we are "logically"
14198 /// positionned at, in the DIE tree. This is useful when @p die is
14199 /// e.g, DW_TAG_partial_unit that can be included in several places in
14202 /// @return the resulting @ref abigail::namespace_decl or NULL if it
14203 /// couldn't be created.
14204 static namespace_decl_sptr
14205 build_namespace_decl_and_add_to_ir(read_context& ctxt,
14207 size_t where_offset)
14209 namespace_decl_sptr result;
14215 ABG_ASSERT(ctxt.get_die_source(die, source));
14217 unsigned tag = dwarf_tag(die);
14218 if (tag != DW_TAG_namespace && tag != DW_TAG_module)
14221 scope_decl_sptr scope = get_scope_for_die(ctxt, die,
14222 /*called_for_public_decl=*/false,
14225 string name, linkage_name;
14227 die_loc_and_name(ctxt, die, loc, name, linkage_name);
14229 result.reset(new namespace_decl(ctxt.env(), name, loc));
14230 add_decl_to_scope(result, scope.get());
14231 ctxt.associate_die_to_decl(die, result, where_offset);
14234 if (dwarf_child(die, &child) != 0)
14237 ctxt.scope_stack().push(result.get());
14239 build_ir_node_from_die(ctxt, &child,
14240 /*called_from_public_decl=*/false,
14242 while (dwarf_siblingof(&child, &child) == 0);
14243 ctxt.scope_stack().pop();
14248 /// Build a @ref type_decl out of a DW_TAG_base_type DIE.
14250 /// @param ctxt the read context to use.
14252 /// @param die the DW_TAG_base_type to consider.
14254 /// @param where_offset where we are logically at in the DIE stream.
14256 /// @return the resulting decl_base_sptr.
14257 static type_decl_sptr
14258 build_type_decl(read_context& ctxt, Dwarf_Die* die, size_t where_offset)
14260 type_decl_sptr result;
14264 ABG_ASSERT(dwarf_tag(die) == DW_TAG_base_type);
14266 uint64_t byte_size = 0, bit_size = 0;
14267 if (!die_unsigned_constant_attribute(die, DW_AT_byte_size, byte_size))
14268 if (!die_unsigned_constant_attribute(die, DW_AT_bit_size, bit_size))
14271 if (bit_size == 0 && byte_size != 0)
14272 // Update the bit size.
14273 bit_size = byte_size * 8;
14275 string type_name, linkage_name;
14277 die_loc_and_name(ctxt, die, loc, type_name, linkage_name);
14279 if (byte_size == 0)
14281 // The size of the type is zero, that must mean that we are
14282 // looking at the definition of the void type.
14283 if (type_name == "void")
14284 result = is_type_decl(build_ir_node_for_void_type(ctxt));
14286 // A type of size zero that is not void? Hmmh, I am not sure
14287 // what that means. Return nil for now.
14291 if (corpus_sptr corp = ctxt.should_reuse_type_from_corpus_group())
14293 string normalized_type_name = type_name;
14294 integral_type int_type;
14295 if (parse_integral_type(type_name, int_type))
14296 normalized_type_name = int_type.to_string();
14297 result = lookup_basic_type(normalized_type_name, *corp);
14301 if (corpus_sptr corp = ctxt.current_corpus())
14302 result = lookup_basic_type(type_name, *corp);
14304 result.reset(new type_decl(ctxt.env(), type_name, bit_size,
14305 /*alignment=*/0, loc, linkage_name));
14306 ctxt.associate_die_to_type(die, result, where_offset);
14310 /// Build an enum_type_decl from a DW_TAG_enumeration_type DIE.
14312 /// @param ctxt the read context to use.
14314 /// @param die the DIE to read from.
14316 /// @param scope the scope of the final enum. Note that this function
14317 /// does *NOT* add the built type to this scope. The scope is just so
14318 /// that the function knows how to name anonymous enums.
14320 /// @return the built enum_type_decl or NULL if it could not be built.
14321 static enum_type_decl_sptr
14322 build_enum_type(read_context& ctxt,
14325 size_t where_offset)
14327 enum_type_decl_sptr result;
14331 unsigned tag = dwarf_tag(die);
14332 if (tag != DW_TAG_enumeration_type)
14335 string name, linkage_name;
14337 die_loc_and_name(ctxt, die, loc, name, linkage_name);
14339 bool enum_is_anonymous = false;
14340 // If the enum is anonymous, let's give it a name.
14343 name = get_internal_anonymous_die_prefix_name(die);
14344 ABG_ASSERT(!name.empty());
14345 // But we remember that the type is anonymous.
14346 enum_is_anonymous = true;
14348 if (size_t s = scope->get_num_anonymous_member_enums())
14349 name = build_internal_anonymous_die_name(name, s);
14352 bool use_odr = ctxt.odr_is_relevant(die);
14353 // If the type has location, then associate it to its
14354 // representation. This way, all occurences of types with the same
14355 // representation (name) and location can be later detected as being
14356 // for the same type.
14358 if (!enum_is_anonymous)
14362 if (enum_type_decl_sptr pre_existing_enum =
14363 is_enum_type(ctxt.lookup_artifact_from_die(die)))
14364 result = pre_existing_enum;
14366 else if (corpus_sptr corp = ctxt.should_reuse_type_from_corpus_group())
14369 result = lookup_enum_type_per_location(loc.expand(), *corp);
14373 if (enum_type_decl_sptr pre_existing_enum =
14374 is_enum_type(ctxt.lookup_artifact_from_die(die)))
14375 if (pre_existing_enum->get_location() == loc)
14376 result = pre_existing_enum;
14381 ctxt.associate_die_to_type(die, result, where_offset);
14385 // TODO: for anonymous enums, maybe have a map of loc -> enums so that
14386 // we can look them up?
14389 if (die_unsigned_constant_attribute(die, DW_AT_byte_size, size))
14392 // for now we consider that underlying types of enums are all anonymous
14393 bool enum_underlying_type_is_anonymous= true;
14394 string underlying_type_name;
14395 if (enum_underlying_type_is_anonymous)
14397 underlying_type_name = "unnamed-enum";
14398 enum_underlying_type_is_anonymous = true;
14401 underlying_type_name = string("enum-") + name;
14402 underlying_type_name += "-underlying-type";
14404 enum_type_decl::enumerators enms;
14406 if (dwarf_child(die, &child) == 0)
14410 if (dwarf_tag(&child) != DW_TAG_enumerator)
14415 die_loc_and_name(ctxt, &child, l, n, m);
14417 die_unsigned_constant_attribute(&child, DW_AT_const_value, val);
14418 enms.push_back(enum_type_decl::enumerator(ctxt.env(), n, val));
14420 while (dwarf_siblingof(&child, &child) == 0);
14423 // DWARF up to version 4 (at least) doesn't seem to carry the
14424 // underlying type, so let's create an artificial one here, which
14425 // sole purpose is to be passed to the constructor of the
14426 // enum_type_decl type.
14427 type_decl_sptr t(new type_decl(ctxt.env(), underlying_type_name,
14428 size, size, location()));
14429 t->set_is_anonymous(enum_underlying_type_is_anonymous);
14430 translation_unit_sptr tu = ctxt.cur_transl_unit();
14432 add_decl_to_scope(t, tu->get_global_scope().get());
14435 t = dynamic_pointer_cast<type_decl>(d);
14437 result.reset(new enum_type_decl(name, loc, t, enms, linkage_name));
14438 result->set_is_anonymous(enum_is_anonymous);
14439 ctxt.associate_die_to_type(die, result, where_offset);
14443 /// Once a function_decl has been built and added to a class as a
14444 /// member function, this function updates the information of the
14445 /// function_decl concerning the properties of its relationship with
14446 /// the member class. That is, it updates properties like
14447 /// virtualness, access, constness, cdtorness, etc ...
14449 /// @param die the DIE of the function_decl that has been just built.
14451 /// @param f the function_decl that has just been built from @p die.
14453 /// @param klass the @ref class_or_union that @p f belongs to.
14455 /// @param ctxt the context used to read the ELF/DWARF information.
14457 finish_member_function_reading(Dwarf_Die* die,
14458 const function_decl_sptr& f,
14459 const class_or_union_sptr& klass,
14460 read_context& ctxt)
14464 method_decl_sptr m = is_method_decl(f);
14467 method_type_sptr method_t = is_method_type(m->get_type());
14468 ABG_ASSERT(method_t);
14470 bool is_ctor = (f->get_name() == klass->get_name());
14471 bool is_dtor = (!f->get_name().empty()
14472 && static_cast<string>(f->get_name())[0] == '~');
14473 bool is_virtual = die_is_virtual(die);
14474 int64_t vindex = -1;
14476 die_virtual_function_index(die, vindex);
14477 access_specifier access = private_access;
14478 if (class_decl_sptr c = is_class_type(klass))
14479 if (c->is_struct())
14480 access = public_access;
14481 die_access_specifier(die, access);
14483 bool is_static = false;
14485 // Let's see if the first parameter is a pointer to an instance of
14486 // the same class type as the current class and has a
14487 // DW_AT_artificial attribute flag set. We are not looking at
14488 // DW_AT_object_pointer (for DWARF 3) because it wasn't being
14489 // emitted in GCC 4_4, which was already DWARF 3.
14490 function_decl::parameter_sptr first_parm;
14491 if (!f->get_parameters().empty())
14492 first_parm = f->get_parameters()[0];
14494 bool is_artificial =
14495 first_parm && first_parm->get_artificial();;
14496 pointer_type_def_sptr this_ptr_type;
14497 type_base_sptr other_klass;
14500 this_ptr_type = is_pointer_type(first_parm->get_type());
14502 other_klass = this_ptr_type->get_pointed_to_type();
14503 // Sometimes, other_klass can be qualified; e.g, volatile. In
14504 // that case, let's get the unqualified version of other_klass.
14505 if (qualified_type_def_sptr q = is_qualified_type(other_klass))
14506 other_klass = q->get_underlying_type();
14509 && get_type_name(other_klass) == klass->get_qualified_name())
14514 set_member_access_specifier(m, access);
14516 set_member_function_vtable_offset(m, vindex);
14517 set_member_function_is_virtual(m, is_virtual);
14518 set_member_is_static(m, is_static);
14519 set_member_function_is_ctor(m, is_ctor);
14520 set_member_function_is_dtor(m, is_dtor);
14521 set_member_function_is_const(m, method_t->get_is_const());
14523 ABG_ASSERT(is_member_function(m));
14525 if (is_virtual && !f->get_linkage_name().empty() && !f->get_symbol())
14527 // This is a virtual member function which has a linkage name
14528 // but has no underlying symbol set.
14530 // The underlying elf symbol to set to this function can show up
14531 // later in the DWARF input or it can be that, because of some
14532 // compiler optimization, the relation between this function and
14533 // its underlying elf symbol is simply not emitted in the DWARF.
14535 // Let's thus schedule this function for a later fixup pass
14537 // read_context::fixup_functions_with_no_symbols()) that will
14538 // set its underlying symbol.
14540 // Note that if the underying symbol is encountered later in the
14541 // DWARF input, then the part of build_function_decl() that
14542 // updates the function to set its underlying symbol will
14543 // de-schedule this function wrt fixup pass.
14544 Dwarf_Off die_offset = dwarf_dieoffset(die);
14545 die_function_decl_map_type &fns_with_no_symbol =
14546 ctxt.die_function_decl_with_no_symbol_map();
14547 die_function_decl_map_type::const_iterator i =
14548 fns_with_no_symbol.find(die_offset);
14549 if (i == fns_with_no_symbol.end())
14550 fns_with_no_symbol[die_offset] = f;
14555 /// If a function DIE has attributes which have not yet been read and
14556 /// added to the internal representation that represents that function
14557 /// then read those extra attributes and update the internal
14558 /// representation.
14560 /// @param ctxt the read context to use.
14562 /// @param die the function DIE to consider.
14564 /// @param where_offset where we logical are, currently, in the stream
14565 /// of DIEs. If you don't know what this is, you can just set it to zero.
14567 /// @param existing_fn the representation of the function to update.
14569 /// @return the updated function representation.
14570 static function_decl_sptr
14571 maybe_finish_function_decl_reading(read_context& ctxt,
14573 size_t where_offset,
14574 const function_decl_sptr& existing_fn)
14576 function_decl_sptr result = build_function_decl(ctxt, die,
14583 /// Lookup a class or a typedef with a given qualified name in the
14584 /// corpus that a given scope belongs to.
14586 /// @param scope the scope to consider.
14588 /// @param type_name the qualified name of the type to look for.
14590 /// @return the typedef or class type found.
14591 static type_base_sptr
14592 lookup_class_or_typedef_from_corpus(scope_decl* scope, const string& type_name)
14594 string qname = build_qualified_name(scope, type_name);
14595 corpus* corp = scope->get_corpus();
14596 type_base_sptr result = lookup_class_or_typedef_type(qname, *corp);
14600 /// Lookup a class of typedef type from the current corpus being
14603 /// The type being looked for has the same name as a given DIE.
14605 /// @param ctxt the reading context to use.
14607 /// @param die the DIE which has the same name as the type we are
14610 /// @param called_for_public_decl whether this function is being
14611 /// called from a a publicly defined declaration.
14613 /// @param where_offset where we are logically at in the DIE stream.
14615 /// @return the type found.
14616 static type_base_sptr
14617 lookup_class_or_typedef_from_corpus(read_context& ctxt,
14619 bool called_for_public_decl,
14620 size_t where_offset)
14623 return class_decl_sptr();
14625 string class_name = die_string_attribute(die, DW_AT_name);
14626 if (class_name.empty())
14627 return class_decl_sptr();
14629 scope_decl_sptr scope = get_scope_for_die(ctxt, die,
14630 called_for_public_decl,
14633 return lookup_class_or_typedef_from_corpus(scope.get(), class_name);
14635 return type_base_sptr();
14638 /// Lookup a class, typedef or enum type with a given qualified name
14639 /// in the corpus that a given scope belongs to.
14641 /// @param scope the scope to consider.
14643 /// @param type_name the qualified name of the type to look for.
14645 /// @return the typedef, enum or class type found.
14646 static type_base_sptr
14647 lookup_class_typedef_or_enum_type_from_corpus(scope_decl* scope,
14648 const string& type_name)
14650 string qname = build_qualified_name(scope, type_name);
14651 corpus* corp = scope->get_corpus();
14652 type_base_sptr result = lookup_class_typedef_or_enum_type(qname, *corp);
14656 /// Lookup a class, typedef or enum type in a given scope, in the
14657 /// corpus that scope belongs to.
14659 /// @param die the DIE of the class, typedef or enum to lookup.
14661 /// @param anonymous_member_type_idx if @p DIE represents an anonymous
14662 /// type, this is the index of that anonymous type in its scope, in
14663 /// case there are several anonymous types of the same kind in that
14666 /// @param scope the scope in which to look the type for.
14668 /// @return the typedef, enum or class type found.
14669 static type_base_sptr
14670 lookup_class_typedef_or_enum_type_from_corpus(Dwarf_Die* die,
14671 size_t anonymous_member_type_idx,
14675 return class_decl_sptr();
14677 string type_name = die_string_attribute(die, DW_AT_name);
14678 if (is_anonymous_type_die(die))
14680 get_internal_anonymous_die_name(die, anonymous_member_type_idx);
14682 if (type_name.empty())
14683 return class_decl_sptr();
14685 return lookup_class_typedef_or_enum_type_from_corpus(scope, type_name);
14688 /// Test if a DIE represents a function that is a member of a given
14691 /// @param ctxt the reading context.
14693 /// @param function_die the DIE of the function to consider.
14695 /// @param class_type the class type to consider.
14697 /// @param where_offset where we are logically at in the DIE stream.
14699 /// @return the method declaration corresponding to the member
14700 /// function of @p class_type, iff @p function_die is for a member
14701 /// function of @p class_type.
14702 static method_decl_sptr
14703 is_function_for_die_a_member_of_class(read_context& ctxt,
14704 Dwarf_Die* function_die,
14705 const class_or_union_sptr& class_type)
14707 type_or_decl_base_sptr artifact = ctxt.lookup_artifact_from_die(function_die);
14710 return method_decl_sptr();
14712 method_decl_sptr method = is_method_decl(artifact);
14713 method_type_sptr method_type;
14716 method_type = method->get_type();
14718 method_type = is_method_type(artifact);
14719 ABG_ASSERT(method_type);
14721 class_or_union_sptr method_class = method_type->get_class_type();
14722 ABG_ASSERT(method_class);
14724 string method_class_name = method_class->get_qualified_name(),
14725 class_type_name = class_type->get_qualified_name();
14727 if (method_class_name == class_type_name)
14729 //ABG_ASSERT(class_type.get() == method_class.get());
14733 return method_decl_sptr();
14736 /// If a given function DIE represents an existing member function of
14737 /// a given class, then update that member function with new
14738 /// properties present in the DIE. Otherwise, if the DIE represents a
14739 /// new member function that is not already present in the class then
14740 /// add that new member function to the class.
14742 /// @param ctxt the reading context.
14744 /// @param function_die the DIE of the potential member function to
14747 /// @param class_type the class type to consider.
14749 /// @param called_from_public_decl is true iff this function was
14750 /// called from a publicly defined and exported declaration.
14752 /// @param where_offset where we are logically at in the DIE stream.
14754 /// @return the method decl representing the member function.
14755 static method_decl_sptr
14756 add_or_update_member_function(read_context& ctxt,
14757 Dwarf_Die* function_die,
14758 const class_or_union_sptr& class_type,
14759 bool called_from_public_decl,
14760 size_t where_offset)
14762 method_decl_sptr method =
14763 is_function_for_die_a_member_of_class(ctxt, function_die, class_type);
14766 method = is_method_decl(build_ir_node_from_die(ctxt, function_die,
14768 called_from_public_decl,
14771 return method_decl_sptr();
14773 finish_member_function_reading(function_die,
14774 is_function_decl(method),
14779 /// Build a an IR node for class type from a DW_TAG_structure_type or
14780 /// DW_TAG_class_type DIE and add that node to the ABI corpus being
14781 /// currently built.
14783 /// If the represents class type that already exists, then update the
14784 /// existing class type with the new properties found in the DIE.
14786 /// It meanst that this function can also update an existing
14787 /// class_decl node with data members, member functions and other
14788 /// properties coming from the DIE.
14790 /// @param ctxt the read context to consider.
14792 /// @param die the DIE to read information from. Must be either a
14793 /// DW_TAG_structure_type or a DW_TAG_class_type.
14795 /// @param scope a pointer to the scope_decl* under which this class
14796 /// is to be added to.
14798 /// @param is_struct whether the class was declared as a struct.
14800 /// @param klass if non-null, this is a klass to append the members
14801 /// to. Otherwise, this function just builds the class from scratch.
14803 /// @param called_from_public_decl set to true if this class is being
14804 /// called from a "Public declaration like vars or public symbols".
14806 /// @param where_offset the offset of the DIE where we are "logically"
14807 /// positionned at, in the DIE tree. This is useful when @p die is
14808 /// e.g, DW_TAG_partial_unit that can be included in several places in
14811 /// @return the resulting class_type.
14812 static class_decl_sptr
14813 add_or_update_class_type(read_context& ctxt,
14817 class_decl_sptr klass,
14818 bool called_from_public_decl,
14819 size_t where_offset)
14821 class_decl_sptr result;
14826 ABG_ASSERT(ctxt.get_die_source(die, source));
14828 unsigned tag = dwarf_tag(die);
14830 if (tag != DW_TAG_class_type && tag != DW_TAG_structure_type)
14834 die_class_or_union_map_type::const_iterator i =
14835 ctxt.die_wip_classes_map(source).find(dwarf_dieoffset(die));
14836 if (i != ctxt.die_wip_classes_map(source).end())
14838 class_decl_sptr class_type = is_class_type(i->second);
14839 ABG_ASSERT(class_type);
14844 if (!ctxt.die_is_in_cplus_plus(die))
14845 // In c++, a given class might be put together "piecewise". That
14846 // is, in a translation unit, some data members of that class
14847 // might be defined; then in another later, some member types
14848 // might be defined. So we can't just re-use a class "verbatim"
14849 // just because we've seen previously. So in c++, re-using the
14850 // class is a much clever process. In the other languages however
14851 // (like in C) we can re-use a class definition verbatim.
14852 if (class_decl_sptr class_type =
14853 is_class_type(ctxt.lookup_type_from_die(die)))
14854 if (!class_type->get_is_declaration_only())
14857 string name, linkage_name;
14859 die_loc_and_name(ctxt, die, loc, name, linkage_name);
14860 bool is_declaration_only = die_is_declaration_only(die);
14862 bool is_anonymous = false;
14865 // So we are looking at an anonymous struct. Let's
14867 name = get_internal_anonymous_die_prefix_name(die);
14868 ABG_ASSERT(!name.empty());
14869 // But we remember that the type is anonymous.
14870 is_anonymous = true;
14872 if (size_t s = scope->get_num_anonymous_member_classes())
14873 name = build_internal_anonymous_die_name(name, s);
14878 if (corpus_sptr corp = ctxt.should_reuse_type_from_corpus_group())
14881 // TODO: if there is only one class defined in the corpus
14882 // for this location, then re-use it. But if there are
14883 // more than one, then do not re-use it, for now.
14884 result = lookup_class_type_per_location(loc.expand(), *corp);
14886 // TODO: if there is just one class for that name defined,
14887 // then re-use it. Otherwise, don't.
14888 result = lookup_class_type(name, *corp);
14890 // If we are seeing a declaration of a definition we
14891 // already had, or if we are seing a type with the same
14892 // declaration-only-ness that we had before, then keep
14893 // the one we already had.
14894 && (result->get_is_declaration_only() == is_declaration_only
14895 || (!result->get_is_declaration_only()
14896 && is_declaration_only)))
14898 ctxt.associate_die_to_type(die, result, where_offset);
14902 // We might be seeing the definition of a declaration we
14903 // already had. In that case, keep the definition and
14904 // drop the declaration.
14909 // If we've already seen the same class as 'die', then let's re-use
14910 // that one, unless it's an anonymous class. We can't really safely
14911 // re-use anonymous classes as they have no name, by construction.
14912 // What we can do, rather, is to reuse the typedef that name them,
14913 // when they do have a naming typedef.
14915 if (class_decl_sptr pre_existing_class =
14916 is_class_type(ctxt.lookup_type_artifact_from_die(die)))
14917 klass = pre_existing_class;
14920 die_size_in_bits(die, size);
14923 bool has_child = (dwarf_child(die, &child) == 0);
14925 decl_base_sptr res;
14928 res = result = klass;
14930 result->set_location(loc);
14934 result.reset(new class_decl(ctxt.env(), name, size,
14935 /*alignment=*/0, is_struct, loc,
14936 decl_base::VISIBILITY_DEFAULT));
14937 result->set_is_anonymous(is_anonymous);
14939 if (is_declaration_only)
14940 result->set_is_declaration_only(true);
14942 res = add_decl_to_scope(result, scope);
14943 result = dynamic_pointer_cast<class_decl>(res);
14944 ABG_ASSERT(result);
14948 result->set_size_in_bits(size);
14950 ctxt.associate_die_to_type(die, result, where_offset);
14952 ctxt.maybe_schedule_declaration_only_class_for_resolution(result);
14955 // TODO: set the access specifier for the declaration-only class
14959 ctxt.die_wip_classes_map(source)[dwarf_dieoffset(die)] = result;
14961 scope_decl_sptr scop =
14962 dynamic_pointer_cast<scope_decl>(res);
14964 ctxt.scope_stack().push(scop.get());
14968 int anonymous_member_class_index = -1;
14969 int anonymous_member_union_index = -1;
14970 int anonymous_member_enum_index = -1;
14974 tag = dwarf_tag(&child);
14976 // Handle base classes.
14977 if (tag == DW_TAG_inheritance)
14979 result->set_is_declaration_only(false);
14981 Dwarf_Die type_die;
14982 if (!die_die_attribute(&child, DW_AT_type, type_die))
14985 type_base_sptr base_type;
14987 lookup_class_or_typedef_from_corpus(ctxt, &type_die,
14988 called_from_public_decl,
14992 is_type(build_ir_node_from_die(ctxt, &type_die,
14993 called_from_public_decl,
14996 // Sometimes base_type can be a typedef. Let's make
14997 // sure that typedef is compatible with a class type.
14998 class_decl_sptr b = is_compatible_with_class_type(base_type);
15002 access_specifier access =
15007 die_access_specifier(&child, access);
15009 bool is_virt= die_is_virtual(&child);
15010 int64_t offset = 0;
15011 bool is_offset_present =
15012 die_member_offset(ctxt, &child, offset);
15014 class_decl::base_spec_sptr base(new class_decl::base_spec
15016 is_offset_present ? offset : -1,
15018 if (b->get_is_declaration_only())
15019 ABG_ASSERT(ctxt.is_decl_only_class_scheduled_for_resolution(b));
15020 if (result->find_base_class(b->get_qualified_name()))
15022 result->add_base_specifier(base);
15024 // Handle data members.
15025 else if (tag == DW_TAG_member
15026 || tag == DW_TAG_variable)
15028 Dwarf_Die type_die;
15029 if (!die_die_attribute(&child, DW_AT_type, type_die))
15034 die_loc_and_name(ctxt, &child, loc, n, m);
15035 /// For now, we skip the hidden vtable pointer.
15036 /// Currently, we're looking for a member starting with
15037 /// "_vptr[^0-9a-zA-Z_]", which is what Clang and GCC
15038 /// use as a name for the hidden vtable pointer.
15039 if (n.substr(0, 5) == "_vptr"
15040 && !std::isalnum(n.at(5))
15044 // If the variable is already a member of this class,
15046 if (lookup_var_decl_in_scope(n, result))
15049 int64_t offset_in_bits = 0;
15050 bool is_laid_out = die_member_offset(ctxt, &child,
15052 // For now, is_static == !is_laid_out. When we have
15053 // templates, we'll try to be more specific. For now,
15054 // this approximation should do OK.
15055 bool is_static = !is_laid_out;
15057 if (is_static && variable_is_suppressed(ctxt,
15062 decl_base_sptr ty = is_decl(
15063 build_ir_node_from_die(ctxt, &type_die,
15064 called_from_public_decl,
15066 type_base_sptr t = is_type(ty);
15070 // The call to build_ir_node_from_die above could have
15071 // triggered the adding of a data member named 'n' into
15072 // result. So let's check again if the variable is
15073 // already a member of this class.
15074 if (lookup_var_decl_in_scope(n, result))
15078 // We have a non-static data member. So this class
15079 // cannot be a declaration-only class anymore, even if
15080 // some DWARF emitters might consider it otherwise.
15081 result->set_is_declaration_only(false);
15082 access_specifier access =
15087 die_access_specifier(&child, access);
15089 var_decl_sptr dm(new var_decl(n, t, loc, m));
15090 result->add_data_member(dm, access, is_laid_out,
15091 is_static, offset_in_bits);
15092 ABG_ASSERT(has_scope(dm));
15093 ctxt.associate_die_to_decl(&child, dm, where_offset,
15094 /*associate_by_repr=*/false);
15096 // Handle member functions;
15097 else if (tag == DW_TAG_subprogram)
15100 add_or_update_member_function(ctxt, &child, result,
15101 called_from_public_decl,
15103 if (function_decl_sptr f = is_function_decl(r))
15104 ctxt.associate_die_to_decl(&child, f, where_offset,
15105 /*associate_by_repr=*/true);
15107 // Handle member types
15108 else if (die_is_type(&child))
15110 // Track the anonymous type index in the current
15111 // scope. Look for what this means by reading the
15112 // comment of the function
15113 // build_internal_anonymous_die_name.
15114 int anonymous_member_type_index = 0;
15115 if (is_anonymous_type_die(&child))
15117 // Update the anonymous type index.
15118 if (die_is_class_type(&child))
15119 anonymous_member_type_index =
15120 ++anonymous_member_class_index;
15121 else if (dwarf_tag(&child) == DW_TAG_union_type)
15122 anonymous_member_type_index =
15123 ++anonymous_member_union_index;
15124 else if (dwarf_tag(&child) == DW_TAG_enumeration_type)
15125 anonymous_member_type_index =
15126 ++anonymous_member_enum_index;
15128 // if the type is not already a member of this class,
15129 // then add it to the class.
15130 if (!lookup_class_typedef_or_enum_type_from_corpus
15131 (&child, anonymous_member_type_index, result.get()))
15132 build_ir_node_from_die(ctxt, &child, result.get(),
15133 called_from_public_decl,
15136 } while (dwarf_siblingof(&child, &child) == 0);
15139 ctxt.scope_stack().pop();
15142 die_class_or_union_map_type::const_iterator i =
15143 ctxt.die_wip_classes_map(source).find(dwarf_dieoffset(die));
15144 if (i != ctxt.die_wip_classes_map(source).end())
15146 if (is_member_type(i->second))
15147 set_member_access_specifier(res,
15148 get_member_access_specifier(i->second));
15149 ctxt.die_wip_classes_map(source).erase(i);
15153 ctxt.maybe_schedule_declaration_only_class_for_resolution(result);
15157 /// Build an @ref union_decl from a DW_TAG_union_type DIE.
15159 /// @param ctxt the read context to use.
15161 /// @param die the DIE to read from.
15163 /// @param scope the scope the resulting @ref union_decl belongs to.
15165 /// @param union_type if this parameter is non-nil, then this function
15166 /// updates the @ref union_decl that it points to, rather than
15167 /// creating a new @ref union_decl.
15169 /// @param called_from_public_decl is true if this function has been
15170 /// initially called within the context of a public decl.
15172 /// @param where_offset the offset of the DIE where we are "logically"
15173 /// positionned at, in the DIE tree. This is useful when @p die is
15174 /// e.g, DW_TAG_partial_unit that can be included in several places in
15176 static union_decl_sptr
15177 add_or_update_union_type(read_context& ctxt,
15180 union_decl_sptr union_type,
15181 bool called_from_public_decl,
15182 size_t where_offset)
15184 union_decl_sptr result;
15188 unsigned tag = dwarf_tag(die);
15190 if (tag != DW_TAG_union_type)
15194 ABG_ASSERT(ctxt.get_die_source(die, source));
15196 die_class_or_union_map_type::const_iterator i =
15197 ctxt.die_wip_classes_map(source).find(dwarf_dieoffset(die));
15198 if (i != ctxt.die_wip_classes_map(source).end())
15200 union_decl_sptr u = is_union_type(i->second);
15206 string name, linkage_name;
15208 die_loc_and_name(ctxt, die, loc, name, linkage_name);
15209 bool is_declaration_only = die_is_declaration_only(die);
15211 bool is_anonymous = false;
15214 // So we are looking at an anonymous union. Let's give it a
15216 name = get_internal_anonymous_die_prefix_name(die);
15217 ABG_ASSERT(!name.empty());
15218 // But we remember that the type is anonymous.
15219 is_anonymous = true;
15221 if (size_t s = scope->get_num_anonymous_member_unions())
15222 name = build_internal_anonymous_die_name(name, s);
15225 // If the type has location, then associate it to its
15226 // representation. This way, all occurences of types with the same
15227 // representation (name) and location can be later detected as being
15228 // for the same type.
15232 if (corpus_sptr corp = ctxt.should_reuse_type_from_corpus_group())
15235 result = lookup_union_type_per_location(loc.expand(), *corp);
15237 result = lookup_union_type(name, *corp);
15241 ctxt.associate_die_to_type(die, result, where_offset);
15247 // if we've already seen a union with the same union as 'die' then
15248 // let's re-use that one. We can't really safely re-use anonymous
15249 // classes as they have no name, by construction. What we can do,
15250 // rather, is to reuse the typedef that name them, when they do have
15251 // a naming typedef.
15253 if (union_decl_sptr pre_existing_union =
15254 is_union_type(ctxt.lookup_artifact_from_die(die)))
15255 union_type = pre_existing_union;
15258 die_size_in_bits(die, size);
15262 result = union_type;
15263 result->set_location(loc);
15267 result.reset(new union_decl(ctxt.env(), name, size,
15268 loc, decl_base::VISIBILITY_DEFAULT));
15269 result->set_is_anonymous(is_anonymous);
15270 if (is_declaration_only)
15271 result->set_is_declaration_only(true);
15272 result = is_union_type(add_decl_to_scope(result, scope));
15273 ABG_ASSERT(result);
15278 result->set_size_in_bits(size);
15279 result->set_is_declaration_only(false);
15282 ctxt.associate_die_to_type(die, result, where_offset);
15284 // TODO: maybe schedule declaration-only union for result like we do
15286 // ctxt.maybe_schedule_declaration_only_class_for_resolution(result);
15289 bool has_child = (dwarf_child(die, &child) == 0);
15293 ctxt.die_wip_classes_map(source)[dwarf_dieoffset(die)] = result;
15295 scope_decl_sptr scop =
15296 dynamic_pointer_cast<scope_decl>(result);
15298 ctxt.scope_stack().push(scop.get());
15304 tag = dwarf_tag(&child);
15305 // Handle data members.
15306 if (tag == DW_TAG_member || tag == DW_TAG_variable)
15308 Dwarf_Die type_die;
15309 if (!die_die_attribute(&child, DW_AT_type, type_die))
15314 die_loc_and_name(ctxt, &child, loc, n, m);
15316 if (lookup_var_decl_in_scope(n, result))
15319 ssize_t offset_in_bits = 0;
15320 decl_base_sptr ty =
15321 is_decl(build_ir_node_from_die(ctxt, &type_die,
15322 called_from_public_decl,
15324 type_base_sptr t = is_type(ty);
15328 // We have a non-static data member. So this class
15329 // cannot be a declaration-only class anymore, even if
15330 // some DWARF emitters might consider it otherwise.
15331 result->set_is_declaration_only(false);
15332 access_specifier access = private_access;
15334 die_access_specifier(&child, access);
15336 var_decl_sptr dm(new var_decl(n, t, loc, m));
15337 result->add_data_member(dm, access, /*is_laid_out=*/true,
15338 /*is_static=*/false,
15340 ABG_ASSERT(has_scope(dm));
15341 ctxt.associate_die_to_decl(&child, dm, where_offset,
15342 /*associate_by_repr=*/false);
15344 // Handle member functions;
15345 else if (tag == DW_TAG_subprogram)
15348 is_decl(build_ir_node_from_die(ctxt, &child,
15350 called_from_public_decl,
15355 function_decl_sptr f = dynamic_pointer_cast<function_decl>(r);
15358 finish_member_function_reading(&child, f, result, ctxt);
15360 ctxt.associate_die_to_decl(&child, f, where_offset,
15361 /*associate_by_repr=*/false);
15363 // Handle member types
15364 else if (die_is_type(&child))
15365 decl_base_sptr td =
15366 is_decl(build_ir_node_from_die(ctxt, &child, result.get(),
15367 called_from_public_decl,
15369 } while (dwarf_siblingof(&child, &child) == 0);
15372 ctxt.scope_stack().pop();
15375 die_class_or_union_map_type::const_iterator i =
15376 ctxt.die_wip_classes_map(source).find(dwarf_dieoffset(die));
15377 if (i != ctxt.die_wip_classes_map(source).end())
15379 if (is_member_type(i->second))
15380 set_member_access_specifier(result,
15381 get_member_access_specifier(i->second));
15382 ctxt.die_wip_classes_map(source).erase(i);
15389 /// build a qualified type from a DW_TAG_const_type,
15390 /// DW_TAG_volatile_type or DW_TAG_restrict_type DIE.
15392 /// @param ctxt the read context to consider.
15394 /// @param die the input DIE to read from.
15396 /// @param called_from_public_decl true if this function was called
15397 /// from a context where either a public function or a public variable
15398 /// is being built.
15400 /// @param where_offset the offset of the DIE where we are "logically"
15401 /// positionned at, in the DIE tree. This is useful when @p die is
15402 /// e.g, DW_TAG_partial_unit that can be included in several places in
15405 /// @return the resulting qualified_type_def.
15406 static type_base_sptr
15407 build_qualified_type(read_context& ctxt,
15409 bool called_from_public_decl,
15410 size_t where_offset)
15412 type_base_sptr result;
15417 ABG_ASSERT(ctxt.get_die_source(die, source));
15419 unsigned tag = dwarf_tag(die);
15421 if (tag != DW_TAG_const_type
15422 && tag != DW_TAG_volatile_type
15423 && tag != DW_TAG_restrict_type)
15426 Dwarf_Die underlying_type_die;
15427 decl_base_sptr utype_decl;
15428 if (!die_die_attribute(die, DW_AT_type, underlying_type_die))
15429 // So, if no DW_AT_type is present, then this means (if we are
15430 // looking at a debug info emitted by GCC) that we are looking
15431 // at a qualified void type.
15432 utype_decl = build_ir_node_for_void_type(ctxt);
15435 utype_decl = is_decl(build_ir_node_from_die(ctxt, &underlying_type_die,
15436 called_from_public_decl,
15441 // The call to build_ir_node_from_die() could have triggered the
15442 // creation of the type for this DIE. In that case, just return it.
15443 if (type_base_sptr t = ctxt.lookup_type_from_die(die))
15446 ctxt.associate_die_to_type(die, result, where_offset);
15450 type_base_sptr utype = is_type(utype_decl);
15453 qualified_type_def::CV qual = qualified_type_def::CV_NONE;
15454 if (tag == DW_TAG_const_type)
15455 qual |= qualified_type_def::CV_CONST;
15456 else if (tag == DW_TAG_volatile_type)
15457 qual |= qualified_type_def::CV_VOLATILE;
15458 else if (tag == DW_TAG_restrict_type)
15459 qual |= qualified_type_def::CV_RESTRICT;
15461 ABG_ASSERT_NOT_REACHED;
15464 result.reset(new qualified_type_def(utype, qual, location()));
15466 ctxt.associate_die_to_type(die, result, where_offset);
15471 /// Strip qualification from a qualified type, when it makes sense.
15473 /// DWARF constructs "const reference". This is redundant because a
15474 /// reference is always const. The issue is these redundant types then
15475 /// leak into the IR and make for bad diagnostics.
15477 /// This function thus strips the const qualifier from the type in
15478 /// that case. It might contain code to strip other cases like this
15481 /// @param t the type to strip const qualification from.
15483 /// @param ctxt the @ref read_context to use.
15485 /// @return the stripped type or just return @p t.
15486 static decl_base_sptr
15487 maybe_strip_qualification(const qualified_type_def_sptr t,
15488 read_context &ctxt)
15493 decl_base_sptr result = t;
15494 type_base_sptr u = t->get_underlying_type();
15495 environment* env = t->get_environment();
15497 if (t->get_cv_quals() & qualified_type_def::CV_CONST
15498 && (is_reference_type(u)))
15500 // Let's strip only the const qualifier. To do that, the "const"
15501 // qualified is turned into a no-op "none" qualified.
15502 result.reset(new qualified_type_def
15503 (u, t->get_cv_quals() & ~qualified_type_def::CV_CONST,
15504 t->get_location()));
15506 else if (t->get_cv_quals() & qualified_type_def::CV_CONST
15507 && env->is_void_type(u))
15509 // So this type is a "const void". Let's strip the "const"
15510 // qualifier out and make this just be "void", so that a "const
15511 // void" type and a "void" type compare equal after going through
15513 result = is_decl(u);
15515 else if (is_array_of_qualified_element(u))
15517 // In C and C++, a cv qualifiers of a qualified array apply to
15518 // the array element type. So the qualifiers of the array can
15519 // be dropped and applied to the element type.
15521 // Here, the element type is qualified already. So apply the
15522 // qualifiers of the array itself to the already qualified
15523 // element type and drop the array qualifiers.
15524 array_type_def_sptr array = is_array_type(u);
15525 qualified_type_def_sptr element_type =
15526 is_qualified_type(array->get_element_type());
15527 qualified_type_def::CV quals = element_type->get_cv_quals();
15528 quals |= t->get_cv_quals();
15529 element_type->set_cv_quals(quals);
15530 result = is_decl(u);
15531 if (u->get_canonical_type()
15532 || element_type->get_canonical_type())
15533 // We shouldn't be editing types that were already
15534 // canonicalized. For those, canonicalization should be
15535 // delayed until after all editing is done.
15536 ABG_ASSERT_NOT_REACHED;
15538 else if (is_array_type(u) && !is_array_of_qualified_element(is_array_type(u)))
15540 // In C and C++, a cv qualifiers of a qualified array apply to
15541 // the array element type. So the qualifiers of the array can
15542 // be dropped and applied to the element type.
15544 // Here, the element type is not qualified. So apply the
15545 // qualifiers of the array itself to the element type and drop
15546 // the array qualifiers.
15547 array_type_def_sptr array = is_array_type(u);
15548 type_base_sptr element_type = array->get_element_type();
15549 qualified_type_def_sptr qual_type
15550 (new qualified_type_def(element_type,
15552 t->get_location()));
15553 add_decl_to_scope(qual_type, is_decl(element_type)->get_scope());
15554 array->set_element_type(qual_type);
15555 ctxt.schedule_type_for_late_canonicalization(is_type(qual_type));
15556 result = is_decl(u);
15557 if (u->get_canonical_type())
15558 // We shouldn't be editing types that were already
15559 // canonicalized. For those, canonicalization should be
15560 // delayed until after all editing is done.
15561 ABG_ASSERT_NOT_REACHED;
15567 /// Build a pointer type from a DW_TAG_pointer_type DIE.
15569 /// @param ctxt the read context to consider.
15571 /// @param die the DIE to read information from.
15573 /// @param called_from_public_decl true if this function was called
15574 /// from a context where either a public function or a public variable
15575 /// is being built.
15577 /// @param where_offset the offset of the DIE where we are "logically"
15578 /// positionned at, in the DIE tree. This is useful when @p die is
15579 /// e.g, DW_TAG_partial_unit that can be included in several places in
15582 /// @return the resulting pointer to pointer_type_def.
15583 static pointer_type_def_sptr
15584 build_pointer_type_def(read_context& ctxt,
15586 bool called_from_public_decl,
15587 size_t where_offset)
15589 pointer_type_def_sptr result;
15595 ABG_ASSERT(ctxt.get_die_source(die, source));
15597 unsigned tag = dwarf_tag(die);
15598 if (tag != DW_TAG_pointer_type)
15601 type_or_decl_base_sptr utype_decl;
15602 Dwarf_Die underlying_type_die;
15603 bool has_underlying_type_die = false;
15604 if (!die_die_attribute(die, DW_AT_type, underlying_type_die))
15605 // If the DW_AT_type attribute is missing, that means we are
15606 // looking at a pointer to "void".
15607 utype_decl = build_ir_node_for_void_type(ctxt);
15609 has_underlying_type_die = true;
15611 if (!utype_decl && has_underlying_type_die)
15612 utype_decl = build_ir_node_from_die(ctxt, &underlying_type_die,
15613 called_from_public_decl,
15618 // The call to build_ir_node_from_die() could have triggered the
15619 // creation of the type for this DIE. In that case, just return it.
15620 if (type_base_sptr t = ctxt.lookup_type_from_die(die))
15622 result = is_pointer_type(t);
15623 ABG_ASSERT(result);
15627 type_base_sptr utype = is_type(utype_decl);
15630 // if the DIE for the pointer type doesn't have a byte_size
15631 // attribute then we assume the size of the pointer is the address
15632 // size of the current translation unit.
15633 uint64_t size = ctxt.cur_transl_unit()->get_address_size();
15634 if (die_unsigned_constant_attribute(die, DW_AT_byte_size, size))
15635 // The size as expressed by DW_AT_byte_size is in byte, so let's
15636 // convert it to bits.
15639 // And the size of the pointer must be the same as the address size
15640 // of the current translation unit.
15641 ABG_ASSERT((size_t) ctxt.cur_transl_unit()->get_address_size() == size);
15643 result.reset(new pointer_type_def(utype, size, /*alignment=*/0, location()));
15644 ABG_ASSERT(result->get_pointed_to_type());
15646 ctxt.associate_die_to_type(die, result, where_offset);
15650 /// Build a reference type from either a DW_TAG_reference_type or
15651 /// DW_TAG_rvalue_reference_type DIE.
15653 /// @param ctxt the read context to consider.
15655 /// @param die the DIE to read from.
15657 /// @param called_from_public_decl true if this function was called
15658 /// from a context where either a public function or a public variable
15659 /// is being built.
15661 /// @param where_offset the offset of the DIE where we are "logically"
15662 /// positionned at, in the DIE tree. This is useful when @p die is
15663 /// e.g, DW_TAG_partial_unit that can be included in several places in
15666 /// @return a pointer to the resulting reference_type_def.
15667 static reference_type_def_sptr
15668 build_reference_type(read_context& ctxt,
15670 bool called_from_public_decl,
15671 size_t where_offset)
15673 reference_type_def_sptr result;
15679 ABG_ASSERT(ctxt.get_die_source(die, source));
15681 unsigned tag = dwarf_tag(die);
15682 if (tag != DW_TAG_reference_type
15683 && tag != DW_TAG_rvalue_reference_type)
15686 Dwarf_Die underlying_type_die;
15687 if (!die_die_attribute(die, DW_AT_type, underlying_type_die))
15690 type_or_decl_base_sptr utype_decl =
15691 build_ir_node_from_die(ctxt, &underlying_type_die,
15692 called_from_public_decl,
15697 // The call to build_ir_node_from_die() could have triggered the
15698 // creation of the type for this DIE. In that case, just return it.
15699 if (type_base_sptr t = ctxt.lookup_type_from_die(die))
15701 result = is_reference_type(t);
15702 ABG_ASSERT(result);
15706 type_base_sptr utype = is_type(utype_decl);
15709 // if the DIE for the reference type doesn't have a byte_size
15710 // attribute then we assume the size of the reference is the address
15711 // size of the current translation unit.
15712 uint64_t size = ctxt.cur_transl_unit()->get_address_size();
15713 if (die_unsigned_constant_attribute(die, DW_AT_byte_size, size))
15716 // And the size of the pointer must be the same as the address size
15717 // of the current translation unit.
15718 ABG_ASSERT((size_t) ctxt.cur_transl_unit()->get_address_size() == size);
15720 bool is_lvalue = (tag == DW_TAG_reference_type) ? true : false;
15722 result.reset(new reference_type_def(utype, is_lvalue, size,
15725 if (corpus_sptr corp = ctxt.current_corpus())
15726 if (reference_type_def_sptr t = lookup_reference_type(*result, *corp))
15728 ctxt.associate_die_to_type(die, result, where_offset);
15732 /// Build a subroutine type from a DW_TAG_subroutine_type DIE.
15734 /// @param ctxt the read context to consider.
15736 /// @param die the DIE to read from.
15738 /// @param is_method points to a class or union declaration iff we're
15739 /// building the type for a method. This is the enclosing class or
15740 /// union of the method.
15742 /// @param where_offset the offset of the DIE where we are "logically"
15743 /// positioned at, in the DIE tree. This is useful when @p die is
15744 /// e.g, DW_TAG_partial_unit that can be included in several places in
15747 /// @return a pointer to the resulting function_type_sptr.
15748 static function_type_sptr
15749 build_function_type(read_context& ctxt,
15751 class_or_union_sptr is_method,
15752 size_t where_offset)
15754 function_type_sptr result;
15759 ABG_ASSERT(dwarf_tag(die) == DW_TAG_subroutine_type
15760 || dwarf_tag(die) == DW_TAG_subprogram);
15763 ABG_ASSERT(ctxt.get_die_source(die, source));
15765 decl_base_sptr type_decl;
15767 translation_unit_sptr tu = ctxt.cur_transl_unit();
15770 /// If, inside the current translation unit, we've already seen a
15771 /// function type with the same text representation, then reuse that
15773 if (type_base_sptr t = ctxt.lookup_fn_type_from_die_repr_per_tu(die))
15775 result = is_function_type(t);
15776 ABG_ASSERT(result);
15777 ctxt.associate_die_to_type(die, result, where_offset);
15781 bool odr_is_relevant = ctxt.odr_is_relevant(die);
15782 if (odr_is_relevant)
15784 // So we can rely on the One Definition Rule to say that if
15785 // several different function types have the same name (or
15786 // rather, representation) across the entire binary, then they
15787 // ought to designate the same function type. So let's ensure
15788 // that if we've already seen a function type with the same
15789 // representation as the function type 'die', then it's the same
15790 // type as the one denoted by 'die'.
15791 if (function_type_sptr fn_type =
15792 is_function_type(ctxt.lookup_type_artifact_from_die(die)))
15794 ctxt.associate_die_to_type(die, fn_type, where_offset);
15799 // Let's look at the DIE to detect if it's the DIE for a method
15800 // (type). If it is, we can deduce the name of its enclosing class
15801 // and if it's a static or const.
15802 bool is_const = false;
15803 bool is_static = false;
15804 Dwarf_Die object_pointer_die;
15805 Dwarf_Die class_type_die;
15806 bool has_this_parm_die =
15807 die_function_type_is_method_type(ctxt, die, where_offset,
15808 object_pointer_die,
15811 if (has_this_parm_die)
15813 // The function (type) has a "this" parameter DIE. It means it's
15814 // a member function DIE.
15816 if (die_object_pointer_is_for_const_method(&object_pointer_die))
15821 // We were initially called as if the function represented
15822 // by DIE was *NOT* a member function. But now we know it's
15823 // a member function. Let's take that into account.
15824 class_or_union_sptr klass_type =
15825 is_class_or_union_type(build_ir_node_from_die(ctxt, &class_type_die,
15826 /*called_from_pub_decl=*/true,
15828 ABG_ASSERT(klass_type);
15829 is_method = klass_type;
15833 // Let's create the type early and record it as being for the DIE
15834 // 'die'. This way, when building the sub-type triggers the
15835 // creation of a type matching the same 'die', then we'll reuse this
15838 result.reset(is_method
15839 ? new method_type(is_method, is_const,
15840 tu->get_address_size(),
15842 : new function_type(ctxt.env(), tu->get_address_size(),
15844 ctxt.associate_die_to_type(die, result, where_offset);
15845 ctxt.die_wip_function_types_map(source)[dwarf_dieoffset(die)] = result;
15846 ctxt.associate_die_repr_to_fn_type_per_tu(die, result);
15848 type_base_sptr return_type;
15849 Dwarf_Die ret_type_die;
15850 if (die_die_attribute(die, DW_AT_type, ret_type_die))
15852 is_type(build_ir_node_from_die(ctxt, &ret_type_die,
15853 /*called_from_public_decl=*/true,
15856 return_type = is_type(build_ir_node_for_void_type(ctxt));
15857 result->set_return_type(return_type);
15860 function_decl::parameters function_parms;
15862 if (dwarf_child(die, &child) == 0)
15865 int child_tag = dwarf_tag(&child);
15866 if (child_tag == DW_TAG_formal_parameter)
15868 // This is a "normal" function parameter.
15869 string name, linkage_name;
15871 die_loc_and_name(ctxt, &child, loc, name, linkage_name);
15872 if (!tools_utils::string_is_ascii_identifier(name))
15873 // Sometimes, bogus compiler emit names that are
15874 // non-ascii garbage. Let's just ditch that for now.
15876 bool is_artificial = die_is_artificial(&child);
15877 type_base_sptr parm_type;
15878 Dwarf_Die parm_type_die;
15879 if (die_die_attribute(&child, DW_AT_type, parm_type_die))
15881 is_type(build_ir_node_from_die(ctxt, &parm_type_die,
15882 /*called_from_public_decl=*/true,
15886 function_decl::parameter_sptr p
15887 (new function_decl::parameter(parm_type, name, loc,
15888 /*variadic_marker=*/false,
15890 function_parms.push_back(p);
15892 else if (child_tag == DW_TAG_unspecified_parameters)
15894 // This is a variadic function parameter.
15895 bool is_artificial = die_is_artificial(&child);
15896 ir::environment* env = ctxt.env();
15898 type_base_sptr parm_type = env->get_variadic_parameter_type();
15899 function_decl::parameter_sptr p
15900 (new function_decl::parameter(parm_type,
15903 /*variadic_marker=*/true,
15905 function_parms.push_back(p);
15906 // After a DW_TAG_unspecified_parameters tag, we shouldn't
15907 // keep reading for parameters. The
15908 // unspecified_parameters TAG should be the last parameter
15909 // that we record. For instance, if there are multiple
15910 // DW_TAG_unspecified_parameters DIEs then we should care
15911 // only for the first one.
15915 while (dwarf_siblingof(&child, &child) == 0);
15917 result->set_parameters(function_parms);
15919 tu->bind_function_type_life_time(result);
15922 die_function_type_map_type::const_iterator i =
15923 ctxt.die_wip_function_types_map(source).
15924 find(dwarf_dieoffset(die));
15925 if (i != ctxt.die_wip_function_types_map(source).end())
15926 ctxt.die_wip_function_types_map(source).erase(i);
15929 maybe_canonicalize_type(result, ctxt);
15933 /// Build a subrange type from a DW_TAG_subrange_type.
15935 /// @param ctxt the read context to consider.
15937 /// @param die the DIE to read from.
15939 /// @param where_offset the offset of the DIE where we are "logically"
15940 /// positionned at in the DIE tree. This is useful when @p die is
15941 /// e,g, DW_TAG_partial_unit that can be included in several places in
15944 /// @param associate_die_to_type if this is true then the resulting
15945 /// type is associated to the @p die, so that next time when the
15946 /// system looks up the type associated to it, the current resulting
15947 /// type is returned. If false, then no association is done and the
15948 /// resulting type can be destroyed right after. This can be useful
15949 /// when the sole purpose of building the @ref
15950 /// array_type_def::subrange_type is to use some of its method like,
15951 /// e.g, its name pretty printing methods.
15953 /// @return the newly built instance of @ref
15954 /// array_type_def::subrange_type, or nil if no type could be built.
15955 static array_type_def::subrange_sptr
15956 build_subrange_type(read_context& ctxt,
15957 const Dwarf_Die* die,
15958 size_t where_offset,
15959 bool associate_type_to_die)
15961 array_type_def::subrange_sptr result;
15967 ABG_ASSERT(ctxt.get_die_source(die, source));
15969 unsigned tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
15970 if (tag != DW_TAG_subrange_type)
15973 string name = die_name(die);
15975 translation_unit::language language = ctxt.cur_transl_unit()->get_language();
15976 array_type_def::subrange_type::bound_value lower_bound =
15977 get_default_array_lower_bound(language);
15978 array_type_def::subrange_type::bound_value upper_bound;
15979 uint64_t count = 0;
15980 bool is_infinite = false;
15982 // The DWARF 4 specifications says, in [5.11 Subrange
15985 // The subrange entry may have the attributes
15986 // DW_AT_lower_bound and DW_AT_upper_bound to
15987 // specify, respectively, the lower and upper bound
15988 // values of the subrange.
15990 // So let's look for DW_AT_lower_bound first.
15991 die_constant_attribute(die, DW_AT_lower_bound, lower_bound);
15993 // Then, DW_AT_upper_bound.
15994 if (!die_constant_attribute(die, DW_AT_upper_bound, upper_bound))
15996 // The DWARF 4 spec says, in [5.11 Subrange Type
15999 // The DW_AT_upper_bound attribute may be replaced
16000 // by a DW_AT_count attribute, whose value
16001 // describes the number of elements in the
16002 // subrange rather than the value of the last
16005 // So, as DW_AT_upper_bound is not present in this
16006 // case, let's see if there is a DW_AT_count.
16007 die_unsigned_constant_attribute(die, DW_AT_count, count);
16009 // We can deduce the upper_bound from the
16010 // lower_bound and the number of elements of the
16012 if (int64_t u = lower_bound.get_signed_value() + count)
16013 upper_bound = u - 1;
16015 if (upper_bound.get_unsigned_value() == 0 && count == 0)
16016 // No upper_bound nor count was present on the DIE, this means
16017 // the array is considered to have an infinite (or rather not
16019 is_infinite = true;
16022 if (UINT64_MAX == upper_bound.get_unsigned_value())
16024 // If the upper_bound size is the max of the integer value, then
16025 // it most certainly means infinite size.
16026 is_infinite = true;
16027 upper_bound.set_unsigned(0);
16031 (new array_type_def::subrange_type(ctxt.env(),
16036 result->is_infinite(is_infinite);
16038 // load the underlying type.
16039 Dwarf_Die underlying_type_die;
16040 type_base_sptr underlying_type;
16041 if (die_die_attribute(die, DW_AT_type, underlying_type_die))
16043 is_type(build_ir_node_from_die(ctxt,
16044 &underlying_type_die,
16045 /*called_from_public_decl=*/true,
16048 if (underlying_type)
16049 result->set_underlying_type(underlying_type);
16051 if (associate_type_to_die)
16052 ctxt.associate_die_to_type(die, result, where_offset);
16057 /// Build the sub-ranges of an array type.
16059 /// This is a sub-routine of build_array_type().
16061 /// @param ctxt the context to read from.
16063 /// @param die the DIE of tag DW_TAG_array_type which contains
16064 /// children DIEs that represent the sub-ranges.
16066 /// @param subranges out parameter. This is set to the sub-ranges
16067 /// that are built from @p die.
16069 /// @param where_offset the offset of the DIE where we are "logically"
16070 /// positioned at, in the DIE tree. This is useful when @p die is
16071 /// e.g, DW_TAG_partial_unit that can be included in several places in
16074 build_subranges_from_array_type_die(read_context& ctxt,
16075 const Dwarf_Die* die,
16076 array_type_def::subranges_type& subranges,
16077 size_t where_offset,
16078 bool associate_type_to_die)
16082 if (dwarf_child(const_cast<Dwarf_Die*>(die), &child) == 0)
16086 int child_tag = dwarf_tag(&child);
16087 if (child_tag == DW_TAG_subrange_type)
16089 array_type_def::subrange_sptr s;
16090 if (associate_type_to_die)
16092 // We are being called to create the type, add it to
16093 // the current type graph and associate it to the
16094 // DIE it's been created from.
16095 type_or_decl_base_sptr t =
16096 build_ir_node_from_die(ctxt, &child,
16097 /*called_from_public_decl=*/true,
16099 s = is_subrange_type(t);
16102 // We are being called to create the type but *NOT*
16103 // add it to the current tyupe tree, *NOR* associate
16104 // it to the DIE it's been created from.
16105 s = build_subrange_type(ctxt, &child,
16107 /*associate_type_to_die=*/false);
16109 subranges.push_back(s);
16112 while (dwarf_siblingof(&child, &child) == 0);
16116 /// Build an array type from a DW_TAG_array_type DIE.
16118 /// @param ctxt the read context to consider.
16120 /// @param die the DIE to read from.
16122 /// @param called_from_public_decl true if this function was called
16123 /// from a context where either a public function or a public variable
16124 /// is being built.
16126 /// @param where_offset the offset of the DIE where we are "logically"
16127 /// positioned at, in the DIE tree. This is useful when @p die is
16128 /// e.g, DW_TAG_partial_unit that can be included in several places in
16131 /// @return a pointer to the resulting array_type_def.
16132 static array_type_def_sptr
16133 build_array_type(read_context& ctxt,
16135 bool called_from_public_decl,
16136 size_t where_offset)
16138 array_type_def_sptr result;
16144 ABG_ASSERT(ctxt.get_die_source(die, source));
16146 unsigned tag = dwarf_tag(die);
16147 if (tag != DW_TAG_array_type)
16150 decl_base_sptr type_decl;
16151 Dwarf_Die type_die;
16153 if (die_die_attribute(die, DW_AT_type, type_die))
16154 type_decl = is_decl(build_ir_node_from_die(ctxt, &type_die,
16155 called_from_public_decl,
16160 // The call to build_ir_node_from_die() could have triggered the
16161 // creation of the type for this DIE. In that case, just return it.
16162 if (type_base_sptr t = ctxt.lookup_type_from_die(die))
16164 result = is_array_type(t);
16165 ABG_ASSERT(result);
16169 type_base_sptr type = is_type(type_decl);
16172 array_type_def::subranges_type subranges;
16174 build_subranges_from_array_type_die(ctxt, die, subranges, where_offset);
16176 result.reset(new array_type_def(type, subranges, location()));
16181 /// Create a typedef_decl from a DW_TAG_typedef DIE.
16183 /// @param ctxt the read context to consider.
16185 /// @param die the DIE to read from.
16187 /// @param called_from_public_decl true if this function was called
16188 /// from a context where either a public function or a public variable
16189 /// is being built.
16191 /// @param where_offset the offset of the DIE where we are "logically"
16192 /// positionned at, in the DIE tree. This is useful when @p die is
16193 /// e.g, DW_TAG_partial_unit that can be included in several places in
16196 /// @return the newly created typedef_decl.
16197 static typedef_decl_sptr
16198 build_typedef_type(read_context& ctxt,
16200 bool called_from_public_decl,
16201 size_t where_offset)
16203 typedef_decl_sptr result;
16209 ABG_ASSERT(ctxt.get_die_source(die, source));
16211 unsigned tag = dwarf_tag(die);
16212 if (tag != DW_TAG_typedef)
16215 string name, linkage_name;
16217 die_loc_and_name(ctxt, die, loc, name, linkage_name);
16219 if (corpus_sptr corp = ctxt.should_reuse_type_from_corpus_group())
16221 result = lookup_typedef_type_per_location(loc.expand(), *corp);
16223 if (!ctxt.odr_is_relevant(die))
16224 if (typedef_decl_sptr t = is_typedef(ctxt.lookup_artifact_from_die(die)))
16229 type_base_sptr utype;
16230 Dwarf_Die underlying_type_die;
16231 if (!die_die_attribute(die, DW_AT_type, underlying_type_die))
16232 // A typedef DIE with no underlying type means a typedef to
16234 utype = ctxt.env()->get_void_type();
16238 is_type(build_ir_node_from_die(ctxt,
16239 &underlying_type_die,
16240 called_from_public_decl,
16245 // The call to build_ir_node_from_die() could have triggered the
16246 // creation of the type for this DIE. In that case, just return
16248 if (type_base_sptr t = ctxt.lookup_type_from_die(die))
16250 result = is_typedef(t);
16251 ABG_ASSERT(result);
16256 result.reset(new typedef_decl(name, utype, loc, linkage_name));
16258 if (class_decl_sptr klass = is_class_type(utype))
16259 if (is_anonymous_type(klass))
16260 klass->set_naming_typedef(result);
16263 ctxt.associate_die_to_type(die, result, where_offset);
16268 /// Build a @ref var_decl out of a DW_TAG_variable DIE if the variable
16269 /// denoted by the DIE is not suppressed by a suppression
16270 /// specification associated to the current read context.
16272 /// Note that if a member variable declaration with the same name as
16273 /// the name of the DIE we are looking at exists, this function returns
16274 /// that existing variable declaration.
16276 /// @param ctxt the read context to use.
16278 /// @param die the DIE representing the variable we are looking at.
16280 /// @param where_offset the offset of the DIE where we are "logically"
16281 /// positionned at, in the DIE tree. This is useful when @p die is
16282 /// e.g, DW_TAG_partial_unit that can be included in several places in
16285 /// @param result if this is set to an existing var_decl, this means
16286 /// that the function will append the new properties it sees on @p die
16287 /// to that exising var_decl. Otherwise, if this parameter is NULL, a
16288 /// new var_decl is going to be allocated and returned.
16290 /// @param is_required_decl_spec this is true iff the variable to
16291 /// build is referred to as being the specification of another
16294 /// @return a pointer to the newly created var_decl. If the var_decl
16295 /// could not be built, this function returns NULL.
16296 static var_decl_sptr
16297 build_or_get_var_decl_if_not_suppressed(read_context& ctxt,
16300 size_t where_offset,
16301 var_decl_sptr result,
16302 bool is_required_decl_spec)
16305 if (variable_is_suppressed(ctxt, scope, die, is_required_decl_spec))
16308 if (class_decl* class_type = is_class_type(scope))
16310 string var_name = die_name(die);
16311 if (!var_name.empty())
16312 if ((var = class_type->find_data_member(var_name)))
16315 var = build_var_decl(ctxt, die, where_offset, result);
16319 /// Create a variable symbol with a given name.
16321 /// @param sym_name the name of the variable symbol.
16323 /// @param env the environment to create the default symbol in.
16325 /// @return the newly created symbol.
16326 static elf_symbol_sptr
16327 create_default_var_sym(const string& sym_name, const environment *env)
16329 elf_symbol::version ver;
16330 elf_symbol::visibility vis = elf_symbol::DEFAULT_VISIBILITY;
16331 elf_symbol_sptr result =
16332 elf_symbol::create(env,
16333 /*symbol index=*/ 0,
16334 /*symbol size=*/ 0,
16336 /*symbol type=*/ elf_symbol::OBJECT_TYPE,
16337 /*symbol binding=*/ elf_symbol::GLOBAL_BINDING,
16338 /*symbol is defined=*/ true,
16339 /*symbol is common=*/ false,
16340 /*symbol version=*/ ver,
16341 /*symbol_visibility=*/vis,
16342 /*is_linux_string_cst=*/false);
16346 /// Build a @ref var_decl out of a DW_TAG_variable DIE.
16348 /// @param ctxt the read context to use.
16350 /// @param die the DIE representing the variable we are looking at.
16352 /// @param where_offset the offset of the DIE where we are "logically"
16353 /// positionned at, in the DIE tree. This is useful when @p die is
16354 /// e.g, DW_TAG_partial_unit that can be included in several places in
16357 /// @param result if this is set to an existing var_decl, this means
16358 /// that the function will append the new properties it sees on @p die
16359 /// to that exising var_decl. Otherwise, if this parameter is NULL, a
16360 /// new var_decl is going to be allocated and returned.
16362 /// @return a pointer to the newly created var_decl. If the var_decl
16363 /// could not be built, this function returns NULL.
16364 static var_decl_sptr
16365 build_var_decl(read_context& ctxt,
16367 size_t where_offset,
16368 var_decl_sptr result)
16373 int tag = dwarf_tag(die);
16374 ABG_ASSERT(tag == DW_TAG_variable || tag == DW_TAG_member);
16376 if (!die_is_public_decl(die))
16380 ABG_ASSERT(ctxt.get_die_source(die, source));
16382 type_base_sptr type;
16383 Dwarf_Die type_die;
16384 if (die_die_attribute(die, DW_AT_type, type_die))
16386 decl_base_sptr ty =
16387 is_decl(build_ir_node_from_die(ctxt, &type_die,
16388 /*called_from_public_decl=*/true,
16392 type = is_type(ty);
16399 string name, linkage_name;
16401 die_loc_and_name(ctxt, die, loc, name, linkage_name);
16404 result.reset(new var_decl(name, type, loc, linkage_name));
16407 // We were called to append properties that might have been
16408 // missing from the first version of the variable. And usually
16409 // that missing property is the mangled name.
16410 if (!linkage_name.empty())
16411 result->set_linkage_name(linkage_name);
16414 // Check if a variable symbol with this name is exported by the elf
16415 // binary. If it is, then set the symbol of the variable, if it's
16416 // not set already.
16417 if (!result->get_symbol())
16419 elf_symbol_sptr var_sym;
16420 if (get_ignore_symbol_table(ctxt))
16423 result->get_linkage_name().empty()
16424 ? result->get_name()
16425 : result->get_linkage_name();
16427 var_sym = create_default_var_sym(var_name, ctxt.env());
16428 ABG_ASSERT(var_sym);
16429 add_symbol_to_map(var_sym, ctxt.var_syms());
16433 Dwarf_Addr var_addr;
16434 if (ctxt.get_variable_address(die, var_addr))
16435 var_sym = var_sym = ctxt.variable_symbol_is_exported(var_addr);
16440 result->set_symbol(var_sym);
16441 // If the linkage name is not set or is wrong, set it to
16442 // the name of the underlying symbol.
16443 string linkage_name = result->get_linkage_name();
16444 if (linkage_name.empty()
16445 || !var_sym->get_alias_from_name(linkage_name))
16446 result->set_linkage_name(var_sym->get_name());
16447 result->set_is_in_public_symbol_table(true);
16454 /// Test if a given function denoted by its DIE and its scope is
16455 /// suppressed by any of the suppression specifications associated to
16456 /// a given context of ELF/DWARF reading.
16458 /// Note that a non-member function which symbol is not exported is
16459 /// also suppressed.
16461 /// @param ctxt the ELF/DWARF reading content of interest.
16463 /// @param scope of the scope of the function.
16465 /// @param function_die the DIE representing the function.
16467 /// @return true iff @p function_die is suppressed by at least one
16468 /// suppression specification attached to the @p ctxt.
16470 function_is_suppressed(const read_context& ctxt,
16471 const scope_decl* scope,
16472 Dwarf_Die *function_die)
16474 if (function_die == 0
16475 || dwarf_tag(function_die) != DW_TAG_subprogram)
16478 string fname = die_string_attribute(function_die, DW_AT_name);
16479 string flinkage_name = die_linkage_name(function_die);
16480 string qualified_name = build_qualified_name(scope, fname);
16482 // A non-member function which symbol is not exported is suppressed.
16483 if (!is_class_type(scope) && !die_is_declaration_only(function_die))
16485 Dwarf_Addr fn_addr;
16486 elf_symbol_sptr fn_sym;
16487 if (!ctxt.get_function_address(function_die, fn_addr))
16489 if (!get_ignore_symbol_table(ctxt))
16491 // We were not instructed to ignore (avoid loading) the
16492 // symbol table, so we can rely on its presence to see if
16493 // the address corresponds to the address of an exported
16494 // function symbol.
16495 if (!ctxt.function_symbol_is_exported(fn_addr))
16500 return suppr::function_is_suppressed(ctxt, qualified_name,
16502 /*require_drop_property=*/true);
16505 /// Build a @ref function_decl out of a DW_TAG_subprogram DIE if the
16506 /// function denoted by the DIE is not suppressed by a suppression
16507 /// specification associated to the current read context.
16509 /// Note that if a member function declaration with the same signature
16510 /// (pretty representation) as one of the DIE we are looking at
16511 /// exists, this function returns that existing function declaration.
16513 /// @param ctxt the read context to use.
16515 /// @param scope the scope of the function we are looking at.
16517 /// @param fn_die the DIE representing the function we are looking at.
16519 /// @param where_offset the offset of the DIE where we are "logically"
16520 /// positionned at, in the DIE tree. This is useful when @p die is
16521 /// e.g, DW_TAG_partial_unit that can be included in several places in
16524 /// @param result if this is set to an existing function_decl, this
16525 /// means that the function will append the new properties it sees on
16526 /// @p fn_die to that exising function_decl. Otherwise, if this
16527 /// parameter is NULL, a new function_decl is going to be allocated
16530 /// @return a pointer to the newly created var_decl. If the var_decl
16531 /// could not be built, this function returns NULL.
16532 static function_decl_sptr
16533 build_or_get_fn_decl_if_not_suppressed(read_context& ctxt,
16536 size_t where_offset,
16537 function_decl_sptr result)
16539 function_decl_sptr fn;
16540 if (function_is_suppressed(ctxt, scope, fn_die))
16544 if ((fn = is_function_decl(ctxt.lookup_artifact_from_die(fn_die))))
16546 fn = maybe_finish_function_decl_reading(ctxt, fn_die, where_offset, fn);
16547 ctxt.associate_die_to_decl(fn_die, fn, /*do_associate_by_repr=*/true);
16548 ctxt.associate_die_to_type(fn_die, fn->get_type(), where_offset);
16552 fn = build_function_decl(ctxt, fn_die, where_offset, result);
16557 /// Test if a given variable denoted by its DIE and its scope is
16558 /// suppressed by any of the suppression specifications associated to
16559 /// a given context of ELF/DWARF reading.
16561 /// @param ctxt the ELF/DWARF reading content of interest.
16563 /// @param scope of the scope of the variable.
16565 /// @param variable_die the DIE representing the variable.
16567 /// @param is_required_decl_spec if true, means that the @p
16568 /// variable_die being considered is for a variable decl that is a
16569 /// specification for a concrete variable being built.
16571 /// @return true iff @p variable_die is suppressed by at least one
16572 /// suppression specification attached to the @p ctxt.
16574 variable_is_suppressed(const read_context& ctxt,
16575 const scope_decl* scope,
16576 Dwarf_Die *variable_die,
16577 bool is_required_decl_spec)
16579 if (variable_die == 0
16580 || (dwarf_tag(variable_die) != DW_TAG_variable
16581 && dwarf_tag(variable_die) != DW_TAG_member))
16584 string name = die_string_attribute(variable_die, DW_AT_name);
16585 string linkage_name = die_linkage_name(variable_die);
16586 string qualified_name = build_qualified_name(scope, name);
16588 // If a non member variable that is a declaration (has no exported
16589 // symbol), is not the specification of another concrete variable,
16590 // then it's suppressed. This is a size optimization; it removes
16591 // useless declaration-only variables from the IR.
16593 // Otherwise, if a non-member variable is the specification of
16594 // another concrete variable, then this function looks at
16595 // suppression specification specifications to know if its
16597 if (!is_class_type(scope) && !is_required_decl_spec)
16599 Dwarf_Addr var_addr = 0;
16600 elf_symbol_sptr var_sym;
16601 if (!ctxt.get_variable_address(variable_die, var_addr))
16603 if (!get_ignore_symbol_table(ctxt))
16605 // We were not instructed to ignore (avoid loading) the
16606 // symbol table, so we can rely on its presence to see if
16607 // the address corresponds to the address of an exported
16608 // variable symbol.
16609 if (!ctxt.variable_symbol_is_exported(var_addr))
16614 return suppr::variable_is_suppressed(ctxt, qualified_name,
16616 /*require_drop_property=*/true);
16619 /// Test if a type (designated by a given DIE) in a given scope is
16620 /// suppressed by the suppression specifications that are associated
16621 /// to a given read context.
16623 /// @param ctxt the read context to consider.
16625 /// @param scope of the scope of the type DIE to consider.
16627 /// @param type_die the DIE that designates the type to consider.
16629 /// @param type_is_private out parameter. If this function returns
16630 /// true (the type @p type_die is suppressed) and if the type was
16631 /// suppressed because it's private then this parameter is set to
16634 /// @return true iff the type designated by the DIE @p type_die, in
16635 /// the scope @p scope is suppressed by at the suppression
16636 /// specifications associated to the current read context.
16638 type_is_suppressed(const read_context& ctxt,
16639 const scope_decl* scope,
16640 Dwarf_Die *type_die,
16641 bool &type_is_private)
16644 || (dwarf_tag(type_die) != DW_TAG_enumeration_type
16645 && dwarf_tag(type_die) != DW_TAG_class_type
16646 && dwarf_tag(type_die) != DW_TAG_structure_type
16647 && dwarf_tag(type_die) != DW_TAG_union_type))
16650 string type_name, linkage_name;
16651 location type_location;
16652 die_loc_and_name(ctxt, type_die, type_location, type_name, linkage_name);
16653 string qualified_name = build_qualified_name(scope, type_name);
16655 return suppr::type_is_suppressed(ctxt, qualified_name,
16658 /*require_drop_property=*/true);
16661 /// Test if a type (designated by a given DIE) in a given scope is
16662 /// suppressed by the suppression specifications that are associated
16663 /// to a given read context.
16665 /// @param ctxt the read context to consider.
16667 /// @param scope of the scope of the type DIE to consider.
16669 /// @param type_die the DIE that designates the type to consider.
16671 /// @return true iff the type designated by the DIE @p type_die, in
16672 /// the scope @p scope is suppressed by at the suppression
16673 /// specifications associated to the current read context.
16675 type_is_suppressed(const read_context& ctxt,
16676 const scope_decl* scope,
16677 Dwarf_Die *type_die)
16679 bool type_is_private = false;
16680 return type_is_suppressed(ctxt, scope, type_die, type_is_private);
16683 /// Get the opaque version of a type that was suppressed because it's
16684 /// a private type.
16686 /// The opaque version version of the type is just a declared-only
16687 /// version of the type (class or union type) denoted by @p type_die.
16689 /// @param ctxt the read context in use.
16691 /// @param scope the scope of the type die we are looking at.
16693 /// @param type_die the type DIE we are looking at.
16695 /// @param where_offset the offset of the DIE where we are "logically"
16696 /// positionned at, in the DIE tree. This is useful when @p die is
16697 /// e.g, DW_TAG_partial_unit that can be included in several places in
16700 /// @return the opaque version of the type denoted by @p type_die or
16701 /// nil if no opaque version was found.
16702 static class_or_union_sptr
16703 get_opaque_version_of_type(read_context &ctxt,
16705 Dwarf_Die *type_die,
16706 size_t where_offset)
16708 class_or_union_sptr result;
16713 unsigned tag = dwarf_tag(type_die);
16714 if (tag != DW_TAG_class_type
16715 && tag != DW_TAG_structure_type
16716 && tag != DW_TAG_union_type)
16719 string type_name, linkage_name;
16720 location type_location;
16721 die_loc_and_name(ctxt, type_die, type_location, type_name, linkage_name);
16722 if (!type_location)
16725 string qualified_name = build_qualified_name(scope, type_name);
16727 // TODO: also handle declaration-only unions. To do that, we mostly
16728 // need to adapt add_or_update_union_type to make it schedule
16729 // declaration-only unions for resolution too.
16730 string_classes_map::const_iterator i =
16731 ctxt.declaration_only_classes().find(qualified_name);
16732 if (i != ctxt.declaration_only_classes().end())
16733 result = i->second.back();
16737 if (tag == DW_TAG_class_type || tag == DW_TAG_structure_type)
16739 // So we didn't find any pre-existing forward-declared-only
16740 // class for the class definition that we could return as an
16741 // opaque type. So let's build one.
16743 // TODO: we need to be able to do this for unions too!
16744 class_decl_sptr klass(new class_decl(ctxt.env(), type_name,
16745 /*alignment=*/0, /*size=*/0,
16746 tag == DW_TAG_structure_type,
16748 decl_base::VISIBILITY_DEFAULT));
16749 klass->set_is_declaration_only(true);
16750 add_decl_to_scope(klass, scope);
16751 ctxt.associate_die_to_type(type_die, klass, where_offset);
16752 ctxt.maybe_schedule_declaration_only_class_for_resolution(klass);
16760 /// Create a function symbol with a given name.
16762 /// @param sym_name the name of the symbol to create.
16764 /// @param env the environment to create the symbol in.
16766 /// @return the newly created symbol.
16768 create_default_fn_sym(const string& sym_name, const environment *env)
16770 elf_symbol::version ver;
16771 elf_symbol_sptr result =
16772 elf_symbol::create(env,
16773 /*symbol index=*/ 0,
16774 /*symbol size=*/ 0,
16776 /*symbol type=*/ elf_symbol::FUNC_TYPE,
16777 /*symbol binding=*/ elf_symbol::GLOBAL_BINDING,
16778 /*symbol is defined=*/ true,
16779 /*symbol is common=*/ false,
16780 /*symbol version=*/ ver,
16781 /*symbol visibility=*/elf_symbol::DEFAULT_VISIBILITY,
16782 /*symbol is linux string cst=*/false);
16786 /// Build a @ref function_decl our of a DW_TAG_subprogram DIE.
16788 /// @param ctxt the read context to use
16790 /// @param die the DW_TAG_subprogram DIE to read from.
16792 /// @param where_offset the offset of the DIE where we are "logically"
16793 /// positionned at, in the DIE tree. This is useful when @p die is
16794 /// e.g, DW_TAG_partial_unit that can be included in several places in
16797 /// @param called_for_public_decl this is set to true if the function
16798 /// was called for a public (function) decl.
16799 static function_decl_sptr
16800 build_function_decl(read_context& ctxt,
16802 size_t where_offset,
16803 function_decl_sptr fn)
16805 function_decl_sptr result = fn;
16808 ABG_ASSERT(dwarf_tag(die) == DW_TAG_subprogram);
16811 ABG_ASSERT(ctxt.get_die_source(die, source));
16813 if (!die_is_public_decl(die))
16816 translation_unit_sptr tu = ctxt.cur_transl_unit();
16819 string fname, flinkage_name;
16821 die_loc_and_name(ctxt, die, floc, fname, flinkage_name);
16823 size_t is_inline = die_is_declared_inline(die);
16824 class_or_union_sptr is_method =
16825 is_class_or_union_type(get_scope_for_die(ctxt, die, true, where_offset));
16829 // Add the properties that might have been missing from the
16830 // first declaration of the function. For now, it usually is
16831 // the mangled name that goes missing in the first declarations.
16833 // Also note that if 'fn' has just been cloned, the current
16834 // linkage name (of the current DIE) might be different from the
16835 // linkage name of 'fn'. In that case, update the linkage name
16837 if (!flinkage_name.empty()
16838 && result->get_linkage_name() != flinkage_name)
16839 result->set_linkage_name(flinkage_name);
16841 if (!result->get_location())
16842 result->set_location(floc);
16846 function_type_sptr fn_type(build_function_type(ctxt, die, is_method,
16851 result.reset(is_method
16852 ? new method_decl(fname, fn_type,
16855 : new function_decl(fname, fn_type,
16860 // Set the symbol of the function. If the linkage name is not set
16861 // or is wrong, set it to the name of the underlying symbol.
16862 if (!result->get_symbol())
16864 elf_symbol_sptr fn_sym;
16865 if (get_ignore_symbol_table(ctxt))
16868 result->get_linkage_name().empty()
16869 ? result->get_name()
16870 : result->get_linkage_name();
16872 fn_sym = create_default_fn_sym(fn_name, ctxt.env());
16873 ABG_ASSERT(fn_sym);
16874 add_symbol_to_map(fn_sym, ctxt.fun_syms());
16878 Dwarf_Addr fn_addr;
16879 if (ctxt.get_function_address(die, fn_addr))
16880 fn_sym = ctxt.function_symbol_is_exported(fn_addr);
16885 result->set_symbol(fn_sym);
16886 string linkage_name = result->get_linkage_name();
16887 if (linkage_name.empty()
16888 || !fn_sym->get_alias_from_name(linkage_name))
16889 result->set_linkage_name(fn_sym->get_name());
16890 result->set_is_in_public_symbol_table(true);
16894 ctxt.associate_die_to_type(die, result->get_type(), where_offset);
16896 size_t die_offset = dwarf_dieoffset(die);
16899 && is_member_function(fn)
16900 && get_member_function_is_virtual(fn)
16901 && !result->get_linkage_name().empty())
16902 // This function is a virtual member function which has its
16903 // linkage name *and* and has its underlying symbol correctly set.
16904 // It thus doesn't need any fixup related to elf symbol. So
16905 // remove it from the set of virtual member functions with linkage
16906 // names and no elf symbol that need to be fixed up.
16907 ctxt.die_function_decl_with_no_symbol_map().erase(die_offset);
16911 /// Add a set of addresses (representing function symbols) to a
16912 /// function symbol name -> symbol map.
16914 /// For a given symbol address, the function retrieves the name of the
16915 /// symbol as well as the symbol itself and inserts an entry {symbol
16916 /// name, symbol} into a map of symbol name -> symbol map.
16918 /// @param syms the set of symbol addresses to consider.
16920 /// @param map the map to populate.
16922 /// @param ctxt the context in which we are loading a given ELF file.
16924 add_fn_symbols_to_map(address_set_type& syms,
16925 string_elf_symbols_map_type& map,
16926 read_context& ctxt)
16928 for (address_set_type::iterator i = syms.begin(); i != syms.end(); ++i)
16930 elf_symbol_sptr sym = ctxt.lookup_elf_fn_symbol_from_address(*i);
16932 string_elf_symbols_map_type::iterator it =
16933 ctxt.fun_syms().find(sym->get_name());
16934 ABG_ASSERT(it != ctxt.fun_syms().end());
16939 /// Add a symbol to a symbol map.
16941 /// @param sym the symbol to add.
16943 /// @param map the symbol map to add the symbol into.
16945 add_symbol_to_map(const elf_symbol_sptr& sym,
16946 string_elf_symbols_map_type& map)
16951 string_elf_symbols_map_type::iterator it = map.find(sym->get_name());
16952 if (it == map.end())
16955 syms.push_back(sym);
16956 map[sym->get_name()] = syms;
16959 it->second.push_back(sym);
16962 /// Add a set of addresses (representing variable symbols) to a
16963 /// variable symbol name -> symbol map.
16965 /// For a given symbol address, the variable retrieves the name of the
16966 /// symbol as well as the symbol itself and inserts an entry {symbol
16967 /// name, symbol} into a map of symbol name -> symbol map.
16969 /// @param syms the set of symbol addresses to consider.
16971 /// @param map the map to populate.
16973 /// @param ctxt the context in which we are loading a given ELF file.
16975 add_var_symbols_to_map(address_set_type& syms,
16976 string_elf_symbols_map_type& map,
16977 read_context& ctxt)
16979 for (address_set_type::iterator i = syms.begin(); i != syms.end(); ++i)
16981 elf_symbol_sptr sym = ctxt.lookup_elf_var_symbol_from_address(*i);
16983 string_elf_symbols_map_type::iterator it =
16984 ctxt.var_syms().find(sym->get_name());
16985 ABG_ASSERT(it != ctxt.var_syms().end());
16990 /// Read all @ref abigail::translation_unit possible from the debug info
16991 /// accessible through a DWARF Front End Library handle, and stuff
16992 /// them into a libabigail ABI Corpus.
16994 /// @param ctxt the read context.
16996 /// @return a pointer to the resulting corpus, or NULL if the corpus
16997 /// could not be constructed.
16999 read_debug_info_into_corpus(read_context& ctxt)
17001 ctxt.clear_per_corpus_data();
17003 if (!ctxt.current_corpus())
17005 corpus_sptr corp (new corpus(ctxt.env(), ctxt.elf_path()));
17006 ctxt.current_corpus(corp);
17008 ctxt.env(corp->get_environment());
17011 // First set some mundane properties of the corpus gathered from
17013 ctxt.current_corpus()->set_path(ctxt.elf_path());
17014 if (ctxt.is_linux_kernel_binary())
17015 ctxt.current_corpus()->set_origin(corpus::LINUX_KERNEL_BINARY_ORIGIN);
17017 ctxt.current_corpus()->set_origin(corpus::DWARF_ORIGIN);
17018 ctxt.current_corpus()->set_soname(ctxt.dt_soname());
17019 ctxt.current_corpus()->set_needed(ctxt.dt_needed());
17020 ctxt.current_corpus()->set_architecture_name(ctxt.elf_architecture());
17021 if (corpus_group_sptr group = ctxt.current_corpus_group())
17022 group->add_corpus(ctxt.current_corpus());
17024 // Set symbols information to the corpus.
17025 if (!get_ignore_symbol_table(ctxt))
17027 if (ctxt.load_in_linux_kernel_mode() && ctxt.is_linux_kernel_binary())
17029 string_elf_symbols_map_sptr exported_fn_symbols_map
17030 (new string_elf_symbols_map_type);
17031 add_fn_symbols_to_map(*ctxt.linux_exported_fn_syms(),
17032 *exported_fn_symbols_map,
17034 add_fn_symbols_to_map(*ctxt.linux_exported_gpl_fn_syms(),
17035 *exported_fn_symbols_map,
17037 ctxt.current_corpus()->set_fun_symbol_map(exported_fn_symbols_map);
17039 string_elf_symbols_map_sptr exported_var_symbols_map
17040 (new string_elf_symbols_map_type);
17041 add_var_symbols_to_map(*ctxt.linux_exported_var_syms(),
17042 *exported_var_symbols_map,
17044 add_var_symbols_to_map(*ctxt.linux_exported_gpl_var_syms(),
17045 *exported_var_symbols_map,
17047 ctxt.current_corpus()->set_var_symbol_map(exported_var_symbols_map);
17051 ctxt.current_corpus()->set_fun_symbol_map(ctxt.fun_syms_sptr());
17052 ctxt.current_corpus()->set_var_symbol_map(ctxt.var_syms_sptr());
17055 ctxt.current_corpus()->set_undefined_fun_symbol_map
17056 (ctxt.undefined_fun_syms_sptr());
17057 ctxt.current_corpus()->set_undefined_var_symbol_map
17058 (ctxt.undefined_var_syms_sptr());
17062 ctxt.current_corpus()->set_fun_symbol_map(ctxt.fun_syms_sptr());
17063 ctxt.current_corpus()->set_var_symbol_map(ctxt.var_syms_sptr());
17066 // Get out now if no debug info is found.
17068 return ctxt.current_corpus();
17070 uint8_t address_size = 0;
17071 size_t header_size = 0;
17073 // Set the set of exported declaration that are defined.
17074 ctxt.exported_decls_builder
17075 (ctxt.current_corpus()->get_exported_decls_builder().get());
17077 // Walk all the DIEs of the debug info to build a DIE -> parent map
17078 // useful for get_die_parent() to work.
17080 tools_utils::timer t;
17083 cerr << "building die -> parent maps ...";
17087 ctxt.build_die_parent_maps();
17092 cerr << " DONE@" << ctxt.current_corpus()->get_path()
17099 ctxt.env()->canonicalization_is_done(false);
17102 tools_utils::timer t;
17105 cerr << "building the libabigail internal representation ...";
17108 // And now walk all the DIEs again to build the libabigail IR.
17109 Dwarf_Half dwarf_version = 0;
17110 for (Dwarf_Off offset = 0, next_offset = 0;
17111 (dwarf_next_unit(ctxt.dwarf(), offset, &next_offset, &header_size,
17112 &dwarf_version, NULL, &address_size, NULL,
17114 offset = next_offset)
17116 Dwarf_Off die_offset = offset + header_size;
17118 if (!dwarf_offdie(ctxt.dwarf(), die_offset, &unit)
17119 || dwarf_tag(&unit) != DW_TAG_compile_unit)
17122 ctxt.dwarf_version(dwarf_version);
17126 // Build a translation_unit IR node from cu; note that cu must
17127 // be a DW_TAG_compile_unit die.
17128 translation_unit_sptr ir_node =
17129 build_translation_unit_and_add_to_ir(ctxt, &unit, address_size);
17130 ABG_ASSERT(ir_node);
17135 cerr << " DONE@" << ctxt.current_corpus()->get_path()
17143 tools_utils::timer t;
17146 cerr << "resolving declaration only classes ...";
17149 ctxt.resolve_declaration_only_classes();
17153 cerr << " DONE@" << ctxt.current_corpus()->get_path()
17161 tools_utils::timer t;
17164 cerr << "fixing up functions with linkage name but "
17165 << "no advertised underlying symbols ....";
17168 ctxt.fixup_functions_with_no_symbols();
17172 cerr << " DONE@" << ctxt.current_corpus()->get_path()
17179 /// Now, look at the types that needs to be canonicalized after the
17180 /// translation has been constructed (which is just now) and
17181 /// canonicalize them.
17183 /// These types need to be constructed at the end of the translation
17184 /// unit reading phase because some types are modified by some DIEs
17185 /// even after the principal DIE describing the type has been read;
17186 /// this happens for clones of virtual destructors (for instance) or
17187 /// even for some static data members. We need to do that for types
17188 /// are in the alternate debug info section and for types that in
17189 /// the main debug info section.
17191 tools_utils::timer t;
17194 cerr << "perform late type canonicalizing ...\n";
17198 ctxt.perform_late_type_canonicalizing();
17202 cerr << "late type canonicalizing DONE@"
17203 << ctxt.current_corpus()->get_path()
17210 ctxt.env()->canonicalization_is_done(true);
17213 tools_utils::timer t;
17216 cerr << "sort functions and variables ...";
17219 ctxt.current_corpus()->sort_functions();
17220 ctxt.current_corpus()->sort_variables();
17224 cerr << " DONE@" << ctxt.current_corpus()->get_path()
17231 return ctxt.current_corpus();
17234 /// Canonicalize a type if it's suitable for early canonicalizing, or,
17235 /// if it's not, schedule it for late canonicalization, after the
17236 /// debug info of the current translation unit has been fully read.
17238 /// A (composite) type is deemed suitable for early canonicalizing iff
17239 /// all of its sub-types are canonicalized themselve. Non composite
17240 /// types are always deemed suitable for early canonicalization.
17242 /// Note that this function doesn't work on *ANONYMOUS* classes,
17243 /// structs, unions or enums because it first does some
17244 /// canonicalization of the DWARF DIE @p die. That canonicalization
17245 /// is done by looking up @p die by name; and because these are
17246 /// anonymous types, they don't have names! and so that
17247 /// canonicalization fails. So the type artifact associated to @p
17248 /// die often ends being *NOT* canonicalized. This later leads to
17249 /// extreme slowness of operation, especially when comparisons are
17250 /// later performed on these anonymous types.
17252 /// So when you have classes, structs, unions, or enums that can be
17253 /// anonymous, please use this overload instead:
17256 /// maybe_canonicalize_type(const Dwarf_Die* die,
17257 /// const type_base_sptr& t,
17258 /// read_context& ctxt);
17260 /// It knows how to deal with anonymous types.
17262 /// @p looks up the type artifact
17263 /// associated to @p die. During that lookup, ; but then those types don't have
17264 /// names because they are anonymous.
17266 /// @param die the type DIE to consider for canonicalization. Note
17267 /// that this DIE must have been associated with its type using the
17268 /// function read_context::associate_die_to_type() prior to calling
17271 /// @param ctxt the @ref read_context to use.
17273 maybe_canonicalize_type(const Dwarf_Die *die, read_context& ctxt)
17276 ABG_ASSERT(ctxt.get_die_source(die, source));
17278 size_t die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
17279 type_base_sptr t = ctxt.lookup_type_from_die(die);
17284 type_base_sptr peeled_type =
17285 peel_typedef_pointer_or_reference_type(t, /*peel_qual_types=*/false);
17286 if (is_class_type(peeled_type)
17287 || is_union_type(peeled_type)
17288 || is_function_type(peeled_type)
17289 || is_array_type(peeled_type)
17290 || is_qualified_type(peeled_type))
17291 // We delay canonicalization of classes/unions or typedef,
17292 // pointers, references and array to classes/unions. This is
17293 // because the (underlying) class might not be finished yet and we
17294 // might not be able to able detect it here (thinking about
17295 // classes that are work-in-progress, or classes that might be
17296 // later amended by some DWARF construct). So we err on the safe
17297 // side. We also delay canonicalization for array and qualified
17298 // types because they can be edited (in particular by
17299 // maybe_strip_qualification) after they are initially built.
17300 ctxt.schedule_type_for_late_canonicalization(die);
17301 else if ((is_function_type(t)
17302 && ctxt.is_wip_function_type_die_offset(die_offset, source))
17303 || type_has_non_canonicalized_subtype(t))
17304 ctxt.schedule_type_for_late_canonicalization(die);
17309 /// Canonicalize a type if it's suitable for early canonicalizing, or,
17310 /// if it's not, schedule it for late canonicalization, after the
17311 /// debug info of the current translation unit has been fully read.
17313 /// A (composite) type is deemed suitable for early canonicalizing iff
17314 /// all of its sub-types are canonicalized themselve. Non composite
17315 /// types are always deemed suitable for early canonicalization.
17317 /// Note that this function nows how to deal with anonymous classes,
17318 /// structs and enums, unlike the overload below:
17320 /// void maybe_canonicalize_type(const Dwarf_Die *die, read_context& ctxt)
17322 /// The problem, though is that this function is much slower that that
17323 /// overload above because of how the types that are meant for later
17324 /// canonicalization are stored. So the idea is that this function
17325 /// should be used only for the smallest possible subset of types that
17326 /// are anonymous and thus cannot be handled by the overload above.
17328 /// @param t the type DIE to consider for canonicalization.
17330 /// @param ctxt the @ref read_context to use.
17332 maybe_canonicalize_type(const type_base_sptr& t,
17333 read_context& ctxt)
17338 type_base_sptr peeled_type =
17339 peel_typedef_pointer_or_reference_type(t, /*peel_qual_types=*/false);
17340 if (is_class_type(peeled_type)
17341 || is_union_type(peeled_type)
17342 || is_function_type(peeled_type)
17343 || is_array_type(peeled_type)
17344 || is_qualified_type(peeled_type))
17345 // We delay canonicalization of classes/unions or typedef,
17346 // pointers, references and array to classes/unions. This is
17347 // because the (underlying) class might not be finished yet and we
17348 // might not be able to able detect it here (thinking about
17349 // classes that are work-in-progress, or classes that might be
17350 // later amended by some DWARF construct). So we err on the safe
17351 // side. We also delay canonicalization for array and qualified
17352 // types because they can be edited (in particular by
17353 // maybe_strip_qualification) after they are initially built.
17354 ctxt.schedule_type_for_late_canonicalization(t);
17355 else if (type_has_non_canonicalized_subtype(t))
17356 ctxt.schedule_type_for_late_canonicalization(t);
17361 /// Canonicalize a type if it's suitable for early canonicalizing, or,
17362 /// if it's not, schedule it for late canonicalization, after the
17363 /// debug info of the current translation unit has been fully read.
17365 /// A (composite) type is deemed suitable for early canonicalizing iff
17366 /// all of its sub-types are canonicalized themselve. Non composite
17367 /// types are always deemed suitable for early canonicalization.
17369 /// Note that this function knows how to properly use either one of
17370 /// the following two overloads:
17373 /// void maybe_canonicalize_type(const Dwarf_Die* die,
17374 /// const type_base_sptr& t,
17375 /// read_context& ctxt);
17378 /// void maybe_canonicalize_type(const Dwarf_Die *die, read_context& ctxt);
17380 /// So this function uses 1/ for most types and uses uses 2/ function
17381 /// types. Using 2/ is slower and bigger than using 1/, but then 1/
17382 /// deals poorly with anonymous types because of how poorly DIEs
17383 /// canonicalization works on anonymous types. That's why this
17384 /// function uses 2/ only for the types that really need it.
17386 /// @param die the DIE of the type denoted by @p t.
17388 /// @param t the type to consider. Its DIE is @p die.
17390 /// @param ctxt the read context in use.
17392 maybe_canonicalize_type(const Dwarf_Die *die,
17393 const type_base_sptr& t,
17394 read_context& ctxt)
17396 if (const function_type_sptr ft = is_function_type(t))
17398 maybe_canonicalize_type(ft, ctxt);
17402 maybe_canonicalize_type(die, ctxt);
17405 /// If a given decl is a member type declaration, set its access
17406 /// specifier from the DIE that represents it.
17408 /// @param member_type_declaration the member type declaration to
17411 maybe_set_member_type_access_specifier(decl_base_sptr member_type_declaration,
17414 if (is_type(member_type_declaration)
17415 && is_member_decl(member_type_declaration))
17417 class_or_union* scope =
17418 is_class_or_union_type(member_type_declaration->get_scope());
17421 access_specifier access = private_access;
17422 if (class_decl* cl = is_class_type(scope))
17423 if (cl->is_struct())
17424 access = public_access;
17426 die_access_specifier(die, access);
17427 set_member_access_specifier(member_type_declaration, access);
17431 /// Build an IR node from a given DIE and add the node to the current
17432 /// IR being build and held in the read_context. Doing that is called
17433 /// "emitting an IR node for the DIE".
17435 /// @param ctxt the read context.
17437 /// @param die the DIE to consider.
17439 /// @param scope the scope under which the resulting IR node has to be
17442 /// @param called_from_public_decl set to yes if this function is
17443 /// called from the functions used to build a public decl (functions
17444 /// and variables). In that case, this function accepts building IR
17445 /// nodes representing types. Otherwise, this function only creates
17446 /// IR nodes representing public decls (functions and variables).
17447 /// This is done to avoid emitting IR nodes for types that are not
17448 /// referenced by public functions or variables.
17450 /// @param where_offset the offset of the DIE where we are "logically"
17451 /// positionned at, in the DIE tree. This is useful when @p die is
17452 /// e.g, DW_TAG_partial_unit that can be included in several places in
17455 /// @param is_required_decl_spec if true, it means the ir node to
17456 /// build is for a decl that is a specification for another decl that
17457 /// is concrete. If you don't know what this is, set it to false.
17459 /// @return the resulting IR node.
17460 static type_or_decl_base_sptr
17461 build_ir_node_from_die(read_context& ctxt,
17464 bool called_from_public_decl,
17465 size_t where_offset,
17466 bool is_required_decl_spec)
17468 type_or_decl_base_sptr result;
17470 if (!die || !scope)
17473 int tag = dwarf_tag(die);
17475 if (!called_from_public_decl)
17477 if (ctxt.load_all_types() && die_is_type(die))
17478 /* We were instructed to load debug info for all types,
17479 included those that are not reachable from a public
17480 declaration. So load the debug info for this type. */;
17481 else if (tag != DW_TAG_subprogram
17482 && tag != DW_TAG_variable
17483 && tag != DW_TAG_member
17484 && tag != DW_TAG_namespace)
17488 die_source source_of_die;
17489 ABG_ASSERT(ctxt.get_die_source(die, source_of_die));
17491 if ((result = ctxt.lookup_decl_from_die_offset(dwarf_dieoffset(die),
17497 // Type DIEs we support.
17498 case DW_TAG_base_type:
17499 if (type_decl_sptr t = build_type_decl(ctxt, die, where_offset))
17502 add_decl_to_scope(t, ctxt.cur_transl_unit()->get_global_scope());
17507 case DW_TAG_typedef:
17509 typedef_decl_sptr t = build_typedef_type(ctxt, die,
17510 called_from_public_decl,
17512 result = add_decl_to_scope(t, scope);
17515 maybe_set_member_type_access_specifier(is_decl(result), die);
17516 maybe_canonicalize_type(die, ctxt);
17521 case DW_TAG_pointer_type:
17523 pointer_type_def_sptr p =
17524 build_pointer_type_def(ctxt, die,
17525 called_from_public_decl,
17530 add_decl_to_scope(p, ctxt.cur_transl_unit()->get_global_scope());
17531 ABG_ASSERT(result->get_translation_unit());
17532 maybe_canonicalize_type(die, ctxt);
17537 case DW_TAG_reference_type:
17538 case DW_TAG_rvalue_reference_type:
17540 reference_type_def_sptr r =
17541 build_reference_type(ctxt, die,
17542 called_from_public_decl,
17547 add_decl_to_scope(r, ctxt.cur_transl_unit()->get_global_scope());
17549 ctxt.associate_die_to_type(die, r, where_offset);
17550 maybe_canonicalize_type(die, ctxt);
17555 case DW_TAG_const_type:
17556 case DW_TAG_volatile_type:
17557 case DW_TAG_restrict_type:
17560 build_qualified_type(ctxt, die,
17561 called_from_public_decl,
17565 // Strip some potentially redundant type qualifiers from
17566 // the qualified type we just built.
17567 decl_base_sptr d = maybe_strip_qualification(is_qualified_type(q),
17570 d = get_type_declaration(q);
17572 type_base_sptr ty = is_type(d);
17573 // Associate the die to type ty again because 'ty'might be
17574 // different from 'q', because 'ty' is 'q' possibly
17575 // stripped from some redundant type qualifier.
17576 ctxt.associate_die_to_type(die, ty, where_offset);
17578 add_decl_to_scope(d, ctxt.cur_transl_unit()->get_global_scope());
17579 maybe_canonicalize_type(die, ctxt);
17584 case DW_TAG_enumeration_type:
17586 if (!type_is_suppressed(ctxt, scope, die))
17588 enum_type_decl_sptr e = build_enum_type(ctxt, die, scope,
17590 result = add_decl_to_scope(e, scope);
17593 maybe_set_member_type_access_specifier(is_decl(result), die);
17594 maybe_canonicalize_type(die, ctxt);
17600 case DW_TAG_class_type:
17601 case DW_TAG_structure_type:
17603 bool type_is_private = false;
17604 bool type_suppressed=
17605 type_is_suppressed(ctxt, scope, die, type_is_private);
17607 if (type_suppressed && type_is_private)
17608 // The type is suppressed because it's private. If other
17609 // non-suppressed and declaration-only instances of this
17610 // type exist in the current corpus, then it means those
17611 // non-suppressed instances are opaque versions of the
17612 // suppressed private type. Lets return one of these opaque
17614 result = get_opaque_version_of_type(ctxt, scope, die, where_offset);
17615 else if (!type_suppressed)
17617 Dwarf_Die spec_die;
17618 scope_decl_sptr scop;
17619 class_decl_sptr klass;
17620 if (die_die_attribute(die, DW_AT_specification, spec_die))
17622 scope_decl_sptr skope =
17623 get_scope_for_die(ctxt, &spec_die,
17624 called_from_public_decl,
17627 decl_base_sptr cl =
17628 is_decl(build_ir_node_from_die(ctxt, &spec_die,
17630 called_from_public_decl,
17633 klass = dynamic_pointer_cast<class_decl>(cl);
17637 add_or_update_class_type(ctxt, die,
17639 tag == DW_TAG_structure_type,
17641 called_from_public_decl,
17646 add_or_update_class_type(ctxt, die, scope,
17647 tag == DW_TAG_structure_type,
17649 called_from_public_decl,
17654 maybe_set_member_type_access_specifier(klass, die);
17655 maybe_canonicalize_type(die, klass, ctxt);
17660 case DW_TAG_union_type:
17661 if (!type_is_suppressed(ctxt, scope, die))
17663 union_decl_sptr union_type =
17664 add_or_update_union_type(ctxt, die, scope,
17666 called_from_public_decl,
17670 maybe_set_member_type_access_specifier(union_type, die);
17671 maybe_canonicalize_type(die, union_type, ctxt);
17673 result = union_type;
17676 case DW_TAG_string_type:
17678 case DW_TAG_subroutine_type:
17680 function_type_sptr f = build_function_type(ctxt, die,
17686 maybe_canonicalize_type(die, ctxt);
17690 case DW_TAG_array_type:
17692 array_type_def_sptr a = build_array_type(ctxt,
17694 called_from_public_decl,
17699 add_decl_to_scope(a, ctxt.cur_transl_unit()->get_global_scope());
17700 ctxt.associate_die_to_type(die, a, where_offset);
17701 maybe_canonicalize_type(die, ctxt);
17705 case DW_TAG_subrange_type:
17707 // If we got here, this means the subrange type is a "free
17708 // form" defined in the global namespace of the current
17709 // translation unit, like what is found in Ada.
17710 array_type_def::subrange_sptr s =
17711 build_subrange_type(ctxt, die, where_offset);
17715 add_decl_to_scope(s, ctxt.cur_transl_unit()->get_global_scope());
17716 ctxt.associate_die_to_type(die, s, where_offset);
17717 maybe_canonicalize_type(die, ctxt);
17721 case DW_TAG_packed_type:
17723 case DW_TAG_set_type:
17725 case DW_TAG_file_type:
17727 case DW_TAG_ptr_to_member_type:
17729 case DW_TAG_thrown_type:
17731 case DW_TAG_interface_type:
17733 case DW_TAG_unspecified_type:
17735 case DW_TAG_shared_type:
17738 case DW_TAG_compile_unit:
17739 // We shouldn't reach this point b/c this should be handled by
17740 // build_translation_unit.
17741 ABG_ASSERT_NOT_REACHED;
17743 case DW_TAG_namespace:
17744 case DW_TAG_module:
17745 result = build_namespace_decl_and_add_to_ir(ctxt, die, where_offset);
17748 case DW_TAG_variable:
17749 case DW_TAG_member:
17751 Dwarf_Die spec_die;
17752 bool var_is_cloned = false;
17754 if (tag == DW_TAG_member)
17755 ABG_ASSERT(!is_c_language(ctxt.cur_transl_unit()->get_language()));
17757 if (die_die_attribute(die, DW_AT_specification, spec_die,false)
17758 || (var_is_cloned = die_die_attribute(die, DW_AT_abstract_origin,
17761 scope_decl_sptr spec_scope = get_scope_for_die(ctxt, &spec_die,
17762 called_from_public_decl,
17767 is_decl(build_ir_node_from_die(ctxt, &spec_die,
17769 called_from_public_decl,
17771 /*is_required_decl_spec=*/true));
17775 dynamic_pointer_cast<var_decl>(d);
17778 m = build_var_decl(ctxt, die, where_offset, m);
17779 if (is_data_member(m))
17781 set_member_is_static(m, true);
17782 ctxt.associate_die_to_decl(die, m, where_offset,
17783 /*associate_by_repr=*/false);
17787 ABG_ASSERT(has_scope(m));
17788 ctxt.var_decls_to_re_add_to_tree().push_back(m);
17790 ABG_ASSERT(m->get_scope());
17791 ctxt.maybe_add_var_to_exported_decls(m.get());
17796 else if (var_decl_sptr v =
17797 build_or_get_var_decl_if_not_suppressed(ctxt, scope, die,
17799 /*result=*/var_decl_sptr(),
17800 is_required_decl_spec))
17802 result = add_decl_to_scope(v, scope);
17803 ABG_ASSERT(is_decl(result)->get_scope());
17804 v = dynamic_pointer_cast<var_decl>(result);
17806 ABG_ASSERT(v->get_scope());
17807 ctxt.var_decls_to_re_add_to_tree().push_back(v);
17808 ctxt.maybe_add_var_to_exported_decls(v.get());
17813 case DW_TAG_subprogram:
17815 Dwarf_Die spec_die;
17816 Dwarf_Die abstract_origin_die;
17817 Dwarf_Die *interface_die = 0, *origin_die = 0;
17818 scope_decl_sptr interface_scope;
17819 if (die_is_artificial(die))
17822 function_decl_sptr fn;
17823 bool has_spec = die_die_attribute(die, DW_AT_specification,
17825 bool has_abstract_origin =
17826 die_die_attribute(die, DW_AT_abstract_origin,
17827 abstract_origin_die, true);
17828 if (has_spec || has_abstract_origin)
17833 : &abstract_origin_die;
17835 has_abstract_origin
17836 ? &abstract_origin_die
17839 string linkage_name = die_linkage_name(die);
17840 string spec_linkage_name = die_linkage_name(interface_die);
17842 interface_scope = get_scope_for_die(ctxt, interface_die,
17843 called_from_public_decl,
17845 if (interface_scope)
17848 is_decl(build_ir_node_from_die(ctxt,
17850 interface_scope.get(),
17851 called_from_public_decl,
17855 fn = dynamic_pointer_cast<function_decl>(d);
17856 if (has_abstract_origin
17857 && (linkage_name != spec_linkage_name))
17858 // The current DIE has 'd' as abstract orign,
17859 // and has a linkage name that is different
17860 // from from the linkage name of 'd'. That
17861 // means, the current DIE represents a clone
17867 ctxt.scope_stack().push(scope);
17869 scope_decl* logical_scope =
17871 ? interface_scope.get()
17874 result = build_or_get_fn_decl_if_not_suppressed(ctxt, logical_scope,
17875 die, where_offset, fn);
17878 result = add_decl_to_scope(is_decl(result), logical_scope);
17880 fn = is_function_decl(result);
17881 if (fn && is_member_function(fn))
17883 class_decl_sptr klass(static_cast<class_decl*>(logical_scope),
17884 sptr_utils::noop_deleter());
17886 finish_member_function_reading(die, fn, klass, ctxt);
17891 ctxt.maybe_add_fn_to_exported_decls(fn.get());
17892 ctxt.associate_die_to_decl(die, fn, where_offset,
17893 /*associate_by_repr=*/false);
17894 maybe_canonicalize_type(die, ctxt);
17897 ctxt.scope_stack().pop();
17901 case DW_TAG_formal_parameter:
17902 // We should not read this case as it should have been dealt
17903 // with by build_function_decl above.
17904 ABG_ASSERT_NOT_REACHED;
17906 case DW_TAG_constant:
17908 case DW_TAG_enumerator:
17911 case DW_TAG_partial_unit:
17912 case DW_TAG_imported_unit:
17913 // For now, the DIEs under these are read lazily when they are
17914 // referenced by a public decl DIE that is under a
17915 // DW_TAG_compile_unit, so we shouldn't get here.
17916 ABG_ASSERT_NOT_REACHED;
17918 // Other declaration we don't really intend to support yet.
17919 case DW_TAG_dwarf_procedure:
17920 case DW_TAG_imported_declaration:
17921 case DW_TAG_entry_point:
17923 case DW_TAG_lexical_block:
17924 case DW_TAG_unspecified_parameters:
17925 case DW_TAG_variant:
17926 case DW_TAG_common_block:
17927 case DW_TAG_common_inclusion:
17928 case DW_TAG_inheritance:
17929 case DW_TAG_inlined_subroutine:
17930 case DW_TAG_with_stmt:
17931 case DW_TAG_access_declaration:
17932 case DW_TAG_catch_block:
17933 case DW_TAG_friend:
17934 case DW_TAG_namelist:
17935 case DW_TAG_namelist_item:
17936 case DW_TAG_template_type_parameter:
17937 case DW_TAG_template_value_parameter:
17938 case DW_TAG_try_block:
17939 case DW_TAG_variant_part:
17940 case DW_TAG_imported_module:
17941 case DW_TAG_condition:
17942 case DW_TAG_type_unit:
17943 case DW_TAG_template_alias:
17944 case DW_TAG_lo_user:
17945 case DW_TAG_MIPS_loop:
17946 case DW_TAG_format_label:
17947 case DW_TAG_function_template:
17948 case DW_TAG_class_template:
17949 case DW_TAG_GNU_BINCL:
17950 case DW_TAG_GNU_EINCL:
17951 case DW_TAG_GNU_template_template_param:
17952 case DW_TAG_GNU_template_parameter_pack:
17953 case DW_TAG_GNU_formal_parameter_pack:
17954 case DW_TAG_GNU_call_site:
17955 case DW_TAG_GNU_call_site_parameter:
17956 case DW_TAG_hi_user:
17961 if (result && tag != DW_TAG_subroutine_type)
17962 ctxt.associate_die_to_decl(die, is_decl(result), where_offset,
17963 /*associate_by_repr=*/false);
17968 /// Build the IR node for a void type.
17970 /// @param ctxt the read context to use.
17972 /// @return the void type node.
17973 static decl_base_sptr
17974 build_ir_node_for_void_type(read_context& ctxt)
17976 ir::environment* env = ctxt.env();
17978 type_base_sptr t = env->get_void_type();
17979 decl_base_sptr type_declaration = get_type_declaration(t);
17980 if (!has_scope(type_declaration))
17981 add_decl_to_scope(type_declaration,
17982 ctxt.cur_transl_unit()->get_global_scope());
17984 return type_declaration;
17987 /// Build an IR node from a given DIE and add the node to the current
17988 /// IR being build and held in the read_context. Doing that is called
17989 /// "emitting an IR node for the DIE".
17991 /// @param ctxt the read context.
17993 /// @param die the DIE to consider.
17995 /// @param called_from_public_decl set to yes if this function is
17996 /// called from the functions used to build a public decl (functions
17997 /// and variables). In that case, this function accepts building IR
17998 /// nodes representing types. Otherwise, this function only creates
17999 /// IR nodes representing public decls (functions and variables).
18000 /// This is done to avoid emitting IR nodes for types that are not
18001 /// referenced by public functions or variables.
18003 /// @param where_offset the offset of the DIE where we are "logically"
18004 /// positionned at, in the DIE tree. This is useful when @p die is
18005 /// e.g, DW_TAG_partial_unit that can be included in several places in
18008 /// @return the resulting IR node.
18009 static type_or_decl_base_sptr
18010 build_ir_node_from_die(read_context& ctxt,
18012 bool called_from_public_decl,
18013 size_t where_offset)
18016 return decl_base_sptr();
18018 if (is_c_language(ctxt.cur_transl_unit()->get_language()))
18020 const scope_decl_sptr& scop = ctxt.global_scope();
18021 return build_ir_node_from_die(ctxt, die, scop.get(),
18022 called_from_public_decl,
18026 scope_decl_sptr scope = get_scope_for_die(ctxt, die,
18027 called_from_public_decl,
18029 return build_ir_node_from_die(ctxt, die, scope.get(),
18030 called_from_public_decl,
18035 operator|(status l, status r)
18037 return static_cast<status>(static_cast<unsigned>(l)
18038 | static_cast<unsigned>(r));
18042 operator&(status l, status r)
18044 return static_cast<status>(static_cast<unsigned>(l)
18045 & static_cast<unsigned>(r));
18049 operator|=(status& l, status r)
18056 operator&=(status& l, status r)
18062 /// Emit a diagnostic status with english sentences to describe the
18063 /// problems encoded in a given abigail::dwarf_reader::status, if
18064 /// there is an error.
18066 /// @param status the status to diagnose
18068 /// @return a string containing sentences that describe the possible
18069 /// errors encoded in @p s. If there is no error to encode, then the
18070 /// empty string is returned.
18072 status_to_diagnostic_string(status s)
18076 if (s & STATUS_DEBUG_INFO_NOT_FOUND)
18077 str += "could not find debug info\n";
18079 if (s & STATUS_ALT_DEBUG_INFO_NOT_FOUND)
18080 str += "could not find alternate debug info\n";
18082 if (s & STATUS_NO_SYMBOLS_FOUND)
18083 str += "could not load ELF symbols\n";
18088 /// Create a dwarf_reader::read_context.
18090 /// @param elf_path the path to the elf file the context is to be used for.
18092 /// @param debug_info_root_paths a pointer to the path to the root
18093 /// directory under which the debug info is to be found for @p
18094 /// elf_path. Leave this to NULL if the debug info is not in a split
18097 /// @param environment the environment used by the current context.
18098 /// This environment contains resources needed by the reader and by
18099 /// the types and declarations that are to be created later. Note
18100 /// that ABI artifacts that are to be compared all need to be created
18101 /// within the same environment.
18103 /// Please also note that the life time of this environment object
18104 /// must be greater than the life time of the resulting @ref
18105 /// read_context the context uses resources that are allocated in the
18108 /// @param load_all_types if set to false only the types that are
18109 /// reachable from publicly exported declarations (of functions and
18110 /// variables) are read. If set to true then all types found in the
18111 /// debug information are loaded.
18113 /// @param linux_kernel_mode if set to true, then consider the special
18114 /// linux kernel symbol tables when determining if a symbol is
18115 /// exported or not.
18117 /// @return a smart pointer to the resulting dwarf_reader::read_context.
18119 create_read_context(const std::string& elf_path,
18120 const vector<char**>& debug_info_root_paths,
18121 ir::environment* environment,
18122 bool load_all_types,
18123 bool linux_kernel_mode)
18125 // Create a DWARF Front End Library handle to be used by functions
18126 // of that library.
18127 read_context_sptr result(new read_context(elf_path, debug_info_root_paths,
18128 environment, load_all_types,
18129 linux_kernel_mode));
18133 /// Getter for the path to the binary this @ref read_context is for.
18135 /// @return the path to the binary the @ref read_context is for.
18137 read_context_get_path(const read_context& ctxt)
18138 {return ctxt.elf_path();}
18140 /// Re-initialize a read_context so that it can re-used to read
18141 /// another binary.
18143 /// @param ctxt the context to re-initialize.
18145 /// @param elf_path the path to the elf file the context is to be used
18148 /// @param debug_info_root_path a pointer to the path to the root
18149 /// directory under which the debug info is to be found for @p
18150 /// elf_path. Leave this to NULL if the debug info is not in a split
18153 /// @param environment the environment used by the current context.
18154 /// This environment contains resources needed by the reader and by
18155 /// the types and declarations that are to be created later. Note
18156 /// that ABI artifacts that are to be compared all need to be created
18157 /// within the same environment.
18159 /// Please also note that the life time of this environment object
18160 /// must be greater than the life time of the resulting @ref
18161 /// read_context the context uses resources that are allocated in the
18164 /// @param load_all_types if set to false only the types that are
18165 /// reachable from publicly exported declarations (of functions and
18166 /// variables) are read. If set to true then all types found in the
18167 /// debug information are loaded.
18169 /// @param linux_kernel_mode if set to true, then consider the special
18170 /// linux kernel symbol tables when determining if a symbol is
18171 /// exported or not.
18173 /// @return a smart pointer to the resulting dwarf_reader::read_context.
18175 reset_read_context(read_context_sptr &ctxt,
18176 const std::string& elf_path,
18177 const vector<char**>& debug_info_root_path,
18178 ir::environment* environment,
18179 bool read_all_types,
18180 bool linux_kernel_mode)
18183 ctxt->initialize(elf_path, debug_info_root_path, environment,
18184 read_all_types, linux_kernel_mode);
18187 /// Add suppressions specifications to the set of suppressions to be
18188 /// used during the construction of the ABI internal representation
18189 /// (the ABI corpus) from ELF and DWARF.
18191 /// During the construction of the ABI corpus, ABI artifacts that
18192 /// match the a given suppression specification are dropped on the
18193 /// floor; that is, they are discarded and won't be part of the final
18194 /// ABI corpus. This is a way to reduce the amount of data held by
18195 /// the final ABI corpus.
18197 /// Note that the suppression specifications provided to this function
18198 /// are only considered during the construction of the ABI corpus.
18199 /// For instance, they are not taken into account during e.g
18200 /// comparisons of two ABI corpora that might happen later. If you
18201 /// want to apply suppression specificatins to the comparison (or
18202 /// reporting) of ABI corpora please refer to the documentation of the
18203 /// @ref diff_context type to learn how to set suppressions that are
18204 /// to be used in that context.
18206 /// @param ctxt the context that is going to be used by functions that
18207 /// read ELF and DWARF information to construct and ABI corpus.
18209 /// @param supprs the suppression specifications to be applied during
18210 /// the construction of the ABI corpus.
18212 add_read_context_suppressions(read_context& ctxt,
18213 const suppr::suppressions_type& supprs)
18215 for (suppr::suppressions_type::const_iterator i = supprs.begin();
18218 if ((*i)->get_drops_artifact_from_ir())
18219 ctxt.get_suppressions().push_back(*i);
18222 /// Set the @ref corpus_group being created to the current read context.
18224 /// @param ctxt the read_context to consider.
18226 /// @param group the @ref corpus_group to set.
18228 set_read_context_corpus_group(read_context& ctxt,
18229 corpus_group_sptr& group)
18231 ctxt.cur_corpus_group_ = group;
18234 /// Read all @ref abigail::translation_unit possible from the debug info
18235 /// accessible from an elf file, stuff them into a libabigail ABI
18236 /// Corpus and return it.
18238 /// @param ctxt the context to use for reading the elf file.
18240 /// @param resulting_corp a pointer to the resulting abigail::corpus.
18242 /// @return the resulting status.
18244 read_corpus_from_elf(read_context& ctxt, status& status)
18246 status = STATUS_UNKNOWN;
18248 // Load debug info from the elf path.
18249 if (!ctxt.load_debug_info())
18250 status |= STATUS_DEBUG_INFO_NOT_FOUND;
18253 string alt_di_path;
18254 if (refers_to_alt_debug_info(ctxt, alt_di_path) && !ctxt.alt_dwarf())
18255 status |= STATUS_ALT_DEBUG_INFO_NOT_FOUND;
18258 if (!get_ignore_symbol_table(ctxt))
18260 ctxt.load_elf_properties();
18261 // Read the symbols for publicly defined decls
18262 if (!ctxt.load_symbol_maps())
18263 status |= STATUS_NO_SYMBOLS_FOUND;
18266 if (// If no elf symbol was found ...
18267 status & STATUS_NO_SYMBOLS_FOUND
18268 // ... or if debug info was found but not the required alternate
18270 || ((status & STATUS_ALT_DEBUG_INFO_NOT_FOUND)
18271 && !(status & STATUS_DEBUG_INFO_NOT_FOUND)))
18272 // ... then we cannot handle the binary.
18273 return corpus_sptr();
18275 // Read the variable and function descriptions from the debug info
18276 // we have, through the dwfl handle.
18277 corpus_sptr corp = read_debug_info_into_corpus(ctxt);
18279 status |= STATUS_OK;
18284 /// Read a corpus and add it to a given @ref corpus_group.
18286 /// @param ctxt the reading context to consider.
18288 /// @param group the @ref corpus_group to add the new corpus to.
18290 /// @param status output parameter. The status of the read. It is set
18291 /// by this function upon its completion.
18293 read_and_add_corpus_to_group_from_elf(read_context& ctxt,
18294 corpus_group& group,
18297 corpus_sptr result;
18298 corpus_sptr corp = read_corpus_from_elf(ctxt, status);
18299 if (status & STATUS_OK)
18301 if (!corp->get_group())
18302 group.add_corpus(corp);
18309 /// Read all @ref abigail::translation_unit possible from the debug info
18310 /// accessible from an elf file, stuff them into a libabigail ABI
18311 /// Corpus and return it.
18313 /// @param elf_path the path to the elf file.
18315 /// @param debug_info_root_paths a vector of pointers to root paths
18316 /// under which to look for the debug info of the elf files that are
18317 /// later handled by the Dwfl. This for cases where the debug info is
18318 /// split into a different file from the binary we want to inspect.
18319 /// On Red Hat compatible systems, this root path is usually
18320 /// /usr/lib/debug by default. If this argument is set to NULL, then
18321 /// "./debug" and /usr/lib/debug will be searched for sub-directories
18322 /// containing the debug info file.
18324 /// @param environment the environment used by the current context.
18325 /// This environment contains resources needed by the reader and by
18326 /// the types and declarations that are to be created later. Note
18327 /// that ABI artifacts that are to be compared all need to be created
18328 /// within the same environment. Also, the lifetime of the
18329 /// environment must be greater than the lifetime of the resulting
18330 /// corpus because the corpus uses resources that are allocated in the
18333 /// @param load_all_types if set to false only the types that are
18334 /// reachable from publicly exported declarations (of functions and
18335 /// variables) are read. If set to true then all types found in the
18336 /// debug information are loaded.
18338 /// @param resulting_corp a pointer to the resulting abigail::corpus.
18340 /// @return the resulting status.
18342 read_corpus_from_elf(const std::string& elf_path,
18343 const vector<char**>& debug_info_root_paths,
18344 ir::environment* environment,
18345 bool load_all_types,
18348 read_context_sptr c = create_read_context(elf_path,
18349 debug_info_root_paths,
18352 read_context& ctxt = *c;
18353 return read_corpus_from_elf(ctxt, status);
18356 /// Look into the symbol tables of a given elf file and see if we find
18357 /// a given symbol.
18359 /// @param env the environment we are operating from.
18361 /// @param elf_path the path to the elf file to consider.
18363 /// @param symbol_name the name of the symbol to look for.
18365 /// @param demangle if true, try to demangle the symbol name found in
18366 /// the symbol table.
18368 /// @param syms the vector of symbols found with the name @p symbol_name.
18370 /// @return true iff the symbol was found among the publicly exported
18371 /// symbols of the ELF file.
18373 lookup_symbol_from_elf(const environment* env,
18374 const string& elf_path,
18375 const string& symbol_name,
18377 vector<elf_symbol_sptr>& syms)
18380 if (elf_version(EV_CURRENT) == EV_NONE)
18383 int fd = open(elf_path.c_str(), O_RDONLY);
18391 Elf* elf = elf_begin(fd, ELF_C_READ, 0);
18395 bool value = lookup_symbol_from_elf(env, elf, symbol_name,
18403 /// Look into the symbol tables of an elf file to see if a public
18404 /// function of a given name is found.
18406 /// @param env the environment we are operating from.
18408 /// @param elf_path the path to the elf file to consider.
18410 /// @param symbol_name the name of the function to look for.
18412 /// @param syms the vector of public function symbols found with the
18413 /// name @p symname.
18415 /// @return true iff a function with symbol name @p symbol_name is
18418 lookup_public_function_symbol_from_elf(const environment* env,
18419 const string& path,
18420 const string& symname,
18421 vector<elf_symbol_sptr>& syms)
18423 if (elf_version(EV_CURRENT) == EV_NONE)
18426 int fd = open(path.c_str(), O_RDONLY);
18434 Elf* elf = elf_begin(fd, ELF_C_READ, 0);
18438 bool value = lookup_public_function_symbol_from_elf(env, elf, symname, syms);
18445 /// Check if the underlying elf file refers to an alternate debug info
18446 /// file associated to it.
18448 /// Note that "alternate debug info sections" is a GNU extension as
18449 /// of DWARF4 and is described at
18450 /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1.
18452 /// @param ctxt the context used to read the elf file.
18454 /// @param alt_di the path to the alternate debug info file. This is
18455 /// set iff the function returns true.
18457 /// @return true if the ELF file refers to an alternate debug info
18460 refers_to_alt_debug_info(const read_context& ctxt,
18461 string& alt_di_path)
18463 if (!ctxt.alt_debug_info_path().empty())
18465 alt_di_path = ctxt.alt_debug_info_path();
18471 /// Check if the underlying elf file has an alternate debug info file
18472 /// associated to it.
18474 /// Note that "alternate debug info sections" is a GNU extension as
18475 /// of DWARF4 and is described at
18476 /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1.
18478 /// @param ctxt the read_context to use to handle the underlying elf file.
18480 /// @param has_alt_di out parameter. This is set to true upon
18481 /// succesful completion of the function iff an alternate debug info
18482 /// file was found, false otherwise. Note thas this parameter is set
18483 /// only if the function returns STATUS_OK.
18485 /// @param alt_debug_info_path if the function returned STATUS_OK and
18486 /// if @p has been set to true, then this parameter contains the path
18487 /// to the alternate debug info file found.
18489 /// return STATUS_OK upon successful completion, false otherwise.
18491 has_alt_debug_info(read_context& ctxt,
18493 string& alt_debug_info_path)
18495 // Load debug info from the elf path.
18496 if (!ctxt.load_debug_info())
18497 return STATUS_DEBUG_INFO_NOT_FOUND;
18499 if (ctxt.alt_dwarf())
18502 alt_debug_info_path = ctxt.alt_debug_info_path();
18505 has_alt_di = false;
18510 /// Check if a given elf file has an alternate debug info file
18511 /// associated to it.
18513 /// Note that "alternate debug info sections" is a GNU extension as
18514 /// of DWARF4 and is described at
18515 /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1.
18517 /// @param elf_path the path to the elf file to consider.
18519 /// @param a pointer to the root directory under which the split debug info
18520 /// file associated to elf_path is to be found. This has to be NULL
18521 /// if the debug info file is not in a split file.
18523 /// @param has_alt_di out parameter. This is set to true upon
18524 /// succesful completion of the function iff an alternate debug info
18525 /// file was found, false otherwise. Note thas this parameter is set
18526 /// only if the function returns STATUS_OK.
18528 /// @param alt_debug_info_path if the function returned STATUS_OK and
18529 /// if @p has been set to true, then this parameter contains the path
18530 /// to the alternate debug info file found.
18532 /// return STATUS_OK upon successful completion, false otherwise.
18534 has_alt_debug_info(const string& elf_path,
18535 char** debug_info_root_path,
18537 string& alt_debug_info_path)
18539 vector<char**> di_roots;
18540 di_roots.push_back(debug_info_root_path);
18541 read_context_sptr c = create_read_context(elf_path, di_roots, 0);
18542 read_context& ctxt = *c;
18544 // Load debug info from the elf path.
18545 if (!ctxt.load_debug_info())
18546 return STATUS_DEBUG_INFO_NOT_FOUND;
18548 if (ctxt.alt_dwarf())
18551 alt_debug_info_path = ctxt.alt_debug_info_path();
18554 has_alt_di = false;
18559 /// Fetch the SONAME ELF property from an ELF binary file.
18561 /// @param path The path to the elf file to consider.
18563 /// @param soname out parameter. Set to the SONAME property of the
18564 /// binary file, if it present in the ELF file.
18566 /// return false if an error occured while looking for the SONAME
18567 /// property in the binary, true otherwise.
18569 get_soname_of_elf_file(const string& path, string &soname)
18572 int fd = open(path.c_str(), O_RDONLY);
18576 elf_version (EV_CURRENT);
18577 Elf* elf = elf_begin (fd, ELF_C_READ_MMAP, NULL);
18579 GElf_Ehdr ehdr_mem;
18580 GElf_Ehdr* ehdr = gelf_getehdr (elf, &ehdr_mem);
18584 for (int i = 0; i < ehdr->e_phnum; ++i)
18586 GElf_Phdr phdr_mem;
18587 GElf_Phdr* phdr = gelf_getphdr (elf, i, &phdr_mem);
18589 if (phdr != NULL && phdr->p_type == PT_DYNAMIC)
18591 Elf_Scn* scn = gelf_offscn (elf, phdr->p_offset);
18592 GElf_Shdr shdr_mem;
18593 GElf_Shdr* shdr = gelf_getshdr (scn, &shdr_mem);
18594 int maxcnt = (shdr != NULL
18595 ? shdr->sh_size / shdr->sh_entsize : INT_MAX);
18596 ABG_ASSERT (shdr == NULL || shdr->sh_type == SHT_DYNAMIC);
18597 Elf_Data* data = elf_getdata (scn, NULL);
18601 for (int cnt = 0; cnt < maxcnt; ++cnt)
18604 GElf_Dyn* dyn = gelf_getdyn (data, cnt, &dynmem);
18608 if (dyn->d_tag == DT_NULL)
18611 if (dyn->d_tag != DT_SONAME)
18614 soname = elf_strptr (elf, shdr->sh_link, dyn->d_un.d_val);
18627 /// Get the type of a given elf type.
18629 /// @param path the absolute path to the ELF file to analyzed.
18631 /// @param type the kind of the ELF file designated by @p path.
18633 /// @param out parameter. Is set to the type of ELF file of @p path.
18634 /// This parameter is set iff the function returns true.
18636 /// @return true iff the file could be opened and analyzed.
18638 get_type_of_elf_file(const string& path, elf_type& type)
18640 int fd = open(path.c_str(), O_RDONLY);
18644 elf_version (EV_CURRENT);
18645 Elf *elf = elf_begin (fd, ELF_C_READ_MMAP, NULL);
18646 type = elf_file_type(elf);
18653 }// end namespace dwarf_reader
18655 }// end namespace abigail