2 * @file bfd_support.cpp
3 * BFD muck we have to deal with.
5 * @remark Copyright 2005 OProfile authors
6 * @remark Read the file COPYING
11 #include "bfd_support.h"
14 #include "op_fileio.h"
15 #include "op_config.h"
16 #include "string_manip.h"
17 #include "file_manip.h"
19 #include "locate_images.h"
20 #include "op_libiberty.h"
21 #include "op_exception.h"
43 #ifndef NT_GNU_BUILD_ID
44 #define NT_GNU_BUILD_ID 3
46 static size_t build_id_size;
49 void check_format(string const & file, bfd ** ibfd)
51 if (!bfd_check_format_matches(*ibfd, bfd_object, NULL)) {
52 cverb << vbfd << "BFD format failure for " << file << endl;
59 bool separate_debug_file_exists(string & name, unsigned long const crc,
60 extra_images const & extra)
62 unsigned long file_crc = 0;
63 // The size of 2 * 1024 elements for the buffer is arbitrary.
64 char buffer[2 * 1024];
67 string const image_path = extra.find_image_path(name, img_ok, true);
69 if (img_ok != image_ok)
74 ifstream file(image_path.c_str());
78 cverb << vbfd << "found " << name;
80 file.read(buffer, sizeof(buffer));
81 file_crc = calc_crc32(file_crc,
82 reinterpret_cast<unsigned char *>(&buffer[0]),
85 ostringstream message;
86 message << " with crc32 = " << hex << file_crc << endl;
87 cverb << vbfd << message.str();
88 return crc == file_crc;
91 static bool find_debuginfo_file_by_buildid(unsigned char * buildid, string & debug_filename)
93 size_t build_id_fname_size = strlen (DEBUGDIR) + (sizeof "/.build-id/" - 1) + 1
94 + (2 * build_id_size) + (sizeof ".debug" - 1) + 1;
95 char * build_id_fname = (char *) xmalloc(build_id_fname_size);
96 char * sptr = build_id_fname;
97 unsigned char * bptr = buildid;
99 size_t build_id_segment_len = strlen("/.build-id/");
102 memcpy(sptr, DEBUGDIR, strlen(DEBUGDIR));
103 sptr += strlen(DEBUGDIR);
104 memcpy(sptr, "/.build-id/", build_id_segment_len);
105 sptr += build_id_segment_len;
106 sptr += sprintf(sptr, "%02x", (unsigned) *bptr++);
108 for (int i = build_id_size - 1; i > 0; i--)
109 sptr += sprintf(sptr, "%02x", (unsigned) *bptr++);
111 strcpy(sptr, ".debug");
113 if (access(build_id_fname, R_OK) == 0) {
114 debug_filename = string(build_id_fname);
116 cverb << vbfd << "Using build-id file" << endl;
118 free(build_id_fname);
120 cverb << vbfd << "build-id file not found; falling back to CRC method." << endl;
125 static bool get_build_id(bfd * ibfd, unsigned char * build_id)
127 Elf32_Nhdr op_note_hdr;
132 cverb << vbfd << "fetching build-id from runtime binary ...";
133 if (!(sect = bfd_get_section_by_name(ibfd, ".note.gnu.build-id"))) {
134 if (!(sect = bfd_get_section_by_name(ibfd, ".notes"))) {
135 cverb << vbfd << " No build-id section found" << endl;
140 bfd_size_type buildid_sect_size = bfd_section_size(ibfd, sect);
141 char * contents = (char *) xmalloc(buildid_sect_size);
143 if (!bfd_get_section_contents(ibfd, sect,
144 reinterpret_cast<unsigned char *>(contents),
145 static_cast<file_ptr>(0), buildid_sect_size)) {
146 string msg = "bfd_get_section_contents:get_build_id";
149 msg += strerror(errno);
151 throw op_fatal_error(msg);
155 while (ptr < (contents + buildid_sect_size)) {
156 op_note_hdr.n_namesz = bfd_get_32(ibfd,
157 reinterpret_cast<bfd_byte *>(contents));
158 op_note_hdr.n_descsz = bfd_get_32(ibfd,
159 reinterpret_cast<bfd_byte *>(contents + 4));
160 op_note_hdr.n_type = bfd_get_32(ibfd,
161 reinterpret_cast<bfd_byte *>(contents + 8));
162 ptr += sizeof(op_note_hdr);
163 if ((op_note_hdr.n_type == NT_GNU_BUILD_ID) &&
164 (op_note_hdr.n_namesz == sizeof("GNU")) &&
165 (strcmp("GNU", ptr ) == 0)) {
166 build_id_size = op_note_hdr.n_descsz;
167 memcpy(build_id, ptr + op_note_hdr.n_namesz, build_id_size);
169 cverb << vbfd << "Found build-id" << endl;
172 ptr += op_note_hdr.n_namesz + op_note_hdr.n_descsz;
175 cverb << vbfd << " No build-id found" << endl;
181 bool get_debug_link_info(bfd * ibfd, string & filename, unsigned long & crc32)
185 cverb << vbfd << "fetching .gnu_debuglink section" << endl;
186 sect = bfd_get_section_by_name(ibfd, ".gnu_debuglink");
191 bfd_size_type debuglink_size = bfd_section_size(ibfd, sect);
192 char * contents = (char *) xmalloc(debuglink_size);
194 << ".gnu_debuglink section has size " << debuglink_size << endl;
196 if (!bfd_get_section_contents(ibfd, sect,
197 reinterpret_cast<unsigned char *>(contents),
198 static_cast<file_ptr>(0), debuglink_size)) {
199 string msg = "bfd_get_section_contents:get_debug";
202 msg += strerror(errno);
204 throw op_fatal_error(msg);
207 /* CRC value is stored after the filename, aligned up to 4 bytes. */
208 size_t filename_len = strlen(contents);
209 size_t crc_offset = filename_len + 1;
210 crc_offset = (crc_offset + 3) & ~3;
212 crc32 = bfd_get_32(ibfd,
213 reinterpret_cast<bfd_byte *>(contents + crc_offset));
214 filename = string(contents, filename_len);
215 cverb << vbfd << ".gnu_debuglink filename is " << filename << endl;
222 * With Objective C, we'll get strings like:
224 * _i_GSUnicodeString__rangeOfCharacterSetFromSet_options_range
226 * for the symbol name, and:
227 * -[GSUnicodeString rangeOfCharacterFromSet:options:range:]
229 * for the function name, so we have to do some looser matching
230 * than for other languages (unfortunately, it's not possible
231 * to demangle Objective C symbols).
233 bool objc_match(string const & sym, string const & method)
235 if (method.length() < 3)
240 if (is_prefix(method, "-[")) {
242 } else if (is_prefix(method, "+[")) {
248 string::const_iterator it = method.begin() + 2;
249 string::const_iterator const end = method.end();
251 bool found_paren = false;
253 for (; it != end; ++it) {
275 return sym == mangled;
280 * With a binary image where some objects are missing debug
281 * info, we can end up attributing to a completely different
282 * function (#484660): bfd_nearest_line() will happily move from one
283 * symbol to the nearest one it can find with debug information.
284 * To mitigate this problem, we check that the symbol name
285 * matches the returned function name.
287 * However, this check fails in some cases it shouldn't:
288 * Objective C, and C++ static inline functions (as discussed in
289 * GCC bugzilla #11774). So, we have a looser check that
290 * accepts merely a substring, plus some magic for Objective C.
292 * If even the loose check fails, then we give up.
294 bool is_correct_function(string const & function, string const & name)
296 if (name == function)
299 if (objc_match(name, function))
302 // warn the user if we had to use the loose check
303 if (name.find(function) != string::npos) {
304 static bool warned = false;
306 cerr << "warning: some functions compiled without "
307 << "debug information may have incorrect source "
308 << "line attributions" << endl;
311 cverb << vbfd << "is_correct_function(" << function << ", "
312 << name << ") fuzzy match." << endl;
321 * binutils 2.12 and below have a small bug where functions without a
322 * debug entry at the prologue start do not give a useful line number
323 * from bfd_find_nearest_line(). This can happen with certain gcc
324 * versions such as 2.95.
326 * We work around this problem by scanning forward for a vma with valid
327 * linenr info, if we can't get a valid line number. Problem uncovered
328 * by Norbert Kaufmann. The work-around decreases, on the tincas
329 * application, the number of failure to retrieve linenr info from 835
330 * to 173. Most of the remaining are c++ inline functions mainly from
331 * the STL library. Fix #529622
333 void fixup_linenr(bfd * abfd, asection * section, asymbol ** syms,
334 string const & name, bfd_vma pc,
335 char const ** filename, unsigned int * line)
337 char const * cfilename;
338 char const * function;
341 // FIXME: looking at debug info for all gcc version shows than
342 // the same problems can -perhaps- occur for epilog code: find a
343 // samples files with samples in epilog and try opreport -l -g
344 // on it, check it also with opannotate.
346 // first restrict the search on a sensible range of vma, 16 is
347 // an intuitive value based on epilog code look
348 size_t max_search = 16;
349 size_t section_size = bfd_section_size(abfd, section);
350 if (pc + max_search > section_size)
351 max_search = section_size - pc;
353 for (size_t i = 1; i < max_search; ++i) {
354 bool ret = bfd_find_nearest_line(abfd, section, syms, pc + i,
355 &cfilename, &function,
358 if (ret && cfilename && function && linenr != 0
359 && is_correct_function(function, name)) {
360 *filename = cfilename;
371 bfd * open_bfd(string const & file)
373 /* bfd keeps its own reference to the filename char *,
374 * so it must have a lifetime longer than the ibfd */
375 bfd * ibfd = bfd_openr(file.c_str(), NULL);
377 cverb << vbfd << "bfd_openr failed for " << file << endl;
381 check_format(file, &ibfd);
387 bfd * fdopen_bfd(string const & file, int fd)
389 /* bfd keeps its own reference to the filename char *,
390 * so it must have a lifetime longer than the ibfd */
391 bfd * ibfd = bfd_fdopenr(file.c_str(), NULL, fd);
393 cverb << vbfd << "bfd_openr failed for " << file << endl;
397 check_format(file, &ibfd);
403 bool find_separate_debug_file(bfd * ibfd, string const & filepath_in,
404 string & debug_filename, extra_images const & extra)
406 string filepath(filepath_in);
408 unsigned long crc32 = 0;
409 // The readelf program uses a char [64], so that's what we'll use.
410 // To my knowledge, the build-id should not be bigger than 20 chars.
411 unsigned char buildid[64];
413 if (get_build_id(ibfd, buildid) &&
414 find_debuginfo_file_by_buildid(buildid, debug_filename))
417 if (!get_debug_link_info(ibfd, basename, crc32))
420 /* Use old method of finding debuginfo file by comparing runtime binary's
421 * CRC with the CRC we calculate from the debuginfo file's contents.
422 * NOTE: This method breaks on systems where "MiniDebugInfo" is used
423 * since the CRC stored in the runtime binary won't match the compressed
424 * debuginfo file's CRC. But in practice, we shouldn't ever run into such
425 * a scenario since the build-id should always be available.
428 // Work out the image file's directory prefix
429 string filedir = op_dirname(filepath);
430 // Make sure it starts with /
431 if (filedir.size() > 0 && filedir.at(filedir.size() - 1) != '/')
434 string first_try(filedir + ".debug/" + basename);
435 string second_try(DEBUGDIR + filedir + basename);
436 string third_try(filedir + basename);
438 ostringstream message;
439 message << "looking for debugging file " << basename
440 << " with crc32 = " << hex << crc32 << endl;
441 cverb << vbfd << message.str();
443 if (separate_debug_file_exists(first_try, crc32, extra))
444 debug_filename = first_try;
445 else if (separate_debug_file_exists(second_try, crc32, extra))
446 debug_filename = second_try;
447 else if (separate_debug_file_exists(third_try, crc32, extra))
448 debug_filename = third_try;
456 bool interesting_symbol(asymbol * sym)
458 // #717720 some binutils are miscompiled by gcc 2.95, one of the
459 // typical symptom can be catched here.
462 os << "Your version of binutils seems to have a bug.\n"
463 << "Read http://oprofile.sf.net/faq/#binutilsbug\n";
464 throw op_runtime_error(os.str());
467 if (!(sym->section->flags & SEC_CODE))
470 // returning true for fix up in op_bfd_symbol()
471 if (!sym->name || sym->name[0] == '\0')
473 /* ARM assembler internal mapping symbols aren't interesting */
474 if ((strcmp("$a", sym->name) == 0) ||
475 (strcmp("$t", sym->name) == 0) ||
476 (strcmp("$d", sym->name) == 0) ||
477 (strcmp("$x", sym->name) == 0))
480 // C++ exception stuff
481 if (sym->name[0] == '.' && sym->name[1] == 'L')
484 /* This case cannot be moved to boring_symbol(),
485 * because that's only used for duplicate VMAs,
486 * and sometimes this symbol appears at an address
487 * different from all other symbols.
489 if (!strcmp("gcc2_compiled.", sym->name))
492 /* Commit ab45a0cc5d1cf522c1aef8f22ed512a9aae0dc1c removed a check for
493 * the SEC_LOAD bit. See the commit message for details why this
497 if (sym->flags & BSF_SECTION_SYM)
504 bool boring_symbol(op_bfd_symbol const & first, op_bfd_symbol const & second)
506 if (first.name() == "Letext")
508 else if (second.name() == "Letext")
511 if (first.name().substr(0, 2) == "??")
513 else if (second.name().substr(0, 2) == "??")
516 if (first.hidden() && !second.hidden())
518 else if (!first.hidden() && second.hidden())
521 if (first.name()[0] == '_' && second.name()[0] != '_')
523 else if (first.name()[0] != '_' && second.name()[0] == '_')
526 if (first.weak() && !second.weak())
528 else if (!first.weak() && second.weak())
535 bool bfd_info::has_debug_info() const
540 for (asection const * sect = abfd->sections; sect; sect = sect->next) {
541 if (sect->flags & SEC_DEBUGGING)
549 bfd_info::~bfd_info()
556 void bfd_info::close()
562 #if SYNTHESIZE_SYMBOLS
564 * This function is intended solely for processing ppc64 debuginfo files.
565 * On ppc64 platforms where there is no symbol information in the image bfd,
566 * the debuginfo syms need to be mapped back to the sections of the image bfd
567 * when calling bfd_get_synthetic_symtab() to gather complete symbol information.
568 * That is the purpose of the translate_debuginfo_syms() function.
570 * This function is only called when processing symbols retrieved from a
571 * debuginfo file that is separate from the actual runtime binary image.
572 * Separate debuginfo files may be needed in two different cases:
573 * 1) the real image is completely stripped, where there is no symbol
575 * 2) the real image has debuginfo stripped, and the user is requesting "-g"
576 * (src file/line num info)
578 void bfd_info::translate_debuginfo_syms(asymbol ** dbg_syms, long nr_dbg_syms)
580 unsigned int img_sect_cnt = 0;
582 bfd * image_bfd = image_bfd_info->abfd;
583 multimap<string, bfd_section *> image_sections;
585 for (bfd_section * sect = image_bfd->sections;
586 sect && img_sect_cnt < image_bfd->section_count;
588 // A comment section marks the end of the needed sections
589 if (strstr(sect->name, ".comment") == sect->name)
591 image_sections.insert(pair<string, bfd_section *>(sect->name, sect));
595 asymbol * sym = dbg_syms[0];
596 string prev_sect_name = "";
597 bfd_section * matched_section = NULL;
598 vma_adj = image_bfd->start_address - abfd->start_address;
599 for (int i = 0; i < nr_dbg_syms; sym = dbg_syms[++i]) {
602 if (strcmp(prev_sect_name.c_str(), sym->section->name)) {
603 section_switch = true;
604 prev_sect_name = sym->section->name;
606 section_switch = false;
608 if (sym->section->owner && sym->section->owner == abfd) {
609 if (section_switch ) {
610 matched_section = NULL;
611 multimap<string, bfd_section *>::iterator it;
612 pair<multimap<string, bfd_section *>::iterator,
613 multimap<string, bfd_section *>::iterator> range;
615 range = image_sections.equal_range(sym->section->name);
616 for (it = range.first; it != range.second; it++) {
617 if ((*it).second->vma == sym->section->vma + vma_adj) {
618 matched_section = (*it).second;
620 section_vma_maps[(*it).second->vma] = sym->section->vma;
625 if (matched_section) {
626 sym->section = matched_section;
627 sym->the_bfd = image_bfd;
633 bool bfd_info::get_synth_symbols()
635 const char* targname = bfd_get_target(abfd);
636 // Match elf64-powerpc and elf64-powerpc-freebsd, but not
637 // elf64-powerpcle. elf64-powerpcle is a different ABI without
638 // function descriptors, so we don't need the synthetic
639 // symbols to have function code marked by a symbol.
640 bool is_elf64_powerpc_target = (!strncmp(targname, "elf64-powerpc", 13)
641 && (targname[13] == 0
642 || targname[13] == '-'));
644 if (!is_elf64_powerpc_target)
649 long nr_mini_syms = bfd_read_minisymbols(abfd, 0, &buf, &tmp);
650 if (nr_mini_syms < 1)
653 asymbol ** mini_syms = (asymbol **)buf;
657 /* For ppc64, a debuginfo file by itself does not hold enough symbol
658 * information for us to properly attribute samples to symbols. If
659 * the image file's bfd has no symbols (as in a super-stripped library),
660 * then we need to do the extra processing in translate_debuginfo_syms.
662 if (image_bfd_info && image_bfd_info->nr_syms == 0) {
663 translate_debuginfo_syms(mini_syms, nr_mini_syms);
664 synth_bfd = image_bfd_info->abfd;
668 long nr_synth_syms = bfd_get_synthetic_symtab(synth_bfd,
673 if (nr_synth_syms < 0) {
678 /* If we called translate_debuginfo_syms() above, then we had to map
679 * the debuginfo symbols' sections to the sections of the runtime binary.
680 * We had to twist ourselves in this knot due to the peculiar requirements
681 * of bfd_get_synthetic_symtab(). While doing this mapping, we cached
682 * the original section VMAs because we need those original values in
683 * order to properly match up sample offsets with debug data. So now that
684 * we're done with bfd_get_synthetic_symtab, we can restore these section
687 if (section_vma_maps.size()) {
688 unsigned int sect_count = 0;
689 for (bfd_section * sect = synth_bfd->sections;
690 sect && sect_count < synth_bfd->section_count;
692 sect->vma = section_vma_maps[sect->vma];
698 cverb << vbfd << "mini_syms: " << dec << nr_mini_syms << hex << endl;
699 cverb << vbfd << "synth_syms: " << dec << nr_synth_syms << hex << endl;
701 nr_syms = nr_mini_syms + nr_synth_syms;
702 syms.reset(new asymbol *[nr_syms + 1]);
704 for (size_t i = 0; i < (size_t)nr_mini_syms; ++i)
705 syms[i] = mini_syms[i];
708 for (size_t i = 0; i < (size_t)nr_synth_syms; ++i)
709 syms[nr_mini_syms + i] = synth_syms + i;
714 // bfd_canonicalize_symtab does this, so shall we
715 syms[nr_syms] = NULL;
720 bool bfd_info::get_synth_symbols()
724 #endif /* SYNTHESIZE_SYMBOLS */
727 void bfd_info::get_symbols()
732 cverb << vbfd << "bfd_info::get_symbols() for "
733 << bfd_get_filename(abfd) << endl;
735 if (get_synth_symbols())
738 if (bfd_get_file_flags(abfd) & HAS_SYMS)
739 nr_syms = bfd_get_symtab_upper_bound(abfd);
741 ostringstream message;
742 message << "bfd_get_symtab_upper_bound: " << dec
743 << nr_syms << hex << endl;
744 cverb << vbfd << message.str();
746 nr_syms /= sizeof(asymbol *);
752 cverb << vbfd << "Debuginfo has debug data only" << endl;
754 syms.reset(new asymbol *[nr_syms]);
755 nr_syms = bfd_canonicalize_symtab(abfd, syms.get());
756 ostringstream message;
757 message << "bfd_canonicalize_symtab: " << dec
758 << nr_syms << hex << endl;
759 cverb << vbfd << message.str();
765 find_nearest_line(bfd_info const & b, op_bfd_symbol const & sym,
766 bfd_vma offset, bool anon_obj)
768 char const * function = "";
769 char const * cfilename = "";
770 unsigned int linenr = 0;
774 asection * section = NULL;
775 asymbol * empty_syms[1];
782 // take care about artificial symbol
789 // If this bfd_info object has no syms, that implies that we're
790 // using a debuginfo bfd_info object that has only debug data.
791 // This also implies that the passed sym is from the runtime binary,
792 // and thus it's section is also from the runtime binary. And
793 // since section VMA can be different for a runtime binary (prelinked)
794 // and its associated debuginfo, we need to obtain the debuginfo
795 // section to pass to the libbfd functions.
796 asection * sect_candidate;
797 bfd_vma vma_adj = b.get_image_bfd_info()->abfd->start_address - abfd->start_address;
799 section = sym.symbol()->section;
800 for (sect_candidate = abfd->sections;
801 (sect_candidate != NULL) && (section == NULL);
802 sect_candidate = sect_candidate->next) {
803 if (sect_candidate->vma + vma_adj == sym.symbol()->section->vma) {
804 section = sect_candidate;
807 if (section == NULL) {
808 cerr << "ERROR: Unable to find section for symbol " << sym.symbol()->name << endl;
815 section = sym.symbol()->section;
818 pc = offset - sym.symbol()->section->vma;
820 pc = (sym.value() + offset) - sym.filepos();
822 if ((bfd_get_section_flags(abfd, section) & SEC_ALLOC) == 0)
825 if (pc >= bfd_section_size(abfd, section))
828 ret = bfd_find_nearest_line(abfd, section, syms, pc, &cfilename,
831 if (!ret || !cfilename || !function)
835 * is_correct_function does not handle the case of static inlines,
836 * but if the linenr is non-zero in the inline case, it is the correct
839 if (linenr == 0 && !is_correct_function(function, sym.name()))
843 fixup_linenr(abfd, section, syms, sym.name(), pc, &cfilename,
848 info.filename = cfilename;
854 // some stl lacks string::clear()
855 info.filename.erase(info.filename.begin(), info.filename.end());