2 * @file bfd_support.cpp
3 * BFD muck we have to deal with.
5 * @remark Copyright 2005 OProfile authors
6 * @remark Read the file COPYING
11 #include "bfd_support.h"
14 #include "op_fileio.h"
15 #include "op_config.h"
16 #include "string_manip.h"
17 #include "file_manip.h"
19 #include "locate_images.h"
20 #include "op_libiberty.h"
21 #include "op_exception.h"
22 #include "op_bfd_wrappers.h"
44 #ifndef NT_GNU_BUILD_ID
45 #define NT_GNU_BUILD_ID 3
47 static size_t build_id_size;
50 void check_format(string const & file, bfd ** ibfd)
52 if (!bfd_check_format_matches(*ibfd, bfd_object, NULL)) {
53 cverb << vbfd << "BFD format failure for " << file << endl;
60 bool separate_debug_file_exists(string & name, unsigned long const crc,
61 extra_images const & extra)
63 unsigned long file_crc = 0;
64 // The size of 2 * 1024 elements for the buffer is arbitrary.
65 char buffer[2 * 1024];
68 string const image_path = extra.find_image_path(name, img_ok, true);
70 if (img_ok != image_ok)
75 ifstream file(image_path.c_str());
79 cverb << vbfd << "found " << name;
81 file.read(buffer, sizeof(buffer));
82 file_crc = calc_crc32(file_crc,
83 reinterpret_cast<unsigned char *>(&buffer[0]),
86 ostringstream message;
87 message << " with crc32 = " << hex << file_crc << endl;
88 cverb << vbfd << message.str();
89 return crc == file_crc;
92 static bool find_debuginfo_file_by_buildid(unsigned char * buildid, string & debug_filename)
94 size_t build_id_fname_size = strlen (DEBUGDIR) + (sizeof "/.build-id/" - 1) + 1
95 + (2 * build_id_size) + (sizeof ".debug" - 1) + 1;
96 char * build_id_fname = (char *) xmalloc(build_id_fname_size);
97 char * sptr = build_id_fname;
98 unsigned char * bptr = buildid;
100 size_t build_id_segment_len = strlen("/.build-id/");
103 memcpy(sptr, DEBUGDIR, strlen(DEBUGDIR));
104 sptr += strlen(DEBUGDIR);
105 memcpy(sptr, "/.build-id/", build_id_segment_len);
106 sptr += build_id_segment_len;
107 sptr += sprintf(sptr, "%02x", (unsigned) *bptr++);
109 for (int i = build_id_size - 1; i > 0; i--)
110 sptr += sprintf(sptr, "%02x", (unsigned) *bptr++);
112 strcpy(sptr, ".debug");
114 if (access(build_id_fname, R_OK) == 0) {
115 debug_filename = string(build_id_fname);
117 cverb << vbfd << "Using build-id file" << endl;
119 free(build_id_fname);
121 cverb << vbfd << "build-id file not found; falling back to CRC method." << endl;
126 static bool get_build_id(bfd * ibfd, unsigned char * build_id)
128 Elf32_Nhdr op_note_hdr;
133 cverb << vbfd << "fetching build-id from runtime binary ...";
134 if (!(sect = bfd_get_section_by_name(ibfd, ".note.gnu.build-id"))) {
135 if (!(sect = bfd_get_section_by_name(ibfd, ".notes"))) {
136 cverb << vbfd << " No build-id section found" << endl;
141 bfd_size_type buildid_sect_size = op_bfd_section_size(ibfd, sect);
142 char * contents = (char *) xmalloc(buildid_sect_size);
144 if (!bfd_get_section_contents(ibfd, sect,
145 reinterpret_cast<unsigned char *>(contents),
146 static_cast<file_ptr>(0), buildid_sect_size)) {
147 string msg = "bfd_get_section_contents:get_build_id";
150 msg += strerror(errno);
152 throw op_fatal_error(msg);
156 while (ptr < (contents + buildid_sect_size)) {
157 op_note_hdr.n_namesz = bfd_get_32(ibfd,
158 reinterpret_cast<bfd_byte *>(contents));
159 op_note_hdr.n_descsz = bfd_get_32(ibfd,
160 reinterpret_cast<bfd_byte *>(contents + 4));
161 op_note_hdr.n_type = bfd_get_32(ibfd,
162 reinterpret_cast<bfd_byte *>(contents + 8));
163 ptr += sizeof(op_note_hdr);
164 if ((op_note_hdr.n_type == NT_GNU_BUILD_ID) &&
165 (op_note_hdr.n_namesz == sizeof("GNU")) &&
166 (strcmp("GNU", ptr ) == 0)) {
167 build_id_size = op_note_hdr.n_descsz;
168 memcpy(build_id, ptr + op_note_hdr.n_namesz, build_id_size);
170 cverb << vbfd << "Found build-id" << endl;
173 ptr += op_note_hdr.n_namesz + op_note_hdr.n_descsz;
176 cverb << vbfd << " No build-id found" << endl;
182 bool get_debug_link_info(bfd * ibfd, string & filename, unsigned long & crc32)
186 cverb << vbfd << "fetching .gnu_debuglink section" << endl;
187 sect = bfd_get_section_by_name(ibfd, ".gnu_debuglink");
192 bfd_size_type debuglink_size = op_bfd_section_size(ibfd, sect);
193 char * contents = (char *) xmalloc(debuglink_size);
195 << ".gnu_debuglink section has size " << debuglink_size << endl;
197 if (!bfd_get_section_contents(ibfd, sect,
198 reinterpret_cast<unsigned char *>(contents),
199 static_cast<file_ptr>(0), debuglink_size)) {
200 string msg = "bfd_get_section_contents:get_debug";
203 msg += strerror(errno);
205 throw op_fatal_error(msg);
208 /* CRC value is stored after the filename, aligned up to 4 bytes. */
209 size_t filename_len = strlen(contents);
210 size_t crc_offset = filename_len + 1;
211 crc_offset = (crc_offset + 3) & ~3;
213 crc32 = bfd_get_32(ibfd,
214 reinterpret_cast<bfd_byte *>(contents + crc_offset));
215 filename = string(contents, filename_len);
216 cverb << vbfd << ".gnu_debuglink filename is " << filename << endl;
223 * With Objective C, we'll get strings like:
225 * _i_GSUnicodeString__rangeOfCharacterSetFromSet_options_range
227 * for the symbol name, and:
228 * -[GSUnicodeString rangeOfCharacterFromSet:options:range:]
230 * for the function name, so we have to do some looser matching
231 * than for other languages (unfortunately, it's not possible
232 * to demangle Objective C symbols).
234 bool objc_match(string const & sym, string const & method)
236 if (method.length() < 3)
241 if (is_prefix(method, "-[")) {
243 } else if (is_prefix(method, "+[")) {
249 string::const_iterator it = method.begin() + 2;
250 string::const_iterator const end = method.end();
252 bool found_paren = false;
254 for (; it != end; ++it) {
276 return sym == mangled;
281 * With a binary image where some objects are missing debug
282 * info, we can end up attributing to a completely different
283 * function (#484660): bfd_nearest_line() will happily move from one
284 * symbol to the nearest one it can find with debug information.
285 * To mitigate this problem, we check that the symbol name
286 * matches the returned function name.
288 * However, this check fails in some cases it shouldn't:
289 * Objective C, and C++ static inline functions (as discussed in
290 * GCC bugzilla #11774). So, we have a looser check that
291 * accepts merely a substring, plus some magic for Objective C.
293 * If even the loose check fails, then we give up.
295 bool is_correct_function(string const & function, string const & name)
297 if (name == function)
300 if (objc_match(name, function))
303 // warn the user if we had to use the loose check
304 if (name.find(function) != string::npos) {
305 static bool warned = false;
307 cerr << "warning: some functions compiled without "
308 << "debug information may have incorrect source "
309 << "line attributions" << endl;
312 cverb << vbfd << "is_correct_function(" << function << ", "
313 << name << ") fuzzy match." << endl;
322 * binutils 2.12 and below have a small bug where functions without a
323 * debug entry at the prologue start do not give a useful line number
324 * from bfd_find_nearest_line(). This can happen with certain gcc
325 * versions such as 2.95.
327 * We work around this problem by scanning forward for a vma with valid
328 * linenr info, if we can't get a valid line number. Problem uncovered
329 * by Norbert Kaufmann. The work-around decreases, on the tincas
330 * application, the number of failure to retrieve linenr info from 835
331 * to 173. Most of the remaining are c++ inline functions mainly from
332 * the STL library. Fix #529622
334 void fixup_linenr(bfd * abfd, asection * section, asymbol ** syms,
335 string const & name, bfd_vma pc,
336 char const ** filename, unsigned int * line)
338 char const * cfilename;
339 char const * function;
342 // FIXME: looking at debug info for all gcc version shows than
343 // the same problems can -perhaps- occur for epilog code: find a
344 // samples files with samples in epilog and try opreport -l -g
345 // on it, check it also with opannotate.
347 // first restrict the search on a sensible range of vma, 16 is
348 // an intuitive value based on epilog code look
349 size_t max_search = 16;
350 size_t section_size = op_bfd_section_size(abfd, section);
351 if (pc + max_search > section_size)
352 max_search = section_size - pc;
354 for (size_t i = 1; i < max_search; ++i) {
355 bool ret = bfd_find_nearest_line(abfd, section, syms, pc + i,
356 &cfilename, &function,
359 if (ret && cfilename && function && linenr != 0
360 && is_correct_function(function, name)) {
361 *filename = cfilename;
372 bfd * open_bfd(string const & file)
374 /* bfd keeps its own reference to the filename char *,
375 * so it must have a lifetime longer than the ibfd */
376 bfd * ibfd = bfd_openr(file.c_str(), NULL);
378 cverb << vbfd << "bfd_openr failed for " << file << endl;
382 check_format(file, &ibfd);
388 bfd * fdopen_bfd(string const & file, int fd)
390 /* bfd keeps its own reference to the filename char *,
391 * so it must have a lifetime longer than the ibfd */
392 bfd * ibfd = bfd_fdopenr(file.c_str(), NULL, fd);
394 cverb << vbfd << "bfd_openr failed for " << file << endl;
398 check_format(file, &ibfd);
404 bool find_separate_debug_file(bfd * ibfd, string const & filepath_in,
405 string & debug_filename, extra_images const & extra)
407 string filepath(filepath_in);
409 unsigned long crc32 = 0;
410 // The readelf program uses a char [64], so that's what we'll use.
411 // To my knowledge, the build-id should not be bigger than 20 chars.
412 unsigned char buildid[64];
414 if (get_build_id(ibfd, buildid) &&
415 find_debuginfo_file_by_buildid(buildid, debug_filename))
418 if (!get_debug_link_info(ibfd, basename, crc32))
421 /* Use old method of finding debuginfo file by comparing runtime binary's
422 * CRC with the CRC we calculate from the debuginfo file's contents.
423 * NOTE: This method breaks on systems where "MiniDebugInfo" is used
424 * since the CRC stored in the runtime binary won't match the compressed
425 * debuginfo file's CRC. But in practice, we shouldn't ever run into such
426 * a scenario since the build-id should always be available.
429 // Work out the image file's directory prefix
430 string filedir = op_dirname(filepath);
431 // Make sure it starts with /
432 if (filedir.size() > 0 && filedir.at(filedir.size() - 1) != '/')
435 string first_try(filedir + ".debug/" + basename);
436 string second_try(DEBUGDIR + filedir + basename);
437 string third_try(filedir + basename);
439 ostringstream message;
440 message << "looking for debugging file " << basename
441 << " with crc32 = " << hex << crc32 << endl;
442 cverb << vbfd << message.str();
444 if (separate_debug_file_exists(first_try, crc32, extra))
445 debug_filename = first_try;
446 else if (separate_debug_file_exists(second_try, crc32, extra))
447 debug_filename = second_try;
448 else if (separate_debug_file_exists(third_try, crc32, extra))
449 debug_filename = third_try;
457 bool interesting_symbol(asymbol * sym)
459 // #717720 some binutils are miscompiled by gcc 2.95, one of the
460 // typical symptom can be catched here.
463 os << "Your version of binutils seems to have a bug.\n"
464 << "Read http://oprofile.sf.net/faq/#binutilsbug\n";
465 throw op_runtime_error(os.str());
468 if (!(sym->section->flags & SEC_CODE))
471 // returning true for fix up in op_bfd_symbol()
472 if (!sym->name || sym->name[0] == '\0')
474 /* ARM assembler internal mapping symbols aren't interesting */
475 if ((strcmp("$a", sym->name) == 0) ||
476 (strcmp("$t", sym->name) == 0) ||
477 (strcmp("$d", sym->name) == 0) ||
478 (strcmp("$x", sym->name) == 0))
481 // C++ exception stuff
482 if (sym->name[0] == '.' && sym->name[1] == 'L')
485 /* This case cannot be moved to boring_symbol(),
486 * because that's only used for duplicate VMAs,
487 * and sometimes this symbol appears at an address
488 * different from all other symbols.
490 if (!strcmp("gcc2_compiled.", sym->name))
493 /* Commit ab45a0cc5d1cf522c1aef8f22ed512a9aae0dc1c removed a check for
494 * the SEC_LOAD bit. See the commit message for details why this
498 if (sym->flags & BSF_SECTION_SYM)
505 bool boring_symbol(op_bfd_symbol const & first, op_bfd_symbol const & second)
507 if (first.name() == "Letext")
509 else if (second.name() == "Letext")
512 if (first.name().substr(0, 2) == "??")
514 else if (second.name().substr(0, 2) == "??")
517 if (first.hidden() && !second.hidden())
519 else if (!first.hidden() && second.hidden())
522 if (first.name()[0] == '_' && second.name()[0] != '_')
524 else if (first.name()[0] != '_' && second.name()[0] == '_')
527 if (first.weak() && !second.weak())
529 else if (!first.weak() && second.weak())
536 bool bfd_info::has_debug_info() const
541 for (asection const * sect = abfd->sections; sect; sect = sect->next) {
542 if (sect->flags & SEC_DEBUGGING)
550 bfd_info::~bfd_info()
557 void bfd_info::close()
563 #if SYNTHESIZE_SYMBOLS
565 * This function is intended solely for processing ppc64 debuginfo files.
566 * On ppc64 platforms where there is no symbol information in the image bfd,
567 * the debuginfo syms need to be mapped back to the sections of the image bfd
568 * when calling bfd_get_synthetic_symtab() to gather complete symbol information.
569 * That is the purpose of the translate_debuginfo_syms() function.
571 * This function is only called when processing symbols retrieved from a
572 * debuginfo file that is separate from the actual runtime binary image.
573 * Separate debuginfo files may be needed in two different cases:
574 * 1) the real image is completely stripped, where there is no symbol
576 * 2) the real image has debuginfo stripped, and the user is requesting "-g"
577 * (src file/line num info)
579 void bfd_info::translate_debuginfo_syms(asymbol ** dbg_syms, long nr_dbg_syms)
581 unsigned int img_sect_cnt = 0;
583 bfd * image_bfd = image_bfd_info->abfd;
584 multimap<string, bfd_section *> image_sections;
586 for (bfd_section * sect = image_bfd->sections;
587 sect && img_sect_cnt < image_bfd->section_count;
589 // A comment section marks the end of the needed sections
590 if (strstr(sect->name, ".comment") == sect->name)
592 image_sections.insert(pair<string, bfd_section *>(sect->name, sect));
596 asymbol * sym = dbg_syms[0];
597 string prev_sect_name = "";
598 bfd_section * matched_section = NULL;
599 vma_adj = image_bfd->start_address - abfd->start_address;
600 for (int i = 0; i < nr_dbg_syms; sym = dbg_syms[++i]) {
603 if (strcmp(prev_sect_name.c_str(), sym->section->name)) {
604 section_switch = true;
605 prev_sect_name = sym->section->name;
607 section_switch = false;
609 if (sym->section->owner && sym->section->owner == abfd) {
610 if (section_switch ) {
611 matched_section = NULL;
612 multimap<string, bfd_section *>::iterator it;
613 pair<multimap<string, bfd_section *>::iterator,
614 multimap<string, bfd_section *>::iterator> range;
616 range = image_sections.equal_range(sym->section->name);
617 for (it = range.first; it != range.second; it++) {
618 if ((*it).second->vma == sym->section->vma + vma_adj) {
619 matched_section = (*it).second;
621 section_vma_maps[(*it).second->vma] = sym->section->vma;
626 if (matched_section) {
627 sym->section = matched_section;
628 sym->the_bfd = image_bfd;
634 bool bfd_info::get_synth_symbols()
636 const char* targname = bfd_get_target(abfd);
637 // Match elf64-powerpc and elf64-powerpc-freebsd, but not
638 // elf64-powerpcle. elf64-powerpcle is a different ABI without
639 // function descriptors, so we don't need the synthetic
640 // symbols to have function code marked by a symbol.
641 bool is_elf64_powerpc_target = (!strncmp(targname, "elf64-powerpc", 13)
642 && (targname[13] == 0
643 || targname[13] == '-'));
645 if (!is_elf64_powerpc_target)
650 long nr_mini_syms = bfd_read_minisymbols(abfd, 0, &buf, &tmp);
651 if (nr_mini_syms < 1)
654 asymbol ** mini_syms = (asymbol **)buf;
658 /* For ppc64, a debuginfo file by itself does not hold enough symbol
659 * information for us to properly attribute samples to symbols. If
660 * the image file's bfd has no symbols (as in a super-stripped library),
661 * then we need to do the extra processing in translate_debuginfo_syms.
663 if (image_bfd_info && image_bfd_info->nr_syms == 0) {
664 translate_debuginfo_syms(mini_syms, nr_mini_syms);
665 synth_bfd = image_bfd_info->abfd;
669 long nr_synth_syms = bfd_get_synthetic_symtab(synth_bfd,
674 if (nr_synth_syms < 0) {
679 /* If we called translate_debuginfo_syms() above, then we had to map
680 * the debuginfo symbols' sections to the sections of the runtime binary.
681 * We had to twist ourselves in this knot due to the peculiar requirements
682 * of bfd_get_synthetic_symtab(). While doing this mapping, we cached
683 * the original section VMAs because we need those original values in
684 * order to properly match up sample offsets with debug data. So now that
685 * we're done with bfd_get_synthetic_symtab, we can restore these section
688 if (section_vma_maps.size()) {
689 unsigned int sect_count = 0;
690 for (bfd_section * sect = synth_bfd->sections;
691 sect && sect_count < synth_bfd->section_count;
693 sect->vma = section_vma_maps[sect->vma];
699 cverb << vbfd << "mini_syms: " << dec << nr_mini_syms << hex << endl;
700 cverb << vbfd << "synth_syms: " << dec << nr_synth_syms << hex << endl;
702 nr_syms = nr_mini_syms + nr_synth_syms;
703 syms.reset(new asymbol *[nr_syms + 1]);
705 for (size_t i = 0; i < (size_t)nr_mini_syms; ++i)
706 syms[i] = mini_syms[i];
709 for (size_t i = 0; i < (size_t)nr_synth_syms; ++i)
710 syms[nr_mini_syms + i] = synth_syms + i;
715 // bfd_canonicalize_symtab does this, so shall we
716 syms[nr_syms] = NULL;
721 bool bfd_info::get_synth_symbols()
725 #endif /* SYNTHESIZE_SYMBOLS */
728 void bfd_info::get_symbols()
733 cverb << vbfd << "bfd_info::get_symbols() for "
734 << bfd_get_filename(abfd) << endl;
736 if (get_synth_symbols())
739 if (bfd_get_file_flags(abfd) & HAS_SYMS)
740 nr_syms = bfd_get_symtab_upper_bound(abfd);
742 ostringstream message;
743 message << "bfd_get_symtab_upper_bound: " << dec
744 << nr_syms << hex << endl;
745 cverb << vbfd << message.str();
747 nr_syms /= sizeof(asymbol *);
753 cverb << vbfd << "Debuginfo has debug data only" << endl;
755 syms.reset(new asymbol *[nr_syms]);
756 nr_syms = bfd_canonicalize_symtab(abfd, syms.get());
757 ostringstream message;
758 message << "bfd_canonicalize_symtab: " << dec
759 << nr_syms << hex << endl;
760 cverb << vbfd << message.str();
766 find_nearest_line(bfd_info const & b, op_bfd_symbol const & sym,
767 bfd_vma offset, bool anon_obj)
769 char const * function = "";
770 char const * cfilename = "";
771 unsigned int linenr = 0;
775 asection * section = NULL;
776 asymbol * empty_syms[1];
783 // take care about artificial symbol
790 // If this bfd_info object has no syms, that implies that we're
791 // using a debuginfo bfd_info object that has only debug data.
792 // This also implies that the passed sym is from the runtime binary,
793 // and thus it's section is also from the runtime binary. And
794 // since section VMA can be different for a runtime binary (prelinked)
795 // and its associated debuginfo, we need to obtain the debuginfo
796 // section to pass to the libbfd functions.
797 asection * sect_candidate;
798 bfd_vma vma_adj = b.get_image_bfd_info()->abfd->start_address - abfd->start_address;
800 section = sym.symbol()->section;
801 for (sect_candidate = abfd->sections;
802 (sect_candidate != NULL) && (section == NULL);
803 sect_candidate = sect_candidate->next) {
804 if (sect_candidate->vma + vma_adj == sym.symbol()->section->vma) {
805 section = sect_candidate;
808 if (section == NULL) {
809 cerr << "ERROR: Unable to find section for symbol " << sym.symbol()->name << endl;
816 section = sym.symbol()->section;
819 pc = offset - sym.symbol()->section->vma;
821 pc = (sym.value() + offset) - sym.filepos();
823 if ((op_bfd_get_section_flags(abfd, section) & SEC_ALLOC) == 0)
826 if (pc >= op_bfd_section_size(abfd, section))
829 ret = bfd_find_nearest_line(abfd, section, syms, pc, &cfilename,
832 if (!ret || !cfilename || !function)
836 * is_correct_function does not handle the case of static inlines,
837 * but if the linenr is non-zero in the inline case, it is the correct
840 if (linenr == 0 && !is_correct_function(function, sym.name()))
844 fixup_linenr(abfd, section, syms, sym.name(), pc, &cfilename,
849 info.filename = cfilename;
855 // some stl lacks string::clear()
856 info.filename.erase(info.filename.begin(), info.filename.end());