1 // Copyright (c) 2010 Google Inc.
2 // All rights reserved.
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
32 // Implement the DwarfCUToModule class; see dwarf_cu_to_module.h.
34 // For <inttypes.h> PRI* macros, before anything else might #include it.
35 #ifndef __STDC_FORMAT_MACROS
36 #define __STDC_FORMAT_MACROS
37 #endif /* __STDC_FORMAT_MACROS */
39 #include "common/dwarf_cu_to_module.h"
42 #if !defined(__ANDROID__)
51 #include "common/dwarf_line_to_module.h"
52 #include "common/unordered.h"
54 namespace google_breakpad {
61 // Data provided by a DWARF specification DIE.
63 // In DWARF, the DIE for a definition may contain a DW_AT_specification
64 // attribute giving the offset of the corresponding declaration DIE, and
65 // the definition DIE may omit information given in the declaration. For
66 // example, it's common for a function's address range to appear only in
67 // its definition DIE, but its name to appear only in its declaration
70 // The dumper needs to be able to follow DW_AT_specification links to
71 // bring all this information together in a FUNC record. Conveniently,
72 // DIEs that are the target of such links have a DW_AT_declaration flag
73 // set, so we can identify them when we first see them, and record their
74 // contents for later reference.
76 // A Specification holds information gathered from a declaration DIE that
77 // we may need if we find a DW_AT_specification link pointing to it.
78 struct DwarfCUToModule::Specification {
79 // The qualified name that can be found by demangling DW_AT_MIPS_linkage_name.
80 string qualified_name;
82 // The name of the enclosing scope, or the empty string if there is none.
83 string enclosing_name;
85 // The name for the specification DIE itself, without any enclosing
87 string unqualified_name;
90 // An abstract origin -- base definition of an inline function.
91 struct AbstractOrigin {
92 AbstractOrigin() : name() {}
93 explicit AbstractOrigin(const string& name) : name(name) {}
98 typedef map<uint64, AbstractOrigin> AbstractOriginByOffset;
100 // Data global to the DWARF-bearing file that is private to the
101 // DWARF-to-Module process.
102 struct DwarfCUToModule::FilePrivate {
103 // A set of strings used in this CU. Before storing a string in one of
104 // our data structures, insert it into this set, and then use the string
107 // In some STL implementations, strings are reference-counted internally,
108 // meaning that simply using strings from this set, even if passed by
109 // value, assigned, or held directly in structures and containers
110 // (map<string, ...>, for example), causes those strings to share a
111 // single instance of each distinct piece of text. GNU's libstdc++ uses
112 // reference counts, and I believe MSVC did as well, at some point.
113 // However, C++ '11 implementations are moving away from reference
116 // In other implementations, string assignments copy the string's text,
117 // so this set will actually hold yet another copy of the string (although
118 // everything will still work). To improve memory consumption portably,
119 // we will probably need to use pointers to strings held in this set.
120 unordered_set<string> common_strings;
122 // A map from offsets of DIEs within the .debug_info section to
123 // Specifications describing those DIEs. Specification references can
124 // cross compilation unit boundaries.
125 SpecificationByOffset specifications;
127 AbstractOriginByOffset origins;
130 DwarfCUToModule::FileContext::FileContext(const string &filename,
132 bool handle_inter_cu_refs)
133 : filename_(filename),
135 handle_inter_cu_refs_(handle_inter_cu_refs),
136 file_private_(new FilePrivate()) {
139 DwarfCUToModule::FileContext::~FileContext() {
142 void DwarfCUToModule::FileContext::AddSectionToSectionMap(
143 const string& name, const char* contents, uint64 length) {
144 section_map_[name] = std::make_pair(contents, length);
147 void DwarfCUToModule::FileContext::ClearSectionMapForTest() {
148 section_map_.clear();
151 const dwarf2reader::SectionMap&
152 DwarfCUToModule::FileContext::section_map() const {
156 void DwarfCUToModule::FileContext::ClearSpecifications() {
157 if (!handle_inter_cu_refs_)
158 file_private_->specifications.clear();
161 bool DwarfCUToModule::FileContext::IsUnhandledInterCUReference(
162 uint64 offset, uint64 compilation_unit_start) const {
163 if (handle_inter_cu_refs_)
165 return offset < compilation_unit_start;
168 // Information global to the particular compilation unit we're
169 // parsing. This is for data shared across the CU's entire DIE tree,
170 // and parameters from the code invoking the CU parser.
171 struct DwarfCUToModule::CUContext {
172 CUContext(FileContext *file_context_arg, WarningReporter *reporter_arg)
173 : file_context(file_context_arg),
174 reporter(reporter_arg),
175 language(Language::CPlusPlus) {}
178 for (vector<Module::Function *>::iterator it = functions.begin();
179 it != functions.end(); ++it) {
184 // The DWARF-bearing file into which this CU was incorporated.
185 FileContext *file_context;
187 // For printing error messages.
188 WarningReporter *reporter;
190 // The source language of this compilation unit.
191 const Language *language;
193 // The functions defined in this compilation unit. We accumulate
194 // them here during parsing. Then, in DwarfCUToModule::Finish, we
195 // assign them lines and add them to file_context->module.
197 // Destroying this destroys all the functions this vector points to.
198 vector<Module::Function *> functions;
201 // Information about the context of a particular DIE. This is for
202 // information that changes as we descend the tree towards the leaves:
203 // the containing classes/namespaces, etc.
204 struct DwarfCUToModule::DIEContext {
205 // The fully-qualified name of the context. For example, for a
208 // DW_TAG_namespace Foo
210 // DW_TAG_subprogram Baz
212 // in a C++ compilation unit, the DIEContext's name for the
213 // DW_TAG_subprogram DIE would be "Foo::Bar". The DIEContext's
214 // name for the DW_TAG_namespace DIE would be "".
218 // An abstract base class for all the dumper's DIE handlers.
219 class DwarfCUToModule::GenericDIEHandler: public dwarf2reader::DIEHandler {
221 // Create a handler for the DIE at OFFSET whose compilation unit is
222 // described by CU_CONTEXT, and whose immediate context is described
223 // by PARENT_CONTEXT.
224 GenericDIEHandler(CUContext *cu_context, DIEContext *parent_context,
226 : cu_context_(cu_context),
227 parent_context_(parent_context),
230 specification_(NULL) { }
232 // Derived classes' ProcessAttributeUnsigned can defer to this to
233 // handle DW_AT_declaration, or simply not override it.
234 void ProcessAttributeUnsigned(enum DwarfAttribute attr,
238 // Derived classes' ProcessAttributeReference can defer to this to
239 // handle DW_AT_specification, or simply not override it.
240 void ProcessAttributeReference(enum DwarfAttribute attr,
244 // Derived classes' ProcessAttributeReference can defer to this to
245 // handle DW_AT_specification, or simply not override it.
246 void ProcessAttributeString(enum DwarfAttribute attr,
251 // Compute and return the fully-qualified name of the DIE. If this
252 // DIE is a declaration DIE, to be cited by other DIEs'
253 // DW_AT_specification attributes, record its enclosing name and
254 // unqualified name in the specification table.
256 // Use this from EndAttributes member functions, not ProcessAttribute*
257 // functions; only the former can be sure that all the DIE's attributes
259 string ComputeQualifiedName();
261 CUContext *cu_context_;
262 DIEContext *parent_context_;
265 // Place the name in the global set of strings. Even though this looks
266 // like a copy, all the major std::string implementations use reference
267 // counting internally, so the effect is to have all the data structures
268 // share copies of strings whenever possible.
269 // FIXME: Should this return something like a string_ref to avoid the
270 // assumption about how strings are implemented?
271 string AddStringToPool(const string &str);
273 // If this DIE has a DW_AT_declaration attribute, this is its value.
274 // It is false on DIEs with no DW_AT_declaration attribute.
277 // If this DIE has a DW_AT_specification attribute, this is the
278 // Specification structure for the DIE the attribute refers to.
279 // Otherwise, this is NULL.
280 Specification *specification_;
282 // The value of the DW_AT_name attribute, or the empty string if the
283 // DIE has no such attribute.
284 string name_attribute_;
286 // The demangled value of the DW_AT_MIPS_linkage_name attribute, or the empty
287 // string if the DIE has no such attribute or its content could not be
289 string demangled_name_;
292 void DwarfCUToModule::GenericDIEHandler::ProcessAttributeUnsigned(
293 enum DwarfAttribute attr,
297 case dwarf2reader::DW_AT_declaration: declaration_ = (data != 0); break;
302 void DwarfCUToModule::GenericDIEHandler::ProcessAttributeReference(
303 enum DwarfAttribute attr,
307 case dwarf2reader::DW_AT_specification: {
308 FileContext *file_context = cu_context_->file_context;
309 if (file_context->IsUnhandledInterCUReference(
310 data, cu_context_->reporter->cu_offset())) {
311 cu_context_->reporter->UnhandledInterCUReference(offset_, data);
314 // Find the Specification to which this attribute refers, and
315 // set specification_ appropriately. We could do more processing
316 // here, but it's better to leave the real work to our
317 // EndAttribute member function, at which point we know we have
318 // seen all the DIE's attributes.
319 SpecificationByOffset *specifications =
320 &file_context->file_private_->specifications;
321 SpecificationByOffset::iterator spec = specifications->find(data);
322 if (spec != specifications->end()) {
323 specification_ = &spec->second;
325 // Technically, there's no reason a DW_AT_specification
326 // couldn't be a forward reference, but supporting that would
327 // be a lot of work (changing to a two-pass structure), and I
328 // don't think any producers we care about ever emit such
330 cu_context_->reporter->UnknownSpecification(offset_, data);
338 string DwarfCUToModule::GenericDIEHandler::AddStringToPool(const string &str) {
339 pair<unordered_set<string>::iterator, bool> result =
340 cu_context_->file_context->file_private_->common_strings.insert(str);
341 return *result.first;
344 void DwarfCUToModule::GenericDIEHandler::ProcessAttributeString(
345 enum DwarfAttribute attr,
347 const string &data) {
349 case dwarf2reader::DW_AT_name:
350 name_attribute_ = AddStringToPool(data);
352 case dwarf2reader::DW_AT_MIPS_linkage_name: {
353 char* demangled = NULL;
354 #if !defined(__ANDROID__)
355 demangled = abi::__cxa_demangle(data.c_str(), NULL, NULL, NULL);
358 demangled_name_ = AddStringToPool(demangled);
359 free(reinterpret_cast<void*>(demangled));
367 string DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() {
368 // Use the demangled name, if one is available. Demangled names are
369 // preferable to those inferred from the DWARF structure because they
370 // include argument types.
371 const string *qualified_name = NULL;
372 if (!demangled_name_.empty()) {
373 // Found it is this DIE.
374 qualified_name = &demangled_name_;
375 } else if (specification_ && !specification_->qualified_name.empty()) {
376 // Found it on the specification.
377 qualified_name = &specification_->qualified_name;
380 const string *unqualified_name;
381 const string *enclosing_name;
382 if (!qualified_name) {
383 // Find our unqualified name. If the DIE has its own DW_AT_name
384 // attribute, then use that; otherwise, check our specification.
385 if (name_attribute_.empty() && specification_)
386 unqualified_name = &specification_->unqualified_name;
388 unqualified_name = &name_attribute_;
390 // Find the name of our enclosing context. If we have a
391 // specification, it's the specification's enclosing context that
392 // counts; otherwise, use this DIE's context.
394 enclosing_name = &specification_->enclosing_name;
396 enclosing_name = &parent_context_->name;
399 // If this DIE was marked as a declaration, record its names in the
400 // specification table.
403 if (qualified_name) {
404 spec.qualified_name = *qualified_name;
406 spec.enclosing_name = *enclosing_name;
407 spec.unqualified_name = *unqualified_name;
409 cu_context_->file_context->file_private_->specifications[offset_] = spec;
413 return *qualified_name;
415 // Combine the enclosing name and unqualified name to produce our
416 // own fully-qualified name.
417 return cu_context_->language->MakeQualifiedName(*enclosing_name,
421 // A handler class for DW_TAG_subprogram DIEs.
422 class DwarfCUToModule::FuncHandler: public GenericDIEHandler {
424 FuncHandler(CUContext *cu_context, DIEContext *parent_context,
426 : GenericDIEHandler(cu_context, parent_context, offset),
427 low_pc_(0), high_pc_(0), high_pc_form_(dwarf2reader::DW_FORM_addr),
428 abstract_origin_(NULL), inline_(false) { }
429 void ProcessAttributeUnsigned(enum DwarfAttribute attr,
432 void ProcessAttributeSigned(enum DwarfAttribute attr,
435 void ProcessAttributeReference(enum DwarfAttribute attr,
439 bool EndAttributes();
443 // The fully-qualified name, as derived from name_attribute_,
444 // specification_, parent_context_. Computed in EndAttributes.
446 uint64 low_pc_, high_pc_; // DW_AT_low_pc, DW_AT_high_pc
447 DwarfForm high_pc_form_; // DW_AT_high_pc can be length or address.
448 const AbstractOrigin* abstract_origin_;
452 void DwarfCUToModule::FuncHandler::ProcessAttributeUnsigned(
453 enum DwarfAttribute attr,
457 // If this attribute is present at all --- even if its value is
458 // DW_INL_not_inlined --- then GCC may cite it as someone else's
459 // DW_AT_abstract_origin attribute.
460 case dwarf2reader::DW_AT_inline: inline_ = true; break;
462 case dwarf2reader::DW_AT_low_pc: low_pc_ = data; break;
463 case dwarf2reader::DW_AT_high_pc:
464 high_pc_form_ = form;
469 GenericDIEHandler::ProcessAttributeUnsigned(attr, form, data);
474 void DwarfCUToModule::FuncHandler::ProcessAttributeSigned(
475 enum DwarfAttribute attr,
479 // If this attribute is present at all --- even if its value is
480 // DW_INL_not_inlined --- then GCC may cite it as someone else's
481 // DW_AT_abstract_origin attribute.
482 case dwarf2reader::DW_AT_inline: inline_ = true; break;
489 void DwarfCUToModule::FuncHandler::ProcessAttributeReference(
490 enum DwarfAttribute attr,
494 case dwarf2reader::DW_AT_abstract_origin: {
495 const AbstractOriginByOffset& origins =
496 cu_context_->file_context->file_private_->origins;
497 AbstractOriginByOffset::const_iterator origin = origins.find(data);
498 if (origin != origins.end()) {
499 abstract_origin_ = &(origin->second);
501 cu_context_->reporter->UnknownAbstractOrigin(offset_, data);
506 GenericDIEHandler::ProcessAttributeReference(attr, form, data);
511 bool DwarfCUToModule::FuncHandler::EndAttributes() {
512 // Compute our name, and record a specification, if appropriate.
513 name_ = ComputeQualifiedName();
514 if (name_.empty() && abstract_origin_) {
515 name_ = abstract_origin_->name;
520 void DwarfCUToModule::FuncHandler::Finish() {
521 // Make high_pc_ an address, if it isn't already.
522 if (high_pc_form_ != dwarf2reader::DW_FORM_addr) {
526 // Did we collect the information we need? Not all DWARF function
527 // entries have low and high addresses (for example, inlined
528 // functions that were never used), but all the ones we're
529 // interested in cover a non-empty range of bytes.
530 if (low_pc_ < high_pc_) {
531 // Create a Module::Function based on the data we've gathered, and
532 // add it to the functions_ list.
533 scoped_ptr<Module::Function> func(new Module::Function);
534 // Malformed DWARF may omit the name, but all Module::Functions must
536 if (!name_.empty()) {
539 cu_context_->reporter->UnnamedFunction(offset_);
540 func->name = "<name omitted>";
542 func->address = low_pc_;
543 func->size = high_pc_ - low_pc_;
544 func->parameter_size = 0;
546 // If the function address is zero this is a sign that this function
547 // description is just empty debug data and should just be discarded.
548 cu_context_->functions.push_back(func.release());
550 } else if (inline_) {
551 AbstractOrigin origin(name_);
552 cu_context_->file_context->file_private_->origins[offset_] = origin;
556 // A handler for DIEs that contain functions and contribute a
557 // component to their names: namespaces, classes, etc.
558 class DwarfCUToModule::NamedScopeHandler: public GenericDIEHandler {
560 NamedScopeHandler(CUContext *cu_context, DIEContext *parent_context,
562 : GenericDIEHandler(cu_context, parent_context, offset) { }
563 bool EndAttributes();
564 DIEHandler *FindChildHandler(uint64 offset, enum DwarfTag tag);
567 DIEContext child_context_; // A context for our children.
570 bool DwarfCUToModule::NamedScopeHandler::EndAttributes() {
571 child_context_.name = ComputeQualifiedName();
575 dwarf2reader::DIEHandler *DwarfCUToModule::NamedScopeHandler::FindChildHandler(
579 case dwarf2reader::DW_TAG_subprogram:
580 return new FuncHandler(cu_context_, &child_context_, offset);
581 case dwarf2reader::DW_TAG_namespace:
582 case dwarf2reader::DW_TAG_class_type:
583 case dwarf2reader::DW_TAG_structure_type:
584 case dwarf2reader::DW_TAG_union_type:
585 return new NamedScopeHandler(cu_context_, &child_context_, offset);
591 void DwarfCUToModule::WarningReporter::CUHeading() {
592 if (printed_cu_header_)
594 fprintf(stderr, "%s: in compilation unit '%s' (offset 0x%llx):\n",
595 filename_.c_str(), cu_name_.c_str(), cu_offset_);
596 printed_cu_header_ = true;
599 void DwarfCUToModule::WarningReporter::UnknownSpecification(uint64 offset,
602 fprintf(stderr, "%s: the DIE at offset 0x%llx has a DW_AT_specification"
603 " attribute referring to the die at offset 0x%llx, which either"
604 " was not marked as a declaration, or comes later in the file\n",
605 filename_.c_str(), offset, target);
608 void DwarfCUToModule::WarningReporter::UnknownAbstractOrigin(uint64 offset,
611 fprintf(stderr, "%s: the DIE at offset 0x%llx has a DW_AT_abstract_origin"
612 " attribute referring to the die at offset 0x%llx, which either"
613 " was not marked as an inline, or comes later in the file\n",
614 filename_.c_str(), offset, target);
617 void DwarfCUToModule::WarningReporter::MissingSection(const string &name) {
619 fprintf(stderr, "%s: warning: couldn't find DWARF '%s' section\n",
620 filename_.c_str(), name.c_str());
623 void DwarfCUToModule::WarningReporter::BadLineInfoOffset(uint64 offset) {
625 fprintf(stderr, "%s: warning: line number data offset beyond end"
626 " of '.debug_line' section\n",
630 void DwarfCUToModule::WarningReporter::UncoveredHeading() {
631 if (printed_unpaired_header_)
634 fprintf(stderr, "%s: warning: skipping unpaired lines/functions:\n",
636 printed_unpaired_header_ = true;
639 void DwarfCUToModule::WarningReporter::UncoveredFunction(
640 const Module::Function &function) {
641 if (!uncovered_warnings_enabled_)
644 fprintf(stderr, " function%s: %s\n",
645 function.size == 0 ? " (zero-length)" : "",
646 function.name.c_str());
649 void DwarfCUToModule::WarningReporter::UncoveredLine(const Module::Line &line) {
650 if (!uncovered_warnings_enabled_)
653 fprintf(stderr, " line%s: %s:%d at 0x%" PRIx64 "\n",
654 (line.size == 0 ? " (zero-length)" : ""),
655 line.file->name.c_str(), line.number, line.address);
658 void DwarfCUToModule::WarningReporter::UnnamedFunction(uint64 offset) {
660 fprintf(stderr, "%s: warning: function at offset 0x%llx has no name\n",
661 filename_.c_str(), offset);
664 void DwarfCUToModule::WarningReporter::UnhandledInterCUReference(
665 uint64 offset, uint64 target) {
667 fprintf(stderr, "%s: warning: the DIE at offset 0x%llx has a "
668 "DW_FORM_ref_addr attribute with an inter-CU reference to "
669 "0x%llx, but inter-CU reference handling is turned off.\n",
670 filename_.c_str(), offset, target);
673 DwarfCUToModule::DwarfCUToModule(FileContext *file_context,
674 LineToModuleHandler *line_reader,
675 WarningReporter *reporter)
676 : line_reader_(line_reader),
677 cu_context_(new CUContext(file_context, reporter)),
678 child_context_(new DIEContext()),
679 has_source_line_info_(false) {
682 DwarfCUToModule::~DwarfCUToModule() {
685 void DwarfCUToModule::ProcessAttributeSigned(enum DwarfAttribute attr,
689 case dwarf2reader::DW_AT_language: // source language of this CU
690 SetLanguage(static_cast<DwarfLanguage>(data));
697 void DwarfCUToModule::ProcessAttributeUnsigned(enum DwarfAttribute attr,
701 case dwarf2reader::DW_AT_stmt_list: // Line number information.
702 has_source_line_info_ = true;
703 source_line_offset_ = data;
705 case dwarf2reader::DW_AT_language: // source language of this CU
706 SetLanguage(static_cast<DwarfLanguage>(data));
713 void DwarfCUToModule::ProcessAttributeString(enum DwarfAttribute attr,
715 const string &data) {
717 case dwarf2reader::DW_AT_name:
718 cu_context_->reporter->SetCUName(data);
720 case dwarf2reader::DW_AT_comp_dir:
721 line_reader_->StartCompilationUnit(data);
728 bool DwarfCUToModule::EndAttributes() {
732 dwarf2reader::DIEHandler *DwarfCUToModule::FindChildHandler(
736 case dwarf2reader::DW_TAG_subprogram:
737 return new FuncHandler(cu_context_.get(), child_context_.get(), offset);
738 case dwarf2reader::DW_TAG_namespace:
739 case dwarf2reader::DW_TAG_class_type:
740 case dwarf2reader::DW_TAG_structure_type:
741 case dwarf2reader::DW_TAG_union_type:
742 return new NamedScopeHandler(cu_context_.get(), child_context_.get(),
749 void DwarfCUToModule::SetLanguage(DwarfLanguage language) {
751 case dwarf2reader::DW_LANG_Java:
752 cu_context_->language = Language::Java;
755 // DWARF has no generic language code for assembly language; this is
756 // what the GNU toolchain uses.
757 case dwarf2reader::DW_LANG_Mips_Assembler:
758 cu_context_->language = Language::Assembler;
761 // C++ covers so many cases that it probably has some way to cope
762 // with whatever the other languages throw at us. So make it the
765 // Objective C and Objective C++ seem to create entries for
766 // methods whose DW_AT_name values are already fully-qualified:
767 // "-[Classname method:]". These appear at the top level.
769 // DWARF data for C should never include namespaces or functions
770 // nested in struct types, but if it ever does, then C++'s
771 // notation is probably not a bad choice for that.
773 case dwarf2reader::DW_LANG_ObjC:
774 case dwarf2reader::DW_LANG_ObjC_plus_plus:
775 case dwarf2reader::DW_LANG_C:
776 case dwarf2reader::DW_LANG_C89:
777 case dwarf2reader::DW_LANG_C99:
778 case dwarf2reader::DW_LANG_C_plus_plus:
779 cu_context_->language = Language::CPlusPlus;
784 void DwarfCUToModule::ReadSourceLines(uint64 offset) {
785 const dwarf2reader::SectionMap §ion_map
786 = cu_context_->file_context->section_map();
787 dwarf2reader::SectionMap::const_iterator map_entry
788 = section_map.find(".debug_line");
789 // Mac OS X puts DWARF data in sections whose names begin with "__"
791 if (map_entry == section_map.end())
792 map_entry = section_map.find("__debug_line");
793 if (map_entry == section_map.end()) {
794 cu_context_->reporter->MissingSection(".debug_line");
797 const char *section_start = map_entry->second.first;
798 uint64 section_length = map_entry->second.second;
799 if (offset >= section_length) {
800 cu_context_->reporter->BadLineInfoOffset(offset);
803 line_reader_->ReadProgram(section_start + offset, section_length - offset,
804 cu_context_->file_context->module_, &lines_);
808 // Return true if ADDRESS falls within the range of ITEM.
810 inline bool within(const T &item, Module::Address address) {
811 // Because Module::Address is unsigned, and unsigned arithmetic
812 // wraps around, this will be false if ADDRESS falls before the
813 // start of ITEM, or if it falls after ITEM's end.
814 return address - item.address < item.size;
818 void DwarfCUToModule::AssignLinesToFunctions() {
819 vector<Module::Function *> *functions = &cu_context_->functions;
820 WarningReporter *reporter = cu_context_->reporter;
822 // This would be simpler if we assumed that source line entries
823 // don't cross function boundaries. However, there's no real reason
824 // to assume that (say) a series of function definitions on the same
825 // line wouldn't get coalesced into one line number entry. The
826 // DWARF spec certainly makes no such promises.
828 // So treat the functions and lines as peers, and take the trouble
829 // to compute their ranges' intersections precisely. In any case,
830 // the hair here is a constant factor for performance; the
831 // complexity from here on out is linear.
833 // Put both our functions and lines in order by address.
834 std::sort(functions->begin(), functions->end(),
835 Module::Function::CompareByAddress);
836 std::sort(lines_.begin(), lines_.end(), Module::Line::CompareByAddress);
838 // The last line that we used any piece of. We use this only for
839 // generating warnings.
840 const Module::Line *last_line_used = NULL;
842 // The last function and line we warned about --- so we can avoid
843 // doing so more than once.
844 const Module::Function *last_function_cited = NULL;
845 const Module::Line *last_line_cited = NULL;
847 // Make a single pass through both vectors from lower to higher
848 // addresses, populating each Function's lines vector with lines
849 // from our lines_ vector that fall within the function's address
851 vector<Module::Function *>::iterator func_it = functions->begin();
852 vector<Module::Line>::const_iterator line_it = lines_.begin();
854 Module::Address current;
856 // Pointers to the referents of func_it and line_it, or NULL if the
857 // iterator is at the end of the sequence.
858 Module::Function *func;
859 const Module::Line *line;
861 // Start current at the beginning of the first line or function,
862 // whichever is earlier.
863 if (func_it != functions->end() && line_it != lines_.end()) {
866 current = std::min(func->address, line->address);
867 } else if (line_it != lines_.end()) {
870 current = line->address;
871 } else if (func_it != functions->end()) {
874 current = (*func_it)->address;
879 while (func || line) {
880 // This loop has two invariants that hold at the top.
882 // First, at least one of the iterators is not at the end of its
883 // sequence, and those that are not refer to the earliest
884 // function or line that contains or starts after CURRENT.
886 // Note that every byte is in one of four states: it is covered
887 // or not covered by a function, and, independently, it is
888 // covered or not covered by a line.
890 // The second invariant is that CURRENT refers to a byte whose
891 // state is different from its predecessor, or it refers to the
892 // first byte in the address space. In other words, CURRENT is
893 // always the address of a transition.
895 // Note that, although each iteration advances CURRENT from one
896 // transition address to the next in each iteration, it might
897 // not advance the iterators. Suppose we have a function that
898 // starts with a line, has a gap, and then a second line, and
899 // suppose that we enter an iteration with CURRENT at the end of
900 // the first line. The next transition address is the start of
901 // the second line, after the gap, so the iteration should
902 // advance CURRENT to that point. At the head of that iteration,
903 // the invariants require that the line iterator be pointing at
904 // the second line. But this is also true at the head of the
905 // next. And clearly, the iteration must not change the function
906 // iterator. So neither iterator moves.
908 // Assert the first invariant (see above).
909 assert(!func || current < func->address || within(*func, current));
910 assert(!line || current < line->address || within(*line, current));
912 // The next transition after CURRENT.
913 Module::Address next_transition;
915 // Figure out which state we're in, add lines or warn, and compute
916 // the next transition address.
917 if (func && current >= func->address) {
918 if (line && current >= line->address) {
919 // Covered by both a line and a function.
920 Module::Address func_left = func->size - (current - func->address);
921 Module::Address line_left = line->size - (current - line->address);
922 // This may overflow, but things work out.
923 next_transition = current + std::min(func_left, line_left);
924 Module::Line l = *line;
926 l.size = next_transition - current;
927 func->lines.push_back(l);
928 last_line_used = line;
930 // Covered by a function, but no line.
931 if (func != last_function_cited) {
932 reporter->UncoveredFunction(*func);
933 last_function_cited = func;
935 if (line && within(*func, line->address))
936 next_transition = line->address;
938 // If this overflows, we'll catch it below.
939 next_transition = func->address + func->size;
942 if (line && current >= line->address) {
943 // Covered by a line, but no function.
945 // If GCC emits padding after one function to align the start
946 // of the next, then it will attribute the padding
947 // instructions to the last source line of function (to reduce
948 // the size of the line number info), but omit it from the
949 // DW_AT_{low,high}_pc range given in .debug_info (since it
950 // costs nothing to be precise there). If we did use at least
951 // some of the line we're about to skip, and it ends at the
952 // start of the next function, then assume this is what
953 // happened, and don't warn.
954 if (line != last_line_cited
956 && line == last_line_used
957 && func->address - line->address == line->size)) {
958 reporter->UncoveredLine(*line);
959 last_line_cited = line;
961 if (func && within(*line, func->address))
962 next_transition = func->address;
964 // If this overflows, we'll catch it below.
965 next_transition = line->address + line->size;
967 // Covered by neither a function nor a line. By the invariant,
968 // both func and line begin after CURRENT. The next transition
969 // is the start of the next function or next line, whichever
971 assert(func || line);
973 next_transition = std::min(func->address, line->address);
975 next_transition = func->address;
977 next_transition = line->address;
981 // If a function or line abuts the end of the address space, then
982 // next_transition may end up being zero, in which case we've completed
983 // our pass. Handle that here, instead of trying to deal with it in
984 // each place we compute next_transition.
985 if (!next_transition)
988 // Advance iterators as needed. If lines overlap or functions overlap,
989 // then we could go around more than once. We don't worry too much
990 // about what result we produce in that case, just as long as we don't
992 while (func_it != functions->end()
993 && next_transition >= (*func_it)->address
994 && !within(**func_it, next_transition))
996 func = (func_it != functions->end()) ? *func_it : NULL;
997 while (line_it != lines_.end()
998 && next_transition >= line_it->address
999 && !within(*line_it, next_transition))
1001 line = (line_it != lines_.end()) ? &*line_it : NULL;
1003 // We must make progress.
1004 assert(next_transition > current);
1005 current = next_transition;
1009 void DwarfCUToModule::Finish() {
1010 // Assembly language files have no function data, and that gives us
1011 // no place to store our line numbers (even though the GNU toolchain
1012 // will happily produce source line info for assembly language
1013 // files). To avoid spurious warnings about lines we can't assign
1014 // to functions, skip CUs in languages that lack functions.
1015 if (!cu_context_->language->HasFunctions())
1018 // Read source line info, if we have any.
1019 if (has_source_line_info_)
1020 ReadSourceLines(source_line_offset_);
1022 vector<Module::Function *> *functions = &cu_context_->functions;
1024 // Dole out lines to the appropriate functions.
1025 AssignLinesToFunctions();
1027 // Add our functions, which now have source lines assigned to them,
1029 cu_context_->file_context->module_->AddFunctions(functions->begin(),
1032 // Ownership of the function objects has shifted from cu_context to
1036 cu_context_->file_context->ClearSpecifications();
1039 bool DwarfCUToModule::StartCompilationUnit(uint64 offset,
1043 uint8 dwarf_version) {
1044 return dwarf_version >= 2;
1047 bool DwarfCUToModule::StartRootDIE(uint64 offset, enum DwarfTag tag) {
1048 // We don't deal with partial compilation units (the only other tag
1049 // likely to be used for root DIE).
1050 return tag == dwarf2reader::DW_TAG_compile_unit;
1053 } // namespace google_breakpad