From: Jonas Devlieghere Date: Fri, 2 Jul 2021 23:17:25 +0000 (-0700) Subject: Revert "Create synthetic symbol names on demand to improve memory consumption and... X-Git-Tag: llvmorg-14-init~2315 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=6b0d266036f73f5ee9556d211a7d0946091ff3b2;p=platform%2Fupstream%2Fllvm.git Revert "Create synthetic symbol names on demand to improve memory consumption and startup times." This reverts commit c8164d0276b97679e80db01adc860271ab4a5d11 and 43f6dad2344247976d5777f56a1fc29e39c6c717 because it breaks TestDyldTrieSymbols.py on GreenDragon. --- diff --git a/lldb/include/lldb/Symbol/ObjectFile.h b/lldb/include/lldb/Symbol/ObjectFile.h index 74e069b..bf1417d4 100644 --- a/lldb/include/lldb/Symbol/ObjectFile.h +++ b/lldb/include/lldb/Symbol/ObjectFile.h @@ -712,6 +712,8 @@ protected: /// false otherwise. bool SetModulesArchitecture(const ArchSpec &new_arch); + ConstString GetNextSyntheticSymbolName(); + static lldb::DataBufferSP MapFileData(const FileSpec &file, uint64_t Size, uint64_t Offset); diff --git a/lldb/include/lldb/Symbol/Symbol.h b/lldb/include/lldb/Symbol/Symbol.h index be3e8ab..3abe311 100644 --- a/lldb/include/lldb/Symbol/Symbol.h +++ b/lldb/include/lldb/Symbol/Symbol.h @@ -113,20 +113,14 @@ public: lldb::LanguageType GetLanguage() const { // TODO: See if there is a way to determine the language for a symbol // somehow, for now just return our best guess - return GetMangled().GuessLanguage(); + return m_mangled.GuessLanguage(); } void SetID(uint32_t uid) { m_uid = uid; } - Mangled &GetMangled() { - SynthesizeNameIfNeeded(); - return m_mangled; - } + Mangled &GetMangled() { return m_mangled; } - const Mangled &GetMangled() const { - SynthesizeNameIfNeeded(); - return m_mangled; - } + const Mangled &GetMangled() const { return m_mangled; } ConstString GetReExportedSymbolName() const; @@ -172,9 +166,9 @@ public: bool IsTrampoline() const; bool IsIndirect() const; - + bool IsWeak() const { return m_is_weak; } - + void SetIsWeak (bool b) { m_is_weak = b; } bool GetByteSizeIsValid() const { return m_size_is_valid; } @@ -229,10 +223,6 @@ public: bool ContainsFileAddress(lldb::addr_t file_addr) const; - static llvm::StringRef GetSyntheticSymbolPrefix() { - return "___lldb_unnamed_symbol"; - } - protected: // This is the internal guts of ResolveReExportedSymbol, it assumes // reexport_name is not null, and that module_spec is valid. We track the @@ -243,8 +233,6 @@ protected: lldb_private::ModuleSpec &module_spec, lldb_private::ModuleList &seen_modules) const; - void SynthesizeNameIfNeeded() const; - uint32_t m_uid = UINT32_MAX; // User ID (usually the original symbol table index) uint16_t m_type_data = 0; // data specific to m_type @@ -270,7 +258,7 @@ protected: // doing name lookups m_is_weak : 1, m_type : 6; // Values from the lldb::SymbolType enum. - mutable Mangled m_mangled; // uniqued symbol name/mangled name pair + Mangled m_mangled; // uniqued symbol name/mangled name pair AddressRange m_addr_range; // Contains the value, or the section offset // address when the value is an address in a // section, and the size (if any) diff --git a/lldb/include/lldb/Symbol/Symtab.h b/lldb/include/lldb/Symbol/Symtab.h index e1ad0df..fbfa3a5 100644 --- a/lldb/include/lldb/Symbol/Symtab.h +++ b/lldb/include/lldb/Symbol/Symtab.h @@ -219,26 +219,6 @@ private: return false; } - /// A helper function that looks up full function names. - /// - /// We generate unique names for synthetic symbols so that users can look - /// them up by name when needed. But because doing so is uncommon in normal - /// debugger use, we trade off some performance at lookup time for faster - /// symbol table building by detecting these symbols and generating their - /// names lazily, rather than adding them to the normal symbol indexes. This - /// function does the job of first consulting the name indexes, and if that - /// fails it extracts the information it needs from the synthetic name and - /// locates the symbol. - /// - /// @param[in] symbol_name The symbol name to search for. - /// - /// @param[out] indexes The vector if symbol indexes to update with results. - /// - /// @returns The number of indexes added to the index vector. Zero if no - /// matches were found. - uint32_t GetNameIndexes(ConstString symbol_name, - std::vector &indexes); - void SymbolIndicesToSymbolContextList(std::vector &symbol_indexes, SymbolContextList &sc_list); diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp index a5e86f0..be73d38 100644 --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -1880,7 +1880,7 @@ void ObjectFileELF::CreateSections(SectionList &unified_section_list) { unified_section_list.AddSection(symtab_section_sp); } } - } + } } std::shared_ptr ObjectFileELF::GetGnuDebugDataObjectFile() { @@ -2813,37 +2813,31 @@ Symtab *ObjectFileELF::GetSymtab() { if (is_valid_entry_point && !m_symtab_up->FindSymbolContainingFileAddress( entry_point_file_addr)) { uint64_t symbol_id = m_symtab_up->GetNumSymbols(); - // Don't set the name for any synthetic symbols, the Symbol - // object will generate one if needed when the name is accessed - // via accessors. - SectionSP section_sp = entry_point_addr.GetSection(); - Symbol symbol( - /*symID=*/symbol_id, - /*name=*/llvm::StringRef(), // Name will be auto generated. - /*type=*/eSymbolTypeCode, - /*external=*/true, - /*is_debug=*/false, - /*is_trampoline=*/false, - /*is_artificial=*/true, - /*section_sp=*/section_sp, - /*offset=*/0, - /*size=*/0, // FDE can span multiple symbols so don't use its size. - /*size_is_valid=*/false, - /*contains_linker_annotations=*/false, - /*flags=*/0); + Symbol symbol(symbol_id, + GetNextSyntheticSymbolName().GetCString(), // Symbol name. + eSymbolTypeCode, // Type of this symbol. + true, // Is this globally visible? + false, // Is this symbol debug info? + false, // Is this symbol a trampoline? + true, // Is this symbol artificial? + entry_point_addr.GetSection(), // Section where this + // symbol is defined. + 0, // Offset in section or symbol value. + 0, // Size. + false, // Size is valid. + false, // Contains linker annotations? + 0); // Symbol flags. + m_symtab_up->AddSymbol(symbol); // When the entry point is arm thumb we need to explicitly set its // class address to reflect that. This is important because expression // evaluation relies on correctly setting a breakpoint at this // address. if (arch.GetMachine() == llvm::Triple::arm && - (entry_point_file_addr & 1)) { - symbol.GetAddressRef().SetOffset(entry_point_addr.GetOffset() ^ 1); + (entry_point_file_addr & 1)) m_address_class_map[entry_point_file_addr ^ 1] = AddressClass::eCodeAlternateISA; - } else { + else m_address_class_map[entry_point_file_addr] = AddressClass::eCode; - } - m_symtab_up->AddSymbol(symbol); } } @@ -2923,24 +2917,22 @@ void ObjectFileELF::ParseUnwindSymbols(Symtab *symbol_table, section_list->FindSectionContainingFileAddress(file_addr); if (section_sp) { addr_t offset = file_addr - section_sp->GetFileAddress(); + const char *symbol_name = GetNextSyntheticSymbolName().GetCString(); uint64_t symbol_id = ++last_symbol_id; - // Don't set the name for any synthetic symbols, the Symbol - // object will generate one if needed when the name is accessed - // via accessors. Symbol eh_symbol( - /*symID=*/symbol_id, - /*name=*/llvm::StringRef(), // Name will be auto generated. - /*type=*/eSymbolTypeCode, - /*external=*/true, - /*is_debug=*/false, - /*is_trampoline=*/false, - /*is_artificial=*/true, - /*section_sp=*/section_sp, - /*offset=*/offset, - /*size=*/0, // FDE can span multiple symbols so don't use its size. - /*size_is_valid=*/false, - /*contains_linker_annotations=*/false, - /*flags=*/0); + symbol_id, // Symbol table index. + symbol_name, // Symbol name. + eSymbolTypeCode, // Type of this symbol. + true, // Is this globally visible? + false, // Is this symbol debug info? + false, // Is this symbol a trampoline? + true, // Is this symbol artificial? + section_sp, // Section in which this symbol is defined or null. + offset, // Offset in section or symbol value. + 0, // Size: Don't specify the size as an FDE can + false, // Size is valid: cover multiple symbols. + false, // Contains linker annotations? + 0); // Symbol flags. new_symbols.push_back(eh_symbol); } } diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index 72389e9..e7652cf 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -4696,10 +4696,8 @@ size_t ObjectFileMachO::ParseSymtab() { symbol_byte_size = section_end_file_addr - symbol_file_addr; } sym[sym_idx].SetID(synthetic_sym_id++); - // Don't set the name for any synthetic symbols, the Symbol - // object will generate one if needed when the name is accessed - // via accessors. - sym[sym_idx].GetMangled().SetDemangledName(ConstString()); + sym[sym_idx].GetMangled().SetDemangledName( + GetNextSyntheticSymbolName()); sym[sym_idx].SetType(eSymbolTypeCode); sym[sym_idx].SetIsSynthetic(true); sym[sym_idx].GetAddressRef() = symbol_addr; diff --git a/lldb/source/Symbol/ObjectFile.cpp b/lldb/source/Symbol/ObjectFile.cpp index 101af01..b0fdd50 100644 --- a/lldb/source/Symbol/ObjectFile.cpp +++ b/lldb/source/Symbol/ObjectFile.cpp @@ -616,6 +616,16 @@ ObjectFile::GetSymbolTypeFromName(llvm::StringRef name, return symbol_type_hint; } +ConstString ObjectFile::GetNextSyntheticSymbolName() { + llvm::SmallString<256> name; + llvm::raw_svector_ostream os(name); + ConstString file_name = GetModule()->GetFileSpec().GetFilename(); + ++m_synthetic_symbol_idx; + os << "___lldb_unnamed_symbol" << m_synthetic_symbol_idx << "$$" + << file_name.GetStringRef(); + return ConstString(os.str()); +} + std::vector ObjectFile::GetLoadableData(Target &target) { std::vector loadables; diff --git a/lldb/source/Symbol/Symbol.cpp b/lldb/source/Symbol/Symbol.cpp index b243727..a25911d1 100644 --- a/lldb/source/Symbol/Symbol.cpp +++ b/lldb/source/Symbol/Symbol.cpp @@ -56,8 +56,8 @@ Symbol::Symbol(uint32_t symID, const Mangled &mangled, SymbolType type, m_size_is_synthesized(false), m_size_is_valid(size_is_valid || range.GetByteSize() > 0), m_demangled_is_synthesized(false), - m_contains_linker_annotations(contains_linker_annotations), - m_is_weak(false), m_type(type), m_mangled(mangled), m_addr_range(range), + m_contains_linker_annotations(contains_linker_annotations), + m_is_weak(false), m_type(type), m_mangled(mangled), m_addr_range(range), m_flags(flags) {} Symbol::Symbol(const Symbol &rhs) @@ -119,7 +119,7 @@ bool Symbol::ValueIsAddress() const { } ConstString Symbol::GetDisplayName() const { - return GetMangled().GetDisplayDemangledName(); + return m_mangled.GetDisplayDemangledName(); } ConstString Symbol::GetReExportedSymbolName() const { @@ -202,7 +202,7 @@ void Symbol::GetDescription(Stream *s, lldb::DescriptionLevel level, s->Printf(", value = 0x%16.16" PRIx64, m_addr_range.GetBaseAddress().GetOffset()); } - ConstString demangled = GetMangled().GetDemangledName(); + ConstString demangled = m_mangled.GetDemangledName(); if (demangled) s->Printf(", name=\"%s\"", demangled.AsCString()); if (m_mangled.GetMangledName()) @@ -218,7 +218,7 @@ void Symbol::Dump(Stream *s, Target *target, uint32_t index, // Make sure the size of the symbol is up to date before dumping GetByteSize(); - ConstString name = GetMangled().GetName(name_preference); + ConstString name = m_mangled.GetName(name_preference); if (ValueIsAddress()) { if (!m_addr_range.GetBaseAddress().Dump(s, nullptr, Address::DumpStyleFileAddress)) @@ -330,11 +330,9 @@ uint32_t Symbol::GetPrologueByteSize() { } bool Symbol::Compare(ConstString name, SymbolType type) const { - if (type == eSymbolTypeAny || m_type == type) { - const Mangled &mangled = GetMangled(); - return mangled.GetMangledName() == name || - mangled.GetDemangledName() == name; - } + if (type == eSymbolTypeAny || m_type == type) + return m_mangled.GetMangledName() == name || + m_mangled.GetDemangledName() == name; return false; } @@ -497,10 +495,10 @@ lldb::addr_t Symbol::GetLoadAddress(Target *target) const { return LLDB_INVALID_ADDRESS; } -ConstString Symbol::GetName() const { return GetMangled().GetName(); } +ConstString Symbol::GetName() const { return m_mangled.GetName(); } ConstString Symbol::GetNameNoArguments() const { - return GetMangled().GetName(Mangled::ePreferDemangledWithoutArguments); + return m_mangled.GetName(Mangled::ePreferDemangledWithoutArguments); } lldb::addr_t Symbol::ResolveCallableAddress(Target &target) const { @@ -567,21 +565,3 @@ bool Symbol::GetDisassembly(const ExecutionContext &exe_ctx, const char *flavor, bool Symbol::ContainsFileAddress(lldb::addr_t file_addr) const { return m_addr_range.ContainsFileAddress(file_addr); } - -void Symbol::SynthesizeNameIfNeeded() const { - if (m_is_synthetic && !m_mangled) { - // Synthetic symbol names don't mean anything, but they do uniquely - // identify individual symbols so we give them a unique name. The name - // starts with the synthetic symbol prefix, followed by a unique number. - // Typically the UserID of a real symbol is the symbol table index of the - // symbol in the object file's symbol table(s), so it will be the same - // every time you read in the object file. We want the same persistence for - // synthetic symbols so that users can identify them across multiple debug - // sessions, to understand crashes in those symbols and to reliably set - // breakpoints on them. - llvm::SmallString<256> name; - llvm::raw_svector_ostream os(name); - os << GetSyntheticSymbolPrefix() << GetID(); - m_mangled.SetDemangledName(ConstString(os.str())); - } -} diff --git a/lldb/source/Symbol/Symtab.cpp b/lldb/source/Symbol/Symtab.cpp index 8b4f1d9..85ece8d 100644 --- a/lldb/source/Symbol/Symtab.cpp +++ b/lldb/source/Symbol/Symtab.cpp @@ -301,7 +301,7 @@ void Symtab::InitNameIndexes() { // the trampoline symbols to be searchable by name we can remove this and // then possibly add a new bool to any of the Symtab functions that // lookup symbols by name to indicate if they want trampolines. - if (symbol->IsTrampoline() || symbol->IsSynthetic()) + if (symbol->IsTrampoline()) continue; // If the symbol's name string matched a Mangled::ManglingScheme, it is @@ -628,36 +628,6 @@ void Symtab::SortSymbolIndexesByValue(std::vector &indexes, } } -uint32_t Symtab::GetNameIndexes(ConstString symbol_name, - std::vector &indexes) { - auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone); - const uint32_t count = name_to_index.GetValues(symbol_name, indexes); - if (count) - return count; - // Synthetic symbol names are not added to the name indexes, but they start - // with a prefix and end with a the symbol UserID. This allows users to find - // these symbols without having to add them to the name indexes. These - // queries will not happen very often since the names don't mean anything, so - // performance is not paramount in this case. - llvm::StringRef name = symbol_name.GetStringRef(); - // String the synthetic prefix if the name starts with it. - if (!name.consume_front(Symbol::GetSyntheticSymbolPrefix())) - return 0; // Not a synthetic symbol name - - // Extract the user ID from the symbol name - unsigned long long uid = 0; - if (getAsUnsignedInteger(name, /*Radix=*/10, uid)) - return 0; // Failed to extract the user ID as an integer - Symbol *symbol = FindSymbolByID(uid); - if (symbol == nullptr) - return 0; - const uint32_t symbol_idx = GetIndexForSymbol(symbol); - if (symbol_idx == UINT32_MAX) - return 0; - indexes.push_back(symbol_idx); - return 1; -} - uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name, std::vector &indexes) { std::lock_guard guard(m_mutex); @@ -667,7 +637,8 @@ uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name, if (!m_name_indexes_computed) InitNameIndexes(); - return GetNameIndexes(symbol_name, indexes); + auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone); + return name_to_index.GetValues(symbol_name, indexes); } return 0; } @@ -684,9 +655,10 @@ uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name, if (!m_name_indexes_computed) InitNameIndexes(); + auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone); std::vector all_name_indexes; const size_t name_match_count = - GetNameIndexes(symbol_name, all_name_indexes); + name_to_index.GetValues(symbol_name, all_name_indexes); for (size_t i = 0; i < name_match_count; ++i) { if (CheckSymbolAtIndex(all_name_indexes[i], symbol_debug_type, symbol_visibility)) diff --git a/lldb/test/Shell/ObjectFile/ELF/eh_frame-symbols.yaml b/lldb/test/Shell/ObjectFile/ELF/eh_frame-symbols.yaml index 0dcc9fb..6178a45 100644 --- a/lldb/test/Shell/ObjectFile/ELF/eh_frame-symbols.yaml +++ b/lldb/test/Shell/ObjectFile/ELF/eh_frame-symbols.yaml @@ -3,8 +3,8 @@ # CHECK: Index UserID DSX Type File Address/Value Load Address Size Flags Name # CHECK: [ 0] 1 SourceFile 0x0000000000000000 0x0000000000000000 0x00000004 - -# CHECK: [ 1] 2 SX Code 0x0000000000201180 0x0000000000000010 0x00000000 ___lldb_unnamed_symbol{{[0-9]*}} -# CHECK: [ 2] 3 SX Code 0x0000000000201190 0x0000000000000006 0x00000000 ___lldb_unnamed_symbol{{[0-9]*}} +# CHECK: [ 1] 2 SX Code 0x0000000000201180 0x0000000000000010 0x00000000 ___lldb_unnamed_symbol1$${{.*}} +# CHECK: [ 2] 3 SX Code 0x0000000000201190 0x0000000000000006 0x00000000 ___lldb_unnamed_symbol2$${{.*}} --- !ELF FileHeader: diff --git a/lldb/test/Shell/SymbolFile/Breakpad/symtab.test b/lldb/test/Shell/SymbolFile/Breakpad/symtab.test index 788dafe..1eb03fa 100644 --- a/lldb/test/Shell/SymbolFile/Breakpad/symtab.test +++ b/lldb/test/Shell/SymbolFile/Breakpad/symtab.test @@ -5,7 +5,7 @@ # CHECK-LABEL: (lldb) image dump symtab symtab.out # CHECK: Symtab, file = {{.*}}symtab.out, num_symbols = 5: # CHECK: Index UserID DSX Type File Address/Value Load Address Size Flags Name -# CHECK: [ 0] 0 SX Code 0x0000000000400000 0x00000000000000b0 0x00000000 ___lldb_unnamed_symbol{{[0-9]*}} +# CHECK: [ 0] 0 SX Code 0x0000000000400000 0x00000000000000b0 0x00000000 ___lldb_unnamed_symbol{{[0-9]*}}$$symtab.out # CHECK: [ 1] 0 X Code 0x00000000004000b0 0x000000000000000c 0x00000000 f1_func # CHECK: [ 2] 0 X Code 0x00000000004000a0 0x000000000000000d 0x00000000 func_only # CHECK: [ 3] 0 X Code 0x00000000004000c0 0x0000000000000010 0x00000000 f2