From e1f0e22fcd991f40827a34de4ed063c7c4c36eea Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 13 Mar 2018 14:27:15 +0000 Subject: [PATCH] [dsymutil] Perform analyzeContextInfo and CloneDIEs in parallel This patch makes dsymutil perform analyzeContextInfo and CloneDIEs in parallel. For the same object file, there is a dependency between the two. However, we can do analyzeContextInfo for the next object file while cloning DIEs for the current. This is exactly the approach taken in this patch. For WebCore, this leads to a performance improvement of 29% and for clang we see similar results with at 32% improvement. A big thanks to Pete Cooper who came up with the original idea and the PoC. Differential revision: https://reviews.llvm.org/D43945 llvm-svn: 327399 --- llvm/test/tools/dsymutil/basic-linking.test | 43 +++---- llvm/tools/dsymutil/DwarfLinker.cpp | 168 +++++++++++++++++++--------- 2 files changed, 141 insertions(+), 70 deletions(-) diff --git a/llvm/test/tools/dsymutil/basic-linking.test b/llvm/test/tools/dsymutil/basic-linking.test index 5be9010..a145e47 100644 --- a/llvm/test/tools/dsymutil/basic-linking.test +++ b/llvm/test/tools/dsymutil/basic-linking.test @@ -1,7 +1,7 @@ RUN: llvm-dsymutil -no-output -verbose -oso-prepend-path=%p %p/Inputs/basic.macho.x86_64 | FileCheck %s RUN: llvm-dsymutil -no-output -verbose -oso-prepend-path=%p %p/Inputs/basic-lto.macho.x86_64 | FileCheck %s --check-prefix=CHECK-LTO RUN: llvm-dsymutil -no-output -verbose -oso-prepend-path=%p %p/Inputs/basic-archive.macho.x86_64 | FileCheck %s --check-prefix=CHECK-ARCHIVE -RUN: llvm-dsymutil -no-output -verbose -oso-prepend-path=%p %p/Inputs/basic.macho.x86_64 %p/Inputs/basic-lto.macho.x86_64 %p/Inputs/basic-archive.macho.x86_64 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LTO --check-prefix=CHECK-ARCHIVE +RUN: llvm-dsymutil -no-output -verbose -oso-prepend-path=%p %p/Inputs/basic.macho.x86_64 %p/Inputs/basic-lto.macho.x86_64 %p/Inputs/basic-archive.macho.x86_64 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LTO --check-prefix=CHECK-ARCHIVE This test check the basic Dwarf linking process through the debug dumps. @@ -11,17 +11,24 @@ CHECK: Input compilation unit: CHECK-NEXT: TAG_compile_unit CHECK-NOT: TAG CHECK: AT_name {{.*}}basic1.c -CHECK-NOT: Found valid debug map entry -CHECK: Found valid debug map entry: _main 0000000000000000 => 0000000100000ea0 -CHECK-NEXT: DW_TAG_subprogram -CHECK-NEXT: DW_AT_name{{.*}}"main" CHECK: DEBUG MAP OBJECT: {{.*}}basic2.macho.x86_64.o CHECK: Input compilation unit: CHECK-NEXT: TAG_compile_unit CHECK-NOT: TAG CHECK: AT_name {{.*}}basic2.c + +CHECK: DEBUG MAP OBJECT: {{.*}}basic3.macho.x86_64.o +CHECK: Input compilation unit: +CHECK-NEXT: TAG_compile_unit +CHECK-NOT: TAG +CHECK: AT_name {{.*}}basic3.c + CHECK-NOT: Found valid debug map entry +CHECK: Found valid debug map entry: _main 0000000000000000 => 0000000100000ea0 +CHECK-NEXT: DW_TAG_subprogram +CHECK-NEXT: DW_AT_name{{.*}}"main" + CHECK: Found valid debug map entry: _private_int 0000000000000560 => 0000000100001008 CHECK-NEXT: DW_TAG_variable CHECK-NEXT: DW_AT_name {{.*}}"private_int" @@ -38,11 +45,6 @@ CHECK: Found valid debug map entry: _inc 0000000000000070 => 0000000100000f20 CHECK-NEXT: DW_TAG_subprogram CHECK-NEXT: DW_AT_name {{.*}}"inc" -CHECK: DEBUG MAP OBJECT: {{.*}}basic3.macho.x86_64.o -CHECK: Input compilation unit: -CHECK-NEXT: TAG_compile_unit -CHECK-NOT: TAG -CHECK: AT_name {{.*}}basic3.c CHECK-NOT: Found valid debug map entry CHECK: Found valid debug map entry: _val ffffffffffffffff => 0000000100001004 CHECK-NEXT: DW_TAG_variable @@ -104,16 +106,24 @@ CHECK-ARCHIVE: Input compilation unit: CHECK-ARCHIVE-NEXT: TAG_compile_unit CHECK-ARCHIVE-NOT: TAG CHECK-ARCHIVE: AT_name {{.*}}basic1.c -CHECK-ARCHIVE-NOT: Found valid debug map entry -CHECK-ARCHIVE: Found valid debug map entry: _main 0000000000000000 => 0000000100000ea0 -CHECK-ARCHIVE-NEXT: DW_TAG_subprogram -CHECK-ARCHIVE-NEXT: DW_AT_name{{.*}}"main" CHECK-ARCHIVE: DEBUG MAP OBJECT: {{.*}}libbasic.a(basic2.macho.x86_64.o) CHECK-ARCHIVE: Input compilation unit: CHECK-ARCHIVE-NEXT: TAG_compile_unit CHECK-ARCHIVE-NOT: TAG CHECK-ARCHIVE: AT_name {{.*}}basic2.c + +CHECK-ARCHIVE: DEBUG MAP OBJECT: {{.*}}libbasic.a(basic3.macho.x86_64.o) +CHECK-ARCHIVE: Input compilation unit: +CHECK-ARCHIVE-NEXT: TAG_compile_unit +CHECK-ARCHIVE-NOT: TAG +CHECK-ARCHIVE: AT_name {{.*}}basic3.c + +CHECK-ARCHIVE-NOT: Found valid debug map entry +CHECK-ARCHIVE: Found valid debug map entry: _main 0000000000000000 => 0000000100000ea0 +CHECK-ARCHIVE-NEXT: DW_TAG_subprogram +CHECK-ARCHIVE-NEXT: DW_AT_name{{.*}}"main" + CHECK-ARCHIVE-NOT: Found valid debug map entry CHECK-ARCHIVE: Found valid debug map entry: _private_int 0000000000000560 => 0000000100001004 CHECK-ARCHIVE-NEXT: DW_TAG_variable @@ -131,11 +141,6 @@ CHECK-ARCHIVE: Found valid debug map entry: _inc 0000000000000070 => 0000000100 CHECK-ARCHIVE-NEXT: DW_TAG_subprogram CHECK-ARCHIVE-NEXT: DW_AT_name {{.*}}"inc" -CHECK-ARCHIVE: DEBUG MAP OBJECT: {{.*}}libbasic.a(basic3.macho.x86_64.o) -CHECK-ARCHIVE: Input compilation unit: -CHECK-ARCHIVE-NEXT: TAG_compile_unit -CHECK-ARCHIVE-NOT: TAG -CHECK-ARCHIVE: AT_name {{.*}}basic3.c CHECK-ARCHIVE-NOT: Found valid debug map entry CHECK-ARCHIVE: Found valid debug map entry: _val ffffffffffffffff => 0000000100001008 CHECK-ARCHIVE-NEXT: DW_TAG_variable diff --git a/llvm/tools/dsymutil/DwarfLinker.cpp b/llvm/tools/dsymutil/DwarfLinker.cpp index 6c46937..305768da 100644 --- a/llvm/tools/dsymutil/DwarfLinker.cpp +++ b/llvm/tools/dsymutil/DwarfLinker.cpp @@ -13,6 +13,7 @@ #include "NonRelocatableStringpool.h" #include "dsymutil.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/DenseSet.h" @@ -75,6 +76,7 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/ThreadPool.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" @@ -4138,6 +4140,14 @@ bool DwarfLinker::link(const DebugMap &Map) { unsigned UnitID = 0; DebugMap ModuleMap(Map.getTriple(), Map.getBinaryPath()); + // First populate the data structure we need for each iteration of the + // parallel loop. + unsigned NumObjects = Map.getNumberOfObjects(); + std::vector ObjectContexts; + ObjectContexts.reserve(NumObjects); + for (const auto &Obj : Map.objects()) + ObjectContexts.emplace_back(Map, *this, *Obj.get(), Options.Verbose); + // This Dwarf string pool which is only used for uniquing. This one should // never be used for offsets as its not thread-safe or predictable. UniquingStringPool UniquingStringPool; @@ -4150,11 +4160,10 @@ bool DwarfLinker::link(const DebugMap &Map) { // ODR Contexts for the link. DeclContextTree ODRContexts; - for (const auto &Obj : Map.objects()) { - LinkContext LinkContext(Map, *this, *Obj.get(), Options.Verbose); - + for (LinkContext &LinkContext : ObjectContexts) { if (Options.Verbose) - outs() << "DEBUG MAP OBJECT: " << Obj->getObjectFilename() << "\n"; + outs() << "DEBUG MAP OBJECT: " << LinkContext.DMO.getObjectFilename() + << "\n"; // N_AST objects (swiftmodule files) should get dumped directly into the // appropriate DWARF section. @@ -4188,14 +4197,20 @@ bool DwarfLinker::link(const DebugMap &Map) { } if (!LinkContext.ObjectFile) + continue; // Look for relocations that correspond to debug map entries. + if (LLVM_LIKELY(!Options.Update) && !LinkContext.RelocMgr.findValidRelocsInDebugInfo( *LinkContext.ObjectFile, LinkContext.DMO)) { if (Options.Verbose) - warn("No valid relocations found: skipping\n"); + outs() << "No valid relocations found. Skipping.\n"; + + // Clear this ObjFile entry as a signal to other loops that we should not + // process this iteration. + LinkContext.ObjectFile = nullptr; continue; } @@ -4203,6 +4218,8 @@ bool DwarfLinker::link(const DebugMap &Map) { if (!LinkContext.DwarfContext) continue; + startDebugObject(LinkContext); + // In a first phase, just read in the debug info and load all clang modules. LinkContext.CompileUnits.reserve( LinkContext.DwarfContext->getNumCompileUnits()); @@ -4225,58 +4242,107 @@ bool DwarfLinker::link(const DebugMap &Map) { maybeUpdateMaxDwarfVersion(CU->getVersion()); } } + } - // Now build the DIE parent links that we will use during the next phase. - for (auto &CurrentUnit : LinkContext.CompileUnits) - analyzeContextInfo(CurrentUnit->getOrigUnit().getUnitDIE(), 0, - *CurrentUnit, &ODRContexts.getRoot(), - UniquingStringPool, ODRContexts); + ThreadPool pool(2); - // Then mark all the DIEs that need to be present in the linked - // output and collect some information about them. Note that this - // loop can not be merged with the previous one because cross-CU - // references require the ParentIdx to be setup for every CU in - // the object file before calling this. - if (LLVM_UNLIKELY(Options.Update)) { - for (auto &CurrentUnit : LinkContext.CompileUnits) - CurrentUnit->markEverythingAsKept(); - Streamer->copyInvariantDebugSection(*LinkContext.ObjectFile, Options); - } else { + // These variables manage the list of processed object files. + // The mutex and condition variable are to ensure that this is thread safe. + std::mutex ProcessedFilesMutex; + std::condition_variable ProcessedFilesConditionVariable; + BitVector ProcessedFiles(NumObjects, false); + + // Now do analyzeContextInfo in parallel as it is particularly expensive. + pool.async([&]() { + for (unsigned i = 0, e = NumObjects; i != e; ++i) { + auto &LinkContext = ObjectContexts[i]; + + if (!LinkContext.ObjectFile) { + std::unique_lock LockGuard(ProcessedFilesMutex); + ProcessedFiles.set(i); + ProcessedFilesConditionVariable.notify_one(); + continue; + } + + // Now build the DIE parent links that we will use during the next phase. for (auto &CurrentUnit : LinkContext.CompileUnits) - lookForDIEsToKeep(LinkContext.RelocMgr, LinkContext.Ranges, - LinkContext.CompileUnits, - CurrentUnit->getOrigUnit().getUnitDIE(), - LinkContext.DMO, *CurrentUnit, 0); + analyzeContextInfo(CurrentUnit->getOrigUnit().getUnitDIE(), 0, + *CurrentUnit, &ODRContexts.getRoot(), + UniquingStringPool, ODRContexts); + + std::unique_lock LockGuard(ProcessedFilesMutex); + ProcessedFiles.set(i); + ProcessedFilesConditionVariable.notify_one(); } + }); + + // And then the remaining work in serial again. + // Note, although this loop runs in serial, it can run in parallel with + // the analyzeContextInfo loop so long as we process files with indices >= + // than those processed by analyzeContextInfo. + pool.async([&]() { + for (unsigned i = 0, e = NumObjects; i != e; ++i) { + { + std::unique_lock LockGuard(ProcessedFilesMutex); + if (!ProcessedFiles[i]) { + ProcessedFilesConditionVariable.wait( + LockGuard, [&]() { return ProcessedFiles[i]; }); + } + } - // The calls to applyValidRelocs inside cloneDIE will walk the - // reloc array again (in the same way findValidRelocsInDebugInfo() - // did). We need to reset the NextValidReloc index to the beginning. - LinkContext.RelocMgr.resetValidRelocs(); - if (LinkContext.RelocMgr.hasValidRelocs() || LLVM_UNLIKELY(Options.Update)) - DIECloner(*this, LinkContext.RelocMgr, DIEAlloc, LinkContext.CompileUnits, - Options) - .cloneAllCompileUnits(*LinkContext.DwarfContext, LinkContext.DMO, - LinkContext.Ranges, OffsetsStringPool); - if (!Options.NoOutput && !LinkContext.CompileUnits.empty() && - LLVM_LIKELY(!Options.Update)) - patchFrameInfoForObject( - LinkContext.DMO, LinkContext.Ranges, *LinkContext.DwarfContext, - LinkContext.CompileUnits[0]->getOrigUnit().getAddressByteSize()); - - // Clean-up before starting working on the next object. - endDebugObject(LinkContext); - } + auto &LinkContext = ObjectContexts[i]; + if (!LinkContext.ObjectFile) + continue; - // Emit everything that's global. - if (!Options.NoOutput) { - Streamer->emitAbbrevs(Abbreviations, MaxDwarfVersion); - Streamer->emitStrings(OffsetsStringPool); - Streamer->emitAppleNames(AppleNames); - Streamer->emitAppleNamespaces(AppleNamespaces); - Streamer->emitAppleTypes(AppleTypes); - Streamer->emitAppleObjc(AppleObjc); - } + // Then mark all the DIEs that need to be present in the linked output + // and collect some information about them. + // Note that this loop can not be merged with the previous one because + // cross-cu references require the ParentIdx to be setup for every CU in + // the object file before calling this. + if (LLVM_UNLIKELY(Options.Update)) { + for (auto &CurrentUnit : LinkContext.CompileUnits) + CurrentUnit->markEverythingAsKept(); + Streamer->copyInvariantDebugSection(*LinkContext.ObjectFile, Options); + } else { + for (auto &CurrentUnit : LinkContext.CompileUnits) + lookForDIEsToKeep(LinkContext.RelocMgr, LinkContext.Ranges, + LinkContext.CompileUnits, + CurrentUnit->getOrigUnit().getUnitDIE(), + LinkContext.DMO, *CurrentUnit, 0); + } + + // The calls to applyValidRelocs inside cloneDIE will walk the reloc + // array again (in the same way findValidRelocsInDebugInfo() did). We + // need to reset the NextValidReloc index to the beginning. + LinkContext.RelocMgr.resetValidRelocs(); + if (LinkContext.RelocMgr.hasValidRelocs() || + LLVM_UNLIKELY(Options.Update)) + DIECloner(*this, LinkContext.RelocMgr, DIEAlloc, + LinkContext.CompileUnits, Options) + .cloneAllCompileUnits(*LinkContext.DwarfContext, LinkContext.DMO, + LinkContext.Ranges, OffsetsStringPool); + if (!Options.NoOutput && !LinkContext.CompileUnits.empty() && + LLVM_LIKELY(!Options.Update)) + patchFrameInfoForObject( + LinkContext.DMO, LinkContext.Ranges, *LinkContext.DwarfContext, + LinkContext.CompileUnits[0]->getOrigUnit().getAddressByteSize()); + + // Clean-up before starting working on the next object. + endDebugObject(LinkContext); + } + + // Emit everything that's global. + if (!Options.NoOutput) { + Streamer->emitAbbrevs(Abbreviations, MaxDwarfVersion); + Streamer->emitStrings(OffsetsStringPool); + Streamer->emitAppleNames(AppleNames); + Streamer->emitAppleNamespaces(AppleNamespaces); + Streamer->emitAppleTypes(AppleTypes); + Streamer->emitAppleObjc(AppleObjc); + } + }); + + pool.wait(); return Options.NoOutput ? true : Streamer->finish(Map); } -- 2.7.4