From: Rui Ueyama Date: Fri, 11 Mar 2016 04:23:12 +0000 (+0000) Subject: ELF: Add --thread option and partially parallelize writeTo(). X-Git-Tag: llvmorg-3.9.0-rc1~12018 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e98095026f2ca2466e24df8727fb56677ecab649;p=platform%2Fupstream%2Fllvm.git ELF: Add --thread option and partially parallelize writeTo(). This patch adds --thread option and use parallel_for_each to write sections in regular OutputSections. This is the first patch to use more than one threads. Note that --thread is off by default because it is experimental. At this moment I still want to focus on single thread performance because multi-threading is not a magic wand to fix performance problems after all. It is generally very hard to make a slow program faster by threads. Therefore, I want to make the linker as efficient as possible first and then look for opportunity to make it even faster using more than one core. Here are some numbers to link programs with and without --threads and using GNU gold. Numbers are in seconds. Clang w/o --threads 0.697 w --threads 0.528 gold 1.643 Scylla w/o --threads 5.032 w --threads 4.935 gold 6.791 GNU gold w/o --threads 0.550 w --threads 0.551 gold 0.737 I limited the number of cores these processes can use to 4 using perf command, so although my machine has 20 physical cores, the performance gain I observed should be reproducible with a machine which is not as beefy as mine. llvm-svn: 263190 --- diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index 273b756..aeaa0f9 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -74,6 +74,7 @@ struct Configuration { bool Static = false; bool StripAll; bool SysvHash = true; + bool Threads; bool Verbose; bool ZExecStack; bool ZNodelete; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 8fff516..064938a 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -239,6 +239,7 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) { Config->SaveTemps = Args.hasArg(OPT_save_temps); Config->Shared = Args.hasArg(OPT_shared); Config->StripAll = Args.hasArg(OPT_strip_all); + Config->Threads = Args.hasArg(OPT_threads); Config->Verbose = Args.hasArg(OPT_verbose); Config->DynamicLinker = getString(Args, OPT_dynamic_linker); diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index 24e9841..c11cb94 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -118,6 +118,8 @@ def strip_all : Flag<["--"], "strip-all">, def sysroot : Joined<["--"], "sysroot=">, HelpText<"Set the system root">; +def threads : Joined<["--"], "threads">; + def undefined : Joined<["--"], "undefined=">, HelpText<"Force undefined symbol during linking">; diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp index 7970566..d9c0d16 100644 --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -12,6 +12,7 @@ #include "LinkerScript.h" #include "SymbolTable.h" #include "Target.h" +#include "lld/Core/Parallel.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/MathExtras.h" #include @@ -929,8 +930,13 @@ template void OutputSection::writeTo(uint8_t *Buf) { ArrayRef Filler = Script->getFiller(this->Name); if (!Filler.empty()) fill(Buf, this->getSize(), Filler); - for (InputSection *C : Sections) - C->writeTo(Buf); + if (Config->Threads) { + parallel_for_each(Sections.begin(), Sections.end(), + [=](InputSection *C) { C->writeTo(Buf); }); + } else { + for (InputSection *C : Sections) + C->writeTo(Buf); + } } template