From: Fangrui Song Date: Thu, 4 Aug 2022 18:47:52 +0000 (-0700) Subject: [ELF] Parallelize input section initialization X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=2515cb80cd3ba3bcead6ec2f1f30437cc2d325d5;p=platform%2Fupstream%2Fllvm.git [ELF] Parallelize input section initialization This implements the last step of https://discourse.llvm.org/t/parallel-input-file-parsing/60164 for the ELF port. For an ELF object file, we previously did: parse, (parallel) initializeLocalSymbols, (parallel) postParseObjectFile. Now we do: parse, (parallel) initSectionsAndLocalSyms, (parallel) postParseObjectFile. initSectionsAndLocalSyms does most of input section initialization. The sequential `parse` does SHT_ARM_ATTRIBUTES/SHT_RISCV_ATTRIBUTES/SHT_GROUP initialization for now. Performance linking some programs with --threads=8 (glibc 2.33 malloc and mimalloc): * clang: 1.05x as fast with glibc malloc, 1.03x as fast with mimalloc * chrome: 1.04x as fast with glibc malloc, 1.03x as fast with mimalloc * internal search program: 1.08x as fast with glibc malloc, 1.05x as fast with mimalloc Reviewed By: peter.smith Differential Revision: https://reviews.llvm.org/D130810 --- diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index e000784..1f7c56d 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -2393,19 +2393,19 @@ static uint32_t getAndFeatures() { return ret; } -static void initializeLocalSymbols(ELFFileBase *file) { +static void initSectionsAndLocalSyms(ELFFileBase *file, bool ignoreComdats) { switch (config->ekind) { case ELF32LEKind: - cast>(file)->initializeLocalSymbols(); + cast>(file)->initSectionsAndLocalSyms(ignoreComdats); break; case ELF32BEKind: - cast>(file)->initializeLocalSymbols(); + cast>(file)->initSectionsAndLocalSyms(ignoreComdats); break; case ELF64LEKind: - cast>(file)->initializeLocalSymbols(); + cast>(file)->initSectionsAndLocalSyms(ignoreComdats); break; case ELF64BEKind: - cast>(file)->initializeLocalSymbols(); + cast>(file)->initSectionsAndLocalSyms(ignoreComdats); break; default: llvm_unreachable(""); @@ -2556,7 +2556,9 @@ void LinkerDriver::link(opt::InputArgList &args) { // No more lazy bitcode can be extracted at this point. Do post parse work // like checking duplicate symbols. - parallelForEach(ctx->objectFiles, initializeLocalSymbols); + parallelForEach(ctx->objectFiles, [](ELFFileBase *file) { + initSectionsAndLocalSyms(file, /*ignoreComdats=*/false); + }); parallelForEach(ctx->objectFiles, postParseObjectFile); parallelForEach(ctx->bitcodeFiles, [](BitcodeFile *file) { file->postParse(); }); @@ -2640,7 +2642,9 @@ void LinkerDriver::link(opt::InputArgList &args) { // compileBitcodeFiles may have produced lto.tmp object files. After this, no // more file will be added. auto newObjectFiles = makeArrayRef(ctx->objectFiles).slice(numObjsBeforeLTO); - parallelForEach(newObjectFiles, initializeLocalSymbols); + parallelForEach(newObjectFiles, [](ELFFileBase *file) { + initSectionsAndLocalSyms(file, /*ignoreComdats=*/true); + }); parallelForEach(newObjectFiles, postParseObjectFile); for (const DuplicateSymbol &d : ctx->duplicates) reportDuplicate(*d.sym, d.file, d.section, d.value); diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 560e24f..3650065 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -516,10 +516,121 @@ uint32_t ObjFile::getSectionIndex(const Elf_Sym &sym) const { template void ObjFile::parse(bool ignoreComdats) { object::ELFFile obj = this->getObj(); // Read a section table. justSymbols is usually false. - if (this->justSymbols) + if (this->justSymbols) { initializeJustSymbols(); - else - initializeSections(ignoreComdats, obj); + initializeSymbols(obj); + return; + } + + // Handle dependent libraries and selection of section groups as these are not + // done in parallel. + ArrayRef objSections = getELFShdrs(); + StringRef shstrtab = CHECK(obj.getSectionStringTable(objSections), this); + uint64_t size = objSections.size(); + sections.resize(size); + for (size_t i = 0; i != size; ++i) { + const Elf_Shdr &sec = objSections[i]; + if (sec.sh_type == SHT_LLVM_DEPENDENT_LIBRARIES && !config->relocatable) { + StringRef name = check(obj.getSectionName(sec, shstrtab)); + ArrayRef data = CHECK( + this->getObj().template getSectionContentsAsArray(sec), this); + if (!data.empty() && data.back() != '\0') { + error( + toString(this) + + ": corrupted dependent libraries section (unterminated string): " + + name); + } else { + for (const char *d = data.begin(), *e = data.end(); d < e;) { + StringRef s(d); + addDependentLibrary(s, this); + d += s.size() + 1; + } + } + this->sections[i] = &InputSection::discarded; + continue; + } + + if (sec.sh_type == SHT_ARM_ATTRIBUTES && config->emachine == EM_ARM) { + ARMAttributeParser attributes; + ArrayRef contents = + check(this->getObj().getSectionContents(sec)); + StringRef name = check(obj.getSectionName(sec, shstrtab)); + this->sections[i] = &InputSection::discarded; + if (Error e = attributes.parse(contents, config->ekind == ELF32LEKind + ? support::little + : support::big)) { + InputSection isec(*this, sec, name); + warn(toString(&isec) + ": " + llvm::toString(std::move(e))); + } else { + updateSupportedARMFeatures(attributes); + updateARMVFPArgs(attributes, this); + + // FIXME: Retain the first attribute section we see. The eglibc ARM + // dynamic loaders require the presence of an attribute section for + // dlopen to work. In a full implementation we would merge all attribute + // sections. + if (in.attributes == nullptr) { + in.attributes = std::make_unique(*this, sec, name); + this->sections[i] = in.attributes.get(); + } + } + } + + if (sec.sh_type == SHT_RISCV_ATTRIBUTES && config->emachine == EM_RISCV) { + RISCVAttributeParser attributes; + ArrayRef contents = + check(this->getObj().getSectionContents(sec)); + StringRef name = check(obj.getSectionName(sec, shstrtab)); + this->sections[i] = &InputSection::discarded; + if (Error e = attributes.parse(contents, support::little)) { + InputSection isec(*this, sec, name); + warn(toString(&isec) + ": " + llvm::toString(std::move(e))); + } else { + // FIXME: Validate arch tag contains C if and only if EF_RISCV_RVC is + // present. + + // FIXME: Retain the first attribute section we see. Tools such as + // llvm-objdump make use of the attribute section to determine which + // standard extensions to enable. In a full implementation we would + // merge all attribute sections. + if (in.attributes == nullptr) { + in.attributes = std::make_unique(*this, sec, name); + this->sections[i] = in.attributes.get(); + } + } + } + + if (sec.sh_type != SHT_GROUP) + continue; + StringRef signature = getShtGroupSignature(objSections, sec); + ArrayRef entries = + CHECK(obj.template getSectionContentsAsArray(sec), this); + if (entries.empty()) + fatal(toString(this) + ": empty SHT_GROUP"); + + Elf_Word flag = entries[0]; + if (flag && flag != GRP_COMDAT) + fatal(toString(this) + ": unsupported SHT_GROUP format"); + + bool keepGroup = + (flag & GRP_COMDAT) == 0 || ignoreComdats || + symtab->comdatGroups.try_emplace(CachedHashStringRef(signature), this) + .second; + if (keepGroup) { + if (config->relocatable) + this->sections[i] = createInputSection( + i, sec, check(obj.getSectionName(sec, shstrtab))); + continue; + } + + // Otherwise, discard group members. + for (uint32_t secIndex : entries.slice(1)) { + if (secIndex >= size) + fatal(toString(this) + + ": invalid section index in group: " + Twine(secIndex)); + this->sections[secIndex] = &InputSection::discarded; + } + } // Read a symbol table. initializeSymbols(obj); @@ -601,10 +712,7 @@ void ObjFile::initializeSections(bool ignoreComdats, ArrayRef objSections = getELFShdrs(); StringRef shstrtab = CHECK(obj.getSectionStringTable(objSections), this); uint64_t size = objSections.size(); - this->sections.resize(size); - - std::vector> selectedGroups; - + SmallVector, 0> selectedGroups; for (size_t i = 0; i != size; ++i) { if (this->sections[i] == &InputSection::discarded) continue; @@ -637,38 +745,16 @@ void ObjFile::initializeSections(bool ignoreComdats, switch (sec.sh_type) { case SHT_GROUP: { - // De-duplicate section groups by their signatures. - StringRef signature = getShtGroupSignature(objSections, sec); - this->sections[i] = &InputSection::discarded; - + if (!config->relocatable) + sections[i] = &InputSection::discarded; + StringRef signature = + cantFail(this->getELFSyms()[sec.sh_info].getName(stringTable)); ArrayRef entries = - CHECK(obj.template getSectionContentsAsArray(sec), this); - if (entries.empty()) - fatal(toString(this) + ": empty SHT_GROUP"); - - Elf_Word flag = entries[0]; - if (flag && flag != GRP_COMDAT) - fatal(toString(this) + ": unsupported SHT_GROUP format"); - - bool keepGroup = - (flag & GRP_COMDAT) == 0 || ignoreComdats || - symtab->comdatGroups.try_emplace(CachedHashStringRef(signature), this) - .second; - if (keepGroup) { - if (config->relocatable) - this->sections[i] = createInputSection( - i, sec, check(obj.getSectionName(sec, shstrtab))); + cantFail(obj.template getSectionContentsAsArray(sec)); + if ((entries[0] & GRP_COMDAT) == 0 || ignoreComdats || + symtab->comdatGroups.find(CachedHashStringRef(signature))->second == + this) selectedGroups.push_back(entries); - continue; - } - - // Otherwise, discard group members. - for (uint32_t secIndex : entries.slice(1)) { - if (secIndex >= size) - fatal(toString(this) + - ": invalid section index in group: " + Twine(secIndex)); - this->sections[secIndex] = &InputSection::discarded; - } break; } case SHT_SYMTAB_SHNDX: @@ -718,8 +804,8 @@ void ObjFile::initializeSections(bool ignoreComdats, // simply handle such sections as non-mergeable ones. Degrading like this // is acceptable because section merging is optional. if (auto *ms = dyn_cast(s)) { - s = make(ms->file, ms->flags, ms->type, ms->alignment, - ms->data(), ms->name); + s = makeThreadLocal(ms->file, ms->flags, ms->type, + ms->alignment, ms->data(), ms->name); sections[info] = s; } @@ -734,7 +820,7 @@ void ObjFile::initializeSections(bool ignoreComdats, // specified, we need to copy them to the output. (Some post link analysis // tools specify --emit-relocs to obtain the information.) if (config->copyRelocs) { - auto *isec = make( + auto *isec = makeThreadLocal( *this, sec, check(obj.getSectionName(sec, shstrtab))); // If the relocated section is discarded (due to /DISCARD/ or // --gc-sections), the relocation section should be discarded as well. @@ -861,73 +947,12 @@ InputSectionBase *ObjFile::getRelocTarget(uint32_t idx, return nullptr; } +// The function may be called concurrently for different input files. For +// allocation, prefer makeThreadLocal which does not require holding a lock. template InputSectionBase *ObjFile::createInputSection(uint32_t idx, const Elf_Shdr &sec, StringRef name) { - if (sec.sh_type == SHT_ARM_ATTRIBUTES && config->emachine == EM_ARM) { - ARMAttributeParser attributes; - ArrayRef contents = check(this->getObj().getSectionContents(sec)); - if (Error e = attributes.parse(contents, config->ekind == ELF32LEKind - ? support::little - : support::big)) { - InputSection isec(*this, sec, name); - warn(toString(&isec) + ": " + llvm::toString(std::move(e))); - } else { - updateSupportedARMFeatures(attributes); - updateARMVFPArgs(attributes, this); - - // FIXME: Retain the first attribute section we see. The eglibc ARM - // dynamic loaders require the presence of an attribute section for dlopen - // to work. In a full implementation we would merge all attribute - // sections. - if (in.attributes == nullptr) { - in.attributes = std::make_unique(*this, sec, name); - return in.attributes.get(); - } - return &InputSection::discarded; - } - } - - if (sec.sh_type == SHT_RISCV_ATTRIBUTES && config->emachine == EM_RISCV) { - RISCVAttributeParser attributes; - ArrayRef contents = check(this->getObj().getSectionContents(sec)); - if (Error e = attributes.parse(contents, support::little)) { - InputSection isec(*this, sec, name); - warn(toString(&isec) + ": " + llvm::toString(std::move(e))); - } else { - // FIXME: Validate arch tag contains C if and only if EF_RISCV_RVC is - // present. - - // FIXME: Retain the first attribute section we see. Tools such as - // llvm-objdump make use of the attribute section to determine which - // standard extensions to enable. In a full implementation we would merge - // all attribute sections. - if (in.attributes == nullptr) { - in.attributes = std::make_unique(*this, sec, name); - return in.attributes.get(); - } - return &InputSection::discarded; - } - } - - if (sec.sh_type == SHT_LLVM_DEPENDENT_LIBRARIES && !config->relocatable) { - ArrayRef data = - CHECK(this->getObj().template getSectionContentsAsArray(sec), this); - if (!data.empty() && data.back() != '\0') { - error(toString(this) + - ": corrupted dependent libraries section (unterminated string): " + - name); - return &InputSection::discarded; - } - for (const char *d = data.begin(), *e = data.end(); d < e;) { - StringRef s(d); - addDependentLibrary(s, this); - d += s.size() + 1; - } - return &InputSection::discarded; - } - if (name.startswith(".n")) { // The GNU linker uses .note.GNU-stack section as a marker indicating // that the code in the object file does not expect that the stack is @@ -993,11 +1018,11 @@ InputSectionBase *ObjFile::createInputSection(uint32_t idx, // .eh_frame_hdr section for runtime. So we handle them with a special // class. For relocatable outputs, they are just passed through. if (name == ".eh_frame" && !config->relocatable) - return make(*this, sec, name); + return makeThreadLocal(*this, sec, name); if ((sec.sh_flags & SHF_MERGE) && shouldMerge(sec, name)) - return make(*this, sec, name); - return make(*this, sec, name); + return makeThreadLocal(*this, sec, name); + return makeThreadLocal(*this, sec, name); } // Initialize this->Symbols. this->Symbols is a parallel array as @@ -1063,7 +1088,11 @@ void ObjFile::initializeSymbols(const object::ELFFile &obj) { } } -template void ObjFile::initializeLocalSymbols() { +template +void ObjFile::initSectionsAndLocalSyms(bool ignoreComdats) { + if (!justSymbols) + initializeSections(ignoreComdats, getObj()); + if (!firstGlobal) return; SymbolUnion *locals = makeThreadLocalN(firstGlobal); diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index dd9f732..0cb6a67 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -274,7 +274,7 @@ public: // Get cached DWARF information. DWARFCache *getDwarf(); - void initializeLocalSymbols(); + void initSectionsAndLocalSyms(bool ignoreComdats); void postParse(); private: diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 57bb485..7181031 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -283,7 +283,8 @@ std::string InputSectionBase::getObjMsg(uint64_t off) { archive = (" in archive " + file->archiveName).str(); // Find a symbol that encloses a given location. getObjMsg may be called - // before ObjFile::initializeLocalSymbols where local symbols are initialized. + // before ObjFile::initSectionsAndLocalSyms where local symbols are + // initialized. for (Symbol *b : file->getSymbols()) if (auto *d = dyn_cast_or_null(b)) if (d->section == this && d->value <= off && off < d->value + d->size)