From: Joseph Huber Date: Tue, 24 Jan 2023 17:04:47 +0000 (-0600) Subject: [LinkerWrapper] Only import static libraries with needed symbols X-Git-Tag: upstream/17.0.6~19747 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1964c334782e7c5961772fbdcdcc0029cba3a7fa;p=platform%2Fupstream%2Fllvm.git [LinkerWrapper] Only import static libraries with needed symbols Currently, we pull in every single static archive member as long as we have an offloading architecture that requires it. This goes against the standard sematnics of static libraries that only pull in symbols that define currently undefined symbols. In order to support this we roll some custom symbol resolution logic to check if a static library is needed. Because of offloading semantics, this requires an extra check for externally visibile symbols. E.g. if a static member defines a kernel we should import it. The main benefit to this is that we can now link against the `libomptarget.devicertl.a` library unconditionally. This removes the requirement for users to specify LTO on the link command. This will also allow us to stop using the `amdgcn` bitcode versions of the libraries. ``` clang foo.c -fopenmp --offload-arch=gfx1030 -foffload-lto -c clang foo.o -fopenmp --offload-arch=gfx1030 -foffload-lto ``` Reviewed By: tra Differential Revision: https://reviews.llvm.org/D142484 --- diff --git a/clang/test/Driver/linker-wrapper-libs.c b/clang/test/Driver/linker-wrapper-libs.c new file mode 100644 index 0000000..3d5a76a --- /dev/null +++ b/clang/test/Driver/linker-wrapper-libs.c @@ -0,0 +1,128 @@ +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.elf.o + +#if defined(RESOLVES) +int __attribute__((visibility("hidden"))) sym; +#elif defined(GLOBAL) +int __attribute__((visibility("protected"))) global; +#elif defined(WEAK) +int __attribute__((visibility("hidden"))) weak; +#elif defined(HIDDEN) +int __attribute__((visibility("hidden"))) hidden; +#else +extern int sym; + +extern int __attribute__((weak)) weak; + +int foo() { return sym; } +int bar() { return weak; } +#endif + +// +// Check that we extract a static library defining an undefined symbol. +// +// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -DRESOLVES -o %t.nvptx.resolves.bc +// RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -DRESOLVES -o %t.amdgpu.resolves.bc +// RUN: clang-offload-packager -o %t-lib.out \ +// RUN: --image=file=%t.nvptx.resolves.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ +// RUN: --image=file=%t.amdgpu.resolves.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t-lib.out +// RUN: llvm-ar rcs %t.a %t.o +// RUN: clang-offload-packager -o %t.out \ +// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ +// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out +// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ +// RUN: --linker-path=/usr/bin/ld -- %t.o %t.a -o a.out 2>&1 \ +// RUN: | FileCheck %s --check-prefix=LIBRARY-RESOLVES + +// LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o +// LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.s {{.*}}.o + +// +// Check that we extract a static library that defines a global visibile to the +// host. +// +// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -DGLOBAL -o %t.nvptx.global.bc +// RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -DGLOBAL -o %t.amdgpu.global.bc +// RUN: clang-offload-packager -o %t-lib.out \ +// RUN: --image=file=%t.nvptx.global.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ +// RUN: --image=file=%t.amdgpu.global.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t-lib.out +// RUN: llvm-ar rcs %t.a %t.o +// RUN: clang-offload-packager -o %t.out \ +// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ +// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out +// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ +// RUN: --linker-path=/usr/bin/ld -- %t.o %t.a -o a.out 2>&1 \ +// RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL + +// LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o +// LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.s {{.*}}.o + +// +// Check that we do not extract an external weak symbol. +// +// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -DWEAK -o %t.nvptx.weak.bc +// RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -DWEAK -o %t.amdgpu.weak.bc +// RUN: clang-offload-packager -o %t-lib.out \ +// RUN: --image=file=%t.nvptx.weak.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ +// RUN: --image=file=%t.amdgpu.weak.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t-lib.out +// RUN: llvm-ar rcs %t.a %t.o +// RUN: clang-offload-packager -o %t.out \ +// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ +// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out +// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ +// RUN: --linker-path=/usr/bin/ld -- %t.o %t.a -o a.out 2>&1 \ +// RUN: | FileCheck %s --check-prefix=LIBRARY-WEAK + +// LIBRARY-WEAK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 +// LIBRARY-WEAK-NOT: {{.*}}.o {{.*}}.o +// LIBRARY-WEAK: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 + +// +// Check that we do not extract an unneeded hidden symbol. +// +// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -DHIDDEN -o %t.nvptx.hidden.bc +// RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -DHIDDEN -o %t.amdgpu.hidden.bc +// RUN: clang-offload-packager -o %t-lib.out \ +// RUN: --image=file=%t.nvptx.hidden.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ +// RUN: --image=file=%t.amdgpu.hidden.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t-lib.out +// RUN: llvm-ar rcs %t.a %t.o +// RUN: clang-offload-packager -o %t.out \ +// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ +// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out +// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ +// RUN: --linker-path=/usr/bin/ld -- %t.o %t.a -o a.out 2>&1 \ +// RUN: | FileCheck %s --check-prefix=LIBRARY-HIDDEN + +// LIBRARY-HIDDEN: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 +// LIBRARY-HIDDEN-NOT: {{.*}}.o {{.*}}.o +// LIBRARY-HIDDEN: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 + +// +// Check that we do not extract a static library that defines a global visibile +// to the host that is already defined. +// +// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -DGLOBAL -o %t.nvptx.global.bc +// RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -DGLOBAL -o %t.amdgpu.global.bc +// RUN: clang-offload-packager -o %t-lib.out \ +// RUN: --image=file=%t.nvptx.global.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ +// RUN: --image=file=%t.amdgpu.global.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t-lib.out +// RUN: llvm-ar rcs %t.a %t.o +// RUN: clang-offload-packager -o %t.out \ +// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ +// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out +// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ +// RUN: --linker-path=/usr/bin/ld -- %t.o %t.a %t.a -o a.out 2>&1 \ +// RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL-DEFINED + +// LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o +// LIBRARY-GLOBAL-DEFINED-NOT: {{.*}}.o +// LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.s {{.*}}.o diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 57603cc..587e7f3 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -1136,9 +1136,125 @@ std::optional searchLibrary(StringRef Input, StringRef Root, return searchLibraryBaseName(Input, Root, SearchPaths); } -/// Search the input files and libraries for embedded device offloading code and -/// add it to the list of files to be linked. Files coming from static libraries -/// are only added to the input if they are used by an existing input file. +/// Common redeclaration of needed symbol flags. +enum Symbol : uint32_t { + Sym_None = 0, + Sym_Undefined = 1U << 1, + Sym_Weak = 1U << 2, +}; + +/// Scan the symbols from a BitcodeFile \p Buffer and record if we need to +/// extract any symbols from it. +Expected getSymbolsFromBitcode(MemoryBufferRef Buffer, StringSaver &Saver, + DenseMap &Syms) { + Expected IRSymtabOrErr = readIRSymtab(Buffer); + if (!IRSymtabOrErr) + return IRSymtabOrErr.takeError(); + + bool ShouldExtract = false; + for (unsigned I = 0; I != IRSymtabOrErr->Mods.size(); ++I) { + for (const auto &Sym : IRSymtabOrErr->TheReader.module_symbols(I)) { + if (Sym.isFormatSpecific() || !Sym.isGlobal()) + continue; + + bool NewSymbol = Syms.count(Sym.getName()) == 0; + auto &OldSym = Syms[Saver.save(Sym.getName())]; + + // We will extract if it defines a currenlty undefined non-weak symbol. + bool ResolvesStrongReference = + ((OldSym & Sym_Undefined && !(OldSym & Sym_Weak)) && + !Sym.isUndefined()); + // We will extract if it defines a new global symbol visible to the host. + bool NewGlobalSymbol = + ((NewSymbol || (OldSym & Sym_Undefined)) && !Sym.isUndefined() && + !Sym.canBeOmittedFromSymbolTable() && + (Sym.getVisibility() != GlobalValue::HiddenVisibility)); + ShouldExtract |= ResolvesStrongReference | NewGlobalSymbol; + + // Update this symbol in the "table" with the new information. + if (OldSym & Sym_Undefined && !Sym.isUndefined()) + OldSym = static_cast(OldSym & ~Sym_Undefined); + if (Sym.isUndefined() && NewSymbol) + OldSym = static_cast(OldSym | Sym_Undefined); + if (Sym.isWeak()) + OldSym = static_cast(OldSym | Sym_Weak); + } + } + + return ShouldExtract; +} + +/// Scan the symbols from an ObjectFile \p Obj and record if we need to extract +/// any symbols from it. +Expected getSymbolsFromObject(const ObjectFile &Obj, StringSaver &Saver, + DenseMap &Syms) { + bool ShouldExtract = false; + for (SymbolRef Sym : Obj.symbols()) { + auto FlagsOrErr = Sym.getFlags(); + if (!FlagsOrErr) + return FlagsOrErr.takeError(); + + if (!(*FlagsOrErr & SymbolRef::SF_Global) || + (*FlagsOrErr & SymbolRef::SF_FormatSpecific)) + continue; + + auto NameOrErr = Sym.getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + + bool NewSymbol = Syms.count(*NameOrErr) == 0; + auto &OldSym = Syms[Saver.save(*NameOrErr)]; + + // We will extract if it defines a currenlty undefined non-weak symbol. + bool ResolvesStrongReference = (OldSym & Sym_Undefined) && + !(OldSym & Sym_Weak) && + !(*FlagsOrErr & SymbolRef::SF_Undefined); + + // We will extract if it defines a new global symbol visible to the host. + bool NewGlobalSymbol = ((NewSymbol || (OldSym & Sym_Undefined)) && + !(*FlagsOrErr & SymbolRef::SF_Undefined) && + !(*FlagsOrErr & SymbolRef::SF_Hidden)); + ShouldExtract |= ResolvesStrongReference | NewGlobalSymbol; + + // Update this symbol in the "table" with the new information. + if (OldSym & Sym_Undefined && !(*FlagsOrErr & SymbolRef::SF_Undefined)) + OldSym = static_cast(OldSym & ~Sym_Undefined); + if (*FlagsOrErr & SymbolRef::SF_Undefined && NewSymbol) + OldSym = static_cast(OldSym | Sym_Undefined); + if (*FlagsOrErr & SymbolRef::SF_Weak) + OldSym = static_cast(OldSym | Sym_Weak); + } + return ShouldExtract; +} + +/// Attempt to 'resolve' symbols found in input files. We use this to +/// determine if an archive member needs to be extracted. An archive member +/// will be extracted if any of the following is true. +/// 1) It defines an undefined symbol in a regular object filie. +/// 2) It defines a global symbol without hidden visibility that has not +/// yet been defined. +Expected getSymbols(StringRef Image, StringSaver &Saver, + DenseMap &Syms) { + MemoryBufferRef Buffer = MemoryBufferRef(Image, ""); + switch (identify_magic(Image)) { + case file_magic::bitcode: + return getSymbolsFromBitcode(Buffer, Saver, Syms); + case file_magic::elf_relocatable: { + Expected> ObjFile = + ObjectFile::createObjectFile(Buffer); + if (!ObjFile) + return ObjFile.takeError(); + return getSymbolsFromObject(**ObjFile, Saver, Syms); + } + default: + return false; + } +} + +/// Search the input files and libraries for embedded device offloading code +/// and add it to the list of files to be linked. Files coming from static +/// libraries are only added to the input if they are used by an existing +/// input file. Expected> getDeviceInput(const ArgList &Args) { llvm::TimeTraceScope TimeScope("ExtractDeviceCode"); @@ -1147,47 +1263,68 @@ Expected> getDeviceInput(const ArgList &Args) { for (const opt::Arg *Arg : Args.filtered(OPT_library_path)) LibraryPaths.push_back(Arg->getValue()); + BumpPtrAllocator Alloc; + StringSaver Saver(Alloc); + // Try to extract device code from the linker input files. SmallVector InputFiles; - SmallVector LazyInputFiles; - for (const opt::Arg *Arg : Args.filtered(OPT_INPUT)) { - StringRef Filename = Arg->getValue(); - if (!sys::fs::exists(Filename) || sys::fs::is_directory(Filename)) + DenseMap> Syms; + for (const opt::Arg *Arg : Args.filtered(OPT_INPUT, OPT_library)) { + std::optional Filename = + Arg->getOption().matches(OPT_library) + ? searchLibrary(Arg->getValue(), Root, LibraryPaths) + : std::string(Arg->getValue()); + + if (!Filename && Arg->getOption().matches(OPT_library)) + reportError(createStringError(inconvertibleErrorCode(), + "unable to find library -l%s", + Arg->getValue())); + + if (!Filename || !sys::fs::exists(*Filename) || + sys::fs::is_directory(*Filename)) continue; ErrorOr> BufferOrErr = - MemoryBuffer::getFileOrSTDIN(Filename); + MemoryBuffer::getFileOrSTDIN(*Filename); if (std::error_code EC = BufferOrErr.getError()) - return createFileError(Filename, EC); + return createFileError(*Filename, EC); - if (identify_magic((*BufferOrErr)->getBuffer()) == - file_magic::elf_shared_object) + MemoryBufferRef Buffer = **BufferOrErr; + if (identify_magic(Buffer.getBuffer()) == file_magic::elf_shared_object) continue; - bool IsLazy = - identify_magic((*BufferOrErr)->getBuffer()) == file_magic::archive; - if (Error Err = extractOffloadBinaries( - **BufferOrErr, IsLazy ? LazyInputFiles : InputFiles)) + SmallVector Binaries; + if (Error Err = extractOffloadBinaries(Buffer, Binaries)) return std::move(Err); - } - // Try to extract input from input archive libraries. - for (const opt::Arg *Arg : Args.filtered(OPT_library)) { - if (auto Library = searchLibrary(Arg->getValue(), Root, LibraryPaths)) { - ErrorOr> BufferOrErr = - MemoryBuffer::getFileOrSTDIN(*Library); - if (std::error_code EC = BufferOrErr.getError()) - reportError(createFileError(*Library, EC)); - - if (identify_magic((*BufferOrErr)->getBuffer()) != file_magic::archive) - continue; - - if (Error Err = extractOffloadBinaries(**BufferOrErr, LazyInputFiles)) - return std::move(Err); - } else { - reportError(createStringError(inconvertibleErrorCode(), - "unable to find library -l%s", - Arg->getValue())); + // We only extract archive members that are needed. + bool IsArchive = identify_magic(Buffer.getBuffer()) == file_magic::archive; + bool Extracted = true; + while (Extracted) { + Extracted = false; + for (OffloadFile &Binary : Binaries) { + if (!Binary.getBinary()) + continue; + + // If we don't have an object file for this architecture do not + // extract. + if (IsArchive && !Syms.count(Binary)) + continue; + + Expected ExtractOrErr = + getSymbols(Binary.getBinary()->getImage(), Saver, Syms[Binary]); + if (!ExtractOrErr) + return ExtractOrErr.takeError(); + + Extracted = IsArchive && *ExtractOrErr; + + if (!IsArchive || Extracted) + InputFiles.emplace_back(std::move(Binary)); + + // If we extracted any files we need to check all the symbols again. + if (Extracted) + break; + } } } @@ -1198,16 +1335,6 @@ Expected> getDeviceInput(const ArgList &Args) { InputFiles.push_back(std::move(*FileOrErr)); } - DenseSet IsTargetUsed; - for (const auto &File : InputFiles) - IsTargetUsed.insert(File); - - // We should only include input files that are used. - // TODO: Only load a library if it defined undefined symbols in the input. - for (auto &LazyFile : LazyInputFiles) - if (IsTargetUsed.contains(LazyFile)) - InputFiles.emplace_back(std::move(LazyFile)); - return std::move(InputFiles); }