From 3adb89bb9f8e73c82787babb2f877fece7394770 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 23 Feb 2021 15:50:45 -0800 Subject: [PATCH] [ThinLTO] Make cloneUsedGlobalVariables deterministic Iterating on `SmallPtrSet` with more than 8 elements is not deterministic. Use a SmallVector instead because `Used` is guaranteed to contain unique elements. While here, decrease inline element counts from 8 to 4. The number of `llvm.used`/`llvm.compiler.used` elements is usually 0 or 1. For full LTO/hybrid LTO, the number may be large, so we need to be careful. According to tejohnson's analysis https://reviews.llvm.org/D97128#2582399 , 4 is good for a large project with WholeProgramDevirt, when available_externally vtables are placed in the llvm.compiler.used set. Differential Revision: https://reviews.llvm.org/D97128 --- llvm/include/llvm/IR/Module.h | 6 ++++++ llvm/lib/IR/Module.cpp | 15 +++++++++++++++ llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp | 11 ++++------- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h index 3ac57b7..63d66c5 100644 --- a/llvm/include/llvm/IR/Module.h +++ b/llvm/include/llvm/IR/Module.h @@ -893,6 +893,12 @@ public: GlobalVariable *collectUsedGlobalVariables(const Module &M, SmallPtrSetImpl &Set, bool CompilerUsed); +/// Given "llvm.used" or "llvm.compiler.used" as a global name, collect the +/// initializer elements of that global in a SmallVector and return the global +/// itself. +GlobalVariable *collectUsedGlobalVariables(const Module &M, + SmallVectorImpl &Vec, + bool CompilerUsed); /// An raw_ostream inserter for modules. inline raw_ostream &operator<<(raw_ostream &O, const Module &M) { diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp index 9395b2b..4c24461 100644 --- a/llvm/lib/IR/Module.cpp +++ b/llvm/lib/IR/Module.cpp @@ -659,6 +659,21 @@ VersionTuple Module::getSDKVersion() const { } GlobalVariable *llvm::collectUsedGlobalVariables( + const Module &M, SmallVectorImpl &Vec, bool CompilerUsed) { + const char *Name = CompilerUsed ? "llvm.compiler.used" : "llvm.used"; + GlobalVariable *GV = M.getGlobalVariable(Name); + if (!GV || !GV->hasInitializer()) + return GV; + + const ConstantArray *Init = cast(GV->getInitializer()); + for (Value *Op : Init->operands()) { + GlobalValue *G = cast(Op->stripPointerCasts()); + Vec.push_back(G); + } + return GV; +} + +GlobalVariable *llvm::collectUsedGlobalVariables( const Module &M, SmallPtrSetImpl &Set, bool CompilerUsed) { const char *Name = CompilerUsed ? "llvm.compiler.used" : "llvm.used"; GlobalVariable *GV = M.getGlobalVariable(Name); diff --git a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index 2ab9dcd..797416f 100644 --- a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -199,23 +199,20 @@ void forEachVirtualFunction(Constant *C, function_ref Fn) { // values whose defs were cloned into that module. static void cloneUsedGlobalVariables(const Module &SrcM, Module &DestM, bool CompilerUsed) { - SmallPtrSet Used; - SmallPtrSet NewUsed; + SmallVector Used, NewUsed; // First collect those in the llvm[.compiler].used set. collectUsedGlobalVariables(SrcM, Used, CompilerUsed); // Next build a set of the equivalent values defined in DestM. for (auto *V : Used) { auto *GV = DestM.getNamedValue(V->getName()); if (GV && !GV->isDeclaration()) - NewUsed.insert(GV); + NewUsed.push_back(GV); } // Finally, add them to a llvm[.compiler].used variable in DestM. if (CompilerUsed) - appendToCompilerUsed( - DestM, std::vector(NewUsed.begin(), NewUsed.end())); + appendToCompilerUsed(DestM, NewUsed); else - appendToUsed(DestM, - std::vector(NewUsed.begin(), NewUsed.end())); + appendToUsed(DestM, NewUsed); } // If it's possible to split M into regular and thin LTO parts, do so and write -- 2.7.4