--- /dev/null
+//===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass prepares a module containing type metadata for ThinLTO by splitting
+// it into regular and thin LTO parts if possible, and writing both parts to
+// a multi-module bitcode file. Modules that do not contain type metadata are
+// written unmodified as a single module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Analysis/ModuleSummaryAnalysis.h"
+#include "llvm/Analysis/TypeMetadataUtils.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+using namespace llvm;
+
+namespace {
+
+// Produce a unique identifier for this module by taking the MD5 sum of the
+// names of the module's strong external symbols. This identifier is
+// normally guaranteed to be unique, or the program would fail to link due to
+// multiply defined symbols.
+//
+// If the module has no strong external symbols (such a module may still have a
+// semantic effect if it performs global initialization), we cannot produce a
+// unique identifier for this module, so we return the empty string, which
+// causes the entire module to be written as a regular LTO module.
+std::string getModuleId(Module *M) {
+ MD5 Md5;
+ bool ExportsSymbols = false;
+ auto AddGlobal = [&](GlobalValue &GV) {
+ if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
+ !GV.hasExternalLinkage())
+ return;
+ ExportsSymbols = true;
+ Md5.update(GV.getName());
+ Md5.update(ArrayRef<uint8_t>{0});
+ };
+
+ for (auto &F : *M)
+ AddGlobal(F);
+ for (auto &GV : M->globals())
+ AddGlobal(GV);
+ for (auto &GA : M->aliases())
+ AddGlobal(GA);
+ for (auto &IF : M->ifuncs())
+ AddGlobal(IF);
+
+ if (!ExportsSymbols)
+ return "";
+
+ MD5::MD5Result R;
+ Md5.final(R);
+
+ SmallString<32> Str;
+ MD5::stringifyResult(R, Str);
+ return ("$" + Str).str();
+}
+
+// Promote each local-linkage entity defined by ExportM and used by ImportM by
+// changing visibility and appending the given ModuleId.
+void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
+ auto PromoteInternal = [&](GlobalValue &ExportGV) {
+ if (!ExportGV.hasLocalLinkage())
+ return;
+
+ GlobalValue *ImportGV = ImportM.getNamedValue(ExportGV.getName());
+ if (!ImportGV || ImportGV->use_empty())
+ return;
+
+ std::string NewName = (ExportGV.getName() + ModuleId).str();
+
+ ExportGV.setName(NewName);
+ ExportGV.setLinkage(GlobalValue::ExternalLinkage);
+ ExportGV.setVisibility(GlobalValue::HiddenVisibility);
+
+ ImportGV->setName(NewName);
+ ImportGV->setVisibility(GlobalValue::HiddenVisibility);
+ };
+
+ for (auto &F : ExportM)
+ PromoteInternal(F);
+ for (auto &GV : ExportM.globals())
+ PromoteInternal(GV);
+ for (auto &GA : ExportM.aliases())
+ PromoteInternal(GA);
+ for (auto &IF : ExportM.ifuncs())
+ PromoteInternal(IF);
+}
+
+// Promote all internal (i.e. distinct) type ids used by the module by replacing
+// them with external type ids formed using the module id.
+//
+// Note that this needs to be done before we clone the module because each clone
+// will receive its own set of distinct metadata nodes.
+void promoteTypeIds(Module &M, StringRef ModuleId) {
+ DenseMap<Metadata *, Metadata *> LocalToGlobal;
+ auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) {
+ Metadata *MD =
+ cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata();
+
+ if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) {
+ Metadata *&GlobalMD = LocalToGlobal[MD];
+ if (!GlobalMD) {
+ std::string NewName =
+ (to_string(LocalToGlobal.size()) + ModuleId).str();
+ GlobalMD = MDString::get(M.getContext(), NewName);
+ }
+
+ CI->setArgOperand(ArgNo,
+ MetadataAsValue::get(M.getContext(), GlobalMD));
+ }
+ };
+
+ if (Function *TypeTestFunc =
+ M.getFunction(Intrinsic::getName(Intrinsic::type_test))) {
+ for (const Use &U : TypeTestFunc->uses()) {
+ auto CI = cast<CallInst>(U.getUser());
+ ExternalizeTypeId(CI, 1);
+ }
+ }
+
+ if (Function *TypeCheckedLoadFunc =
+ M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) {
+ for (const Use &U : TypeCheckedLoadFunc->uses()) {
+ auto CI = cast<CallInst>(U.getUser());
+ ExternalizeTypeId(CI, 2);
+ }
+ }
+
+ for (GlobalObject &GO : M.global_objects()) {
+ SmallVector<MDNode *, 1> MDs;
+ GO.getMetadata(LLVMContext::MD_type, MDs);
+
+ GO.eraseMetadata(LLVMContext::MD_type);
+ for (auto MD : MDs) {
+ auto I = LocalToGlobal.find(MD->getOperand(1));
+ if (I == LocalToGlobal.end()) {
+ GO.addMetadata(LLVMContext::MD_type, *MD);
+ continue;
+ }
+ GO.addMetadata(
+ LLVMContext::MD_type,
+ *MDNode::get(M.getContext(),
+ ArrayRef<Metadata *>{MD->getOperand(0), I->second}));
+ }
+ }
+}
+
+// Drop unused globals, and drop type information from function declarations.
+// FIXME: If we made functions typeless then there would be no need to do this.
+void simplifyExternals(Module &M) {
+ FunctionType *EmptyFT =
+ FunctionType::get(Type::getVoidTy(M.getContext()), false);
+
+ for (auto I = M.begin(), E = M.end(); I != E;) {
+ Function &F = *I++;
+ if (F.isDeclaration() && F.use_empty()) {
+ F.eraseFromParent();
+ continue;
+ }
+
+ if (!F.isDeclaration() || F.getFunctionType() == EmptyFT)
+ continue;
+
+ Function *NewF =
+ Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M);
+ NewF->setVisibility(F.getVisibility());
+ NewF->takeName(&F);
+ F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType()));
+ F.eraseFromParent();
+ }
+
+ for (auto I = M.global_begin(), E = M.global_end(); I != E;) {
+ GlobalVariable &GV = *I++;
+ if (GV.isDeclaration() && GV.use_empty()) {
+ GV.eraseFromParent();
+ continue;
+ }
+ }
+}
+
+void filterModule(
+ Module *M, std::function<bool(const GlobalValue *)> ShouldKeepDefinition) {
+ for (Function &F : *M) {
+ if (ShouldKeepDefinition(&F))
+ continue;
+
+ F.deleteBody();
+ F.clearMetadata();
+ }
+
+ for (GlobalVariable &GV : M->globals()) {
+ if (ShouldKeepDefinition(&GV))
+ continue;
+
+ GV.setInitializer(nullptr);
+ GV.setLinkage(GlobalValue::ExternalLinkage);
+ GV.clearMetadata();
+ }
+
+ for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
+ I != E;) {
+ GlobalAlias *GA = &*I++;
+ if (ShouldKeepDefinition(GA))
+ continue;
+
+ GlobalObject *GO;
+ if (I->getValueType()->isFunctionTy())
+ GO = Function::Create(cast<FunctionType>(GA->getValueType()),
+ GlobalValue::ExternalLinkage, "", M);
+ else
+ GO = new GlobalVariable(
+ *M, GA->getValueType(), false, GlobalValue::ExternalLinkage,
+ (Constant *)nullptr, "", (GlobalVariable *)nullptr,
+ GA->getThreadLocalMode(), GA->getType()->getAddressSpace());
+ GO->takeName(GA);
+ GA->replaceAllUsesWith(GO);
+ GA->eraseFromParent();
+ }
+}
+
+// If it's possible to split M into regular and thin LTO parts, do so and write
+// a multi-module bitcode file with the two parts to OS. Otherwise, write only a
+// regular LTO bitcode file to OS.
+void splitAndWriteThinLTOBitcode(raw_ostream &OS, Module &M) {
+ std::string ModuleId = getModuleId(&M);
+ if (ModuleId.empty()) {
+ // We couldn't generate a module ID for this module, just write it out as a
+ // regular LTO module.
+ WriteBitcodeToFile(&M, OS);
+ return;
+ }
+
+ promoteTypeIds(M, ModuleId);
+
+ auto IsInMergedM = [&](const GlobalValue *GV) {
+ auto *GVar = dyn_cast<GlobalVariable>(GV->getBaseObject());
+ if (!GVar)
+ return false;
+
+ SmallVector<MDNode *, 1> MDs;
+ GVar->getMetadata(LLVMContext::MD_type, MDs);
+ return !MDs.empty();
+ };
+
+ ValueToValueMapTy VMap;
+ std::unique_ptr<Module> MergedM(CloneModule(&M, VMap, IsInMergedM));
+
+ filterModule(&M, [&](const GlobalValue *GV) { return !IsInMergedM(GV); });
+
+ promoteInternals(*MergedM, M, ModuleId);
+ promoteInternals(M, *MergedM, ModuleId);
+
+ simplifyExternals(*MergedM);
+
+ SmallVector<char, 0> Buffer;
+ BitcodeWriter W(Buffer);
+
+ // FIXME: Try to re-use BSI and PFI from the original module here.
+ ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, nullptr);
+ W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
+ /*GenerateHash=*/true);
+
+ W.writeModule(MergedM.get());
+
+ OS << Buffer;
+}
+
+// Returns whether this module needs to be split because it uses type metadata.
+bool requiresSplit(Module &M) {
+ SmallVector<MDNode *, 1> MDs;
+ for (auto &GO : M.global_objects()) {
+ GO.getMetadata(LLVMContext::MD_type, MDs);
+ if (!MDs.empty())
+ return true;
+ }
+
+ return false;
+}
+
+void writeThinLTOBitcode(raw_ostream &OS, Module &M,
+ const ModuleSummaryIndex *Index) {
+ // See if this module has any type metadata. If so, we need to split it.
+ if (requiresSplit(M))
+ return splitAndWriteThinLTOBitcode(OS, M);
+
+ // Otherwise we can just write it out as a regular module.
+ WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index,
+ /*GenerateHash=*/true);
+}
+
+class WriteThinLTOBitcode : public ModulePass {
+ raw_ostream &OS; // raw_ostream to print on
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()) {
+ initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
+ }
+
+ explicit WriteThinLTOBitcode(raw_ostream &o)
+ : ModulePass(ID), OS(o) {
+ initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; }
+
+ bool runOnModule(Module &M) override {
+ const ModuleSummaryIndex *Index =
+ &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex());
+ writeThinLTOBitcode(OS, M, Index);
+ return true;
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ AU.addRequired<ModuleSummaryIndexWrapperPass>();
+ }
+};
+} // anonymous namespace
+
+char WriteThinLTOBitcode::ID = 0;
+INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode",
+ "Write ThinLTO Bitcode", false, true)
+INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass)
+INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode",
+ "Write ThinLTO Bitcode", false, true)
+
+ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str) {
+ return new WriteThinLTOBitcode(Str);
+}