From bacb45eb9a776c032f69d55a561fa450ba545c57 Mon Sep 17 00:00:00 2001 From: Argyrios Kyrtzidis Date: Mon, 22 May 2023 15:16:20 -0700 Subject: [PATCH] [ThinLTO] Make the cache key independent of the module identifier paths Otherwise there are cache misses just from changing the name of a path, even though the input modules did not change. rdar://109672225 Differential Revision: https://reviews.llvm.org/D151165 --- llvm/include/llvm/IR/ModuleSummaryIndex.h | 7 ++++ llvm/lib/LTO/LTO.cpp | 39 ++++++++++++++++------ .../ThinLTO/X86/cache-decoupled-from-filenames.ll | 27 +++++++++++++++ 3 files changed, 62 insertions(+), 11 deletions(-) create mode 100644 llvm/test/ThinLTO/X86/cache-decoupled-from-filenames.ll diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h index 6c6ed17..64cb2b5 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -1728,6 +1728,13 @@ public: return &*It; } + /// Return module entry for module with the given \p ModPath. + const ModuleInfo *getModule(StringRef ModPath) const { + auto It = ModulePathStringTable.find(ModPath); + assert(It != ModulePathStringTable.end() && "Module not registered"); + return &*It; + } + /// Check if the given Module has any functions available for exporting /// in the index. We consider any module present in the ModulePathStringTable /// to have exported functions. diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index de9b8f1..2f52a20 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -174,22 +174,38 @@ void llvm::computeLTOCacheKey( // imported symbols for each module may affect code generation and is // sensitive to link order, so include that as well. using ImportMapIteratorTy = FunctionImporter::ImportMapTy::const_iterator; - std::vector ImportModulesVector; + struct ImportModule { + ImportMapIteratorTy ModIt; + const ModuleSummaryIndex::ModuleInfo *ModInfo; + + StringRef getIdentifier() const { return ModIt->getKey(); } + const FunctionImporter::FunctionsToImportTy &getFunctions() const { + return ModIt->second; + } + + const ModuleHash &getHash() const { return ModInfo->second.second; } + uint64_t getId() const { return ModInfo->second.first; } + }; + + std::vector ImportModulesVector; ImportModulesVector.reserve(ImportList.size()); for (ImportMapIteratorTy It = ImportList.begin(); It != ImportList.end(); ++It) { - ImportModulesVector.push_back(It); + ImportModulesVector.push_back({It, Index.getModule(It->getKey())}); } + // Order using moduleId integer which is based on the order the module was + // added. llvm::sort(ImportModulesVector, - [](const ImportMapIteratorTy &Lhs, const ImportMapIteratorTy &Rhs) - -> bool { return Lhs->getKey() < Rhs->getKey(); }); - for (const ImportMapIteratorTy &EntryIt : ImportModulesVector) { - auto ModHash = Index.getModuleHash(EntryIt->first()); + [](const ImportModule &Lhs, const ImportModule &Rhs) -> bool { + return Lhs.getId() < Rhs.getId(); + }); + for (const ImportModule &Entry : ImportModulesVector) { + auto ModHash = Entry.getHash(); Hasher.update(ArrayRef((uint8_t *)&ModHash[0], sizeof(ModHash))); - AddUint64(EntryIt->second.size()); - for (auto &Fn : EntryIt->second) + AddUint64(Entry.getFunctions().size()); + for (auto &Fn : Entry.getFunctions()) AddUint64(Fn); } @@ -259,9 +275,10 @@ void llvm::computeLTOCacheKey( // Imported functions may introduce new uses of type identifier resolutions, // so we need to collect their used resolutions as well. - for (auto &ImpM : ImportList) - for (auto &ImpF : ImpM.second) { - GlobalValueSummary *S = Index.findSummaryInModule(ImpF, ImpM.first()); + for (const ImportModule &ImpM : ImportModulesVector) + for (auto &ImpF : ImpM.getFunctions()) { + GlobalValueSummary *S = + Index.findSummaryInModule(ImpF, ImpM.getIdentifier()); AddUsedThings(S); // If this is an alias, we also care about any types/etc. that the aliasee // may reference. diff --git a/llvm/test/ThinLTO/X86/cache-decoupled-from-filenames.ll b/llvm/test/ThinLTO/X86/cache-decoupled-from-filenames.ll new file mode 100644 index 0000000..3b5b75e --- /dev/null +++ b/llvm/test/ThinLTO/X86/cache-decoupled-from-filenames.ll @@ -0,0 +1,27 @@ +; RUN: rm -rf %t && mkdir -p %t/1 %t/2 %t/3 %t/4 +; RUN: opt -module-hash -module-summary %s -o %t/t.bc +; RUN: opt -module-hash -module-summary %S/Inputs/cache-import-lists1.ll -o %t/1/a.bc +; RUN: opt -module-hash -module-summary %S/Inputs/cache-import-lists2.ll -o %t/2/b.bc + +; Tests that the hash for t is insensitive to the bitcode module filenames. + +; RUN: rm -rf %t/cache +; RUN: llvm-lto2 run -cache-dir %t/cache -o %t.o %t/t.bc %t/1/a.bc %t/2/b.bc -r=%t/t.bc,main,plx -r=%t/t.bc,f1,lx -r=%t/t.bc,f2,lx -r=%t/1/a.bc,f1,plx -r=%t/1/a.bc,linkonce_odr,plx -r=%t/2/b.bc,f2,plx -r=%t/2/b.bc,linkonce_odr,lx +; RUN: ls %t/cache | count 3 + +; RUN: cp %t/1/a.bc %t/4/d.bc +; RUN: cp %t/2/b.bc %t/3/k.bc +; RUN: llvm-lto2 run -cache-dir %t/cache -o %t.o %t/t.bc %t/4/d.bc %t/3/k.bc -r=%t/t.bc,main,plx -r=%t/t.bc,f1,lx -r=%t/t.bc,f2,lx -r=%t/4/d.bc,f1,plx -r=%t/4/d.bc,linkonce_odr,plx -r=%t/3/k.bc,f2,plx -r=%t/3/k.bc,linkonce_odr,lx +; RUN: ls %t/cache | count 3 + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @main() { + call void @f1() + call void @f2() + ret void +} + +declare void @f1() +declare void @f2() -- 2.7.4