From 21711039e3ab3fd9ed296cc2c07a011c09dd170b Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Fri, 12 Aug 2022 18:33:29 -0500 Subject: [PATCH] [OpenMP] Allow the Attributor to look at functions we also internalized This is important as we have accesses to globals in those which we need to categorize. --- llvm/include/llvm/Transforms/IPO/Attributor.h | 11 ++++++----- llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 16 ++++++++++++--- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 23 +++++++++++----------- .../test/Transforms/OpenMP/remove_globalization.ll | 2 -- 4 files changed, 30 insertions(+), 22 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 794e85f..81459ed 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -1156,7 +1156,7 @@ struct InformationCache { /// Check whether \p F is part of module slice. bool isInModuleSlice(const Function &F) { - return ModuleSlice.count(const_cast(&F)); + return ModuleSlice.empty() || ModuleSlice.count(const_cast(&F)); } /// Return true if the stack (llvm::Alloca) can be accessed by other threads. @@ -1438,8 +1438,8 @@ struct Attributor { // We update only AAs associated with functions in the Functions set or // call sites of them. - if ((AnchorFn && !Functions.count(const_cast(AnchorFn))) && - !Functions.count(IRP.getAssociatedFunction())) { + if ((AnchorFn && !isRunOn(const_cast(AnchorFn))) && + !isRunOn(IRP.getAssociatedFunction())) { AA.getState().indicatePessimisticFixpoint(); return AA; } @@ -1554,8 +1554,9 @@ struct Attributor { bool isModulePass() const { return Configuration.IsModulePass; } /// Return true if we derive attributes for \p Fn - bool isRunOn(Function &Fn) const { - return Functions.empty() || Functions.count(&Fn); + bool isRunOn(Function &Fn) const { return isRunOn(&Fn); } + bool isRunOn(Function *Fn) const { + return Functions.empty() || Functions.count(Fn); } /// Determine opportunities to derive 'default' attributes in \p F and create diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index d94a651..bc19afb 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -3501,11 +3501,21 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl { bool UsedAssumedInformation = false; SmallSetVector PotentialCopies; if (!AA::getPotentialCopiesOfStoredValue(A, SI, PotentialCopies, *this, - UsedAssumedInformation)) + UsedAssumedInformation)) { + LLVM_DEBUG( + dbgs() + << "[AAIsDead] Could not determine potential copies of store!\n"); return false; + } + LLVM_DEBUG(dbgs() << "[AAIsDead] Store has " << PotentialCopies.size() + << " potential copies.\n"); return llvm::all_of(PotentialCopies, [&](Value *V) { - return A.isAssumedDead(IRPosition::value(*V), this, nullptr, - UsedAssumedInformation); + if (A.isAssumedDead(IRPosition::value(*V), this, nullptr, + UsedAssumedInformation)) + return true; + LLVM_DEBUG(dbgs() << "[AAIsDead] Potential copy " << *V + << " is assumed live!\n"); + return false; }); } diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 66484c1..17714fb 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -182,13 +182,13 @@ struct AAICVTracker; /// Attributor runs. struct OMPInformationCache : public InformationCache { OMPInformationCache(Module &M, AnalysisGetter &AG, - BumpPtrAllocator &Allocator, SetVector &CGSCC, + BumpPtrAllocator &Allocator, SetVector *CGSCC, KernelSet &Kernels) - : InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M), + : InformationCache(M, AG, Allocator, CGSCC), OMPBuilder(M), Kernels(Kernels) { OMPBuilder.initialize(); - initializeRuntimeFunctions(); + initializeRuntimeFunctions(M); initializeInternalControlVars(); } @@ -412,7 +412,7 @@ struct OMPInformationCache : public InformationCache { // TODO: We directly convert uses into proper calls and unknown uses. for (Use &U : RFI.Declaration->uses()) { if (Instruction *UserI = dyn_cast(U.getUser())) { - if (ModuleSlice.count(UserI->getFunction())) { + if (ModuleSlice.empty() || ModuleSlice.count(UserI->getFunction())) { RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U); ++NumUses; } @@ -445,8 +445,7 @@ struct OMPInformationCache : public InformationCache { /// Helper to initialize all runtime function information for those defined /// in OpenMPKinds.def. - void initializeRuntimeFunctions() { - Module &M = *((*ModuleSlice.begin())->getParent()); + void initializeRuntimeFunctions(Module &M) { // Helper macros for handling __VA_ARGS__ in OMP_RTL #define OMP_TYPE(VarName, ...) \ @@ -855,7 +854,7 @@ struct OpenMPOpt { InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel, ICV_proc_bind}; - for (Function *F : OMPInfoCache.ModuleSlice) { + for (Function *F : SCC) { for (auto ICV : ICVs) { auto ICVInfo = OMPInfoCache.ICVs[ICV]; auto Remark = [&](OptimizationRemarkAnalysis ORA) { @@ -2148,7 +2147,7 @@ private: }; Kernel OpenMPOpt::getUniqueKernelFor(Function &F) { - if (!OMPInfoCache.ModuleSlice.count(&F)) + if (!OMPInfoCache.ModuleSlice.empty() && !OMPInfoCache.ModuleSlice.count(&F)) return nullptr; // Use a scope to keep the lifetime of the CachedKernel short. @@ -5050,8 +5049,7 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { BumpPtrAllocator Allocator; CallGraphUpdater CGUpdater; - SetVector Functions(SCC.begin(), SCC.end()); - OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels); + OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ nullptr, Kernels); unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? SetFixpointIterations : 32; @@ -5063,6 +5061,7 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { AC.OREGetter = OREGetter; AC.PassName = DEBUG_TYPE; + SetVector Functions; Attributor A(Functions, InfoCache, AC); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); @@ -5125,7 +5124,7 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C, SetVector Functions(SCC.begin(), SCC.end()); OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, - /*CGSCC*/ Functions, Kernels); + /*CGSCC*/ &Functions, Kernels); unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? SetFixpointIterations : 32; @@ -5204,7 +5203,7 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass { BumpPtrAllocator Allocator; OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, - /*CGSCC*/ Functions, Kernels); + /*CGSCC*/ &Functions, Kernels); unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? SetFixpointIterations : 32; diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll index 6bb3681..5d1cf6e 100644 --- a/llvm/test/Transforms/OpenMP/remove_globalization.ll +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s ; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -pass-remarks-missed=openmp-opt -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK-REMARKS -; RUN: opt -passes=openmp-opt -pass-remarks-missed=openmp-opt -openmp-opt-max-iterations=1 -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK-FIXPOINT ; RUN: opt -openmp-opt-disable-deglobalization -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=CHECK-DISABLED target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64" @@ -11,7 +10,6 @@ target triple = "nvptx64" ; CHECK-REMARKS: remark: remove_globalization.c:2:2: Moving globalized variable to the stack. ; CHECK-REMARKS: remark: remove_globalization.c:6:2: Moving globalized variable to the stack. ; CHECK-REMARKS: remark: remove_globalization.c:4:2: Found thread data sharing on the GPU. Expect degraded performance due to data globalization. -; CHECK-FIXPOINT: Attributor did not reach a fixpoint after 1 iterations. ; UTC_ARGS: --enable @S = external local_unnamed_addr global i8* -- 2.7.4