#define LLVM_ANALYSIS_LEGACYDIVERGENCEANALYSIS_H
#include "llvm/ADT/DenseSet.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include <memory>
class Use;
class Value;
-class LegacyDivergenceAnalysis : public FunctionPass {
+class LegacyDivergenceAnalysisImpl {
public:
- static char ID;
-
- LegacyDivergenceAnalysis();
-
- void getAnalysisUsage(AnalysisUsage &AU) const override;
-
- bool runOnFunction(Function &F) override;
-
- // Print all divergent branches in the function.
- void print(raw_ostream &OS, const Module *) const override;
-
// Returns true if V is divergent at its definition.
bool isDivergent(const Value *V) const;
// Keep the analysis results uptodate by removing an erased value.
void removeValue(const Value *V) { DivergentValues.erase(V); }
-private:
+ // Print all divergent branches in the function.
+ void print(raw_ostream &OS, const Module *) const;
+
// Whether analysis should be performed by GPUDivergenceAnalysis.
bool shouldUseGPUDivergenceAnalysis(const Function &F,
- const TargetTransformInfo &TTI) const;
+ const TargetTransformInfo &TTI,
+ const LoopInfo &LI);
+
+ void run(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
+ PostDominatorTree &PDT, const LoopInfo &LI);
+
+protected:
+ // (optional) handle to new DivergenceAnalysis
+ std::unique_ptr<DivergenceInfo> gpuDA;
+
+ // Stores all divergent values.
+ DenseSet<const Value *> DivergentValues;
+
+ // Stores divergent uses of possibly uniform values.
+ DenseSet<const Use *> DivergentUses;
+};
+
+class LegacyDivergenceAnalysis : public FunctionPass,
+ public LegacyDivergenceAnalysisImpl {
+public:
+ static char ID;
+ LegacyDivergenceAnalysis();
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnFunction(Function &F) override;
+};
+
+class LegacyDivergenceAnalysisPass
+ : public PassInfoMixin<LegacyDivergenceAnalysisPass>,
+ public LegacyDivergenceAnalysisImpl {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+private:
// (optional) handle to new DivergenceAnalysis
std::unique_ptr<DivergenceInfo> gpuDA;
// Stores divergent uses of possibly uniform values.
DenseSet<const Use *> DivergentUses;
};
-} // End llvm namespace
+
+} // end namespace llvm
#endif // LLVM_ANALYSIS_LEGACYDIVERGENCEANALYSIS_H
return new LegacyDivergenceAnalysis();
}
-void LegacyDivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequiredTransitive<DominatorTreeWrapperPass>();
- AU.addRequiredTransitive<PostDominatorTreeWrapperPass>();
- AU.addRequiredTransitive<LoopInfoWrapperPass>();
- AU.setPreservesAll();
-}
-
-bool LegacyDivergenceAnalysis::shouldUseGPUDivergenceAnalysis(
- const Function &F, const TargetTransformInfo &TTI) const {
+bool LegacyDivergenceAnalysisImpl::shouldUseGPUDivergenceAnalysis(
+ const Function &F, const TargetTransformInfo &TTI, const LoopInfo &LI) {
if (!(UseGPUDA || TTI.useGPUDivergenceAnalysis()))
return false;
// GPUDivergenceAnalysis requires a reducible CFG.
- auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
using RPOTraversal = ReversePostOrderTraversal<const Function *>;
RPOTraversal FuncRPOT(&F);
return !containsIrreducibleCFG<const BasicBlock *, const RPOTraversal,
const LoopInfo>(FuncRPOT, LI);
}
-bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
- auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
- if (TTIWP == nullptr)
- return false;
-
- TargetTransformInfo &TTI = TTIWP->getTTI(F);
- // Fast path: if the target does not have branch divergence, we do not mark
- // any branch as divergent.
- if (!TTI.hasBranchDivergence())
- return false;
-
- DivergentValues.clear();
- DivergentUses.clear();
- gpuDA = nullptr;
-
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
-
- if (shouldUseGPUDivergenceAnalysis(F, TTI)) {
+void LegacyDivergenceAnalysisImpl::run(Function &F,
+ llvm::TargetTransformInfo &TTI,
+ llvm::DominatorTree &DT,
+ llvm::PostDominatorTree &PDT,
+ const llvm::LoopInfo &LI) {
+ if (shouldUseGPUDivergenceAnalysis(F, TTI, LI)) {
// run the new GPU divergence analysis
- auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
gpuDA = std::make_unique<DivergenceInfo>(F, DT, PDT, LI, TTI,
/* KnownReducible = */ true);
DP.populateWithSourcesOfDivergence();
DP.propagate();
}
-
- LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName()
- << ":\n";
- print(dbgs(), F.getParent()));
-
- return false;
}
-bool LegacyDivergenceAnalysis::isDivergent(const Value *V) const {
+bool LegacyDivergenceAnalysisImpl::isDivergent(const Value *V) const {
if (gpuDA) {
return gpuDA->isDivergent(*V);
}
return DivergentValues.count(V);
}
-bool LegacyDivergenceAnalysis::isDivergentUse(const Use *U) const {
+bool LegacyDivergenceAnalysisImpl::isDivergentUse(const Use *U) const {
if (gpuDA) {
return gpuDA->isDivergentUse(*U);
}
return DivergentValues.count(U->get()) || DivergentUses.count(U);
}
-void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
+void LegacyDivergenceAnalysisImpl::print(raw_ostream &OS,
+ const Module *) const {
if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty())
return;
}
OS << "\n";
}
+
+void LegacyDivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredTransitive<DominatorTreeWrapperPass>();
+ AU.addRequiredTransitive<PostDominatorTreeWrapperPass>();
+ AU.addRequiredTransitive<LoopInfoWrapperPass>();
+ AU.setPreservesAll();
+}
+
+bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
+ auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
+ if (TTIWP == nullptr)
+ return false;
+
+ TargetTransformInfo &TTI = TTIWP->getTTI(F);
+ // Fast path: if the target does not have branch divergence, we do not mark
+ // any branch as divergent.
+ if (!TTI.hasBranchDivergence())
+ return false;
+
+ DivergentValues.clear();
+ DivergentUses.clear();
+ gpuDA = nullptr;
+
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ LegacyDivergenceAnalysisImpl::run(F, TTI, DT, PDT, LI);
+ LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName()
+ << ":\n";
+ LegacyDivergenceAnalysisImpl::print(dbgs(), F.getParent()));
+
+ return false;
+}
+
+PreservedAnalyses
+LegacyDivergenceAnalysisPass::run(Function &F, FunctionAnalysisManager &AM) {
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ if (!TTI.hasBranchDivergence())
+ return PreservedAnalyses::all();
+
+ DivergentValues.clear();
+ DivergentUses.clear();
+ gpuDA = nullptr;
+
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ LegacyDivergenceAnalysisImpl::run(F, TTI, DT, PDT, LI);
+ LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName()
+ << ":\n";
+ LegacyDivergenceAnalysisImpl::print(dbgs(), F.getParent()));
+ return PreservedAnalyses::all();
+}
+; RUN: opt -mtriple amdgcn-amdhsa -mcpu=gfx90a -passes=legacy-divergence-analysis < %s -S 2>&1 | FileCheck -check-prefix=OPT %s
; RUN: llc -mtriple amdgcn-amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
declare i32 @llvm.amdgcn.workitem.id.x()
declare i32 @llvm.amdgcn.readfirstlane(i32)
+; OPT-LABEL: define amdgpu_kernel void @readfirstlane_uniform(
+; OPT-NEXT: %tid = tail call i32 @llvm.amdgcn.workitem.id.x()
+; OPT-NEXT: %scalar = tail call i32 @llvm.amdgcn.readfirstlane(i32 %tid)
+; OPT-NEXT: %idx = zext i32 %scalar to i64
+; OPT-NEXT: %gep0 = getelementptr inbounds float, ptr addrspace(1) %0, i64 %idx
+; OPT-NEXT: %val = load float, ptr addrspace(1) %gep0, align 4
+; OPT-NEXT: %gep1 = getelementptr inbounds float, ptr addrspace(1) %1, i64 10
+; OPT-NEXT: store float %val, ptr addrspace(1) %gep1, align 4
+; OPT-NEXT: ret void
+;
; GCN-LABEL: readfirstlane_uniform
; GCN: s_load_dwordx4 s[[[IN_ADDR:[0-9]+]]:3], s[4:5], 0x0
; GCN: v_readfirstlane_b32 s[[SCALAR:[0-9]+]], v0