Adds & uses a new `isDivergentUse` API in UA.
UniformityAnalysis now requires CycleInfo as well as the new temporal divergence API can query it.
-----
Original patch that adds `isDivergentUse` by @sameerds
The user of a temporally divergent value is marked as divergent in the
uniformity analysis. But the same user may also have been marked divergent for
other reasons, thus losing this information about temporal divergence. But some
clients need to specificly check for temporal divergence. This change restores
such an API, that already existed in DivergenceAnalysis.
Reviewed By: sameerds, foad
Differential Revision: https://reviews.llvm.org/D146018
// indicated by the compiler.
using FunctionT = typename _FunctionT::invalidTemplateInstanceError;
+ // A UseT represents a data-edge from the defining instruction to the using
+ // instruction.
+ //
+ // using UseT = ...
+
// Initialize the SSA context with information about the FunctionT being
// processed.
//
using FunctionT = typename ContextT::FunctionT;
using ValueRefT = typename ContextT::ValueRefT;
using ConstValueRefT = typename ContextT::ConstValueRefT;
+ using UseT = typename ContextT::UseT;
using InstructionT = typename ContextT::InstructionT;
using DominatorTreeT = typename ContextT::DominatorTreeT;
/// \brief Whether \p Val is divergent at its definition.
bool isDivergent(ConstValueRefT V) const { return DivergentValues.count(V); }
+ bool isDivergentUse(const UseT &U) const;
+
bool hasDivergentTerminator(const BlockT &B) const {
return DivergentTermBlocks.contains(&B);
}
bool usesValueFromCycle(const InstructionT &I, const CycleT &DefCycle) const;
- /// \brief Whether \p Val is divergent when read in \p ObservingBlock.
+ /// \brief Whether \p Def is divergent when read in \p ObservingBlock.
bool isTemporalDivergent(const BlockT &ObservingBlock,
- ConstValueRefT Val) const;
+ const InstructionT &Def) const;
};
template <typename ImplT>
}
template <typename ContextT>
+bool GenericUniformityAnalysisImpl<ContextT>::isTemporalDivergent(
+ const BlockT &ObservingBlock, const InstructionT &Def) const {
+ const BlockT *DefBlock = Def.getParent();
+ for (const CycleT *Cycle = CI.getCycle(DefBlock);
+ Cycle && !Cycle->contains(&ObservingBlock);
+ Cycle = Cycle->getParentCycle()) {
+ if (DivergentExitCycles.contains(Cycle)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+template <typename ContextT>
void GenericUniformityAnalysisImpl<ContextT>::analyzeControlDivergence(
const InstructionT &Term) {
const auto *DivTermBlock = Term.getParent();
}
template <typename ContextT>
+bool GenericUniformityInfo<ContextT>::isDivergentUse(const UseT &U) const {
+ return DA->isDivergentUse(U);
+}
+
+template <typename ContextT>
bool GenericUniformityInfo<ContextT>::hasDivergentTerminator(const BlockT &B) {
return DA->hasDivergentTerminator(B);
}
using FunctionT = typename ContextT::FunctionT;
using ValueRefT = typename ContextT::ValueRefT;
using ConstValueRefT = typename ContextT::ConstValueRefT;
+ using UseT = typename ContextT::UseT;
using InstructionT = typename ContextT::InstructionT;
using DominatorTreeT = typename ContextT::DominatorTreeT;
using ThisT = GenericUniformityInfo<ContextT>;
bool isUniform(const InstructionT *I) const { return !isDivergent(I); };
bool isDivergent(const InstructionT *I) const;
+ /// \brief Whether \p U is divergent. Uses of a uniform value can be
+ /// divergent.
+ bool isDivergentUse(const UseT &U) const;
+
bool hasDivergentTerminator(const BlockT &B);
void print(raw_ostream &Out) const;
using ValueRefT = Register;
using ConstValueRefT = Register;
static const Register ValueRefNull;
+ using UseT = MachineOperand;
using DominatorTreeT = DominatorTreeBase<BlockT, false>;
void setFunction(MachineFunction &Fn);
using ValueRefT = Value *;
using ConstValueRefT = const Value *;
static Value *ValueRefNull;
+ using UseT = Use;
using DominatorTreeT = DominatorTreeBase<BlockT, false>;
void setFunction(Function &Fn);
return false;
}
+template <>
+bool llvm::GenericUniformityAnalysisImpl<SSAContext>::isDivergentUse(
+ const Use &U) const {
+ const auto *V = U.get();
+ if (isDivergent(V))
+ return true;
+ if (const auto *DefInstr = dyn_cast<Instruction>(V)) {
+ const auto *UseInstr = cast<Instruction>(U.getUser());
+ return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
+ }
+ return false;
+}
+
// This ensures explicit instantiation of
// GenericUniformityAnalysisImpl::ImplDeleter::operator()
template class llvm::GenericUniformityInfo<SSAContext>;
INITIALIZE_PASS_BEGIN(UniformityInfoWrapperPass, "uniformity",
"Uniformity Analysis", true, true)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(CycleInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(UniformityInfoWrapperPass, "uniformity",
"Uniformity Analysis", true, true)
void UniformityInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<CycleInfoWrapperPass>();
+ AU.addRequiredTransitive<CycleInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
}
return false;
}
+template <>
+bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::isDivergentUse(
+ const MachineOperand &U) const {
+ if (!U.isReg())
+ return false;
+
+ auto Reg = U.getReg();
+ if (isDivergent(Reg))
+ return true;
+
+ const auto &RegInfo = F.getRegInfo();
+ auto *Def = RegInfo.getOneDef(Reg);
+ if (!Def)
+ return true;
+
+ auto *DefInstr = Def->getParent();
+ auto *UseInstr = U.getParent();
+ return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
+}
+
// This ensures explicit instantiation of
// GenericUniformityAnalysisImpl::ImplDeleter::operator()
template class llvm::GenericUniformityInfo<MachineSSAContext>;
#include "AMDGPU.h"
#include "GCNSubtarget.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
public InstVisitor<AMDGPUAtomicOptimizer> {
private:
SmallVector<ReplacementInfo, 8> ToReplace;
- const LegacyDivergenceAnalysis *DA;
+ const UniformityInfo *UA;
const DataLayout *DL;
DominatorTree *DT;
const GCNSubtarget *ST;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<LegacyDivergenceAnalysis>();
+ AU.addRequired<UniformityInfoWrapperPass>();
AU.addRequired<TargetPassConfig>();
}
return false;
}
- DA = &getAnalysis<LegacyDivergenceAnalysis>();
+ UA = &getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
DL = &F.getParent()->getDataLayout();
DominatorTreeWrapperPass *const DTW =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
// If the pointer operand is divergent, then each lane is doing an atomic
// operation on a different address, and we cannot optimize that.
- if (DA->isDivergentUse(&I.getOperandUse(PtrIdx))) {
+ if (UA->isDivergentUse(I.getOperandUse(PtrIdx))) {
return;
}
- const bool ValDivergent = DA->isDivergentUse(&I.getOperandUse(ValIdx));
+ const bool ValDivergent = UA->isDivergentUse(I.getOperandUse(ValIdx));
// If the value operand is divergent, each lane is contributing a different
// value to the atomic calculation. We can only optimize divergent values if
const unsigned ValIdx = 0;
- const bool ValDivergent = DA->isDivergentUse(&I.getOperandUse(ValIdx));
+ const bool ValDivergent = UA->isDivergentUse(I.getOperandUse(ValIdx));
// If the value operand is divergent, each lane is contributing a different
// value to the atomic calculation. We can only optimize divergent values if
// If any of the other arguments to the intrinsic are divergent, we can't
// optimize the operation.
for (unsigned Idx = 1; Idx < I.getNumOperands(); Idx++) {
- if (DA->isDivergentUse(&I.getOperandUse(Idx))) {
+ if (UA->isDivergentUse(I.getOperandUse(Idx))) {
return;
}
}
INITIALIZE_PASS_BEGIN(AMDGPUAtomicOptimizer, DEBUG_TYPE,
"AMDGPU atomic optimizations", false, false)
-INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_END(AMDGPUAtomicOptimizer, DEBUG_TYPE,
"AMDGPU atomic optimizations", false, false)
; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizations=true < %s | FileCheck %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizations=true < %s -use-gpu-divergence-analysis | FileCheck %s
@local = addrspace(3) global i32 undef
ret void
}
+define amdgpu_kernel void @def_in_nested_cycle() {
+; CHECK-LABEL: def_in_nested_cycle:
+; CHECK-NOT: dpp
+entry:
+ %x = call i32 @llvm.amdgcn.workitem.id.x()
+ br label %loop
+loop:
+ %i = phi i32 [ 0, %entry ], [ 0, %innerloop ], [ %i1, %loop ]
+ %cond = icmp ult i32 %i, %x
+ %i1 = add i32 %i, 1
+ br i1 %cond, label %innerloop, label %loop
+innerloop:
+ %i.inner = phi i32 [ 0, %loop ], [ %i1.inner, %innerloop ]
+ %gep = getelementptr i32, ptr addrspace(3) @local, i32 %i
+ %i1.inner = add i32 %i, 1
+ %cond.inner = icmp ult i32 %i, %x
+ br i1 %cond, label %innerloop, label %loop
+exit:
+ %old = atomicrmw add ptr addrspace(3) %gep, i32 %x acq_rel
+ ret void
+}
+
declare i32 @llvm.amdgcn.workitem.id.x()