static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
DominatorTree *DT);
- /// Returns true if value \p V is uniform across \p VF lanes, when \p VF is
- /// provided, and otherwise if \p V is invariant across all loop iterations.
- bool isUniform(Value *V, std::optional<ElementCount> VF = std::nullopt) const;
+ /// Returns true if value \p V is loop invariant.
+ bool isInvariant(Value *V) const;
uint64_t getMaxSafeDepDistBytes() const { return MaxSafeDepDistBytes; }
unsigned getNumStores() const { return NumStores; }
/// Returns true if value V is uniform across \p VF lanes, when \p VF is
/// provided, and otherwise if \p V is invariant across all loop iterations.
- bool isUniform(Value *V, std::optional<ElementCount> VF = std::nullopt) const;
+ bool isInvariant(Value *V) const;
+
+ /// Returns true if value V is uniform across \p VF lanes, when \p VF is
+ /// provided, and otherwise if \p V is invariant across all loop iterations.
+ bool isUniform(Value *V, ElementCount VF) const;
/// A uniform memory op is a load or store which accesses the same memory
/// location on all \p VF lanes, if \p VF is provided and otherwise if the
/// memory location is invariant.
- bool isUniformMemOp(Instruction &I,
- std::optional<ElementCount> VF = std::nullopt) const;
+ bool isUniformMemOp(Instruction &I, ElementCount VF) const;
/// Returns the information that we collected about runtime memory check.
const RuntimePointerChecking *getRuntimePointerChecking() const {
for (StoreInst *ST : Stores) {
Value *Ptr = ST->getPointerOperand();
- if (isUniform(Ptr)) {
+ if (isInvariant(Ptr)) {
// Record store instructions to loop invariant addresses
StoresToInvariantAddresses.push_back(ST);
HasDependenceInvolvingLoopInvariantAddress |=
return *Report;
}
-namespace {
-/// A rewriter to build the SCEVs for each of the VF lanes in the expected
-/// vectorized loop, which can then be compared to detect their uniformity. This
-/// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
-/// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
-/// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
-/// uniformity.
-class SCEVAddRecForUniformityRewriter
- : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
- /// Multiplier to be applied to the step of AddRecs in TheLoop.
- unsigned StepMultiplier;
-
- /// Offset to be added to the AddRecs in TheLoop.
- unsigned Offset;
-
- /// Loop for which to rewrite AddRecsFor.
- Loop *TheLoop;
-
- /// Is any sub-expressions not analyzable w.r.t. uniformity?
- bool CannotAnalyze = false;
-
- bool canAnalyze() const { return !CannotAnalyze; }
-
-public:
- SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
- unsigned Offset, Loop *TheLoop)
- : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
- TheLoop(TheLoop) {}
-
- const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
- assert(Expr->getLoop() == TheLoop &&
- "addrec outside of TheLoop must be invariant and should have been "
- "handled earlier");
- // Build a new AddRec by multiplying the step by StepMultiplier and
- // incrementing the start by Offset * step.
- Type *Ty = Expr->getType();
- auto *Step = Expr->getStepRecurrence(SE);
- if (!SE.isLoopInvariant(Step, TheLoop)) {
- CannotAnalyze = true;
- return Expr;
- }
- auto *NewStep = SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
- auto *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
- auto *NewStart = SE.getAddExpr(Expr->getStart(), ScaledOffset);
- return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
- }
-
- const SCEV *visit(const SCEV *S) {
- if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
- return S;
- return SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter>::visit(S);
- }
-
- const SCEV *visitUnknown(const SCEVUnknown *S) {
- if (SE.isLoopInvariant(S, TheLoop))
- return S;
- // The value could vary across iterations.
- CannotAnalyze = true;
- return S;
- }
-
- const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
- // Could not analyze the expression.
- CannotAnalyze = true;
- return S;
- }
-
- static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
- unsigned StepMultiplier, unsigned Offset,
- Loop *TheLoop) {
- /// Bail out if the expression does not contain an UDiv expression.
- /// Uniform values which are not loop invariant require operations to strip
- /// out the lowest bits. For now just look for UDivs and use it to avoid
- /// re-writing UDIV-free expressions for other lanes to limit compile time.
- if (!SCEVExprContains(S,
- [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
- return SE.getCouldNotCompute();
-
- SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
- TheLoop);
- const SCEV *Result = Rewriter.visit(S);
-
- if (Rewriter.canAnalyze())
- return Result;
- return SE.getCouldNotCompute();
- }
-};
-
-} // namespace
-
-bool LoopAccessInfo::isUniform(Value *V, std::optional<ElementCount> VF) const {
+bool LoopAccessInfo::isInvariant(Value *V) const {
auto *SE = PSE->getSE();
- // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
- // never considered uniform.
// TODO: Is this really what we want? Even without FP SCEV, we may want some
- // trivially loop-invariant FP values to be considered uniform.
+ // trivially loop-invariant FP values to be considered invariant.
if (!SE->isSCEVable(V->getType()))
return false;
const SCEV *S = SE->getSCEV(V);
- if (SE->isLoopInvariant(S, TheLoop))
- return true;
- if (!VF || VF->isScalable())
- return false;
- if (VF->isScalar())
- return true;
-
- // Rewrite AddRecs in TheLoop to step by VF and check if the expression for
- // lane 0 matches the expressions for all other lanes.
- unsigned FixedVF = VF->getKnownMinValue();
- const SCEV *FirstLaneExpr =
- SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
- if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
- return false;
-
- // Make sure the expressions for lanes FixedVF-1..1 match the expression for
- // lane 0. We check lanes in reverse order for compile-time, as frequently
- // checking the last lane is sufficient to rule out uniformity.
- return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) {
- const SCEV *IthLaneExpr =
- SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
- return FirstLaneExpr == IthLaneExpr;
- });
+ return SE->isLoopInvariant(S, TheLoop);
}
/// Find the operand of the GEP that should be checked for consecutive
return 0;
}
-bool LoopVectorizationLegality::isUniform(
- Value *V, std::optional<ElementCount> VF) const {
- return LAI->isUniform(V, VF);
+bool LoopVectorizationLegality::isInvariant(Value *V) const {
+ return LAI->isInvariant(V);
}
-bool LoopVectorizationLegality::isUniformMemOp(
- Instruction &I, std::optional<ElementCount> VF) const {
+namespace {
+/// A rewriter to build the SCEVs for each of the VF lanes in the expected
+/// vectorized loop, which can then be compared to detect their uniformity. This
+/// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
+/// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
+/// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
+/// uniformity.
+class SCEVAddRecForUniformityRewriter
+ : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
+ /// Multiplier to be applied to the step of AddRecs in TheLoop.
+ unsigned StepMultiplier;
+
+ /// Offset to be added to the AddRecs in TheLoop.
+ unsigned Offset;
+
+ /// Loop for which to rewrite AddRecsFor.
+ Loop *TheLoop;
+
+ /// Is any sub-expressions not analyzable w.r.t. uniformity?
+ bool CannotAnalyze = false;
+
+ bool canAnalyze() const { return !CannotAnalyze; }
+
+public:
+ SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
+ unsigned Offset, Loop *TheLoop)
+ : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
+ TheLoop(TheLoop) {}
+
+ const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+ assert(Expr->getLoop() == TheLoop &&
+ "addrec outside of TheLoop must be invariant and should have been "
+ "handled earlier");
+ // Build a new AddRec by multiplying the step by StepMultiplier and
+ // incrementing the start by Offset * step.
+ Type *Ty = Expr->getType();
+ auto *Step = Expr->getStepRecurrence(SE);
+ if (!SE.isLoopInvariant(Step, TheLoop)) {
+ CannotAnalyze = true;
+ return Expr;
+ }
+ auto *NewStep = SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
+ auto *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
+ auto *NewStart = SE.getAddExpr(Expr->getStart(), ScaledOffset);
+ return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
+ }
+
+ const SCEV *visit(const SCEV *S) {
+ if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
+ return S;
+ return SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter>::visit(S);
+ }
+
+ const SCEV *visitUnknown(const SCEVUnknown *S) {
+ if (SE.isLoopInvariant(S, TheLoop))
+ return S;
+ // The value could vary across iterations.
+ CannotAnalyze = true;
+ return S;
+ }
+
+ const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
+ // Could not analyze the expression.
+ CannotAnalyze = true;
+ return S;
+ }
+
+ static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
+ unsigned StepMultiplier, unsigned Offset,
+ Loop *TheLoop) {
+ /// Bail out if the expression does not contain an UDiv expression.
+ /// Uniform values which are not loop invariant require operations to strip
+ /// out the lowest bits. For now just look for UDivs and use it to avoid
+ /// re-writing UDIV-free expressions for other lanes to limit compile time.
+ if (!SCEVExprContains(S,
+ [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
+ return SE.getCouldNotCompute();
+
+ SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
+ TheLoop);
+ const SCEV *Result = Rewriter.visit(S);
+
+ if (Rewriter.canAnalyze())
+ return Result;
+ return SE.getCouldNotCompute();
+ }
+};
+
+} // namespace
+
+bool LoopVectorizationLegality::isUniform(Value *V, ElementCount VF) const {
+ if (isInvariant(V))
+ return true;
+ if (VF.isScalable())
+ return false;
+ if (VF.isScalar())
+ return true;
+
+ // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
+ // never considered uniform.
+ auto *SE = PSE.getSE();
+ if (!SE->isSCEVable(V->getType()))
+ return false;
+ const SCEV *S = SE->getSCEV(V);
+
+ // Rewrite AddRecs in TheLoop to step by VF and check if the expression for
+ // lane 0 matches the expressions for all other lanes.
+ unsigned FixedVF = VF.getKnownMinValue();
+ const SCEV *FirstLaneExpr =
+ SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
+ if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
+ return false;
+
+ // Make sure the expressions for lanes FixedVF-1..1 match the expression for
+ // lane 0. We check lanes in reverse order for compile-time, as frequently
+ // checking the last lane is sufficient to rule out uniformity.
+ return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) {
+ const SCEV *IthLaneExpr =
+ SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
+ return FirstLaneExpr == IthLaneExpr;
+ });
+}
+
+bool LoopVectorizationLegality::isUniformMemOp(Instruction &I,
+ ElementCount VF) const {
Value *Ptr = getLoadStorePointerOperand(&I);
if (!Ptr)
return false;
// both speculation safety (which follows from the same argument as loads),
// but also must prove the value being stored is correct. The easiest
// form of the later is to require that all values stored are the same.
- if (Legal->isUniformMemOp(*I) &&
- (isa<LoadInst>(I) ||
- (isa<StoreInst>(I) &&
- TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()))) &&
+ if (Legal->isInvariant(getLoadStorePointerOperand(I)) &&
+ (isa<LoadInst>(I) ||
+ (isa<StoreInst>(I) &&
+ TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()))) &&
!Legal->blockNeedsPredication(I->getParent()))
return false;
return true;
// second vector operand. One example of this are shifts on x86.
Value *Op2 = I->getOperand(1);
auto Op2Info = TTI.getOperandInfo(Op2);
- if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue && Legal->isUniform(Op2))
+ if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue &&
+ Legal->isInvariant(Op2))
Op2Info.Kind = TargetTransformInfo::OK_UniformValue;
SmallVector<const Value *, 4> Operands(I->operand_values());
if (isa<StoreInst>(I) && I->getOperand(0) == Ptr)
return false;
return getLoadStorePointerOperand(I) == Ptr &&
- (isUniformDecision(I, VF) || Legal->isUniform(Ptr));
+ (isUniformDecision(I, VF) || Legal->isInvariant(Ptr));
};
// Holds a list of values which are known to have at least one uniform use.
}
StoreInst *SI = cast<StoreInst>(I);
- bool isLoopInvariantStoreValue = Legal->isUniform(SI->getValueOperand());
+ bool isLoopInvariantStoreValue = Legal->isInvariant(SI->getValueOperand());
return TTI.getAddressComputationCost(ValTy) +
TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS,
CostKind) +
// second vector operand. One example of this are shifts on x86.
Value *Op2 = I->getOperand(1);
auto Op2Info = TTI.getOperandInfo(Op2);
- if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue && Legal->isUniform(Op2))
+ if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue &&
+ Legal->isInvariant(Op2))
Op2Info.Kind = TargetTransformInfo::OK_UniformValue;
SmallVector<const Value *, 4> Operands(I->operand_values());