[LV] Split off invariance check from isUniform (NFCI).

author Florian Hahn <flo@fhahn.com>

Thu, 1 Jun 2023 18:09:11 +0000 (19:09 +0100)

committer Florian Hahn <flo@fhahn.com>

Thu, 1 Jun 2023 18:09:11 +0000 (19:09 +0100)
author Florian Hahn <flo@fhahn.com>
Thu, 1 Jun 2023 18:09:11 +0000 (19:09 +0100)
committer Florian Hahn <flo@fhahn.com>
Thu, 1 Jun 2023 18:09:11 +0000 (19:09 +0100)
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h

index 9fe4a35..f8b0a31 100644 (file)
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -588,9 +588,8 @@ public:
    static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
                                      DominatorTree *DT);
  
-  /// Returns true if value \p V is uniform across \p VF lanes, when \p VF is
-  /// provided, and otherwise if \p V is invariant across all loop iterations.
-  bool isUniform(Value *V, std::optional<ElementCount> VF = std::nullopt) const;
+  /// Returns true if value \p V is loop invariant.
+  bool isInvariant(Value *V) const;
  
    uint64_t getMaxSafeDepDistBytes() const { return MaxSafeDepDistBytes; }
    unsigned getNumStores() const { return NumStores; }
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

index c666267..f97029c 100644 (file)
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -349,13 +349,16 @@ public:
  
    /// Returns true if value V is uniform across \p VF lanes, when \p VF is
    /// provided, and otherwise if \p V is invariant across all loop iterations.
-  bool isUniform(Value *V, std::optional<ElementCount> VF = std::nullopt) const;
+  bool isInvariant(Value *V) const;
+
+  /// Returns true if value V is uniform across \p VF lanes, when \p VF is
+  /// provided, and otherwise if \p V is invariant across all loop iterations.
+  bool isUniform(Value *V, ElementCount VF) const;
  
    /// A uniform memory op is a load or store which accesses the same memory
    /// location on all \p VF lanes, if \p VF is provided and otherwise if the
    /// memory location is invariant.
-  bool isUniformMemOp(Instruction &I,
-                      std::optional<ElementCount> VF = std::nullopt) const;
+  bool isUniformMemOp(Instruction &I, ElementCount VF) const;
  
    /// Returns the information that we collected about runtime memory check.
    const RuntimePointerChecking *getRuntimePointerChecking() const {
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp

index 7a78906..30033f1 100644 (file)
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2290,7 +2290,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
    for (StoreInst *ST : Stores) {
      Value *Ptr = ST->getPointerOperand();
  
-    if (isUniform(Ptr)) {
+    if (isInvariant(Ptr)) {
        // Record store instructions to loop invariant addresses
        StoresToInvariantAddresses.push_back(ST);
        HasDependenceInvolvingLoopInvariantAddress |=
@@ -2532,128 +2532,14 @@ OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
    return *Report;
  }
  
-namespace {
-/// A rewriter to build the SCEVs for each of the VF lanes in the expected
-/// vectorized loop, which can then be compared to detect their uniformity. This
-/// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
-/// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
-/// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
-/// uniformity.
-class SCEVAddRecForUniformityRewriter
-    : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
-  /// Multiplier to be applied to the step of AddRecs in TheLoop.
-  unsigned StepMultiplier;
-
-  /// Offset to be added to the AddRecs in TheLoop.
-  unsigned Offset;
-
-  /// Loop for which to rewrite AddRecsFor.
-  Loop *TheLoop;
-
-  /// Is any sub-expressions not analyzable w.r.t. uniformity?
-  bool CannotAnalyze = false;
-
-  bool canAnalyze() const { return !CannotAnalyze; }
-
-public:
-  SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
-                                  unsigned Offset, Loop *TheLoop)
-      : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
-        TheLoop(TheLoop) {}
-
-  const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
-    assert(Expr->getLoop() == TheLoop &&
-           "addrec outside of TheLoop must be invariant and should have been "
-           "handled earlier");
-    // Build a new AddRec by multiplying the step by StepMultiplier and
-    // incrementing the start by Offset * step.
-    Type *Ty = Expr->getType();
-    auto *Step = Expr->getStepRecurrence(SE);
-    if (!SE.isLoopInvariant(Step, TheLoop)) {
-      CannotAnalyze = true;
-      return Expr;
-    }
-    auto *NewStep = SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
-    auto *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
-    auto *NewStart = SE.getAddExpr(Expr->getStart(), ScaledOffset);
-    return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
-  }
-
-  const SCEV *visit(const SCEV *S) {
-    if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
-      return S;
-    return SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter>::visit(S);
-  }
-
-  const SCEV *visitUnknown(const SCEVUnknown *S) {
-    if (SE.isLoopInvariant(S, TheLoop))
-      return S;
-    // The value could vary across iterations.
-    CannotAnalyze = true;
-    return S;
-  }
-
-  const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
-    // Could not analyze the expression.
-    CannotAnalyze = true;
-    return S;
-  }
-
-  static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
-                             unsigned StepMultiplier, unsigned Offset,
-                             Loop *TheLoop) {
-    /// Bail out if the expression does not contain an UDiv expression.
-    /// Uniform values which are not loop invariant require operations to strip
-    /// out the lowest bits. For now just look for UDivs and use it to avoid
-    /// re-writing UDIV-free expressions for other lanes to limit compile time.
-    if (!SCEVExprContains(S,
-                          [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
-      return SE.getCouldNotCompute();
-
-    SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
-                                             TheLoop);
-    const SCEV *Result = Rewriter.visit(S);
-
-    if (Rewriter.canAnalyze())
-      return Result;
-    return SE.getCouldNotCompute();
-  }
-};
-
-} // namespace
-
-bool LoopAccessInfo::isUniform(Value *V, std::optional<ElementCount> VF) const {
+bool LoopAccessInfo::isInvariant(Value *V) const {
    auto *SE = PSE->getSE();
-  // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
-  // never considered uniform.
    // TODO: Is this really what we want? Even without FP SCEV, we may want some
-  // trivially loop-invariant FP values to be considered uniform.
+  // trivially loop-invariant FP values to be considered invariant.
    if (!SE->isSCEVable(V->getType()))
      return false;
    const SCEV *S = SE->getSCEV(V);
-  if (SE->isLoopInvariant(S, TheLoop))
-    return true;
-  if (!VF || VF->isScalable())
-    return false;
-  if (VF->isScalar())
-    return true;
-
-  // Rewrite AddRecs in TheLoop to step by VF and check if the expression for
-  // lane 0 matches the expressions for all other lanes.
-  unsigned FixedVF = VF->getKnownMinValue();
-  const SCEV *FirstLaneExpr =
-      SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
-  if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
-    return false;
-
-  // Make sure the expressions for lanes FixedVF-1..1 match the expression for
-  // lane 0. We check lanes in reverse order for compile-time, as frequently
-  // checking the last lane is sufficient to rule out uniformity.
-  return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) {
-    const SCEV *IthLaneExpr =
-        SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
-    return FirstLaneExpr == IthLaneExpr;
-  });
+  return SE->isLoopInvariant(S, TheLoop);
  }
  
  /// Find the operand of the GEP that should be checked for consecutive
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

index 2fa54b3..f923f0b 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -471,13 +471,135 @@ int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
    return 0;
  }
  
-bool LoopVectorizationLegality::isUniform(
-    Value *V, std::optional<ElementCount> VF) const {
-  return LAI->isUniform(V, VF);
+bool LoopVectorizationLegality::isInvariant(Value *V) const {
+  return LAI->isInvariant(V);
  }
  
-bool LoopVectorizationLegality::isUniformMemOp(
-    Instruction &I, std::optional<ElementCount> VF) const {
+namespace {
+/// A rewriter to build the SCEVs for each of the VF lanes in the expected
+/// vectorized loop, which can then be compared to detect their uniformity. This
+/// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
+/// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
+/// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
+/// uniformity.
+class SCEVAddRecForUniformityRewriter
+    : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
+  /// Multiplier to be applied to the step of AddRecs in TheLoop.
+  unsigned StepMultiplier;
+
+  /// Offset to be added to the AddRecs in TheLoop.
+  unsigned Offset;
+
+  /// Loop for which to rewrite AddRecsFor.
+  Loop *TheLoop;
+
+  /// Is any sub-expressions not analyzable w.r.t. uniformity?
+  bool CannotAnalyze = false;
+
+  bool canAnalyze() const { return !CannotAnalyze; }
+
+public:
+  SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
+                                  unsigned Offset, Loop *TheLoop)
+      : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
+        TheLoop(TheLoop) {}
+
+  const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+    assert(Expr->getLoop() == TheLoop &&
+           "addrec outside of TheLoop must be invariant and should have been "
+           "handled earlier");
+    // Build a new AddRec by multiplying the step by StepMultiplier and
+    // incrementing the start by Offset * step.
+    Type *Ty = Expr->getType();
+    auto *Step = Expr->getStepRecurrence(SE);
+    if (!SE.isLoopInvariant(Step, TheLoop)) {
+      CannotAnalyze = true;
+      return Expr;
+    }
+    auto *NewStep = SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
+    auto *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
+    auto *NewStart = SE.getAddExpr(Expr->getStart(), ScaledOffset);
+    return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
+  }
+
+  const SCEV *visit(const SCEV *S) {
+    if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
+      return S;
+    return SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter>::visit(S);
+  }
+
+  const SCEV *visitUnknown(const SCEVUnknown *S) {
+    if (SE.isLoopInvariant(S, TheLoop))
+      return S;
+    // The value could vary across iterations.
+    CannotAnalyze = true;
+    return S;
+  }
+
+  const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
+    // Could not analyze the expression.
+    CannotAnalyze = true;
+    return S;
+  }
+
+  static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
+                             unsigned StepMultiplier, unsigned Offset,
+                             Loop *TheLoop) {
+    /// Bail out if the expression does not contain an UDiv expression.
+    /// Uniform values which are not loop invariant require operations to strip
+    /// out the lowest bits. For now just look for UDivs and use it to avoid
+    /// re-writing UDIV-free expressions for other lanes to limit compile time.
+    if (!SCEVExprContains(S,
+                          [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
+      return SE.getCouldNotCompute();
+
+    SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
+                                             TheLoop);
+    const SCEV *Result = Rewriter.visit(S);
+
+    if (Rewriter.canAnalyze())
+      return Result;
+    return SE.getCouldNotCompute();
+  }
+};
+
+} // namespace
+
+bool LoopVectorizationLegality::isUniform(Value *V, ElementCount VF) const {
+  if (isInvariant(V))
+    return true;
+  if (VF.isScalable())
+    return false;
+  if (VF.isScalar())
+    return true;
+
+  // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
+  // never considered uniform.
+  auto *SE = PSE.getSE();
+  if (!SE->isSCEVable(V->getType()))
+    return false;
+  const SCEV *S = SE->getSCEV(V);
+
+  // Rewrite AddRecs in TheLoop to step by VF and check if the expression for
+  // lane 0 matches the expressions for all other lanes.
+  unsigned FixedVF = VF.getKnownMinValue();
+  const SCEV *FirstLaneExpr =
+      SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
+  if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
+    return false;
+
+  // Make sure the expressions for lanes FixedVF-1..1 match the expression for
+  // lane 0. We check lanes in reverse order for compile-time, as frequently
+  // checking the last lane is sufficient to rule out uniformity.
+  return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) {
+    const SCEV *IthLaneExpr =
+        SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
+    return FirstLaneExpr == IthLaneExpr;
+  });
+}
+
+bool LoopVectorizationLegality::isUniformMemOp(Instruction &I,
+                                               ElementCount VF) const {
    Value *Ptr = getLoadStorePointerOperand(&I);
    if (!Ptr)
      return false;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

index e102efc..a4cd439 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4436,10 +4436,10 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
      // both speculation safety (which follows from the same argument as loads),
      // but also must prove the value being stored is correct.  The easiest
      // form of the later is to require that all values stored are the same.
-    if (Legal->isUniformMemOp(*I) &&
-      (isa<LoadInst>(I) ||
-       (isa<StoreInst>(I) &&
-        TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()))) &&
+    if (Legal->isInvariant(getLoadStorePointerOperand(I)) &&
+        (isa<LoadInst>(I) ||
+         (isa<StoreInst>(I) &&
+          TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()))) &&
          !Legal->blockNeedsPredication(I->getParent()))
        return false;
      return true;
@@ -4510,7 +4510,8 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
    // second vector operand. One example of this are shifts on x86.
    Value *Op2 = I->getOperand(1);
    auto Op2Info = TTI.getOperandInfo(Op2);
-  if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue && Legal->isUniform(Op2))
+  if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue &&
+      Legal->isInvariant(Op2))
      Op2Info.Kind = TargetTransformInfo::OK_UniformValue;
  
    SmallVector<const Value *, 4> Operands(I->operand_values());
@@ -4704,7 +4705,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
      if (isa<StoreInst>(I) && I->getOperand(0) == Ptr)
        return false;
      return getLoadStorePointerOperand(I) == Ptr &&
-           (isUniformDecision(I, VF) || Legal->isUniform(Ptr));
+           (isUniformDecision(I, VF) || Legal->isInvariant(Ptr));
    };
  
    // Holds a list of values which are known to have at least one uniform use.
@@ -6511,7 +6512,7 @@ LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
    }
    StoreInst *SI = cast<StoreInst>(I);
  
-  bool isLoopInvariantStoreValue = Legal->isUniform(SI->getValueOperand());
+  bool isLoopInvariantStoreValue = Legal->isInvariant(SI->getValueOperand());
    return TTI.getAddressComputationCost(ValTy) +
           TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS,
                               CostKind) +
@@ -7186,7 +7187,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
      // second vector operand. One example of this are shifts on x86.
      Value *Op2 = I->getOperand(1);
      auto Op2Info = TTI.getOperandInfo(Op2);
-    if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue && Legal->isUniform(Op2))
+    if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue &&
+        Legal->isInvariant(Op2))
        Op2Info.Kind = TargetTransformInfo::OK_UniformValue;
  
      SmallVector<const Value *, 4> Operands(I->operand_values());
author	Florian Hahn <flo@fhahn.com>
	Thu, 1 Jun 2023 18:09:11 +0000 (19:09 +0100)
committer	Florian Hahn <flo@fhahn.com>
	Thu, 1 Jun 2023 18:09:11 +0000 (19:09 +0100)
llvm/include/llvm/Analysis/LoopAccessAnalysis.h		patch \| blob \| history
llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h		patch \| blob \| history
llvm/lib/Analysis/LoopAccessAnalysis.cpp		patch \| blob \| history
llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp		patch \| blob \| history
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history