[LoopVectorize] use IR fast-math-flags exclusively (not FP function attributes)

author Sanjay Patel <spatel@rotateright.com>

Wed, 27 Jan 2021 19:11:22 +0000 (14:11 -0500)

committer Sanjay Patel <spatel@rotateright.com>

Wed, 27 Jan 2021 19:17:11 +0000 (14:17 -0500)
author Sanjay Patel <spatel@rotateright.com>
Wed, 27 Jan 2021 19:11:22 +0000 (14:11 -0500)
committer Sanjay Patel <spatel@rotateright.com>
Wed, 27 Jan 2021 19:17:11 +0000 (14:17 -0500)
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

index 2f80b43..c6ed43b 100644 (file)
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -344,9 +344,6 @@ public:
    unsigned getNumStores() const { return LAI->getNumStores(); }
    unsigned getNumLoads() const { return LAI->getNumLoads(); }
  
-  // Returns true if the NoNaN attribute is set on the function.
-  bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; }
-
    /// Returns all assume calls in predicated blocks. They need to be dropped
    /// when flattening the CFG.
    const SmallPtrSetImpl<Instruction *> &getConditionalAssumes() const {
@@ -495,9 +492,6 @@ private:
    /// outside the loop.
    SmallPtrSet<Value *, 4> AllowedExit;
  
-  /// Can we assume the absence of NaNs.
-  bool HasFunNoNaNAttr = false;
-
    /// Vectorization requirements that will go through late-evaluation.
    LoopVectorizationRequirements *Requirements;
  
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

index 2ab0848..2e35dfa 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -605,11 +605,6 @@ static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
  bool LoopVectorizationLegality::canVectorizeInstrs() {
    BasicBlock *Header = TheLoop->getHeader();
  
-  // Look for the attribute signaling the absence of NaNs.
-  Function &F = *Header->getParent();
-  HasFunNoNaNAttr =
-      F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
-
    // For each block in the loop.
    for (BasicBlock *BB : TheLoop->blocks()) {
      // Scan the instructions in the block and look for hazards.
@@ -673,7 +668,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
          InductionDescriptor ID;
          if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
            addInductionPhi(Phi, ID, AllowedExit);
-          if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
+          if (ID.hasUnsafeAlgebra())
              Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
            continue;
          }
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

index c2a9ae1..b1da80b 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8908,7 +8908,7 @@ void LoopVectorizationPlanner::adjustRecipesForInLoopReductions(
                           ? RecipeBuilder.createBlockInMask(R->getParent(), Plan)
                           : nullptr;
        VPReductionRecipe *RedRecipe = new VPReductionRecipe(
-          &RdxDesc, R, ChainOp, VecOp, CondOp, Legal->hasFunNoNaNAttr(), TTI);
+          &RdxDesc, R, ChainOp, VecOp, CondOp, TTI);
        WidenRecipe->getVPValue()->replaceAllUsesWith(RedRecipe);
        Plan->removeVPValueFor(R);
        Plan->addVPValue(R, RedRecipe);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h

index 2cce127..199e121 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1130,17 +1130,15 @@ public:
  class VPReductionRecipe : public VPRecipeBase, public VPUser, public VPValue {
    /// The recurrence decriptor for the reduction in question.
    RecurrenceDescriptor *RdxDesc;
-  /// Fast math flags to use for the resulting reduction operation.
-  bool NoNaN;
    /// Pointer to the TTI, needed to create the target reduction
    const TargetTransformInfo *TTI;
  
  public:
    VPReductionRecipe(RecurrenceDescriptor *R, Instruction *I, VPValue *ChainOp,
-                    VPValue *VecOp, VPValue *CondOp, bool NoNaN,
+                    VPValue *VecOp, VPValue *CondOp,
                      const TargetTransformInfo *TTI)
        : VPRecipeBase(VPRecipeBase::VPReductionSC), VPUser({ChainOp, VecOp}),
-        VPValue(VPValue::VPVReductionSC, I, this), RdxDesc(R), NoNaN(NoNaN),
+        VPValue(VPValue::VPVReductionSC, I, this), RdxDesc(R),
          TTI(TTI) {
      if (CondOp)
        addOperand(CondOp);
diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll

index 4947f84..017bce6 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
@@ -4,7 +4,7 @@
  target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
  
  ; This test checks auto-vectorization with FP induction variable.
-; The FP operation is not "fast" and requires "fast-math" function attribute.
+; FMF is required on the IR instructions.
  
  ;void fp_iv_loop1(float * __restrict__ A, int N) {
  ;  float x = 1.0;
@@ -149,7 +149,7 @@ define void @fp_iv_loop1(float* noalias nocapture %A, i32 %N) #0 {
  ; AUTO_VEC-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
  ; AUTO_VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
  ; AUTO_VEC-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
-; AUTO_VEC-NEXT:    [[CONV1]] = fadd float [[X_06]], 5.000000e-01
+; AUTO_VEC-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
  ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
  ; AUTO_VEC-NEXT:    [[TMP45:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
  ; AUTO_VEC-NEXT:    br i1 [[TMP45]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP4:!llvm.loop !.*]]
@@ -168,7 +168,7 @@ for.body:                                         ; preds = %for.body.preheader,
    %x.06 = phi float [ %conv1, %for.body ], [ 1.000000e+00, %for.body.preheader ]
    %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
    store float %x.06, float* %arrayidx, align 4
-  %conv1 = fadd float %x.06, 5.000000e-01
+  %conv1 = fadd fast float %x.06, 5.000000e-01
    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    %exitcond = icmp eq i32 %lftr.wideiv, %N
@@ -181,7 +181,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
    ret void
  }
  
-; The same as the previous, FP operation is not fast, different function attribute
+; The same as the previous, but FP operation has no FMF.
  ; Vectorization should be rejected.
  ;void fp_iv_loop2(float * __restrict__ A, int N) {
  ;  float x = 1.0;
@@ -191,7 +191,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
  ;  }
  ;}
  
-define void @fp_iv_loop2(float* noalias nocapture %A, i32 %N) #1 {
+define void @fp_iv_loop2(float* noalias nocapture %A, i32 %N) {
  ; AUTO_VEC-LABEL: @fp_iv_loop2(
  ; AUTO_VEC-NEXT:  entry:
  ; AUTO_VEC-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -538,6 +538,3 @@ for.end:
    %t1 = phi double [ %j, %for.body ]
    ret double %t1
  }
-
-attributes #0 = { "no-nans-fp-math"="true" }
-attributes #1 = { "no-nans-fp-math"="false" }
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp

index 7b452f6..f57b4ff 100644 (file)
--- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
@@ -643,7 +643,7 @@ TEST(VPRecipeTest, CastVPReductionRecipeToVPUser) {
    VPValue ChainOp;
    VPValue VecOp;
    VPValue CondOp;
-  VPReductionRecipe Recipe(nullptr, nullptr, &ChainOp, &CondOp, &VecOp, false,
+  VPReductionRecipe Recipe(nullptr, nullptr, &ChainOp, &CondOp, &VecOp,
                             nullptr);
    EXPECT_TRUE(isa<VPUser>(&Recipe));
    VPRecipeBase *BaseR = &Recipe;
author	Sanjay Patel <spatel@rotateright.com>
	Wed, 27 Jan 2021 19:11:22 +0000 (14:11 -0500)
committer	Sanjay Patel <spatel@rotateright.com>
	Wed, 27 Jan 2021 19:17:11 +0000 (14:17 -0500)
llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h		patch \| blob \| history
llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp		patch \| blob \| history
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history
llvm/lib/Transforms/Vectorize/VPlan.h		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll		patch \| blob \| history
llvm/unittests/Transforms/Vectorize/VPlanTest.cpp		patch \| blob \| history