From ab93c18c125f0ee51959ef225fa8f09f4dc29e35 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 27 Jan 2021 14:11:22 -0500 Subject: [PATCH] [LoopVectorize] use IR fast-math-flags exclusively (not FP function attributes) I am trying to untangle the fast-math-flags propagation logic in the vectorizers (see a6f022127 for SLP). The loop vectorizer has a mix of checking FP function attributes, IR-level FMF, and just wrong assumptions. I am trying to avoid regressions while fixing this, and I think the IR-level logic is good enough for that, but it's hard to say for sure. This would be the 1st step in the clean-up. The existing test that I changed to include 'fast' actually shows a miscompile: the function only had the equivalent of nnan, but we created new instructions that had fast (all FMF set). This is similar to the example in https://llvm.org/PR35538 Differential Revision: https://reviews.llvm.org/D95452 --- .../llvm/Transforms/Vectorize/LoopVectorizationLegality.h | 6 ------ llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp | 7 +------ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +- llvm/lib/Transforms/Vectorize/VPlan.h | 6 ++---- .../Transforms/LoopVectorize/X86/float-induction-x86.ll | 13 +++++-------- llvm/unittests/Transforms/Vectorize/VPlanTest.cpp | 2 +- 6 files changed, 10 insertions(+), 26 deletions(-) diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index 2f80b43..c6ed43b 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -344,9 +344,6 @@ public: unsigned getNumStores() const { return LAI->getNumStores(); } unsigned getNumLoads() const { return LAI->getNumLoads(); } - // Returns true if the NoNaN attribute is set on the function. - bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; } - /// Returns all assume calls in predicated blocks. They need to be dropped /// when flattening the CFG. const SmallPtrSetImpl &getConditionalAssumes() const { @@ -495,9 +492,6 @@ private: /// outside the loop. SmallPtrSet AllowedExit; - /// Can we assume the absence of NaNs. - bool HasFunNoNaNAttr = false; - /// Vectorization requirements that will go through late-evaluation. LoopVectorizationRequirements *Requirements; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 2ab0848..2e35dfa 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -605,11 +605,6 @@ static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) { bool LoopVectorizationLegality::canVectorizeInstrs() { BasicBlock *Header = TheLoop->getHeader(); - // Look for the attribute signaling the absence of NaNs. - Function &F = *Header->getParent(); - HasFunNoNaNAttr = - F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; - // For each block in the loop. for (BasicBlock *BB : TheLoop->blocks()) { // Scan the instructions in the block and look for hazards. @@ -673,7 +668,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { InductionDescriptor ID; if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) { addInductionPhi(Phi, ID, AllowedExit); - if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr) + if (ID.hasUnsafeAlgebra()) Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst()); continue; } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index c2a9ae1..b1da80b 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8908,7 +8908,7 @@ void LoopVectorizationPlanner::adjustRecipesForInLoopReductions( ? RecipeBuilder.createBlockInMask(R->getParent(), Plan) : nullptr; VPReductionRecipe *RedRecipe = new VPReductionRecipe( - &RdxDesc, R, ChainOp, VecOp, CondOp, Legal->hasFunNoNaNAttr(), TTI); + &RdxDesc, R, ChainOp, VecOp, CondOp, TTI); WidenRecipe->getVPValue()->replaceAllUsesWith(RedRecipe); Plan->removeVPValueFor(R); Plan->addVPValue(R, RedRecipe); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 2cce127..199e121 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1130,17 +1130,15 @@ public: class VPReductionRecipe : public VPRecipeBase, public VPUser, public VPValue { /// The recurrence decriptor for the reduction in question. RecurrenceDescriptor *RdxDesc; - /// Fast math flags to use for the resulting reduction operation. - bool NoNaN; /// Pointer to the TTI, needed to create the target reduction const TargetTransformInfo *TTI; public: VPReductionRecipe(RecurrenceDescriptor *R, Instruction *I, VPValue *ChainOp, - VPValue *VecOp, VPValue *CondOp, bool NoNaN, + VPValue *VecOp, VPValue *CondOp, const TargetTransformInfo *TTI) : VPRecipeBase(VPRecipeBase::VPReductionSC), VPUser({ChainOp, VecOp}), - VPValue(VPValue::VPVReductionSC, I, this), RdxDesc(R), NoNaN(NoNaN), + VPValue(VPValue::VPVReductionSC, I, this), RdxDesc(R), TTI(TTI) { if (CondOp) addOperand(CondOp); diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll index 4947f84..017bce6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -4,7 +4,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" ; This test checks auto-vectorization with FP induction variable. -; The FP operation is not "fast" and requires "fast-math" function attribute. +; FMF is required on the IR instructions. ;void fp_iv_loop1(float * __restrict__ A, int N) { ; float x = 1.0; @@ -149,7 +149,7 @@ define void @fp_iv_loop1(float* noalias nocapture %A, i32 %N) #0 { ; AUTO_VEC-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] ; AUTO_VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] ; AUTO_VEC-NEXT: store float [[X_06]], float* [[ARRAYIDX]], align 4 -; AUTO_VEC-NEXT: [[CONV1]] = fadd float [[X_06]], 5.000000e-01 +; AUTO_VEC-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 ; AUTO_VEC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; AUTO_VEC-NEXT: [[TMP45:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]] ; AUTO_VEC-NEXT: br i1 [[TMP45]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP4:!llvm.loop !.*]] @@ -168,7 +168,7 @@ for.body: ; preds = %for.body.preheader, %x.06 = phi float [ %conv1, %for.body ], [ 1.000000e+00, %for.body.preheader ] %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv store float %x.06, float* %arrayidx, align 4 - %conv1 = fadd float %x.06, 5.000000e-01 + %conv1 = fadd fast float %x.06, 5.000000e-01 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %N @@ -181,7 +181,7 @@ for.end: ; preds = %for.end.loopexit, % ret void } -; The same as the previous, FP operation is not fast, different function attribute +; The same as the previous, but FP operation has no FMF. ; Vectorization should be rejected. ;void fp_iv_loop2(float * __restrict__ A, int N) { ; float x = 1.0; @@ -191,7 +191,7 @@ for.end: ; preds = %for.end.loopexit, % ; } ;} -define void @fp_iv_loop2(float* noalias nocapture %A, i32 %N) #1 { +define void @fp_iv_loop2(float* noalias nocapture %A, i32 %N) { ; AUTO_VEC-LABEL: @fp_iv_loop2( ; AUTO_VEC-NEXT: entry: ; AUTO_VEC-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 @@ -538,6 +538,3 @@ for.end: %t1 = phi double [ %j, %for.body ] ret double %t1 } - -attributes #0 = { "no-nans-fp-math"="true" } -attributes #1 = { "no-nans-fp-math"="false" } diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index 7b452f6..f57b4ff 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -643,7 +643,7 @@ TEST(VPRecipeTest, CastVPReductionRecipeToVPUser) { VPValue ChainOp; VPValue VecOp; VPValue CondOp; - VPReductionRecipe Recipe(nullptr, nullptr, &ChainOp, &CondOp, &VecOp, false, + VPReductionRecipe Recipe(nullptr, nullptr, &ChainOp, &CondOp, &VecOp, nullptr); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; -- 2.7.4