From 523a526a024fb2835c998d1c054698cc16da87f4 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 19 Jul 2022 11:42:55 -0700 Subject: [PATCH] [LV] Fix miscompile due to srem/sdiv speculation safety condition An srem or sdiv has two cases which can cause undefined behavior, not just one. The existing code did not account for this, and as a result, we miscompiled when we encountered e.g. a srem i64 %v, -1 in a conditional block. Instead of hand rolling the logic, just use the utility function which exists exactly for this purpose. Differential Revision: https://reviews.llvm.org/D130106 --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 23 ++------- .../LoopVectorize/RISCV/scalable-divrem.ll | 57 ++-------------------- 2 files changed, 7 insertions(+), 73 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 51cc6ea..e7bf4d1 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -92,6 +92,7 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -4177,24 +4178,6 @@ bool InnerLoopVectorizer::useOrderedReductions( return Cost->useOrderedReductions(RdxDesc); } -/// A helper function for checking whether an integer division-related -/// instruction may divide by zero (in which case it must be predicated if -/// executed conditionally in the scalar code). -/// TODO: It may be worthwhile to generalize and check isKnownNonZero(). -/// Non-zero divisors that are non compile-time constants will not be -/// converted into multiplication, so we will still end up scalarizing -/// the division, but can do so w/o predication. -static bool mayDivideByZero(Instruction &I) { - assert((I.getOpcode() == Instruction::UDiv || - I.getOpcode() == Instruction::SDiv || - I.getOpcode() == Instruction::URem || - I.getOpcode() == Instruction::SRem) && - "Unexpected instruction"); - Value *Divisor = I.getOperand(1); - auto *CInt = dyn_cast(Divisor); - return !CInt || CInt->isZero(); -} - void InnerLoopVectorizer::widenCallInstruction(CallInst &CI, VPValue *Def, VPUser &ArgOperands, VPTransformState &State) { @@ -4478,7 +4461,9 @@ bool LoopVectorizationCostModel::isScalarWithPredication( case Instruction::SDiv: case Instruction::SRem: case Instruction::URem: - return mayDivideByZero(*I); + // TODO: We can use the loop-preheader as context point here and get + // context sensitive reasoning + return !isSafeToSpeculativelyExecute(I); } return false; } diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-divrem.ll index 26c87a3..16c7ad2 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-divrem.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-divrem.ll @@ -482,64 +482,13 @@ for.end: ret void } -; FIXME: The current code here is wrong. sdiv/srem INT_MIN, -1 is UB. -; We can not speculate such a case. define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; CHECK-LABEL: @predicated_sdiv_by_minus_one( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 16 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 1 -; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 8 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 [[TMP14]] -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP15]], align 1 -; CHECK-NEXT: [[TMP16:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 -128, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP17:%.*]] = icmp ne [[WIDE_LOAD1]], shufflevector ( insertelement ( poison, i8 -128, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP18:%.*]] = sdiv [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 -1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP19:%.*]] = sdiv [[WIDE_LOAD1]], shufflevector ( insertelement ( poison, i8 -1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP20:%.*]] = xor [[TMP16]], shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP21:%.*]] = xor [[TMP17]], shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP16]], [[TMP18]], [[WIDE_LOAD]] -; CHECK-NEXT: [[PREDPHI2:%.*]] = select [[TMP17]], [[TMP19]], [[WIDE_LOAD1]] -; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP12]], align 1 -; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP23:%.*]] = mul i32 [[TMP22]], 8 -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 [[TMP23]] -; CHECK-NEXT: store [[PREDPHI2]], ptr [[TMP24]], align 1 -; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 16 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP26]] -; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[IV]] ; CHECK-NEXT: [[ELEM:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[ELEM]], -128 ; CHECK-NEXT: br i1 [[C]], label [[DO_OP:%.*]], label [[LATCH]] @@ -551,7 +500,7 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: store i8 [[PHI]], ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; -- 2.7.4