From: Roman Lebedev Date: Sat, 18 Jul 2020 10:23:26 +0000 (+0300) Subject: [CVP] Soften SDiv into a UDiv as long as we know domains of both of the operands. X-Git-Tag: llvmorg-13-init~17514 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8d487668d09fb0e4e54f36207f07c1480ffabbfd;p=platform%2Fupstream%2Fllvm.git [CVP] Soften SDiv into a UDiv as long as we know domains of both of the operands. Yes, if operands are non-positive this comes at the extra cost of two extra negations. But a. division is already just ridiculously costly, two more subtractions can't hurt much :) and b. we have better/more analyzes/folds for an unsigned division, we could end up narrowing it's bitwidth, converting it to lshr, etc. This is essentially a take two on 0fdcca07ad2c0bdc2cdd40ba638109926f4f513b, which didn't fix the potential regression i was seeing, because ValueTracking's computeKnownBits() doesn't make use of dominating conditions in it's analysis. While i could teach it that, this seems like the more general fix. This big hammer actually does catch said potential regression. Over vanilla test-suite + RawSpeed + darktable (10M IR instrs, 1M IR BB, 1M X86 ASM instrs), this fires/converts 5 more (+2%) SDiv's, the total instruction count at the end of middle-end pipeline is only +6, so out of +10 extra negations, ~half are folded away, and asm instr count is only +1, so practically speaking all extra negations are folded away and are therefore free. Sadly, all these new UDiv's remained, none folded away. But there are two less basic blocks. https://rise4fun.com/Alive/VS6 Name: v0 Pre: C0 >= 0 && C1 >= 0 %r = sdiv i8 C0, C1 => %r = udiv i8 C0, C1 Name: v1 Pre: C0 <= 0 && C1 >= 0 %r = sdiv i8 C0, C1 => %t0 = udiv i8 -C0, C1 %r = sub i8 0, %t0 Name: v2 Pre: C0 >= 0 && C1 <= 0 %r = sdiv i8 C0, C1 => %t0 = udiv i8 C0, -C1 %r = sub i8 0, %t0 Name: v3 Pre: C0 <= 0 && C1 <= 0 %r = sdiv i8 C0, C1 => %r = udiv i8 -C0, -C1 --- diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 284312e..fb7a005 100644 --- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -607,6 +607,12 @@ static bool isNonNegative(Value *V, LazyValueInfo *LVI, Instruction *CxtI) { return Result == LazyValueInfo::True; } +static bool isNonPositive(Value *V, LazyValueInfo *LVI, Instruction *CxtI) { + Constant *Zero = ConstantInt::get(V->getType(), 0); + auto Result = LVI->getPredicateAt(ICmpInst::ICMP_SLE, V, Zero, CxtI); + return Result == LazyValueInfo::True; +} + static bool allOperandsAreNonNegative(BinaryOperator *SDI, LazyValueInfo *LVI) { return all_of(SDI->operands(), [&](Value *Op) { return isNonNegative(Op, LVI, SDI); }); @@ -672,24 +678,65 @@ static bool processSRem(BinaryOperator *SDI, LazyValueInfo *LVI) { } /// See if LazyValueInfo's ability to exploit edge conditions or range -/// information is sufficient to prove the both operands of this SDiv are -/// positive. If this is the case, replace the SDiv with a UDiv. Even for local +/// information is sufficient to prove the signs of both operands of this SDiv. +/// If this is the case, replace the SDiv with a UDiv. Even for local /// conditions, this can sometimes prove conditions instcombine can't by /// exploiting range information. static bool processSDiv(BinaryOperator *SDI, LazyValueInfo *LVI) { - if (SDI->getType()->isVectorTy() || !allOperandsAreNonNegative(SDI, LVI)) + if (SDI->getType()->isVectorTy()) return false; + enum class Domain { NonNegative, NonPositive, Unknown }; + auto getDomain = [&](Value *V) { + if (isNonNegative(V, LVI, SDI)) + return Domain::NonNegative; + if (isNonPositive(V, LVI, SDI)) + return Domain::NonPositive; + return Domain::Unknown; + }; + + struct Operand { + Value *V; + Domain Domain; + }; + std::array Ops; + for (const auto &I : zip(Ops, SDI->operands())) { + Operand &Op = std::get<0>(I); + Op.V = std::get<1>(I); + Op.Domain = getDomain(Op.V); + if (Op.Domain == Domain::Unknown) + return false; + } + + // We know domains of both of the operands! ++NumSDivs; - auto *BO = BinaryOperator::CreateUDiv(SDI->getOperand(0), SDI->getOperand(1), - SDI->getName(), SDI); - BO->setDebugLoc(SDI->getDebugLoc()); - BO->setIsExact(SDI->isExact()); - SDI->replaceAllUsesWith(BO); + + // We need operands to be non-negative, so negate each one that isn't. + for (Operand &Op : Ops) { + if (Op.Domain == Domain::NonNegative) + continue; + auto *BO = + BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", SDI); + BO->setDebugLoc(SDI->getDebugLoc()); + Op.V = BO; + } + + auto *UDiv = + BinaryOperator::CreateUDiv(Ops[0].V, Ops[1].V, SDI->getName(), SDI); + UDiv->setDebugLoc(SDI->getDebugLoc()); + UDiv->setIsExact(SDI->isExact()); + + Value *Res = UDiv; + + // If the operands had two different domains, we need to negate the result. + if (Ops[0].Domain != Ops[1].Domain) + Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", SDI); + + SDI->replaceAllUsesWith(Res); SDI->eraseFromParent(); // Try to simplify our new udiv. - processUDivOrURem(BO, LVI); + processUDivOrURem(UDiv, LVI); return true; } diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/sdiv.ll b/llvm/test/Transforms/CorrelatedValuePropagation/sdiv.ll index ec5de00..8dfa09d4 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/sdiv.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/sdiv.ll @@ -177,8 +177,10 @@ define i32 @test7_pos_neg(i32 %x, i32 %y) { ; CHECK-NEXT: call void @llvm.assume(i1 [[C0]]) ; CHECK-NEXT: [[C1:%.*]] = icmp sle i32 [[Y:%.*]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[C1]]) -; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[X]], [[Y]] -; CHECK-NEXT: ret i32 [[DIV]] +; CHECK-NEXT: [[Y_NONNEG:%.*]] = sub i32 0, [[Y]] +; CHECK-NEXT: [[DIV1:%.*]] = udiv i32 [[X]], [[Y_NONNEG]] +; CHECK-NEXT: [[DIV1_NEG:%.*]] = sub i32 0, [[DIV1]] +; CHECK-NEXT: ret i32 [[DIV1_NEG]] ; %c0 = icmp sge i32 %x, 0 call void @llvm.assume(i1 %c0) @@ -194,8 +196,10 @@ define i32 @test8_neg_pos(i32 %x, i32 %y) { ; CHECK-NEXT: call void @llvm.assume(i1 [[C0]]) ; CHECK-NEXT: [[C1:%.*]] = icmp sge i32 [[Y:%.*]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[C1]]) -; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[X]], [[Y]] -; CHECK-NEXT: ret i32 [[DIV]] +; CHECK-NEXT: [[X_NONNEG:%.*]] = sub i32 0, [[X]] +; CHECK-NEXT: [[DIV1:%.*]] = udiv i32 [[X_NONNEG]], [[Y]] +; CHECK-NEXT: [[DIV1_NEG:%.*]] = sub i32 0, [[DIV1]] +; CHECK-NEXT: ret i32 [[DIV1_NEG]] ; %c0 = icmp sle i32 %x, 0 call void @llvm.assume(i1 %c0) @@ -211,8 +215,10 @@ define i32 @test9_neg_neg(i32 %x, i32 %y) { ; CHECK-NEXT: call void @llvm.assume(i1 [[C0]]) ; CHECK-NEXT: [[C1:%.*]] = icmp sle i32 [[Y:%.*]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[C1]]) -; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[X]], [[Y]] -; CHECK-NEXT: ret i32 [[DIV]] +; CHECK-NEXT: [[X_NONNEG:%.*]] = sub i32 0, [[X]] +; CHECK-NEXT: [[Y_NONNEG:%.*]] = sub i32 0, [[Y]] +; CHECK-NEXT: [[DIV1:%.*]] = udiv i32 [[X_NONNEG]], [[Y_NONNEG]] +; CHECK-NEXT: ret i32 [[DIV1]] ; %c0 = icmp sle i32 %x, 0 call void @llvm.assume(i1 %c0)