From 038cbc5c13e33052c1b7dad1112c2a062e7c565e Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 21 Mar 2018 14:08:21 +0000 Subject: [PATCH] Re-re-land: Teach CorrelatedValuePropagation to reduce the width of udiv/urem instructions. Summary: If the operands of a udiv/urem can be proved to fit within a smaller power-of-two-sized type, reduce the width of the udiv/urem. Backed out for causing performance regressions. Re-landing because we've determined that these regressions were noise. Original Differential Revision: https://reviews.llvm.org/D44102 llvm-svn: 328096 --- .../Scalar/CorrelatedValuePropagation.cpp | 54 +++++++++++ .../Transforms/CorrelatedValuePropagation/udiv.ll | 95 +++++++++++++++++++ .../Transforms/CorrelatedValuePropagation/urem.ll | 101 +++++++++++++++++++++ 3 files changed, 250 insertions(+) create mode 100644 llvm/test/Transforms/CorrelatedValuePropagation/udiv.ll create mode 100644 llvm/test/Transforms/CorrelatedValuePropagation/urem.ll diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 5cab6d6..efbe2aa 100644 --- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -58,6 +58,7 @@ STATISTIC(NumCmps, "Number of comparisons propagated"); STATISTIC(NumReturns, "Number of return values propagated"); STATISTIC(NumDeadCases, "Number of switch cases removed"); STATISTIC(NumSDivs, "Number of sdiv converted to udiv"); +STATISTIC(NumUDivs, "Number of udivs whose width was decreased"); STATISTIC(NumAShrs, "Number of ashr converted to lshr"); STATISTIC(NumSRems, "Number of srem converted to urem"); STATISTIC(NumOverflows, "Number of overflow checks removed"); @@ -432,6 +433,48 @@ static bool hasPositiveOperands(BinaryOperator *SDI, LazyValueInfo *LVI) { return true; } +/// Try to shrink a udiv/urem's width down to the smallest power of two that's +/// sufficient to contain its operands. +static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) { + assert(Instr->getOpcode() == Instruction::UDiv || + Instr->getOpcode() == Instruction::URem); + if (Instr->getType()->isVectorTy()) + return false; + + // Find the smallest power of two bitwidth that's sufficient to hold Instr's + // operands. + auto OrigWidth = Instr->getType()->getIntegerBitWidth(); + ConstantRange OperandRange(OrigWidth, /*isFullset=*/false); + for (Value *Operand : Instr->operands()) { + OperandRange = OperandRange.unionWith( + LVI->getConstantRange(Operand, Instr->getParent())); + } + // Don't shrink below 8 bits wide. + unsigned NewWidth = std::max( + PowerOf2Ceil(OperandRange.getUnsignedMax().getActiveBits()), 8); + // NewWidth might be greater than OrigWidth if OrigWidth is not a power of + // two. + if (NewWidth >= OrigWidth) + return false; + + ++NumUDivs; + auto *TruncTy = Type::getIntNTy(Instr->getContext(), NewWidth); + auto *LHS = CastInst::Create(Instruction::Trunc, Instr->getOperand(0), TruncTy, + Instr->getName() + ".lhs.trunc", Instr); + auto *RHS = CastInst::Create(Instruction::Trunc, Instr->getOperand(1), TruncTy, + Instr->getName() + ".rhs.trunc", Instr); + auto *BO = + BinaryOperator::Create(Instr->getOpcode(), LHS, RHS, Instr->getName(), Instr); + auto *Zext = CastInst::Create(Instruction::ZExt, BO, Instr->getType(), + Instr->getName() + ".zext", Instr); + if (BO->getOpcode() == Instruction::UDiv) + BO->setIsExact(Instr->isExact()); + + Instr->replaceAllUsesWith(Zext); + Instr->eraseFromParent(); + return true; +} + static bool processSRem(BinaryOperator *SDI, LazyValueInfo *LVI) { if (SDI->getType()->isVectorTy() || !hasPositiveOperands(SDI, LVI)) return false; @@ -441,6 +484,10 @@ static bool processSRem(BinaryOperator *SDI, LazyValueInfo *LVI) { SDI->getName(), SDI); SDI->replaceAllUsesWith(BO); SDI->eraseFromParent(); + + // Try to process our new urem. + processUDivOrURem(BO, LVI); + return true; } @@ -460,6 +507,9 @@ static bool processSDiv(BinaryOperator *SDI, LazyValueInfo *LVI) { SDI->replaceAllUsesWith(BO); SDI->eraseFromParent(); + // Try to simplify our new udiv. + processUDivOrURem(BO, LVI); + return true; } @@ -595,6 +645,10 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, const SimplifyQuery &SQ) { case Instruction::SDiv: BBChanged |= processSDiv(cast(II), LVI); break; + case Instruction::UDiv: + case Instruction::URem: + BBChanged |= processUDivOrURem(cast(II), LVI); + break; case Instruction::AShr: BBChanged |= processAShr(cast(II), LVI); break; diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/udiv.ll b/llvm/test/Transforms/CorrelatedValuePropagation/udiv.ll new file mode 100644 index 0000000..1907813 --- /dev/null +++ b/llvm/test/Transforms/CorrelatedValuePropagation/udiv.ll @@ -0,0 +1,95 @@ +; RUN: opt < %s -correlated-propagation -S | FileCheck %s + +; CHECK-LABEL: @test_nop +define void @test_nop(i32 %n) { +; CHECK udiv i32 %n, 100 + %div = udiv i32 %n, 100 + ret void +} + +; CHECK-LABEL: @test1( +define void @test1(i32 %n) { +entry: + %cmp = icmp ule i32 %n, 65535 + br i1 %cmp, label %bb, label %exit + +bb: +; CHECK: udiv i16 + %div = udiv i32 %n, 100 + br label %exit + +exit: + ret void +} + +; CHECK-LABEL: @test2( +define void @test2(i32 %n) { +entry: + %cmp = icmp ule i32 %n, 65536 + br i1 %cmp, label %bb, label %exit + +bb: +; CHECK: udiv i32 %n, 100 + %div = udiv i32 %n, 100 + br label %exit + +exit: + ret void +} + +; CHECK-LABEL: @test3( +define void @test3(i32 %m, i32 %n) { +entry: + %cmp1 = icmp ult i32 %m, 65535 + %cmp2 = icmp ult i32 %n, 65535 + %cmp = and i1 %cmp1, %cmp2 + br i1 %cmp, label %bb, label %exit + +bb: +; CHECK: udiv i16 + %div = udiv i32 %m, %n + br label %exit + +exit: + ret void +} + +; CHECK-LABEL: @test4( +define void @test4(i32 %m, i32 %n) { +entry: + %cmp1 = icmp ult i32 %m, 65535 + %cmp2 = icmp ule i32 %n, 65536 + %cmp = and i1 %cmp1, %cmp2 + br i1 %cmp, label %bb, label %exit + +bb: +; CHECK: udiv i32 %m, %n + %div = udiv i32 %m, %n + br label %exit + +exit: + ret void +} + +; CHECK-LABEL: @test5 +define void @test5(i32 %n) { + %trunc = and i32 %n, 65535 + ; CHECK: udiv i16 + %div = udiv i32 %trunc, 42 + ret void +} + +; CHECK-LABEL: @test6 +define void @test6(i32 %n) { +entry: + %cmp = icmp ule i32 %n, 255 + br i1 %cmp, label %bb, label %exit + +bb: +; CHECK: udiv i8 + %div = sdiv i32 %n, 100 + br label %exit + +exit: + ret void +} diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/urem.ll b/llvm/test/Transforms/CorrelatedValuePropagation/urem.ll new file mode 100644 index 0000000..b01e9c5 --- /dev/null +++ b/llvm/test/Transforms/CorrelatedValuePropagation/urem.ll @@ -0,0 +1,101 @@ +; RUN: opt < %s -correlated-propagation -S | FileCheck %s + +; CHECK-LABEL: @test_nop +define void @test_nop(i32 %n) { +; CHECK udiv i32 %n, 100 + %div = udiv i32 %n, 100 + ret void +} + +; CHECK-LABEL: @test1( +define void @test1(i32 %n) { +entry: + %cmp = icmp ule i32 %n, 65535 + br i1 %cmp, label %bb, label %exit + +bb: +; CHECK: urem i16 + %div = urem i32 %n, 100 + br label %exit + +exit: + ret void +} + +; CHECK-LABEL: @test2( +define void @test2(i32 %n) { +entry: + %cmp = icmp ule i32 %n, 65536 + br i1 %cmp, label %bb, label %exit + +bb: +; CHECK: urem i32 %n, 100 + %div = urem i32 %n, 100 + br label %exit + +exit: + ret void +} + +; CHECK-LABEL: @test3( +define void @test3(i32 %m, i32 %n) { +entry: + %cmp1 = icmp ult i32 %m, 65535 + %cmp2 = icmp ult i32 %n, 65535 + %cmp = and i1 %cmp1, %cmp2 + br i1 %cmp, label %bb, label %exit + +bb: +; CHECK: urem i16 + %div = urem i32 %m, %n + br label %exit + +exit: + ret void +} + +; CHECK-LABEL: @test4( +define void @test4(i32 %m, i32 %n) { +entry: + %cmp1 = icmp ult i32 %m, 65535 + %cmp2 = icmp ule i32 %n, 65536 + %cmp = and i1 %cmp1, %cmp2 + br i1 %cmp, label %bb, label %exit + +bb: +; CHECK: urem i32 %m, %n + %div = urem i32 %m, %n + br label %exit + +exit: + ret void +} + +; CHECK-LABEL: @test5 +define void @test5(i32 %n) { + %trunc = and i32 %n, 63 + ; CHECK: urem i8 + %div = urem i32 %trunc, 42 + ret void +} + +; CHECK-LABEL: @test6 +define void @test6(i32 %n) { +entry: + %cmp = icmp ule i32 %n, 255 + br i1 %cmp, label %bb, label %exit + +bb: +; CHECK: urem i8 + %div = srem i32 %n, 100 + br label %exit + +exit: + ret void +} + +; CHECK-LABEL: @non_power_of_2 +define void @non_power_of_2(i24 %n) { + %div = urem i24 %n, 42 + ret void +} -- 2.7.4