From 0d2fc1a501c7f4e3be014eadc0761941ac2995ff Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 9 May 2018 22:27:34 +0000 Subject: [PATCH] [InstCombine] Teach SimplifyDemandedBits that udiv doesn't demand low dividend bits that are zero in the divisor This is safe as long as the udiv is not exact. The pattern is not common in C++ code, but comes up all the time in code generated by XLA's GPU backend. Differential Revision: https://reviews.llvm.org/D46647 llvm-svn: 331933 --- .../InstCombine/InstCombineSimplifyDemanded.cpp | 16 ++++++++++++++++ llvm/test/Transforms/InstCombine/udiv-simplify.ll | 21 +++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 0c03cc3..abd3e39 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -545,6 +545,22 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } break; } + case Instruction::UDiv: { + // UDiv doesn't demand low bits that are zero in the divisor. + const APInt *SA; + if (match(I->getOperand(1), m_APInt(SA))) { + // If the shift is exact, then it does demand the low bits. + if (cast(I)->isExact()) + break; + + // FIXME: Take the demanded mask of the result into account. + APInt DemandedMaskIn = + APInt::getHighBitsSet(BitWidth, BitWidth - SA->countTrailingZeros()); + if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) + return I; + } + break; + } case Instruction::SRem: if (ConstantInt *Rem = dyn_cast(I->getOperand(1))) { // X % -1 demands all the bits because we don't want to introduce diff --git a/llvm/test/Transforms/InstCombine/udiv-simplify.ll b/llvm/test/Transforms/InstCombine/udiv-simplify.ll index 1794e26..8fd604b 100644 --- a/llvm/test/Transforms/InstCombine/udiv-simplify.ll +++ b/llvm/test/Transforms/InstCombine/udiv-simplify.ll @@ -83,3 +83,24 @@ define i177 @ossfuzz_4857(i177 %X, i177 %Y) { store i1 %C9, i1* undef ret i177 %B1 } + +define i32 @udiv_demanded(i32 %a) { +; CHECK-LABEL: @udiv_demanded( +; CHECK-NEXT: [[U:%.*]] = udiv i32 [[A:%.*]], 12 +; CHECK-NEXT: ret i32 [[U]] +; + %o = or i32 %a, 3 + %u = udiv i32 %o, 12 + ret i32 %u +} + +define i32 @udiv_exact_demanded(i32 %a) { +; CHECK-LABEL: @udiv_exact_demanded( +; CHECK-NEXT: [[O:%.*]] = and i32 [[A:%.*]], -3 +; CHECK-NEXT: [[U:%.*]] = udiv exact i32 [[O]], 12 +; CHECK-NEXT: ret i32 [[U]] +; + %o = and i32 %a, -3 + %u = udiv exact i32 %o, 12 + ret i32 %u +} -- 2.7.4