From 5150612012beab0065080fce0507755a7e325099 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 25 May 2017 14:13:57 +0000 Subject: [PATCH] [InstCombine] make icmp-mul fold more efficient There's probably a lot more like this (see also comments in D33338 about responsibility), but I suspect we don't usually get a visible manifestation. Given the recent interest in improving InstCombine efficiency, another potential micro-opt that could be repeated several times in this function: morph the existing icmp pred/operands instead of creating a new instruction. llvm-svn: 303860 --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | 12 +++++++----- llvm/test/Transforms/InstCombine/icmp.ll | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index a855a0d..30c5abb 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -3057,19 +3057,21 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { break; const APInt *C; - if (match(BO0->getOperand(1), m_APInt(C))) { + if (match(BO0->getOperand(1), m_APInt(C)) && *C != 0 && *C != 1) { // icmp eq/ne (X * C), (Y * C) --> icmp (X & Mask), (Y & Mask) // Mask = -1 >> count-trailing-zeros(C). - if (*C != 0 && *C != 1) { - // FIXME: If trailing zeros is 0, don't bother creating Mask. + if (unsigned TZs = C->countTrailingZeros()) { Constant *Mask = ConstantInt::get( BO0->getType(), - APInt::getLowBitsSet(C->getBitWidth(), - C->getBitWidth() - C->countTrailingZeros())); + APInt::getLowBitsSet(C->getBitWidth(), C->getBitWidth() - TZs)); Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask); Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask); return new ICmpInst(Pred, And1, And2); } + // If there are no trailing zeros in the multiplier, just eliminate + // the multiplies (no masking is needed): + // icmp eq/ne (X * C), (Y * C) --> icmp eq/ne X, Y + return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); } break; } diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll index 39df422..ed570da 100644 --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -2920,7 +2920,7 @@ define i1 @eq_mul_constants(i32 %x, i32 %y) { define <2 x i1> @eq_mul_constants_splat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @eq_mul_constants_splat( -; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> %y, %x +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> %x, %y ; CHECK-NEXT: ret <2 x i1> [[C]] ; %A = mul <2 x i32> %x, -- 2.7.4