From 80bea345d11912c6473797fcea0866a7f0ca9cca Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 11 Sep 2019 12:04:26 +0000 Subject: [PATCH] [InstCombine] fold sign-bit compares of srem (srem X, pow2C) sgt/slt 0 can be reduced using bit hacks by masking off the sign bit and the module (low) bits: https://rise4fun.com/Alive/jSO A '2' divisor allows slightly more folding: https://rise4fun.com/Alive/tDBM Any chance to remove an 'srem' use is probably worthwhile, but this is limited to the one-use improvement case because doing more may expose other missing folds. That means it does nothing for PR21929 yet: https://bugs.llvm.org/show_bug.cgi?id=21929 Differential Revision: https://reviews.llvm.org/D67334 llvm-svn: 371610 --- .../Transforms/InstCombine/InstCombineCompares.cpp | 42 ++++++++++++++++++++++ .../Transforms/InstCombine/InstCombineInternal.h | 2 ++ .../Transforms/InstCombine/icmp-div-constant.ll | 28 ++++++++++----- 3 files changed, 64 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 7dd21ed..e23e85b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2249,6 +2249,44 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp, return nullptr; } +Instruction *InstCombiner::foldICmpSRemConstant(ICmpInst &Cmp, + BinaryOperator *SRem, + const APInt &C) { + // Match an 'is positive' or 'is negative' comparison of remainder by a + // constant power-of-2 value: + // (X % pow2C) sgt/slt 0 + const ICmpInst::Predicate Pred = Cmp.getPredicate(); + if (Pred != ICmpInst::ICMP_SGT && Pred != ICmpInst::ICMP_SLT) + return nullptr; + + // TODO: The one-use check is standard because we do not typically want to + // create longer instruction sequences, but this might be a special-case + // because srem is not good for analysis or codegen. + if (!SRem->hasOneUse()) + return nullptr; + + const APInt *DivisorC; + if (!C.isNullValue() || !match(SRem->getOperand(1), m_Power2(DivisorC))) + return nullptr; + + // Mask off the sign bit and the modulo bits (low-bits). + Type *Ty = SRem->getType(); + APInt SignMask = APInt::getSignMask(Ty->getScalarSizeInBits()); + Constant *MaskC = ConstantInt::get(Ty, SignMask | (*DivisorC - 1)); + Value *And = Builder.CreateAnd(SRem->getOperand(0), MaskC); + + // For 'is positive?' check that the sign-bit is clear and at least 1 masked + // bit is set. Example: + // (i8 X % 32) s> 0 --> (X & 159) s> 0 + if (Pred == ICmpInst::ICMP_SGT) + return new ICmpInst(ICmpInst::ICMP_SGT, And, ConstantInt::getNullValue(Ty)); + + // For 'is negative?' check that the sign-bit is set and at least 1 masked + // bit is set. Example: + // (i16 X % 4) s< 0 --> (X & 32771) u> 32768 + return new ICmpInst(ICmpInst::ICMP_UGT, And, ConstantInt::get(Ty, SignMask)); +} + /// Fold icmp (udiv X, Y), C. Instruction *InstCombiner::foldICmpUDivConstant(ICmpInst &Cmp, BinaryOperator *UDiv, @@ -2806,6 +2844,10 @@ Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) { if (Instruction *I = foldICmpShrConstant(Cmp, BO, *C)) return I; break; + case Instruction::SRem: + if (Instruction *I = foldICmpSRemConstant(Cmp, BO, *C)) + return I; + break; case Instruction::UDiv: if (Instruction *I = foldICmpUDivConstant(Cmp, BO, *C)) return I; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 91f5228..5e4a56b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -891,6 +891,8 @@ private: const APInt &C); Instruction *foldICmpShrConstant(ICmpInst &Cmp, BinaryOperator *Shr, const APInt &C); + Instruction *foldICmpSRemConstant(ICmpInst &Cmp, BinaryOperator *UDiv, + const APInt &C); Instruction *foldICmpUDivConstant(ICmpInst &Cmp, BinaryOperator *UDiv, const APInt &C); Instruction *foldICmpDivConstant(ICmpInst &Cmp, BinaryOperator *Div, diff --git a/llvm/test/Transforms/InstCombine/icmp-div-constant.ll b/llvm/test/Transforms/InstCombine/icmp-div-constant.ll index a3c2409..8028dd6 100644 --- a/llvm/test/Transforms/InstCombine/icmp-div-constant.ll +++ b/llvm/test/Transforms/InstCombine/icmp-div-constant.ll @@ -3,8 +3,8 @@ define i1 @is_rem2_neg_i8(i8 %x) { ; CHECK-LABEL: @is_rem2_neg_i8( -; CHECK-NEXT: [[S:%.*]] = srem i8 [[X:%.*]], 2 -; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[S]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], -127 +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP1]], -127 ; CHECK-NEXT: ret i1 [[R]] ; %s = srem i8 %x, 2 @@ -14,8 +14,8 @@ define i1 @is_rem2_neg_i8(i8 %x) { define <2 x i1> @is_rem2_pos_v2i8(<2 x i8> %x) { ; CHECK-LABEL: @is_rem2_pos_v2i8( -; CHECK-NEXT: [[S:%.*]] = srem <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i8> [[S]], zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], ; CHECK-NEXT: ret <2 x i1> [[R]] ; %s = srem <2 x i8> %x, @@ -23,10 +23,12 @@ define <2 x i1> @is_rem2_pos_v2i8(<2 x i8> %x) { ret <2 x i1> %r } +; i8 -97 == 159 == 0b10011111 + define i1 @is_rem32_pos_i8(i8 %x) { ; CHECK-LABEL: @is_rem32_pos_i8( -; CHECK-NEXT: [[S:%.*]] = srem i8 [[X:%.*]], 32 -; CHECK-NEXT: [[R:%.*]] = icmp sgt i8 [[S]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], -97 +; CHECK-NEXT: [[R:%.*]] = icmp sgt i8 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %s = srem i8 %x, 32 @@ -34,10 +36,12 @@ define i1 @is_rem32_pos_i8(i8 %x) { ret i1 %r } +; i16 -32765 == 32771 == 0b1000000000000011 + define i1 @is_rem4_neg_i16(i16 %x) { ; CHECK-LABEL: @is_rem4_neg_i16( -; CHECK-NEXT: [[S:%.*]] = srem i16 [[X:%.*]], 4 -; CHECK-NEXT: [[R:%.*]] = icmp slt i16 [[S]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = and i16 [[X:%.*]], -32765 +; CHECK-NEXT: [[R:%.*]] = icmp ugt i16 [[TMP1]], -32768 ; CHECK-NEXT: ret i1 [[R]] ; %s = srem i16 %x, 4 @@ -47,6 +51,8 @@ define i1 @is_rem4_neg_i16(i16 %x) { declare void @use(i32) +; TODO: This is still worth folding because srem is difficult? + define i1 @is_rem32_neg_i32_extra_use(i32 %x) { ; CHECK-LABEL: @is_rem32_neg_i32_extra_use( ; CHECK-NEXT: [[S:%.*]] = srem i32 [[X:%.*]], 32 @@ -60,6 +66,8 @@ define i1 @is_rem32_neg_i32_extra_use(i32 %x) { ret i1 %r } +; Negative test - wrong compare constant + define i1 @is_rem8_nonneg_i16(i16 %x) { ; CHECK-LABEL: @is_rem8_nonneg_i16( ; CHECK-NEXT: [[S:%.*]] = srem i16 [[X:%.*]], 8 @@ -71,6 +79,8 @@ define i1 @is_rem8_nonneg_i16(i16 %x) { ret i1 %r } +; Negative test - wrong remainder constant + define i1 @is_rem3_neg_i8(i8 %x) { ; CHECK-LABEL: @is_rem3_neg_i8( ; CHECK-NEXT: [[S:%.*]] = srem i8 [[X:%.*]], 3 @@ -82,6 +92,8 @@ define i1 @is_rem3_neg_i8(i8 %x) { ret i1 %r } +; Negative test - wrong compare constant + define i1 @is_rem16_something_i8(i8 %x) { ; CHECK-LABEL: @is_rem16_something_i8( ; CHECK-NEXT: [[S:%.*]] = srem i8 [[X:%.*]], 16 -- 2.7.4