From 0abde4bc9273374bbef68b2d24b0158f8fb338c2 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 12 Nov 2020 19:59:56 -0500 Subject: [PATCH] [InstCombine] fold sub of low-bit masked value from offset of same value There might be some demanded/known bits way to generalize this, but I'm not seeing it right now. This came up as a regression when I was looking at a different demanded bits improvement. https://rise4fun.com/Alive/5fl Name: general Pre: ((-1 << countTrailingZeros(C1)) & C2) == 0 %a1 = add i8 %x, C1 %a2 = and i8 %x, C2 %r = sub i8 %a1, %a2 => %r = and i8 %a1, ~C2 Name: test 1 %a1 = add i8 %x, 192 %a2 = and i8 %x, 10 %r = sub i8 %a1, %a2 => %r = and i8 %a1, -11 Name: test 2 %a1 = add i8 %x, -108 %a2 = and i8 %x, 3 %r = sub i8 %a1, %a2 => %r = and i8 %a1, -4 --- llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp | 14 ++++++++++++++ llvm/test/Transforms/InstCombine/sub.ll | 7 ++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index e78df30..abeddf4 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -2051,6 +2051,20 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { return SelectInst::Create(Cmp, Neg, A); } + // If we are subtracting a low-bit masked subset of some value from an add + // of that same value with no low bits changed, that is clearing some low bits + // of the sum: + // sub (X + AddC), (X & AndC) --> and (X + AddC), ~AndC + const APInt *AddC, *AndC; + if (match(Op0, m_Add(m_Value(X), m_APInt(AddC))) && + match(Op1, m_And(m_Specific(X), m_APInt(AndC)))) { + unsigned BitWidth = Ty->getScalarSizeInBits(); + unsigned Cttz = AddC->countTrailingZeros(); + APInt HighMask(APInt::getHighBitsSet(BitWidth, BitWidth - Cttz)); + if ((HighMask & *AndC).isNullValue()) + return BinaryOperator::CreateAnd(Op0, ConstantInt::get(Ty, ~(*AndC))); + } + if (Instruction *V = canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I)) return V; diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll index d703004..3e9c066 100644 --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -1464,8 +1464,7 @@ define i8 @sub_add_sub_reassoc_use2(i8 %w, i8 %x, i8 %y, i8 %z) { define i8 @sub_mask_lowbits(i8 %x) { ; CHECK-LABEL: @sub_mask_lowbits( ; CHECK-NEXT: [[A1:%.*]] = add i8 [[X:%.*]], -108 -; CHECK-NEXT: [[A2:%.*]] = and i8 [[X]], 3 -; CHECK-NEXT: [[R:%.*]] = sub i8 [[A1]], [[A2]] +; CHECK-NEXT: [[R:%.*]] = and i8 [[A1]], -4 ; CHECK-NEXT: ret i8 [[R]] ; %a1 = add i8 %x, 148 ; 0x94 @@ -1474,6 +1473,8 @@ define i8 @sub_mask_lowbits(i8 %x) { ret i8 %r } +; Negative test - low-bit mask must not overlap with offset + define i8 @sub_not_mask_lowbits(i8 %x) { ; CHECK-LABEL: @sub_not_mask_lowbits( ; CHECK-NEXT: [[A1:%.*]] = add i8 [[X:%.*]], 4 @@ -1492,7 +1493,7 @@ define <2 x i8> @sub_mask_lowbits_splat_extra_use(<2 x i8> %x, <2 x i8>* %p) { ; CHECK-NEXT: [[A1:%.*]] = add <2 x i8> [[X:%.*]], ; CHECK-NEXT: [[A2:%.*]] = and <2 x i8> [[X]], ; CHECK-NEXT: store <2 x i8> [[A2]], <2 x i8>* [[P:%.*]], align 2 -; CHECK-NEXT: [[R:%.*]] = sub <2 x i8> [[A1]], [[A2]] +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[A1]], ; CHECK-NEXT: ret <2 x i8> [[R]] ; %a1 = add <2 x i8> %x, ; 0xc0 -- 2.7.4