From: Roman Lebedev Date: Wed, 18 Jul 2018 10:55:17 +0000 (+0000) Subject: [InstCombine] Re-commit: Fold 'check for [no] signed truncation' pattern X-Git-Tag: llvmorg-7.0.0-rc1~1154 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3cb87e905c033d3bc02599b3ed1728007abbe28a;p=platform%2Fupstream%2Fllvm.git [InstCombine] Re-commit: Fold 'check for [no] signed truncation' pattern Summary: [[ https://bugs.llvm.org/show_bug.cgi?id=38149 | PR38149 ]] As discussed in https://reviews.llvm.org/D49179#1158957 and later, the IR for 'check for [no] signed truncation' pattern can be improved: https://rise4fun.com/Alive/gBf ^ that pattern will be produced by Implicit Integer Truncation sanitizer, https://reviews.llvm.org/D48958 https://bugs.llvm.org/show_bug.cgi?id=21530 in signed case, therefore it is probably a good idea to improve it. The DAGCombine will reverse this transform, see https://reviews.llvm.org/D49266 This transform is surprisingly frustrating. This does not deal with non-splat shift amounts, or with undef shift amounts. I've outlined what i think the solution should be: ``` // Potential handling of non-splats: for each element: // * if both are undef, replace with constant 0. // Because (1<<0) is OK and is 1, and ((1<<0)>>1) is also OK and is 0. // * if both are not undef, and are different, bailout. // * else, only one is undef, then pick the non-undef one. ``` This is a re-commit, as the original patch, committed in rL337190 was reverted in rL337344 as it broke chromium build: https://bugs.llvm.org/show_bug.cgi?id=38204 and https://crbug.com/864832 Proofs that the fixed folds are ok: https://rise4fun.com/Alive/VYM Differential Revision: https://reviews.llvm.org/D49320 llvm-svn: 337376 --- diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index f91eb9d..6de92a4 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2945,6 +2945,81 @@ static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I, return Builder.CreateICmp(DstPred, X, M); } +/// Some comparisons can be simplified. +/// In this case, we are looking for comparisons that look like +/// a check for a lossy signed truncation. +/// Folds: (MaskedBits is a constant.) +/// ((%x << MaskedBits) a>> MaskedBits) SrcPred %x +/// Into: +/// (add %x, (1 << (KeptBits-1))) DstPred (1 << KeptBits) +/// Where KeptBits = bitwidth(%x) - MaskedBits +static Value * +foldICmpWithTruncSignExtendedVal(ICmpInst &I, + InstCombiner::BuilderTy &Builder) { + ICmpInst::Predicate SrcPred; + Value *X; + const APInt *C0, *C1; // FIXME: non-splats, potentially with undef. + // We are ok with 'shl' having multiple uses, but 'ashr' must be one-use. + if (!match(&I, m_c_ICmp(SrcPred, + m_OneUse(m_AShr(m_Shl(m_Value(X), m_APInt(C0)), + m_APInt(C1))), + m_Deferred(X)))) + return nullptr; + + // Potential handling of non-splats: for each element: + // * if both are undef, replace with constant 0. + // Because (1<<0) is OK and is 1, and ((1<<0)>>1) is also OK and is 0. + // * if both are not undef, and are different, bailout. + // * else, only one is undef, then pick the non-undef one. + + // The shift amount must be equal. + if (*C0 != *C1) + return nullptr; + const APInt &MaskedBits = *C0; + assert(MaskedBits != 0 && "shift by zero should be folded away already."); + + ICmpInst::Predicate DstPred; + switch (SrcPred) { + case ICmpInst::Predicate::ICMP_EQ: + // ((%x << MaskedBits) a>> MaskedBits) == %x + // => + // (add %x, (1 << (KeptBits-1))) u< (1 << KeptBits) + DstPred = ICmpInst::Predicate::ICMP_ULT; + break; + case ICmpInst::Predicate::ICMP_NE: + // ((%x << MaskedBits) a>> MaskedBits) != %x + // => + // (add %x, (1 << (KeptBits-1))) u>= (1 << KeptBits) + DstPred = ICmpInst::Predicate::ICMP_UGE; + break; + // FIXME: are more folds possible? + default: + return nullptr; + } + + auto *XType = X->getType(); + const unsigned XBitWidth = XType->getScalarSizeInBits(); + const APInt BitWidth = APInt(XBitWidth, XBitWidth); + assert(BitWidth.ugt(MaskedBits) && "shifts should leave some bits untouched"); + + // KeptBits = bitwidth(%x) - MaskedBits + const APInt KeptBits = BitWidth - MaskedBits; + assert(KeptBits.ugt(0) && KeptBits.ult(BitWidth) && "unreachable"); + // ICmpCst = (1 << KeptBits) + const APInt ICmpCst = APInt(XBitWidth, 1).shl(KeptBits); + assert(ICmpCst.isPowerOf2()); + // AddCst = (1 << (KeptBits-1)) + const APInt AddCst = ICmpCst.lshr(1); + assert(AddCst.ult(ICmpCst) && AddCst.isPowerOf2()); + + // T0 = add %x, AddCst + Value *T0 = Builder.CreateAdd(X, ConstantInt::get(XType, AddCst)); + // T1 = T0 DstPred ICmpCst + Value *T1 = Builder.CreateICmp(DstPred, T0, ConstantInt::get(XType, ICmpCst)); + + return T1; +} + /// Try to fold icmp (binop), X or icmp X, (binop). /// TODO: A large part of this logic is duplicated in InstSimplify's /// simplifyICmpWithBinOp(). We should be able to share that and avoid the code @@ -3285,6 +3360,9 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { if (Value *V = foldICmpWithLowBitMaskedVal(I, Builder)) return replaceInstUsesWith(I, V); + if (Value *V = foldICmpWithTruncSignExtendedVal(I, Builder)) + return replaceInstUsesWith(I, V); + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll b/llvm/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll index 2e75ae6..60aa4d4 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll @@ -15,9 +15,8 @@ define i1 @p0(i8 %x) { ; CHECK-LABEL: @p0( -; CHECK-NEXT: [[TMP0:%.*]] = shl i8 [[X:%.*]], 5 -; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X:%.*]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 8 ; CHECK-NEXT: ret i1 [[TMP2]] ; %tmp0 = shl i8 %x, 5 @@ -29,9 +28,8 @@ define i1 @p0(i8 %x) { ; Big unusual bit width, https://bugs.llvm.org/show_bug.cgi?id=38204 define i1 @pb(i65 %x) { ; CHECK-LABEL: @pb( -; CHECK-NEXT: [[TMP0:%.*]] = shl i65 [[X:%.*]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i65 [[TMP0]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i65 [[TMP1]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = add i65 [[X:%.*]], 9223372036854775808 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i65 [[TMP1]], -1 ; CHECK-NEXT: ret i1 [[TMP2]] ; %tmp0 = shl i65 %x, 1 @@ -46,9 +44,8 @@ define i1 @pb(i65 %x) { define <2 x i1> @p1_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @p1_vec_splat( -; CHECK-NEXT: [[TMP0:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <2 x i8> [[TMP0]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i8> [[TMP1]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i8> [[TMP1]], ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; %tmp0 = shl <2 x i8> %x, @@ -118,9 +115,8 @@ declare i8 @gen8() define i1 @c0() { ; CHECK-LABEL: @c0( ; CHECK-NEXT: [[X:%.*]] = call i8 @gen8() -; CHECK-NEXT: [[TMP0:%.*]] = shl i8 [[X]], 5 -; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[X]], [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 8 ; CHECK-NEXT: ret i1 [[TMP2]] ; %x = call i8 @gen8() @@ -140,8 +136,8 @@ define i1 @n_oneuse0(i8 %x) { ; CHECK-LABEL: @n_oneuse0( ; CHECK-NEXT: [[TMP0:%.*]] = shl i8 [[X:%.*]], 5 ; CHECK-NEXT: call void @use8(i8 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 8 ; CHECK-NEXT: ret i1 [[TMP2]] ; %tmp0 = shl i8 %x, 5 diff --git a/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll b/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll index 6cd32db..90d19be 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll @@ -15,9 +15,8 @@ define i1 @p0(i8 %x) { ; CHECK-LABEL: @p0( -; CHECK-NEXT: [[TMP0:%.*]] = shl i8 [[X:%.*]], 5 -; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i8 [[TMP1]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X:%.*]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i8 [[TMP1]], 7 ; CHECK-NEXT: ret i1 [[TMP2]] ; %tmp0 = shl i8 %x, 5 @@ -29,9 +28,8 @@ define i1 @p0(i8 %x) { ; Big unusual bit width, https://bugs.llvm.org/show_bug.cgi?id=38204 define i1 @pb(i65 %x) { ; CHECK-LABEL: @pb( -; CHECK-NEXT: [[TMP0:%.*]] = shl i65 [[X:%.*]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i65 [[TMP0]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i65 [[TMP1]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = add i65 [[X:%.*]], 9223372036854775808 +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i65 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[TMP2]] ; %tmp0 = shl i65 %x, 1 @@ -46,9 +44,8 @@ define i1 @pb(i65 %x) { define <2 x i1> @p1_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @p1_vec_splat( -; CHECK-NEXT: [[TMP0:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <2 x i8> [[TMP0]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i8> [[TMP1]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i8> [[TMP1]], ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; %tmp0 = shl <2 x i8> %x, @@ -118,9 +115,8 @@ declare i8 @gen8() define i1 @c0() { ; CHECK-LABEL: @c0( ; CHECK-NEXT: [[X:%.*]] = call i8 @gen8() -; CHECK-NEXT: [[TMP0:%.*]] = shl i8 [[X]], 5 -; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i8 [[X]], [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i8 [[TMP1]], 7 ; CHECK-NEXT: ret i1 [[TMP2]] ; %x = call i8 @gen8() @@ -140,8 +136,8 @@ define i1 @n_oneuse0(i8 %x) { ; CHECK-LABEL: @n_oneuse0( ; CHECK-NEXT: [[TMP0:%.*]] = shl i8 [[X:%.*]], 5 ; CHECK-NEXT: call void @use8(i8 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i8 [[TMP1]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i8 [[TMP1]], 7 ; CHECK-NEXT: ret i1 [[TMP2]] ; %tmp0 = shl i8 %x, 5