From e5ee0b06d694fe7749b56706f1bf67e22eaef628 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 16 Oct 2022 10:01:10 -0400 Subject: [PATCH] [InstCombine] try to determine "exact" for sdiv If the divisor is a power-of-2 or negative-power-of-2 and the dividend is known to have >= trailing zeros than the divisor, the division is exact: https://alive2.llvm.org/ce/z/UGBksM (general proof) https://alive2.llvm.org/ce/z/D4yPS- (examples based on regression tests) This isn't the most direct optimization (we could create ashr in these examples instead of relying on existing folds for exact divides), but it's possible that there's a more general constraint than just a pow2 divisor, so this might be extended in the future. This should solve issue #58348. Differential Revision: https://reviews.llvm.org/D135970 --- llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp | 10 +++++++++- .../InstCombine/sdiv-exact-by-negative-power-of-two.ll | 16 +++++++++++----- .../Transforms/InstCombine/sdiv-exact-by-power-of-two.ll | 15 +++++++++------ 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index b98e673..c2c35c6 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -1332,7 +1332,15 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) { ConstantInt::getAllOnesValue(Ty)); } - if (isKnownNonNegative(Op0, DL, 0, &AC, &I, &DT)) { + KnownBits KnownDividend = computeKnownBits(Op0, 0, &I); + if (!I.isExact() && + (match(Op1, m_Power2(Op1C)) || match(Op1, m_NegatedPower2(Op1C))) && + KnownDividend.countMinTrailingZeros() >= Op1C->countTrailingZeros()) { + I.setIsExact(); + return &I; + } + + if (KnownDividend.isNonNegative()) { // If both operands are unsigned, turn this into a udiv. if (isKnownNonNegative(Op1, DL, 0, &AC, &I, &DT)) { auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); diff --git a/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll b/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll index de74b31..81f22bb 100644 --- a/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll +++ b/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll @@ -64,8 +64,9 @@ define <2 x i8> @n4_vec_undef(<2 x i8> %x) { define i8 @prove_exact_with_high_mask(i8 %x, i8 %y) { ; CHECK-LABEL: @prove_exact_with_high_mask( -; CHECK-NEXT: [[A:%.*]] = and i8 [[X:%.*]], -32 -; CHECK-NEXT: [[D:%.*]] = sdiv i8 [[A]], -4 +; CHECK-NEXT: [[A:%.*]] = ashr i8 [[X:%.*]], 2 +; CHECK-NEXT: [[D_NEG:%.*]] = and i8 [[A]], -8 +; CHECK-NEXT: [[D:%.*]] = sub nsw i8 0, [[D_NEG]] ; CHECK-NEXT: ret i8 [[D]] ; %a = and i8 %x, -32 @@ -75,8 +76,8 @@ define i8 @prove_exact_with_high_mask(i8 %x, i8 %y) { define i8 @prove_exact_with_high_mask_limit(i8 %x, i8 %y) { ; CHECK-LABEL: @prove_exact_with_high_mask_limit( -; CHECK-NEXT: [[A:%.*]] = and i8 [[X:%.*]], -32 -; CHECK-NEXT: [[D:%.*]] = sdiv i8 [[A]], -32 +; CHECK-NEXT: [[A:%.*]] = ashr i8 [[X:%.*]], 5 +; CHECK-NEXT: [[D:%.*]] = sub nsw i8 0, [[A]] ; CHECK-NEXT: ret i8 [[D]] ; %a = and i8 %x, -32 @@ -84,6 +85,8 @@ define i8 @prove_exact_with_high_mask_limit(i8 %x, i8 %y) { ret i8 %d } +; negative test - not enough low zeros in dividend + define i8 @not_prove_exact_with_high_mask(i8 %x, i8 %y) { ; CHECK-LABEL: @not_prove_exact_with_high_mask( ; CHECK-NEXT: [[A:%.*]] = and i8 [[X:%.*]], -32 @@ -98,7 +101,8 @@ define i8 @not_prove_exact_with_high_mask(i8 %x, i8 %y) { define <2 x i8> @prove_exact_with_high_mask_splat_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @prove_exact_with_high_mask_splat_vec( ; CHECK-NEXT: [[A:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[D:%.*]] = sdiv <2 x i8> [[A]], +; CHECK-NEXT: [[D_NEG:%.*]] = ashr exact <2 x i8> [[A]], +; CHECK-NEXT: [[D:%.*]] = sub nsw <2 x i8> zeroinitializer, [[D_NEG]] ; CHECK-NEXT: ret <2 x i8> [[D]] ; %a = shl <2 x i8> %x, @@ -106,6 +110,8 @@ define <2 x i8> @prove_exact_with_high_mask_splat_vec(<2 x i8> %x, <2 x i8> %y) ret <2 x i8> %d } +; TODO: Needs knownbits to handle arbitrary vector constants. + define <2 x i8> @prove_exact_with_high_mask_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @prove_exact_with_high_mask_vec( ; CHECK-NEXT: [[A:%.*]] = shl <2 x i8> [[X:%.*]], diff --git a/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll b/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll index 0cdc52b..36af868 100644 --- a/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll +++ b/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll @@ -110,8 +110,8 @@ define i8 @shl1_nsw_not_exact(i8 %x, i8 %y) { define i8 @prove_exact_with_high_mask(i8 %x, i8 %y) { ; CHECK-LABEL: @prove_exact_with_high_mask( -; CHECK-NEXT: [[A:%.*]] = and i8 [[X:%.*]], -8 -; CHECK-NEXT: [[D:%.*]] = sdiv i8 [[A]], 4 +; CHECK-NEXT: [[A:%.*]] = ashr i8 [[X:%.*]], 2 +; CHECK-NEXT: [[D:%.*]] = and i8 [[A]], -2 ; CHECK-NEXT: ret i8 [[D]] ; %a = and i8 %x, -8 @@ -121,15 +121,16 @@ define i8 @prove_exact_with_high_mask(i8 %x, i8 %y) { define i8 @prove_exact_with_high_mask_limit(i8 %x, i8 %y) { ; CHECK-LABEL: @prove_exact_with_high_mask_limit( -; CHECK-NEXT: [[A:%.*]] = and i8 [[X:%.*]], -8 -; CHECK-NEXT: [[D:%.*]] = sdiv i8 [[A]], 8 -; CHECK-NEXT: ret i8 [[D]] +; CHECK-NEXT: [[A:%.*]] = ashr i8 [[X:%.*]], 3 +; CHECK-NEXT: ret i8 [[A]] ; %a = and i8 %x, -8 %d = sdiv i8 %a, 8 ret i8 %d } +; negative test - not enough low zeros in dividend + define i8 @not_prove_exact_with_high_mask(i8 %x, i8 %y) { ; CHECK-LABEL: @not_prove_exact_with_high_mask( ; CHECK-NEXT: [[A:%.*]] = and i8 [[X:%.*]], -8 @@ -144,7 +145,7 @@ define i8 @not_prove_exact_with_high_mask(i8 %x, i8 %y) { define <2 x i8> @prove_exact_with_high_mask_splat_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @prove_exact_with_high_mask_splat_vec( ; CHECK-NEXT: [[A:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[D:%.*]] = sdiv <2 x i8> [[A]], +; CHECK-NEXT: [[D:%.*]] = ashr exact <2 x i8> [[A]], ; CHECK-NEXT: ret <2 x i8> [[D]] ; %a = shl <2 x i8> %x, @@ -152,6 +153,8 @@ define <2 x i8> @prove_exact_with_high_mask_splat_vec(<2 x i8> %x, <2 x i8> %y) ret <2 x i8> %d } +; TODO: Needs knownbits to handle arbitrary vector constants. + define <2 x i8> @prove_exact_with_high_mask_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @prove_exact_with_high_mask_vec( ; CHECK-NEXT: [[A:%.*]] = shl <2 x i8> [[X:%.*]], -- 2.7.4