From e5ee0b06d694fe7749b56706f1bf67e22eaef628 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Sun, 16 Oct 2022 10:01:10 -0400
Subject: [PATCH] [InstCombine] try to determine "exact" for sdiv

If the divisor is a power-of-2 or negative-power-of-2 and the dividend
is known to have >= trailing zeros than the divisor, the division is exact:
https://alive2.llvm.org/ce/z/UGBksM (general proof)
https://alive2.llvm.org/ce/z/D4yPS- (examples based on regression tests)

This isn't the most direct optimization (we could create ashr in these
examples instead of relying on existing folds for exact divides), but
it's possible that there's a more general constraint than just a pow2
divisor, so this might be extended in the future.

This should solve issue #58348.

Differential Revision: https://reviews.llvm.org/D135970
---
 llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp | 10 +++++++++-
 .../InstCombine/sdiv-exact-by-negative-power-of-two.ll   | 16 +++++++++++-----
 .../Transforms/InstCombine/sdiv-exact-by-power-of-two.ll | 15 +++++++++------
 3 files changed, 29 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index b98e673..c2c35c6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1332,7 +1332,15 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
                               ConstantInt::getAllOnesValue(Ty));
   }
 
-  if (isKnownNonNegative(Op0, DL, 0, &AC, &I, &DT)) {
+  KnownBits KnownDividend = computeKnownBits(Op0, 0, &I);
+  if (!I.isExact() &&
+      (match(Op1, m_Power2(Op1C)) || match(Op1, m_NegatedPower2(Op1C))) &&
+      KnownDividend.countMinTrailingZeros() >= Op1C->countTrailingZeros()) {
+    I.setIsExact();
+    return &I;
+  }
+
+  if (KnownDividend.isNonNegative()) {
     // If both operands are unsigned, turn this into a udiv.
     if (isKnownNonNegative(Op1, DL, 0, &AC, &I, &DT)) {
       auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
diff --git a/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll b/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll
index de74b31..81f22bb 100644
--- a/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll
+++ b/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll
@@ -64,8 +64,9 @@ define <2 x i8> @n4_vec_undef(<2 x i8> %x) {
 
 define i8 @prove_exact_with_high_mask(i8 %x, i8 %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask(
-; CHECK-NEXT:    [[A:%.*]] = and i8 [[X:%.*]], -32
-; CHECK-NEXT:    [[D:%.*]] = sdiv i8 [[A]], -4
+; CHECK-NEXT:    [[A:%.*]] = ashr i8 [[X:%.*]], 2
+; CHECK-NEXT:    [[D_NEG:%.*]] = and i8 [[A]], -8
+; CHECK-NEXT:    [[D:%.*]] = sub nsw i8 0, [[D_NEG]]
 ; CHECK-NEXT:    ret i8 [[D]]
 ;
   %a = and i8 %x, -32
@@ -75,8 +76,8 @@ define i8 @prove_exact_with_high_mask(i8 %x, i8 %y) {
 
 define i8 @prove_exact_with_high_mask_limit(i8 %x, i8 %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask_limit(
-; CHECK-NEXT:    [[A:%.*]] = and i8 [[X:%.*]], -32
-; CHECK-NEXT:    [[D:%.*]] = sdiv i8 [[A]], -32
+; CHECK-NEXT:    [[A:%.*]] = ashr i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[D:%.*]] = sub nsw i8 0, [[A]]
 ; CHECK-NEXT:    ret i8 [[D]]
 ;
   %a = and i8 %x, -32
@@ -84,6 +85,8 @@ define i8 @prove_exact_with_high_mask_limit(i8 %x, i8 %y) {
   ret i8 %d
 }
 
+; negative test - not enough low zeros in dividend
+
 define i8 @not_prove_exact_with_high_mask(i8 %x, i8 %y) {
 ; CHECK-LABEL: @not_prove_exact_with_high_mask(
 ; CHECK-NEXT:    [[A:%.*]] = and i8 [[X:%.*]], -32
@@ -98,7 +101,8 @@ define i8 @not_prove_exact_with_high_mask(i8 %x, i8 %y) {
 define <2 x i8> @prove_exact_with_high_mask_splat_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask_splat_vec(
 ; CHECK-NEXT:    [[A:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 5>
-; CHECK-NEXT:    [[D:%.*]] = sdiv <2 x i8> [[A]], <i8 -16, i8 -16>
+; CHECK-NEXT:    [[D_NEG:%.*]] = ashr exact <2 x i8> [[A]], <i8 4, i8 4>
+; CHECK-NEXT:    [[D:%.*]] = sub nsw <2 x i8> zeroinitializer, [[D_NEG]]
 ; CHECK-NEXT:    ret <2 x i8> [[D]]
 ;
   %a = shl <2 x i8> %x, <i8 5, i8 5>
@@ -106,6 +110,8 @@ define <2 x i8> @prove_exact_with_high_mask_splat_vec(<2 x i8> %x, <2 x i8> %y)
   ret <2 x i8> %d
 }
 
+; TODO: Needs knownbits to handle arbitrary vector constants.
+
 define <2 x i8> @prove_exact_with_high_mask_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask_vec(
 ; CHECK-NEXT:    [[A:%.*]] = shl <2 x i8> [[X:%.*]], <i8 3, i8 2>
diff --git a/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll b/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll
index 0cdc52b..36af868 100644
--- a/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll
+++ b/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll
@@ -110,8 +110,8 @@ define i8 @shl1_nsw_not_exact(i8 %x, i8 %y) {
 
 define i8 @prove_exact_with_high_mask(i8 %x, i8 %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask(
-; CHECK-NEXT:    [[A:%.*]] = and i8 [[X:%.*]], -8
-; CHECK-NEXT:    [[D:%.*]] = sdiv i8 [[A]], 4
+; CHECK-NEXT:    [[A:%.*]] = ashr i8 [[X:%.*]], 2
+; CHECK-NEXT:    [[D:%.*]] = and i8 [[A]], -2
 ; CHECK-NEXT:    ret i8 [[D]]
 ;
   %a = and i8 %x, -8
@@ -121,15 +121,16 @@ define i8 @prove_exact_with_high_mask(i8 %x, i8 %y) {
 
 define i8 @prove_exact_with_high_mask_limit(i8 %x, i8 %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask_limit(
-; CHECK-NEXT:    [[A:%.*]] = and i8 [[X:%.*]], -8
-; CHECK-NEXT:    [[D:%.*]] = sdiv i8 [[A]], 8
-; CHECK-NEXT:    ret i8 [[D]]
+; CHECK-NEXT:    [[A:%.*]] = ashr i8 [[X:%.*]], 3
+; CHECK-NEXT:    ret i8 [[A]]
 ;
   %a = and i8 %x, -8
   %d = sdiv i8 %a, 8
   ret i8 %d
 }
 
+; negative test - not enough low zeros in dividend
+
 define i8 @not_prove_exact_with_high_mask(i8 %x, i8 %y) {
 ; CHECK-LABEL: @not_prove_exact_with_high_mask(
 ; CHECK-NEXT:    [[A:%.*]] = and i8 [[X:%.*]], -8
@@ -144,7 +145,7 @@ define i8 @not_prove_exact_with_high_mask(i8 %x, i8 %y) {
 define <2 x i8> @prove_exact_with_high_mask_splat_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask_splat_vec(
 ; CHECK-NEXT:    [[A:%.*]] = shl <2 x i8> [[X:%.*]], <i8 3, i8 3>
-; CHECK-NEXT:    [[D:%.*]] = sdiv <2 x i8> [[A]], <i8 8, i8 8>
+; CHECK-NEXT:    [[D:%.*]] = ashr exact <2 x i8> [[A]], <i8 3, i8 3>
 ; CHECK-NEXT:    ret <2 x i8> [[D]]
 ;
   %a = shl <2 x i8> %x, <i8 3, i8 3>
@@ -152,6 +153,8 @@ define <2 x i8> @prove_exact_with_high_mask_splat_vec(<2 x i8> %x, <2 x i8> %y)
   ret <2 x i8> %d
 }
 
+; TODO: Needs knownbits to handle arbitrary vector constants.
+
 define <2 x i8> @prove_exact_with_high_mask_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask_vec(
 ; CHECK-NEXT:    [[A:%.*]] = shl <2 x i8> [[X:%.*]], <i8 3, i8 2>
-- 
2.7.4