[InstCombine] fold flooring sdiv by power-of-2 to ashr

author Sanjay Patel <spatel@rotateright.com>

Sun, 18 Dec 2022 13:13:21 +0000 (08:13 -0500)

committer Sanjay Patel <spatel@rotateright.com>

Sun, 18 Dec 2022 13:17:07 +0000 (08:17 -0500)
author Sanjay Patel <spatel@rotateright.com>
Sun, 18 Dec 2022 13:13:21 +0000 (08:13 -0500)
committer Sanjay Patel <spatel@rotateright.com>
Sun, 18 Dec 2022 13:17:07 +0000 (08:17 -0500)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

index 9c6624f..c77b495 100644 (file)
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1129,6 +1129,35 @@ static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) {
    return nullptr;
  }
  
+/// Try to reduce signed division by power-of-2 to an arithmetic shift right.
+static Instruction *foldAddToAshr(BinaryOperator &Add) {
+  // Division must be by power-of-2, but not the minimum signed value.
+  Value *X;
+  const APInt *DivC;
+  if (!match(Add.getOperand(0), m_SDiv(m_Value(X), m_Power2(DivC))) ||
+      DivC->isNegative())
+    return nullptr;
+
+  // Rounding is done by adding -1 if the dividend (X) is negative and has any
+  // low bits set. The canonical pattern for that is an "ugt" compare with SMIN:
+  // sext (icmp ugt (X & (DivC - 1)), SMIN)
+  const APInt *MaskC;
+  ICmpInst::Predicate Pred;
+  if (!match(Add.getOperand(1),
+             m_SExt(m_ICmp(Pred, m_And(m_Specific(X), m_APInt(MaskC)),
+                           m_SignMask()))) ||
+      Pred != ICmpInst::ICMP_UGT)
+    return nullptr;
+
+  APInt SMin = APInt::getSignedMinValue(Add.getType()->getScalarSizeInBits());
+  if (*MaskC != (SMin | (*DivC - 1)))
+    return nullptr;
+
+  // (X / DivC) + sext ((X & (SMin | (DivC - 1)) >u SMin) --> X >>s log2(DivC)
+  return BinaryOperator::CreateAShr(
+      X, ConstantInt::get(Add.getType(), DivC->exactLogBase2()));
+}
+
  Instruction *InstCombinerImpl::
      canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(
          BinaryOperator &I) {
@@ -1484,6 +1513,9 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
      return BinaryOperator::CreateSub(B, Shl);
    }
  
+  if (Instruction *Ashr = foldAddToAshr(I))
+    return Ashr;
+
    // TODO(jingyue): Consider willNotOverflowSignedAdd and
    // willNotOverflowUnsignedAdd to reduce the number of invocations of
    // computeKnownBits.
diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll

index 7002009..5bc34de 100644 (file)
--- a/llvm/test/Transforms/InstCombine/add.ll
+++ b/llvm/test/Transforms/InstCombine/add.ll
@@ -2462,11 +2462,7 @@ define i9 @sext_zext_not_commute(i4 %x) {
  
  define i32 @floor_sdiv(i32 %x) {
  ; CHECK-LABEL: @floor_sdiv(
-; CHECK-NEXT:    [[D:%.*]] = sdiv i32 [[X:%.*]], 4
-; CHECK-NEXT:    [[A:%.*]] = and i32 [[X]], -2147483645
-; CHECK-NEXT:    [[I:%.*]] = icmp ugt i32 [[A]], -2147483648
-; CHECK-NEXT:    [[S:%.*]] = sext i1 [[I]] to i32
-; CHECK-NEXT:    [[R:%.*]] = add nsw i32 [[D]], [[S]]
+; CHECK-NEXT:    [[R:%.*]] = ashr i32 [[X:%.*]], 2
  ; CHECK-NEXT:    ret i32 [[R]]
  ;
    %d = sdiv i32 %x, 4
@@ -2477,13 +2473,11 @@ define i32 @floor_sdiv(i32 %x) {
    ret i32 %r
  }
  
+; vectors work too and commute is handled by complexity-based canonicalization
+
  define <2 x i32> @floor_sdiv_vec_commute(<2 x i32> %x) {
  ; CHECK-LABEL: @floor_sdiv_vec_commute(
-; CHECK-NEXT:    [[D:%.*]] = sdiv <2 x i32> [[X:%.*]], <i32 4, i32 4>
-; CHECK-NEXT:    [[A:%.*]] = and <2 x i32> [[X]], <i32 -2147483645, i32 -2147483645>
-; CHECK-NEXT:    [[I:%.*]] = icmp ugt <2 x i32> [[A]], <i32 -2147483648, i32 -2147483648>
-; CHECK-NEXT:    [[S:%.*]] = sext <2 x i1> [[I]] to <2 x i32>
-; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i32> [[D]], [[S]]
+; CHECK-NEXT:    [[R:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 2, i32 2>
  ; CHECK-NEXT:    ret <2 x i32> [[R]]
  ;
    %d = sdiv <2 x i32> %x, <i32 4, i32 4>
@@ -2494,6 +2488,8 @@ define <2 x i32> @floor_sdiv_vec_commute(<2 x i32> %x) {
    ret <2 x i32> %r
  }
  
+; extra uses are ok
+
  define i8 @floor_sdiv_uses(i8 %x) {
  ; CHECK-LABEL: @floor_sdiv_uses(
  ; CHECK-NEXT:    [[D:%.*]] = sdiv i8 [[X:%.*]], 16
@@ -2503,7 +2499,7 @@ define i8 @floor_sdiv_uses(i8 %x) {
  ; CHECK-NEXT:    [[I:%.*]] = icmp ugt i8 [[A]], -128
  ; CHECK-NEXT:    [[S:%.*]] = sext i1 [[I]] to i8
  ; CHECK-NEXT:    call void @use(i8 [[S]])
-; CHECK-NEXT:    [[R:%.*]] = add nsw i8 [[D]], [[S]]
+; CHECK-NEXT:    [[R:%.*]] = ashr i8 [[X]], 4
  ; CHECK-NEXT:    ret i8 [[R]]
  ;
    %d = sdiv i8 %x, 16
@@ -2517,6 +2513,8 @@ define i8 @floor_sdiv_uses(i8 %x) {
    ret i8 %r
  }
  
+; negative test
+
  define i32 @floor_sdiv_wrong_div(i32 %x) {
  ; CHECK-LABEL: @floor_sdiv_wrong_div(
  ; CHECK-NEXT:    [[D:%.*]] = sdiv i32 [[X:%.*]], 8
@@ -2534,6 +2532,8 @@ define i32 @floor_sdiv_wrong_div(i32 %x) {
    ret i32 %r
  }
  
+; negative test
+
  define i32 @floor_sdiv_wrong_mask(i32 %x) {
  ; CHECK-LABEL: @floor_sdiv_wrong_mask(
  ; CHECK-NEXT:    [[D:%.*]] = sdiv i32 [[X:%.*]], 4
@@ -2551,6 +2551,8 @@ define i32 @floor_sdiv_wrong_mask(i32 %x) {
    ret i32 %r
  }
  
+; negative test
+
  define i32 @floor_sdiv_wrong_cmp(i32 %x) {
  ; CHECK-LABEL: @floor_sdiv_wrong_cmp(
  ; CHECK-NEXT:    [[D:%.*]] = sdiv i32 [[X:%.*]], 4
@@ -2568,6 +2570,8 @@ define i32 @floor_sdiv_wrong_cmp(i32 %x) {
    ret i32 %r
  }
  
+; negative test
+
  define i32 @floor_sdiv_wrong_ext(i32 %x) {
  ; CHECK-LABEL: @floor_sdiv_wrong_ext(
  ; CHECK-NEXT:    [[D:%.*]] = sdiv i32 [[X:%.*]], 4
@@ -2585,6 +2589,8 @@ define i32 @floor_sdiv_wrong_ext(i32 %x) {
    ret i32 %r
  }
  
+; negative test
+
  define i32 @floor_sdiv_wrong_op(i32 %x, i32 %y) {
  ; CHECK-LABEL: @floor_sdiv_wrong_op(
  ; CHECK-NEXT:    [[D:%.*]] = sdiv i32 [[X:%.*]], 4
author	Sanjay Patel <spatel@rotateright.com>
	Sun, 18 Dec 2022 13:13:21 +0000 (08:13 -0500)
committer	Sanjay Patel <spatel@rotateright.com>
	Sun, 18 Dec 2022 13:17:07 +0000 (08:17 -0500)
llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp		patch \| blob \| history
llvm/test/Transforms/InstCombine/add.ll		patch \| blob \| history