From bfde8619355a06d9f1a199e9cb3f8d41aef5c05b Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Fri, 17 Jun 2022 10:41:00 -0400
Subject: [PATCH] [InstCombine] convert mask and shift of power-of-2 to
 cmp+select

When the mask is a power-of-2 constant and op0 is a shifted-power-of-2
constant, test if the shift amount equals the offset bit index:

(ShiftC << X) & C --> X == (log2(C) - log2(ShiftC)) ? C : 0
(ShiftC >> X) & C --> X == (log2(ShiftC) - log2(C)) ? C : 0

This is an alternate to D127610 with a more general pattern.
We match only shift+and instead of the trailing xor, so we see a few
more tests diffs. I think we discussed this initially in D126617.

Here are proofs for shifts in both directions:
https://alive2.llvm.org/ce/z/CFrLs4

The test diffs look equal or better for IR, and this makes the
patterns more uniform in IR. The backend can partially invert this
in both cases if that is profitable. It is not trivially reversible,
however, so if we find perf regressions that are not easy to undo,
then we may want to revert this.

Differential Revision: https://reviews.llvm.org/D127801
---
 .../InstCombine/InstCombineAndOrXor.cpp       | 17 ++++++++++
 llvm/test/Transforms/InstCombine/and.ll       | 26 ++++++---------
 .../Transforms/InstCombine/icmp-and-shift.ll  | 33 +++++++++----------
 .../lshr-and-signbit-icmpeq-zero.ll           |  5 ++-
 4 files changed, 44 insertions(+), 37 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index af0efe12c16e..cc06e2fd45db 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1914,6 +1914,23 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
       }
     }
 
+    // When the mask is a power-of-2 constant and op0 is a shifted-power-of-2
+    // constant, test if the shift amount equals the offset bit index:
+    // (ShiftC << X) & C --> X == (log2(C) - log2(ShiftC)) ? C : 0
+    // (ShiftC >> X) & C --> X == (log2(ShiftC) - log2(C)) ? C : 0
+    if (C->isPowerOf2() &&
+        match(Op0, m_OneUse(m_LogicalShift(m_Power2(ShiftC), m_Value(X))))) {
+      int Log2ShiftC = ShiftC->exactLogBase2();
+      int Log2C = C->exactLogBase2();
+      bool IsShiftLeft =
+         cast<BinaryOperator>(Op0)->getOpcode() == Instruction::Shl;
+      int BitNum = IsShiftLeft ? Log2C - Log2ShiftC : Log2ShiftC - Log2C;
+      assert(BitNum >= 0 && "Expected demanded bits to handle impossible mask");
+      Value *Cmp = Builder.CreateICmpEQ(X, ConstantInt::get(Ty, BitNum));
+      return SelectInst::Create(Cmp, ConstantInt::get(Ty, *C),
+                                ConstantInt::getNullValue(Ty));
+    }
+
     Constant *C1, *C2;
     const APInt *C3 = C;
     Value *X;
diff --git a/llvm/test/Transforms/InstCombine/and.ll b/llvm/test/Transforms/InstCombine/and.ll
index fcf1db2e4012..6143e61c783b 100644
--- a/llvm/test/Transforms/InstCombine/and.ll
+++ b/llvm/test/Transforms/InstCombine/and.ll
@@ -1876,8 +1876,8 @@ define <3 x i16> @shl_lshr_pow2_const_case1_undef3_vec(<3 x i16> %x) {
 
 define i16 @shl_lshr_pow2_const_case2(i16 %x) {
 ; CHECK-LABEL: @shl_lshr_pow2_const_case2(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl i16 2, [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = and i16 [[TMP1]], 8
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i16 [[X:%.*]], 2
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i16 8, i16 0
 ; CHECK-NEXT:    ret i16 [[R]]
 ;
   %shl = shl i16 16, %x
@@ -1886,13 +1886,10 @@ define i16 @shl_lshr_pow2_const_case2(i16 %x) {
   ret i16 %r
 }
 
-; TODO: this pattern can be transform to icmp+select
-
 define i16 @shl_lshr_pow2_not_const_case2(i16 %x) {
 ; CHECK-LABEL: @shl_lshr_pow2_not_const_case2(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl i16 2, [[X:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and i16 [[TMP1]], 8
-; CHECK-NEXT:    [[R:%.*]] = xor i16 [[AND]], 8
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i16 [[X:%.*]], 2
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i16 0, i16 8
 ; CHECK-NEXT:    ret i16 [[R]]
 ;
   %shl = shl i16 16, %x
@@ -1969,8 +1966,8 @@ define i16 @shl_lshr_pow2_const_negative_nopow2_2(i16 %x) {
 
 define i16 @lshr_lshr_pow2_const(i16 %x) {
 ; CHECK-LABEL: @lshr_lshr_pow2_const(
-; CHECK-NEXT:    [[LSHR2:%.*]] = lshr i16 32, [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = and i16 [[LSHR2]], 4
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i16 [[X:%.*]], 3
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i16 4, i16 0
 ; CHECK-NEXT:    ret i16 [[R]]
 ;
   %lshr1 = lshr i16 2048, %x
@@ -2032,8 +2029,8 @@ define i16 @lshr_lshr_pow2_const_negative_overflow(i16 %x) {
 
 define i16 @lshr_shl_pow2_const_case1(i16 %x) {
 ; CHECK-LABEL: @lshr_shl_pow2_const_case1(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i16 1024, [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = and i16 [[TMP1]], 8
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i16 [[X:%.*]], 7
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i16 8, i16 0
 ; CHECK-NEXT:    ret i16 [[R]]
 ;
   %lshr1 = lshr i16 256, %x
@@ -2042,13 +2039,10 @@ define i16 @lshr_shl_pow2_const_case1(i16 %x) {
   ret i16 %r
 }
 
-; TODO: this pattern can be transform to icmp+select
-
 define i16 @lshr_shl_pow2_const_xor(i16 %x) {
 ; CHECK-LABEL: @lshr_shl_pow2_const_xor(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i16 1024, [[X:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and i16 [[TMP1]], 8
-; CHECK-NEXT:    [[R:%.*]] = xor i16 [[AND]], 8
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i16 [[X:%.*]], 7
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i16 0, i16 8
 ; CHECK-NEXT:    ret i16 [[R]]
 ;
   %lshr1 = lshr i16 256, %x
diff --git a/llvm/test/Transforms/InstCombine/icmp-and-shift.ll b/llvm/test/Transforms/InstCombine/icmp-and-shift.ll
index 87b594231daf..a67017fb8159 100644
--- a/llvm/test/Transforms/InstCombine/icmp-and-shift.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-and-shift.ll
@@ -329,9 +329,9 @@ define i32 @icmp_ne_and1_lshr_pow2(i32 %0) {
 
 define <2 x i32> @icmp_ne_and1_lshr_pow2_vec(<2 x i32> %0) {
 ; CHECK-LABEL: @icmp_ne_and1_lshr_pow2_vec(
-; CHECK-NEXT:    [[AND:%.*]] = lshr <2 x i32> <i32 2, i32 2>, [[TMP0:%.*]]
-; CHECK-NEXT:    [[AND_LOBIT:%.*]] = and <2 x i32> [[AND]], <i32 1, i32 1>
-; CHECK-NEXT:    ret <2 x i32> [[AND_LOBIT]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[CONV]]
 ;
   %lshr = lshr <2 x i32> <i32 8, i32 8>, %0
   %and  = and <2 x i32> %lshr, <i32 4, i32 4>
@@ -342,10 +342,9 @@ define <2 x i32> @icmp_ne_and1_lshr_pow2_vec(<2 x i32> %0) {
 
 define i32 @icmp_eq_and_pow2_lshr_pow2(i32 %0) {
 ; CHECK-LABEL: @icmp_eq_and_pow2_lshr_pow2(
-; CHECK-NEXT:    [[AND:%.*]] = lshr i32 2, [[TMP0:%.*]]
-; CHECK-NEXT:    [[AND_LOBIT:%.*]] = and i32 [[AND]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[AND_LOBIT]], 1
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP0:%.*]], 1
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
 ;
   %lshr = lshr i32 8, %0
   %and  = and i32 %lshr, 4
@@ -367,10 +366,9 @@ define i32 @icmp_eq_and_pow2_lshr_pow2_case2(i32 %0) {
 
 define <2 x i32> @icmp_eq_and_pow2_lshr_pow2_vec(<2 x i32> %0) {
 ; CHECK-LABEL: @icmp_eq_and_pow2_lshr_pow2_vec(
-; CHECK-NEXT:    [[AND:%.*]] = lshr <2 x i32> <i32 2, i32 2>, [[TMP0:%.*]]
-; CHECK-NEXT:    [[AND_LOBIT:%.*]] = and <2 x i32> [[AND]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = xor <2 x i32> [[AND_LOBIT]], <i32 1, i32 1>
-; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[CONV]]
 ;
   %lshr = lshr <2 x i32> <i32 8, i32 8>, %0
   %and  = and <2 x i32> %lshr, <i32 4, i32 4>
@@ -381,10 +379,9 @@ define <2 x i32> @icmp_eq_and_pow2_lshr_pow2_vec(<2 x i32> %0) {
 
 define i32 @icmp_ne_and_pow2_lshr_pow2(i32 %0) {
 ; CHECK-LABEL: @icmp_ne_and_pow2_lshr_pow2(
-; CHECK-NEXT:    [[AND:%.*]] = lshr i32 2, [[TMP0:%.*]]
-; CHECK-NEXT:    [[AND_LOBIT:%.*]] = and i32 [[AND]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[AND_LOBIT]], 1
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP0:%.*]], 1
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
 ;
   %lshr = lshr i32 8, %0
   %and  = and i32 %lshr, 4
@@ -406,9 +403,9 @@ define i32 @icmp_ne_and_pow2_lshr_pow2_case2(i32 %0) {
 
 define <2 x i32> @icmp_ne_and_pow2_lshr_pow2_vec(<2 x i32> %0) {
 ; CHECK-LABEL: @icmp_ne_and_pow2_lshr_pow2_vec(
-; CHECK-NEXT:    [[AND:%.*]] = lshr <2 x i32> <i32 2, i32 2>, [[TMP0:%.*]]
-; CHECK-NEXT:    [[AND_LOBIT:%.*]] = and <2 x i32> [[AND]], <i32 1, i32 1>
-; CHECK-NEXT:    ret <2 x i32> [[AND_LOBIT]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[CONV]]
 ;
   %lshr = lshr <2 x i32> <i32 8, i32 8>, %0
   %and  = and <2 x i32> %lshr, <i32 4, i32 4>
diff --git a/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll
index 1388e5a016cb..66860981aacb 100644
--- a/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll
+++ b/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll
@@ -193,9 +193,8 @@ define i1 @scalar_i32_lshr_and_signbit_eq_X_is_constant1(i32 %y) {
 
 define i1 @scalar_i32_lshr_and_negC_eq_X_is_constant2(i32 %y) {
 ; CHECK-LABEL: @scalar_i32_lshr_and_negC_eq_X_is_constant2(
-; CHECK-NEXT:    [[LSHR:%.*]] = lshr i32 -2147483648, [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp sgt i32 [[LSHR]], -1
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[Y:%.*]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %lshr = lshr i32 2147483648, %y
   %and = and i32 %lshr, 2147483648
-- 
2.34.1