From f400daae902875b488c92b13322e39f41eb20934 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Mon, 9 Jan 2023 16:29:24 -0500
Subject: [PATCH] [InstCombine] limit zext-of-icmp folds to bit-hacks

In the changed tests, we avoid creating extra instructions,
and there are no obvious regressions in IR tests at least.

Codegen should be able to create the shift+mask form if that
is profitable.

This is a more general fix for issue #59897 than 0eedc9e56712 .
---
 llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp |  4 +++-
 llvm/test/Transforms/InstCombine/zext.ll             | 16 ++++++----------
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 7b17a8b..d9f89962 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1024,7 +1024,9 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,
     // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
     // zext (X != 0) to i32 --> X        iff X has only the low bit set.
     // zext (X != 0) to i32 --> X>>1     iff X has only the 2nd bit set.
-    if (Op1CV->isZero() && Cmp->isEquality()) {
+    if (Op1CV->isZero() && Cmp->isEquality() &&
+        (Cmp->getOperand(0)->getType() == Zext.getType() ||
+         Cmp->getPredicate() == ICmpInst::ICMP_NE)) {
       // If Op1C some other power of two, convert:
       KnownBits Known = computeKnownBits(Cmp->getOperand(0), 0, &Zext);
 
diff --git a/llvm/test/Transforms/InstCombine/zext.ll b/llvm/test/Transforms/InstCombine/zext.ll
index 938978a..c0275e9 100644
--- a/llvm/test/Transforms/InstCombine/zext.ll
+++ b/llvm/test/Transforms/InstCombine/zext.ll
@@ -645,10 +645,9 @@ define i64 @and_trunc_extra_use1_wider_src(i65 %x, i32 %y) {
 
 define i16 @zext_icmp_eq0_pow2(i32 %x) {
 ; CHECK-LABEL: @zext_icmp_eq0_pow2(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i16
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr i16 [[TMP1]], 2
-; CHECK-NEXT:    [[TMP3:%.*]] = and i16 [[TMP2]], 1
-; CHECK-NEXT:    [[Z:%.*]] = xor i16 [[TMP3]], 1
+; CHECK-NEXT:    [[M:%.*]] = and i32 [[X:%.*]], 4
+; CHECK-NEXT:    [[I:%.*]] = icmp eq i32 [[M]], 0
+; CHECK-NEXT:    [[Z:%.*]] = zext i1 [[I]] to i16
 ; CHECK-NEXT:    ret i16 [[Z]]
 ;
   %m = and i32 %x, 4
@@ -661,9 +660,8 @@ define i16 @zext_icmp_eq0_pow2_use1(i32 %x) {
 ; CHECK-LABEL: @zext_icmp_eq0_pow2_use1(
 ; CHECK-NEXT:    [[M:%.*]] = and i32 [[X:%.*]], 4
 ; CHECK-NEXT:    call void @use32(i32 [[M]])
-; CHECK-NEXT:    [[M_LOBIT:%.*]] = lshr exact i32 [[M]], 2
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[M_LOBIT]] to i16
-; CHECK-NEXT:    [[Z:%.*]] = xor i16 [[TMP1]], 1
+; CHECK-NEXT:    [[I:%.*]] = icmp eq i32 [[M]], 0
+; CHECK-NEXT:    [[Z:%.*]] = zext i1 [[I]] to i16
 ; CHECK-NEXT:    ret i16 [[Z]]
 ;
   %m = and i32 %x, 4
@@ -678,9 +676,7 @@ define i16 @zext_icmp_eq0_pow2_use2(i32 %x) {
 ; CHECK-NEXT:    [[M:%.*]] = and i32 [[X:%.*]], 4
 ; CHECK-NEXT:    [[I:%.*]] = icmp eq i32 [[M]], 0
 ; CHECK-NEXT:    call void @use1(i1 [[I]])
-; CHECK-NEXT:    [[M_LOBIT:%.*]] = lshr exact i32 [[M]], 2
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[M_LOBIT]] to i16
-; CHECK-NEXT:    [[Z:%.*]] = xor i16 [[TMP1]], 1
+; CHECK-NEXT:    [[Z:%.*]] = zext i1 [[I]] to i16
 ; CHECK-NEXT:    ret i16 [[Z]]
 ;
   %m = and i32 %x, 4
-- 
2.7.4