From: Sanjay Patel <spatel@rotateright.com>
Date: Thu, 22 Mar 2018 14:08:16 +0000 (+0000)
Subject: [InstCombine] add folds for xor-of-icmp signbit tests (PR36682)
X-Git-Tag: llvmorg-7.0.0-rc1~9950
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=94c91b78e7f6a7fca6f569eb8264bc94a6e21511;p=platform%2Fupstream%2Fllvm.git

[InstCombine] add folds for xor-of-icmp signbit tests (PR36682)

This is a retry of r328119 which was reverted at r328145 because
it could crash by trying to combine icmps with different operand
types. This version has a check for that and additional tests.

Original commit message:

This is part of solving:
https://bugs.llvm.org/show_bug.cgi?id=36682

There's also a leftover improvement from the long-ago-closed:
https://bugs.llvm.org/show_bug.cgi?id=5438

https://rise4fun.com/Alive/dC1

llvm-svn: 328197
---

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index af116db..16e8c4f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2363,6 +2363,34 @@ Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
     }
   }
 
+  // TODO: This can be generalized to compares of non-signbits using
+  // decomposeBitTestICmp(). It could be enhanced more by using (something like)
+  // foldLogOpOfMaskedICmps().
+  ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
+  Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1);
+  Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1);
+  if ((LHS->hasOneUse() || RHS->hasOneUse()) &&
+      LHS0->getType() == RHS0->getType()) {
+    // (X > -1) ^ (Y > -1) --> (X ^ Y) < 0
+    // (X <  0) ^ (Y <  0) --> (X ^ Y) < 0
+    if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_AllOnes()) &&
+         PredR == CmpInst::ICMP_SGT && match(RHS1, m_AllOnes())) ||
+        (PredL == CmpInst::ICMP_SLT && match(LHS1, m_Zero()) &&
+         PredR == CmpInst::ICMP_SLT && match(RHS1, m_Zero()))) {
+      Value *Zero = ConstantInt::getNullValue(LHS0->getType());
+      return Builder.CreateICmpSLT(Builder.CreateXor(LHS0, RHS0), Zero);
+    }
+    // (X > -1) ^ (Y <  0) --> (X ^ Y) > -1
+    // (X <  0) ^ (Y > -1) --> (X ^ Y) > -1
+    if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_AllOnes()) &&
+         PredR == CmpInst::ICMP_SLT && match(RHS1, m_Zero())) ||
+        (PredL == CmpInst::ICMP_SLT && match(LHS1, m_Zero()) &&
+         PredR == CmpInst::ICMP_SGT && match(RHS1, m_AllOnes()))) {
+      Value *MinusOne = ConstantInt::getAllOnesValue(LHS0->getType());
+      return Builder.CreateICmpSGT(Builder.CreateXor(LHS0, RHS0), MinusOne);
+    }
+  }
+
   // Instead of trying to imitate the folds for and/or, decompose this 'xor'
   // into those logic ops. That is, try to turn this into an and-of-icmps
   // because we have many folds for that pattern.
diff --git a/llvm/test/Transforms/InstCombine/compare-signs.ll b/llvm/test/Transforms/InstCombine/compare-signs.ll
index 5d69d35..c6c56f2 100644
--- a/llvm/test/Transforms/InstCombine/compare-signs.ll
+++ b/llvm/test/Transforms/InstCombine/compare-signs.ll
@@ -1,15 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -instcombine -S < %s | FileCheck %s
+
 ; PR5438
 
-; TODO: This should also optimize down.
 define i32 @test1(i32 %a, i32 %b) nounwind readnone {
 ; CHECK-LABEL: @test1(
-; CHECK-NEXT:    [[T0:%.*]] = icmp sgt i32 [[A:%.*]], -1
-; CHECK-NEXT:    [[T1:%.*]] = icmp slt i32 [[B:%.*]], 0
-; CHECK-NEXT:    [[T2:%.*]] = xor i1 [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = zext i1 [[T2]] to i32
-; CHECK-NEXT:    ret i32 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    [[DOTLOBIT:%.*]] = lshr i32 [[TMP1]], 31
+; CHECK-NEXT:    [[DOTLOBIT_NOT:%.*]] = xor i32 [[DOTLOBIT]], 1
+; CHECK-NEXT:    ret i32 [[DOTLOBIT_NOT]]
 ;
   %t0 = icmp sgt i32 %a, -1
   %t1 = icmp slt i32 %b, 0
@@ -36,7 +35,7 @@ define i32 @test2(i32 %a, i32 %b) nounwind readnone {
 
 define i32 @test3(i32 %a, i32 %b) nounwind readnone {
 ; CHECK-LABEL: @test3(
-; CHECK-NEXT:    [[T2_UNSHIFTED:%.*]] = xor i32 %a, %b
+; CHECK-NEXT:    [[T2_UNSHIFTED:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[T2_UNSHIFTED_LOBIT:%.*]] = lshr i32 [[T2_UNSHIFTED]], 31
 ; CHECK-NEXT:    [[T2_UNSHIFTED_LOBIT_NOT:%.*]] = xor i32 [[T2_UNSHIFTED_LOBIT]], 1
 ; CHECK-NEXT:    ret i32 [[T2_UNSHIFTED_LOBIT_NOT]]
@@ -68,7 +67,7 @@ define <2 x i32> @test3vec(<2 x i32> %a, <2 x i32> %b) nounwind readnone {
 ; is one, not zero.
 define i32 @test3i(i32 %a, i32 %b) nounwind readnone {
 ; CHECK-LABEL: @test3i(
-; CHECK-NEXT:    [[T01:%.*]] = xor i32 %a, %b
+; CHECK-NEXT:    [[T01:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[T01]], 31
 ; CHECK-NEXT:    [[T4:%.*]] = xor i32 [[TMP1]], 1
 ; CHECK-NEXT:    ret i32 [[T4]]
@@ -84,7 +83,7 @@ define i32 @test3i(i32 %a, i32 %b) nounwind readnone {
 
 define i1 @test4a(i32 %a) {
 ; CHECK-LABEL: @test4a(
-; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 %a, 1
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 [[A:%.*]], 1
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %l = ashr i32 %a, 31
@@ -97,7 +96,7 @@ define i1 @test4a(i32 %a) {
 
 define <2 x i1> @test4a_vec(<2 x i32> %a) {
 ; CHECK-LABEL: @test4a_vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i32> %a, <i32 1, i32 1>
+; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i32> [[A:%.*]], <i32 1, i32 1>
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %l = ashr <2 x i32> %a, <i32 31, i32 31>
@@ -110,7 +109,7 @@ define <2 x i1> @test4a_vec(<2 x i32> %a) {
 
 define i1 @test4b(i64 %a) {
 ; CHECK-LABEL: @test4b(
-; CHECK-NEXT:    [[C:%.*]] = icmp slt i64 %a, 1
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i64 [[A:%.*]], 1
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %l = ashr i64 %a, 63
@@ -123,7 +122,7 @@ define i1 @test4b(i64 %a) {
 
 define i1 @test4c(i64 %a) {
 ; CHECK-LABEL: @test4c(
-; CHECK-NEXT:    [[C:%.*]] = icmp slt i64 %a, 1
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i64 [[A:%.*]], 1
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %l = ashr i64 %a, 63
@@ -137,7 +136,7 @@ define i1 @test4c(i64 %a) {
 
 define <2 x i1> @test4c_vec(<2 x i64> %a) {
 ; CHECK-LABEL: @test4c_vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i64> %a, <i64 1, i64 1>
+; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i64> [[A:%.*]], <i64 1, i64 1>
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %l = ashr <2 x i64> %a, <i64 63, i64 63>
diff --git a/llvm/test/Transforms/InstCombine/xor-icmps.ll b/llvm/test/Transforms/InstCombine/xor-icmps.ll
index 834e913..e19a061 100644
--- a/llvm/test/Transforms/InstCombine/xor-icmps.ll
+++ b/llvm/test/Transforms/InstCombine/xor-icmps.ll
@@ -42,14 +42,34 @@ define i1 @eq_ne_zero(i4 %x, i4 %y) {
 
 define i1 @slt_zero(i4 %x, i4 %y) {
 ; CHECK-LABEL: @slt_zero(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i4 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %i0 = icmp slt i4 %x, 0
+  %i1 = icmp slt i4 %y, 0
+  %r = xor i1 %i0, %i1
+  ret i1 %r
+}
+
+; Don't increase the instruction count.
+
+declare void @use(i1)
+
+define i1 @slt_zero_extra_uses(i4 %x, i4 %y) {
+; CHECK-LABEL: @slt_zero_extra_uses(
 ; CHECK-NEXT:    [[I0:%.*]] = icmp slt i4 [[X:%.*]], 0
 ; CHECK-NEXT:    [[I1:%.*]] = icmp slt i4 [[Y:%.*]], 0
 ; CHECK-NEXT:    [[R:%.*]] = xor i1 [[I0]], [[I1]]
+; CHECK-NEXT:    call void @use(i1 [[I0]])
+; CHECK-NEXT:    call void @use(i1 [[I1]])
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %i0 = icmp slt i4 %x, 0
   %i1 = icmp slt i4 %y, 0
   %r = xor i1 %i0, %i1
+  call void @use(i1 %i0)
+  call void @use(i1 %i1)
   ret i1 %r
 }
 
@@ -68,10 +88,9 @@ define i1 @sgt_zero(i4 %x, i4 %y) {
 
 define i1 @sgt_minus1(i4 %x, i4 %y) {
 ; CHECK-LABEL: @sgt_minus1(
-; CHECK-NEXT:    [[I0:%.*]] = icmp sgt i4 [[X:%.*]], -1
-; CHECK-NEXT:    [[I1:%.*]] = icmp sgt i4 [[Y:%.*]], -1
-; CHECK-NEXT:    [[R:%.*]] = xor i1 [[I0]], [[I1]]
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i4 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %i0 = icmp sgt i4 %x, -1
   %i1 = icmp sgt i4 %y, -1
@@ -81,10 +100,9 @@ define i1 @sgt_minus1(i4 %x, i4 %y) {
 
 define i1 @slt_zero_sgt_minus1(i4 %x, i4 %y) {
 ; CHECK-LABEL: @slt_zero_sgt_minus1(
-; CHECK-NEXT:    [[I0:%.*]] = icmp slt i4 [[X:%.*]], 0
-; CHECK-NEXT:    [[I1:%.*]] = icmp sgt i4 [[Y:%.*]], -1
-; CHECK-NEXT:    [[R:%.*]] = xor i1 [[I0]], [[I1]]
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i4 [[TMP1]], -1
+; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %i0 = icmp slt i4 %x, 0
   %i1 = icmp sgt i4 %y, -1
@@ -94,10 +112,9 @@ define i1 @slt_zero_sgt_minus1(i4 %x, i4 %y) {
 
 define <2 x i1> @sgt_minus1_slt_zero_sgt(<2 x i4> %x, <2 x i4> %y) {
 ; CHECK-LABEL: @sgt_minus1_slt_zero_sgt(
-; CHECK-NEXT:    [[I1:%.*]] = icmp sgt <2 x i4> [[X:%.*]], <i4 -1, i4 -1>
-; CHECK-NEXT:    [[I0:%.*]] = icmp slt <2 x i4> [[Y:%.*]], zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = xor <2 x i1> [[I0]], [[I1]]
-; CHECK-NEXT:    ret <2 x i1> [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i4> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <2 x i4> [[TMP1]], <i4 -1, i4 -1>
+; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
 ;
   %i1 = icmp sgt <2 x i4> %x, <i4 -1, i4 -1>
   %i0 = icmp slt <2 x i4> %y, zeroinitializer
@@ -105,6 +122,21 @@ define <2 x i1> @sgt_minus1_slt_zero_sgt(<2 x i4> %x, <2 x i4> %y) {
   ret <2 x i1> %r
 }
 
+; Don't try (crash) if the operand types don't match.
+
+define i1 @different_type_cmp_ops(i32 %x, i64 %y) {
+; CHECK-LABEL: @different_type_cmp_ops(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i64 [[Y:%.*]], 0
+; CHECK-NEXT:    [[R:%.*]] = xor i1 [[CMP1]], [[CMP2]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %cmp1 = icmp slt i32 %x, 0
+  %cmp2 = icmp slt i64 %y, 0
+  %r = xor i1 %cmp1, %cmp2
+  ret i1 %r
+}
+
 define i1 @test13(i8 %A, i8 %B) {
 ; CHECK-LABEL: @test13(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i8 [[A:%.*]], [[B:%.*]]