From 053014f8f92c4c10847ab441392caeb6a6d0c783 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Tue, 1 Oct 2019 17:50:20 +0000
Subject: [PATCH] [InstCombine] Deal with -(trunc(X >>u 63)) -> trunc(X >>s 63)

Identical to it's trunc-less variant, just pretent-to hoist
trunc, and everything else still holds:
https://rise4fun.com/Alive/JRU

llvm-svn: 373364
---
 .../Transforms/InstCombine/InstCombineAddSub.cpp   | 37 +++++++++++++++-------
 .../InstCombine/high-bit-signmask-with-trunc.ll    | 29 +++++++----------
 2 files changed, 37 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 7e3d405..40cc188 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1666,29 +1666,42 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
 
   const APInt *Op0C;
   if (match(Op0, m_APInt(Op0C))) {
-    unsigned BitWidth = I.getType()->getScalarSizeInBits();
 
-    // -(X >>u 31) -> (X >>s 31)
-    // -(X >>s 31) -> (X >>u 31)
     if (Op0C->isNullValue()) {
+      Value *Op1Wide;
+      match(Op1, m_TruncOrSelf(m_Value(Op1Wide)));
+      bool HadTrunc = Op1Wide != Op1;
+      bool NoTruncOrTruncIsOneUse = !HadTrunc || Op1->hasOneUse();
+      unsigned BitWidth = Op1Wide->getType()->getScalarSizeInBits();
+
       Value *X;
       const APInt *ShAmt;
-      if (match(Op1, m_LShr(m_Value(X), m_APInt(ShAmt))) &&
+      // -(X >>u 31) -> (X >>s 31)
+      if (NoTruncOrTruncIsOneUse &&
+          match(Op1Wide, m_LShr(m_Value(X), m_APInt(ShAmt))) &&
           *ShAmt == BitWidth - 1) {
-        Value *ShAmtOp = cast<Instruction>(Op1)->getOperand(1);
+        Value *ShAmtOp = cast<Instruction>(Op1Wide)->getOperand(1);
         Instruction *NewShift = BinaryOperator::CreateAShr(X, ShAmtOp);
-        NewShift->copyIRFlags(Op1);
-        return NewShift;
+        NewShift->copyIRFlags(Op1Wide);
+        if (!HadTrunc)
+          return NewShift;
+        Builder.Insert(NewShift);
+        return TruncInst::CreateTruncOrBitCast(NewShift, Op1->getType());
       }
-      if (match(Op1, m_AShr(m_Value(X), m_APInt(ShAmt))) &&
+      // -(X >>s 31) -> (X >>u 31)
+      if (NoTruncOrTruncIsOneUse &&
+          match(Op1Wide, m_AShr(m_Value(X), m_APInt(ShAmt))) &&
           *ShAmt == BitWidth - 1) {
-        Value *ShAmtOp = cast<Instruction>(Op1)->getOperand(1);
+        Value *ShAmtOp = cast<Instruction>(Op1Wide)->getOperand(1);
         Instruction *NewShift = BinaryOperator::CreateLShr(X, ShAmtOp);
-        NewShift->copyIRFlags(Op1);
-        return NewShift;
+        NewShift->copyIRFlags(Op1Wide);
+        if (!HadTrunc)
+          return NewShift;
+        Builder.Insert(NewShift);
+        return TruncInst::CreateTruncOrBitCast(NewShift, Op1->getType());
       }
 
-      if (Op1->hasOneUse()) {
+      if (!HadTrunc && Op1->hasOneUse()) {
         Value *LHS, *RHS;
         SelectPatternFlavor SPF = matchSelectPattern(Op1, LHS, RHS).Flavor;
         if (SPF == SPF_ABS || SPF == SPF_NABS) {
diff --git a/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll b/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll
index 5abd596..034c285 100644
--- a/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll
+++ b/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll
@@ -3,9 +3,8 @@
 
 define i32 @t0(i64 %x) {
 ; CHECK-LABEL: @t0(
-; CHECK-NEXT:    [[T0:%.*]] = lshr i64 [[X:%.*]], 63
-; CHECK-NEXT:    [[T1:%.*]] = trunc i64 [[T0]] to i32
-; CHECK-NEXT:    [[R:%.*]] = sub nsw i32 0, [[T1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i64 [[X:%.*]], 63
+; CHECK-NEXT:    [[R:%.*]] = trunc i64 [[TMP1]] to i32
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %t0 = lshr i64 %x, 63
@@ -15,9 +14,8 @@ define i32 @t0(i64 %x) {
 }
 define i32 @t1_exact(i64 %x) {
 ; CHECK-LABEL: @t1_exact(
-; CHECK-NEXT:    [[T0:%.*]] = lshr exact i64 [[X:%.*]], 63
-; CHECK-NEXT:    [[T1:%.*]] = trunc i64 [[T0]] to i32
-; CHECK-NEXT:    [[R:%.*]] = sub nsw i32 0, [[T1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact i64 [[X:%.*]], 63
+; CHECK-NEXT:    [[R:%.*]] = trunc i64 [[TMP1]] to i32
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %t0 = lshr exact i64 %x, 63
@@ -27,9 +25,8 @@ define i32 @t1_exact(i64 %x) {
 }
 define i32 @t2(i64 %x) {
 ; CHECK-LABEL: @t2(
-; CHECK-NEXT:    [[T0:%.*]] = ashr i64 [[X:%.*]], 63
-; CHECK-NEXT:    [[T1:%.*]] = trunc i64 [[T0]] to i32
-; CHECK-NEXT:    [[R:%.*]] = sub i32 0, [[T1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[X:%.*]], 63
+; CHECK-NEXT:    [[R:%.*]] = trunc i64 [[TMP1]] to i32
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %t0 = ashr i64 %x, 63
@@ -39,9 +36,8 @@ define i32 @t2(i64 %x) {
 }
 define i32 @t3_exact(i64 %x) {
 ; CHECK-LABEL: @t3_exact(
-; CHECK-NEXT:    [[T0:%.*]] = ashr exact i64 [[X:%.*]], 63
-; CHECK-NEXT:    [[T1:%.*]] = trunc i64 [[T0]] to i32
-; CHECK-NEXT:    [[R:%.*]] = sub i32 0, [[T1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr exact i64 [[X:%.*]], 63
+; CHECK-NEXT:    [[R:%.*]] = trunc i64 [[TMP1]] to i32
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %t0 = ashr exact i64 %x, 63
@@ -52,9 +48,8 @@ define i32 @t3_exact(i64 %x) {
 
 define <2 x i32> @t4(<2 x i64> %x) {
 ; CHECK-LABEL: @t4(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <2 x i64> [[X:%.*]], <i64 63, i64 63>
-; CHECK-NEXT:    [[T1:%.*]] = trunc <2 x i64> [[T0]] to <2 x i32>
-; CHECK-NEXT:    [[R:%.*]] = sub nsw <2 x i32> zeroinitializer, [[T1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[X:%.*]], <i64 63, i64 63>
+; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %t0 = lshr <2 x i64> %x, <i64 63, i64 63>
@@ -83,8 +78,8 @@ define i32 @t6(i64 %x) {
 ; CHECK-LABEL: @t6(
 ; CHECK-NEXT:    [[T0:%.*]] = lshr i64 [[X:%.*]], 63
 ; CHECK-NEXT:    call void @use64(i64 [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = trunc i64 [[T0]] to i32
-; CHECK-NEXT:    [[R:%.*]] = sub nsw i32 0, [[T1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i64 [[X]], 63
+; CHECK-NEXT:    [[R:%.*]] = trunc i64 [[TMP1]] to i32
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %t0 = lshr i64 %x, 63
-- 
2.7.4