From 23a116c8c446f82ec5e2d2337c3253d0dc9c75b5 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Thu, 3 Jun 2021 16:13:16 -0400
Subject: [PATCH] [InstCombine] convert lshr to ashr to eliminate cast op

This is similar to b865eead7657 ( D103617 ) and fixes:
https://llvm.org/PR50575

41b71f718b94c6f12b did this and more (noted with TODO
comments in the tests), but it didn't handle the case
where the destination is narrower than the source, so
it got reverted.

This is a simple match-and-replace. If there's evidence
that the TODO cases are useful, we can revisit/extend.
---
 .../Transforms/InstCombine/InstCombineCasts.cpp    | 12 +++++
 .../InstCombine/lshr-trunc-sext-to-ashr-sext.ll    | 53 ++++++++++++----------
 2 files changed, 41 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index f658dc3..433d90a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1540,6 +1540,18 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &CI) {
       Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
       return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt);
     }
+
+    // If we are replacing shifted-in high zero bits with sign bits, convert
+    // the logic shift to arithmetic shift and eliminate the cast to
+    // intermediate type:
+    // sext (trunc (lshr Y, C)) --> sext/trunc (ashr Y, C)
+    Value *Y;
+    if (Src->hasOneUse() &&
+        match(X, m_LShr(m_Value(Y),
+                        m_SpecificIntAllowUndef(XBitSize - SrcBitSize)))) {
+      Value *Ashr = Builder.CreateAShr(Y, XBitSize - SrcBitSize);
+      return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
+    }
   }
 
   if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
diff --git a/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll b/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll
index 1333833..1af708e 100644
--- a/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll
+++ b/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll
@@ -13,9 +13,8 @@ declare void @usevec4(<2 x i4>)
 
 define i16 @t0(i8 %x) {
 ; CHECK-LABEL: @t0(
-; CHECK-NEXT:    [[A:%.*]] = lshr i8 [[X:%.*]], 4
-; CHECK-NEXT:    [[B:%.*]] = trunc i8 [[A]] to i4
-; CHECK-NEXT:    [[C:%.*]] = sext i4 [[B]] to i16
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i8 [[X:%.*]], 4
+; CHECK-NEXT:    [[C:%.*]] = sext i8 [[TMP1]] to i16
 ; CHECK-NEXT:    ret i16 [[C]]
 ;
   %a = lshr i8 %x, 4
@@ -26,9 +25,8 @@ define i16 @t0(i8 %x) {
 
 define i16 @t1(i8 %x) {
 ; CHECK-LABEL: @t1(
-; CHECK-NEXT:    [[A:%.*]] = lshr i8 [[X:%.*]], 5
-; CHECK-NEXT:    [[B:%.*]] = trunc i8 [[A]] to i3
-; CHECK-NEXT:    [[C:%.*]] = sext i3 [[B]] to i16
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[C:%.*]] = sext i8 [[TMP1]] to i16
 ; CHECK-NEXT:    ret i16 [[C]]
 ;
   %a = lshr i8 %x, 5
@@ -39,9 +37,8 @@ define i16 @t1(i8 %x) {
 
 define i16 @t2(i7 %x) {
 ; CHECK-LABEL: @t2(
-; CHECK-NEXT:    [[A:%.*]] = lshr i7 [[X:%.*]], 3
-; CHECK-NEXT:    [[B:%.*]] = trunc i7 [[A]] to i4
-; CHECK-NEXT:    [[C:%.*]] = sext i4 [[B]] to i16
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i7 [[X:%.*]], 3
+; CHECK-NEXT:    [[C:%.*]] = sext i7 [[TMP1]] to i16
 ; CHECK-NEXT:    ret i16 [[C]]
 ;
   %a = lshr i7 %x, 3
@@ -65,9 +62,8 @@ define i16 @n3(i8 %x) {
 
 define <2 x i16> @t4_vec_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @t4_vec_splat(
-; CHECK-NEXT:    [[A:%.*]] = lshr <2 x i8> [[X:%.*]], <i8 4, i8 4>
-; CHECK-NEXT:    [[B:%.*]] = trunc <2 x i8> [[A]] to <2 x i4>
-; CHECK-NEXT:    [[C:%.*]] = sext <2 x i4> [[B]] to <2 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    [[C:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16>
 ; CHECK-NEXT:    ret <2 x i16> [[C]]
 ;
   %a = lshr <2 x i8> %x, <i8 4, i8 4>
@@ -78,9 +74,8 @@ define <2 x i16> @t4_vec_splat(<2 x i8> %x) {
 
 define <2 x i16> @t5_vec_undef(<2 x i8> %x) {
 ; CHECK-LABEL: @t5_vec_undef(
-; CHECK-NEXT:    [[A:%.*]] = lshr <2 x i8> [[X:%.*]], <i8 4, i8 undef>
-; CHECK-NEXT:    [[B:%.*]] = trunc <2 x i8> [[A]] to <2 x i4>
-; CHECK-NEXT:    [[C:%.*]] = sext <2 x i4> [[B]] to <2 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    [[C:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16>
 ; CHECK-NEXT:    ret <2 x i16> [[C]]
 ;
   %a = lshr <2 x i8> %x, <i8 4, i8 undef>
@@ -89,6 +84,8 @@ define <2 x i16> @t5_vec_undef(<2 x i8> %x) {
   ret <2 x i16> %c
 }
 
+; TODO: We could convert both uses of %a to ashr.
+
 define i16 @t6_extrause0(i8 %x) {
 ; CHECK-LABEL: @t6_extrause0(
 ; CHECK-NEXT:    [[A:%.*]] = lshr i8 [[X:%.*]], 4
@@ -103,6 +100,9 @@ define i16 @t6_extrause0(i8 %x) {
   %c = sext i4 %b to i16
   ret i16 %c
 }
+
+; TODO: We could convert both uses of %a to ashr.
+
 define <2 x i16> @t7_extrause0_vec_undef(<2 x i8> %x) {
 ; CHECK-LABEL: @t7_extrause0_vec_undef(
 ; CHECK-NEXT:    [[A:%.*]] = lshr <2 x i8> [[X:%.*]], <i8 4, i8 undef>
@@ -117,12 +117,15 @@ define <2 x i16> @t7_extrause0_vec_undef(<2 x i8> %x) {
   %c = sext <2 x i4> %b to <2 x i16>
   ret <2 x i16> %c
 }
+
+; TODO: We could convert both uses of %a to ashr.
+
 define i16 @t8_extrause1(i8 %x) {
 ; CHECK-LABEL: @t8_extrause1(
 ; CHECK-NEXT:    [[A:%.*]] = lshr i8 [[X:%.*]], 4
 ; CHECK-NEXT:    call void @use8(i8 [[A]])
-; CHECK-NEXT:    [[B:%.*]] = trunc i8 [[A]] to i4
-; CHECK-NEXT:    [[C:%.*]] = sext i4 [[B]] to i16
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i8 [[X]], 4
+; CHECK-NEXT:    [[C:%.*]] = sext i8 [[TMP1]] to i16
 ; CHECK-NEXT:    ret i16 [[C]]
 ;
   %a = lshr i8 %x, 4 ; has extra use, but we can deal with that
@@ -131,12 +134,15 @@ define i16 @t8_extrause1(i8 %x) {
   %c = sext i4 %b to i16
   ret i16 %c
 }
+
+; TODO: We could convert both uses of %a to ashr.
+
 define <2 x i16> @t9_extrause1_vec_undef(<2 x i8> %x) {
 ; CHECK-LABEL: @t9_extrause1_vec_undef(
 ; CHECK-NEXT:    [[A:%.*]] = lshr <2 x i8> [[X:%.*]], <i8 4, i8 undef>
 ; CHECK-NEXT:    call void @usevec8(<2 x i8> [[A]])
-; CHECK-NEXT:    [[B:%.*]] = trunc <2 x i8> [[A]] to <2 x i4>
-; CHECK-NEXT:    [[C:%.*]] = sext <2 x i4> [[B]] to <2 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i8> [[X]], <i8 4, i8 4>
+; CHECK-NEXT:    [[C:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16>
 ; CHECK-NEXT:    ret <2 x i16> [[C]]
 ;
   %a = lshr <2 x i8> %x, <i8 4, i8 undef>
@@ -180,9 +186,8 @@ define <2 x i16> @t11_extrause2_vec_undef(<2 x i8> %x) {
 
 define <2 x i10> @wide_source_shifted_signbit(<2 x i32> %x) {
 ; CHECK-LABEL: @wide_source_shifted_signbit(
-; CHECK-NEXT:    [[A:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 24, i32 24>
-; CHECK-NEXT:    [[B:%.*]] = trunc <2 x i32> [[A]] to <2 x i8>
-; CHECK-NEXT:    [[C:%.*]] = sext <2 x i8> [[B]] to <2 x i10>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 24, i32 24>
+; CHECK-NEXT:    [[C:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i10>
 ; CHECK-NEXT:    ret <2 x i10> [[C]]
 ;
   %a = lshr <2 x i32> %x, <i32 24, i32 24>
@@ -195,8 +200,8 @@ define i10 @wide_source_shifted_signbit_use1(i32 %x) {
 ; CHECK-LABEL: @wide_source_shifted_signbit_use1(
 ; CHECK-NEXT:    [[A:%.*]] = lshr i32 [[X:%.*]], 24
 ; CHECK-NEXT:    call void @use32(i32 [[A]])
-; CHECK-NEXT:    [[B:%.*]] = trunc i32 [[A]] to i8
-; CHECK-NEXT:    [[C:%.*]] = sext i8 [[B]] to i10
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[X]], 24
+; CHECK-NEXT:    [[C:%.*]] = trunc i32 [[TMP1]] to i10
 ; CHECK-NEXT:    ret i10 [[C]]
 ;
   %a = lshr i32 %x, 24
-- 
2.7.4