From 323d08e50a7bb80786dc00a8ade6ae49e1358393 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Wed, 30 Sep 2020 15:51:54 +0100
Subject: [PATCH] [InstCombine] Fix bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
 vector support

Use getScalarSizeInBits not getPrimitiveSizeInBits to determine the shift value at the element level.
---
 llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 4 ++--
 llvm/test/Transforms/InstCombine/bswap-fold.ll       | 8 ++++++--
 2 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 465191b..c069657 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -828,8 +828,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
 
     // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
     if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
-      unsigned C = X->getType()->getPrimitiveSizeInBits() -
-        IIOperand->getType()->getPrimitiveSizeInBits();
+      unsigned C = X->getType()->getScalarSizeInBits() -
+                   IIOperand->getType()->getScalarSizeInBits();
       Value *CV = ConstantInt::get(X->getType(), C);
       Value *V = Builder.CreateLShr(X, CV);
       return new TruncInst(V, IIOperand->getType());
diff --git a/llvm/test/Transforms/InstCombine/bswap-fold.ll b/llvm/test/Transforms/InstCombine/bswap-fold.ll
index c90b880..da7380e 100644
--- a/llvm/test/Transforms/InstCombine/bswap-fold.ll
+++ b/llvm/test/Transforms/InstCombine/bswap-fold.ll
@@ -39,7 +39,9 @@ define i16 @test7(i32 %A) {
 
 define <2 x i16> @test7_vector(<2 x i32> %A) {
 ; CHECK-LABEL: @test7_vector(
-; CHECK-NEXT:    ret <2 x i16> undef
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 16, i32 16>
+; CHECK-NEXT:    [[D:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16>
+; CHECK-NEXT:    ret <2 x i16> [[D]]
 ;
   %B = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %A) nounwind
   %C = trunc <2 x i32> %B to <2 x i16>
@@ -61,7 +63,9 @@ define i16 @test8(i64 %A) {
 
 define <2 x i16> @test8_vector(<2 x i64> %A) {
 ; CHECK-LABEL: @test8_vector(
-; CHECK-NEXT:    ret <2 x i16> undef
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[A:%.*]], <i64 48, i64 48>
+; CHECK-NEXT:    [[D:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i16>
+; CHECK-NEXT:    ret <2 x i16> [[D]]
 ;
   %B = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %A) nounwind
   %C = trunc <2 x i64> %B to <2 x i16>
-- 
2.7.4