From 87f0d55304a27ce0f6178eed65d8dad49b5dcfd9 Mon Sep 17 00:00:00 2001
From: "chenglin.bi" <chenglin.bi@cixcomputing.com>
Date: Wed, 6 Apr 2022 21:17:42 +0800
Subject: [PATCH] [AArch64] Fold lsr+bfi in tryBitfieldInsertOpFromOr

In tryBitfieldInsertOpFromOr, if the new created LSR Node's source
is LSR with Imm shift, try to fold them.

Fixes https://github.com/llvm/llvm-project/issues/54696

Reviewed By: efriedma, benshi001

Differential Revision: https://reviews.llvm.org/D122915
---
 llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 10 ++-
 llvm/test/CodeGen/AArch64/aarch64-lsr-bfi.ll    | 84 +++++++++++++++++++++++++
 2 files changed, 93 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AArch64/aarch64-lsr-bfi.ll

diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 01dc160..c367d2d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -2714,8 +2714,16 @@ static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
     // shift the needed bits into place.
     SDLoc DL(N);
     unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
+    uint64_t LsrImm = LSB;
+    if (Src->hasOneUse() &&
+        isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
+        (LsrImm + LSB) < BitWidth) {
+      Src = Src->getOperand(0);
+      LsrImm += LSB;
+    }
+
     SDNode *LSR = CurDAG->getMachineNode(
-        ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT),
+        ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
         CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
 
     // BFXIL is an alias of BFM, so translate to BFM operands.
diff --git a/llvm/test/CodeGen/AArch64/aarch64-lsr-bfi.ll b/llvm/test/CodeGen/AArch64/aarch64-lsr-bfi.ll
new file mode 100644
index 0000000..9584ab8
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-lsr-bfi.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -o -| FileCheck %s
+
+define i32 @lsr_bfi(i32 %a) {
+; CHECK-LABEL: lsr_bfi:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsr w8, w0, #20
+; CHECK-NEXT:    bfi w0, w8, #4, #4
+; CHECK-NEXT:    ret
+  %and1 = and i32 %a, -241
+  %1 = lshr i32 %a, 16
+  %shl = and i32 %1, 240
+  %or = or i32 %shl, %and1
+  ret i32 %or
+}
+
+define i32 @negative_lsr_bfi0(i32 %a) {
+; CHECK-LABEL: negative_lsr_bfi0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and w0, w0, #0xffffff0f
+; CHECK-NEXT:    ret
+  %and1 = and i32 %a, -241
+  %1 = lshr i32 %a, 28
+  %shl = and i32 %1, 240
+  %or = or i32 %shl, %and1
+  ret i32 %or
+}
+
+define i32 @negative_lsr_bfi1(i32 %a) {
+; CHECK-LABEL: negative_lsr_bfi1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsr w8, w0, #16
+; CHECK-NEXT:    lsr w9, w8, #4
+; CHECK-NEXT:    bfi w0, w9, #4, #4
+; CHECK-NEXT:    add w0, w0, w8
+; CHECK-NEXT:    ret
+  %and1 = and i32 %a, -241
+  %1 = lshr i32 %a, 16
+  %shl = and i32 %1, 240
+  %or = or i32 %shl, %and1
+  %add = add i32 %or, %1
+  ret i32 %add
+}
+
+define i64 @lsr_bfix(i64 %a) {
+; CHECK-LABEL: lsr_bfix:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsr x8, x0, #20
+; CHECK-NEXT:    bfi x0, x8, #4, #4
+; CHECK-NEXT:    ret
+  %and1 = and i64 %a, -241
+  %1 = lshr i64 %a, 16
+  %shl = and i64 %1, 240
+  %or = or i64 %shl, %and1
+  ret i64 %or
+}
+
+define i64 @negative_lsr_bfix0(i64 %a) {
+; CHECK-LABEL: negative_lsr_bfix0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and x0, x0, #0xffffffffffffff0f
+; CHECK-NEXT:    ret
+  %and1 = and i64 %a, -241
+  %1 = lshr i64 %a, 60
+  %shl = and i64 %1, 240
+  %or = or i64 %shl, %and1
+  ret i64 %or
+}
+
+define i64 @negative_lsr_bfix1(i64 %a) {
+; CHECK-LABEL: negative_lsr_bfix1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsr x8, x0, #16
+; CHECK-NEXT:    lsr x9, x8, #4
+; CHECK-NEXT:    bfi x0, x9, #4, #4
+; CHECK-NEXT:    add x0, x0, x8
+; CHECK-NEXT:    ret
+  %and1 = and i64 %a, -241
+  %1 = lshr i64 %a, 16
+  %shl = and i64 %1, 240
+  %or = or i64 %shl, %and1
+  %add = add i64 %or, %1
+  ret i64 %add
+}
-- 
2.7.4