From 87f0d55304a27ce0f6178eed65d8dad49b5dcfd9 Mon Sep 17 00:00:00 2001 From: "chenglin.bi" Date: Wed, 6 Apr 2022 21:17:42 +0800 Subject: [PATCH] [AArch64] Fold lsr+bfi in tryBitfieldInsertOpFromOr In tryBitfieldInsertOpFromOr, if the new created LSR Node's source is LSR with Imm shift, try to fold them. Fixes https://github.com/llvm/llvm-project/issues/54696 Reviewed By: efriedma, benshi001 Differential Revision: https://reviews.llvm.org/D122915 --- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 10 ++- llvm/test/CodeGen/AArch64/aarch64-lsr-bfi.ll | 84 +++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AArch64/aarch64-lsr-bfi.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 01dc160..c367d2d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -2714,8 +2714,16 @@ static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, // shift the needed bits into place. SDLoc DL(N); unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; + uint64_t LsrImm = LSB; + if (Src->hasOneUse() && + isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) && + (LsrImm + LSB) < BitWidth) { + Src = Src->getOperand(0); + LsrImm += LSB; + } + SDNode *LSR = CurDAG->getMachineNode( - ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT), + ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT), CurDAG->getTargetConstant(BitWidth - 1, DL, VT)); // BFXIL is an alias of BFM, so translate to BFM operands. diff --git a/llvm/test/CodeGen/AArch64/aarch64-lsr-bfi.ll b/llvm/test/CodeGen/AArch64/aarch64-lsr-bfi.ll new file mode 100644 index 0000000..9584ab8 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-lsr-bfi.ll @@ -0,0 +1,84 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -o -| FileCheck %s + +define i32 @lsr_bfi(i32 %a) { +; CHECK-LABEL: lsr_bfi: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr w8, w0, #20 +; CHECK-NEXT: bfi w0, w8, #4, #4 +; CHECK-NEXT: ret + %and1 = and i32 %a, -241 + %1 = lshr i32 %a, 16 + %shl = and i32 %1, 240 + %or = or i32 %shl, %and1 + ret i32 %or +} + +define i32 @negative_lsr_bfi0(i32 %a) { +; CHECK-LABEL: negative_lsr_bfi0: +; CHECK: // %bb.0: +; CHECK-NEXT: and w0, w0, #0xffffff0f +; CHECK-NEXT: ret + %and1 = and i32 %a, -241 + %1 = lshr i32 %a, 28 + %shl = and i32 %1, 240 + %or = or i32 %shl, %and1 + ret i32 %or +} + +define i32 @negative_lsr_bfi1(i32 %a) { +; CHECK-LABEL: negative_lsr_bfi1: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr w8, w0, #16 +; CHECK-NEXT: lsr w9, w8, #4 +; CHECK-NEXT: bfi w0, w9, #4, #4 +; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: ret + %and1 = and i32 %a, -241 + %1 = lshr i32 %a, 16 + %shl = and i32 %1, 240 + %or = or i32 %shl, %and1 + %add = add i32 %or, %1 + ret i32 %add +} + +define i64 @lsr_bfix(i64 %a) { +; CHECK-LABEL: lsr_bfix: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr x8, x0, #20 +; CHECK-NEXT: bfi x0, x8, #4, #4 +; CHECK-NEXT: ret + %and1 = and i64 %a, -241 + %1 = lshr i64 %a, 16 + %shl = and i64 %1, 240 + %or = or i64 %shl, %and1 + ret i64 %or +} + +define i64 @negative_lsr_bfix0(i64 %a) { +; CHECK-LABEL: negative_lsr_bfix0: +; CHECK: // %bb.0: +; CHECK-NEXT: and x0, x0, #0xffffffffffffff0f +; CHECK-NEXT: ret + %and1 = and i64 %a, -241 + %1 = lshr i64 %a, 60 + %shl = and i64 %1, 240 + %or = or i64 %shl, %and1 + ret i64 %or +} + +define i64 @negative_lsr_bfix1(i64 %a) { +; CHECK-LABEL: negative_lsr_bfix1: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr x8, x0, #16 +; CHECK-NEXT: lsr x9, x8, #4 +; CHECK-NEXT: bfi x0, x9, #4, #4 +; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: ret + %and1 = and i64 %a, -241 + %1 = lshr i64 %a, 16 + %shl = and i64 %1, 240 + %or = or i64 %shl, %and1 + %add = add i64 %or, %1 + ret i64 %add +} -- 2.7.4