From d20c54cbdb9a4a12bbfb1d51c72e52abd3f039a6 Mon Sep 17 00:00:00 2001 From: wanglei Date: Thu, 10 Nov 2022 21:01:05 +0800 Subject: [PATCH] [LoongArch] Override TargetFrameLowering::spillCalleeSavedRegisters When using `llvm.returnaddress` intrinsic, special handling is required for the spill of the `RA` register. Otherwise it will cause the verifier fail in some cases (e.g. pr17377.c of the GCC C Torture Suite). Specifically: ``` *** Bad machine code: Using an undefined physical register *** - function: f - basic block: %bb.0 entry (0xd94d18) - instruction: ST_D killed $r1, $r22, -40 :: (store (s64) into %stack.2) - operand 0: killed $r1 ``` Reviewed By: SixWeining Differential Revision: https://reviews.llvm.org/D137387 --- .../Target/LoongArch/LoongArchFrameLowering.cpp | 23 +++++ llvm/lib/Target/LoongArch/LoongArchFrameLowering.h | 4 + .../CodeGen/LoongArch/spill-ra-without-kill.ll | 102 +++++++++++++++++++++ 3 files changed, 129 insertions(+) create mode 100644 llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp index f45ad53..7c28ab9 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp @@ -406,6 +406,29 @@ LoongArchFrameLowering::eliminateCallFramePseudoInstr( return MBB.erase(MI); } +bool LoongArchFrameLowering::spillCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + ArrayRef CSI, const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return true; + + MachineFunction *MF = MBB.getParent(); + const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo(); + + // Insert the spill to the stack frame. + for (auto &CS : CSI) { + Register Reg = CS.getReg(); + // If the register is RA and the return address is taken by method + // LoongArchTargetLowering::lowerRETURNADDR, don't set kill flag. + bool IsKill = + !(Reg == LoongArch::R1 && MF->getFrameInfo().isReturnAddressTaken()); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, CS.getFrameIdx(), RC, TRI); + } + + return true; +} + StackOffset LoongArchFrameLowering::getFrameIndexReference( const MachineFunction &MF, int FI, Register &FrameReg) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h index f14d9b8..414d671 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h @@ -41,6 +41,10 @@ public: MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + ArrayRef CSI, + const TargetRegisterInfo *TRI) const override; StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override; diff --git a/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll b/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll new file mode 100644 index 0000000..d800bef --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll @@ -0,0 +1,102 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 --mtriple=loongarch64 --verify-machineinstrs < %s | FileCheck %s + +;; This test case is reduced from pr17377.c of the GCC C Torture Suite using +;; bugpoint. + +@calls = external dso_local global i32, align 4 +declare ptr @llvm.returnaddress(i32 immarg) + +define dso_local ptr @f(i32 noundef signext %i) "frame-pointer"="all" { +; CHECK-LABEL: f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -48 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 48 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: st.d $ra, $fp, -40 # 8-byte Folded Spill +; CHECK-NEXT: move $a1, $a0 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(calls) +; CHECK-NEXT: addi.d $a3, $a0, %pc_lo12(calls) +; CHECK-NEXT: ld.wu $a0, $a3, 0 +; CHECK-NEXT: addi.d $a2, $a0, 1 +; CHECK-NEXT: st.w $a2, $a3, 0 +; CHECK-NEXT: st.w $a1, $fp, -28 +; CHECK-NEXT: bnez $a0, .LBB0_2 +; CHECK-NEXT: b .LBB0_1 +; CHECK-NEXT: .LBB0_1: # %if.then +; CHECK-NEXT: ld.d $a0, $fp, -40 # 8-byte Folded Reload +; CHECK-NEXT: st.d $a0, $fp, -24 +; CHECK-NEXT: b .LBB0_7 +; CHECK-NEXT: .LBB0_2: # %if.end +; CHECK-NEXT: ld.wu $a0, $fp, -28 +; CHECK-NEXT: st.d $a0, $fp, -48 # 8-byte Folded Spill +; CHECK-NEXT: beqz $a0, .LBB0_5 +; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: .LBB0_3: # %if.end +; CHECK-NEXT: ld.d $a0, $fp, -48 # 8-byte Folded Reload +; CHECK-NEXT: ori $a1, $zero, 1 +; CHECK-NEXT: bne $a0, $a1, .LBB0_6 +; CHECK-NEXT: b .LBB0_4 +; CHECK-NEXT: .LBB0_4: # %sw.bb +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(f) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(f) +; CHECK-NEXT: st.d $a0, $fp, -24 +; CHECK-NEXT: b .LBB0_7 +; CHECK-NEXT: .LBB0_5: # %sw.bb1 +; CHECK-NEXT: ld.d $a0, $fp, -40 # 8-byte Folded Reload +; CHECK-NEXT: st.d $a0, $fp, -24 +; CHECK-NEXT: b .LBB0_7 +; CHECK-NEXT: .LBB0_6: # %sw.epilog +; CHECK-NEXT: move $a0, $zero +; CHECK-NEXT: st.d $a0, $fp, -24 +; CHECK-NEXT: b .LBB0_7 +; CHECK-NEXT: .LBB0_7: # %return +; CHECK-NEXT: ld.d $a0, $fp, -24 +; CHECK-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 48 +; CHECK-NEXT: ret +entry: + %retval = alloca ptr, align 8 + %i.addr = alloca i32, align 4 + store i32 %i, ptr %i.addr, align 4 + %0 = load i32, ptr @calls, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, ptr @calls, align 4 + %cmp = icmp eq i32 %0, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %1 = call ptr @llvm.returnaddress(i32 0) + store ptr %1, ptr %retval, align 8 + br label %return + +if.end: + %2 = load i32, ptr %i.addr, align 4 + switch i32 %2, label %sw.epilog [ + i32 1, label %sw.bb + i32 0, label %sw.bb1 + ] + +sw.bb: + store ptr @f, ptr %retval, align 8 + br label %return + +sw.bb1: + %3 = call ptr @llvm.returnaddress(i32 0) + store ptr %3, ptr %retval, align 8 + br label %return + +sw.epilog: + store ptr null, ptr %retval, align 8 + br label %return + +return: + %4 = load ptr, ptr %retval, align 8 + ret ptr %4 +} -- 2.7.4