From: Xiaodong Liu Date: Tue, 15 Nov 2022 01:55:03 +0000 (+0800) Subject: [LoongArch] Handle register spill in BranchRelaxation pass X-Git-Tag: upstream/17.0.6~27674 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=03d07e181d97248a1cdfe34affe826dd3ad4b46a;p=platform%2Fupstream%2Fllvm.git [LoongArch] Handle register spill in BranchRelaxation pass When the range of the unconditional branch is overflow, the indirect branch way is used. The case when there is no scavenged register for indirect branch needs to spill register to stack. Reviewed By: SixWeining, wangleiat Differential Revision: https://reviews.llvm.org/D137821 --- diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp index 7c28ab9..a5c8438 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp @@ -118,12 +118,32 @@ void LoongArchFrameLowering::determineFrameLayout(MachineFunction &MF) const { MFI.setStackSize(FrameSize); } +static uint64_t estimateFunctionSizeInBytes(const LoongArchInstrInfo *TII, + const MachineFunction &MF) { + uint64_t FuncSize = 0; + for (auto &MBB : MF) + for (auto &MI : MBB) + FuncSize += TII->getInstSizeInBytes(MI); + return FuncSize; +} + void LoongArchFrameLowering::processFunctionBeforeFrameFinalized( MachineFunction &MF, RegScavenger *RS) const { const LoongArchRegisterInfo *RI = STI.getRegisterInfo(); const TargetRegisterClass &RC = LoongArch::GPRRegClass; + const LoongArchInstrInfo *TII = STI.getInstrInfo(); + LoongArchMachineFunctionInfo *LAFI = + MF.getInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); + // Far branches beyond 27-bit offset require a spill slot for scratch register. + if (!isInt<27>(estimateFunctionSizeInBytes(TII, MF))) { + int FI = MFI.CreateStackObject(RI->getSpillSize(RC), RI->getSpillAlign(RC), + false); + RS->addScavengingFrameIndex(FI); + if (LAFI->getBranchRelaxationSpillFrameIndex() == -1) + LAFI->setBranchRelaxationSpillFrameIndex(FI); + } // estimateStackSize has been observed to under-estimate the final stack // size, so give ourselves wiggle-room by checking for stack size // representable an 11-bit signed field rather than 12-bits. diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp index eab5cdb6..58669f1 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -371,6 +371,9 @@ void LoongArchInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, MachineFunction *MF = MBB.getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + LoongArchMachineFunctionInfo *LAFI = + MF->getInfo(); if (!isInt<32>(BrOffset)) report_fatal_error( @@ -379,26 +382,45 @@ void LoongArchInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, Register ScratchReg = MRI.createVirtualRegister(&LoongArch::GPRRegClass); auto II = MBB.end(); - MachineInstr &MI = + MachineInstr &PCALAU12I = *BuildMI(MBB, II, DL, get(LoongArch::PCALAU12I), ScratchReg) .addMBB(&DestBB, LoongArchII::MO_PCREL_HI); - BuildMI(MBB, II, DL, - get(STI.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W), - ScratchReg) - .addReg(ScratchReg) - .addMBB(&DestBB, LoongArchII::MO_PCREL_LO); + MachineInstr &ADDI = + *BuildMI(MBB, II, DL, + get(STI.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W), + ScratchReg) + .addReg(ScratchReg) + .addMBB(&DestBB, LoongArchII::MO_PCREL_LO); BuildMI(MBB, II, DL, get(LoongArch::PseudoBRIND)) .addReg(ScratchReg, RegState::Kill) .addImm(0); RS->enterBasicBlockEnd(MBB); - Register Scav = RS->scavengeRegisterBackwards(LoongArch::GPRRegClass, - MI.getIterator(), false, 0); - // TODO: When there is no scavenged register, it needs to specify a register. - assert(Scav != LoongArch::NoRegister && "No register is scavenged!"); + Register Scav = RS->scavengeRegisterBackwards( + LoongArch::GPRRegClass, PCALAU12I.getIterator(), /*RestoreAfter=*/false, + /*SPAdj=*/0, /*AllowSpill=*/false); + if (Scav != LoongArch::NoRegister) + RS->setRegUsed(Scav); + else { + // When there is no scavenged register, it needs to specify a register. + // Specify t8 register because it won't be used too often. + Scav = LoongArch::R20; + int FrameIndex = LAFI->getBranchRelaxationSpillFrameIndex(); + if (FrameIndex == -1) + report_fatal_error("The function size is incorrectly estimated."); + storeRegToStackSlot(MBB, PCALAU12I, Scav, /*IsKill=*/true, FrameIndex, + &LoongArch::GPRRegClass, TRI); + TRI->eliminateFrameIndex(std::prev(PCALAU12I.getIterator()), + /*SpAdj=*/0, /*FIOperandNum=*/1); + PCALAU12I.getOperand(1).setMBB(&RestoreBB); + ADDI.getOperand(2).setMBB(&RestoreBB); + loadRegFromStackSlot(RestoreBB, RestoreBB.end(), Scav, FrameIndex, + &LoongArch::GPRRegClass, TRI); + TRI->eliminateFrameIndex(RestoreBB.back(), + /*SpAdj=*/0, /*FIOperandNum=*/1); + } MRI.replaceRegWith(ScratchReg, Scav); MRI.clearVirtRegs(); - RS->setRegUsed(Scav); } static unsigned getOppositeBranchOpc(unsigned Opc) { diff --git a/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h b/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h index d4a6c88..47b021e 100644 --- a/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h @@ -32,6 +32,10 @@ private: /// Size of stack frame to save callee saved registers unsigned CalleeSavedStackSize = 0; + /// FrameIndex of the spill slot when there is no scavenged register in + /// insertIndirectBranch. + int BranchRelaxationSpillFrameIndex = -1; + public: LoongArchMachineFunctionInfo(const MachineFunction &MF) {} @@ -50,6 +54,13 @@ public: unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; } void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; } + + int getBranchRelaxationSpillFrameIndex() { + return BranchRelaxationSpillFrameIndex; + } + void setBranchRelaxationSpillFrameIndex(int Index) { + BranchRelaxationSpillFrameIndex = Index; + } }; } // end namespace llvm diff --git a/llvm/test/CodeGen/LoongArch/branch-relaxation-spill-32.ll b/llvm/test/CodeGen/LoongArch/branch-relaxation-spill-32.ll new file mode 100644 index 0000000..aa4a602c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/branch-relaxation-spill-32.ll @@ -0,0 +1,313 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --filetype=obj --verify-machineinstrs < %s \ +; RUN: -o /dev/null 2>&1 +; RUN: llc --mtriple=loongarch32 --verify-machineinstrs < %s | FileCheck %s + +define void @relax_b28_spill() { +; CHECK-LABEL: relax_b28_spill: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $sp, $sp, -48 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill +; CHECK-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s5, $sp, 16 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s6, $sp, 12 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s7, $sp, 8 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s8, $sp, 4 # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -4 +; CHECK-NEXT: .cfi_offset 22, -8 +; CHECK-NEXT: .cfi_offset 23, -12 +; CHECK-NEXT: .cfi_offset 24, -16 +; CHECK-NEXT: .cfi_offset 25, -20 +; CHECK-NEXT: .cfi_offset 26, -24 +; CHECK-NEXT: .cfi_offset 27, -28 +; CHECK-NEXT: .cfi_offset 28, -32 +; CHECK-NEXT: .cfi_offset 29, -36 +; CHECK-NEXT: .cfi_offset 30, -40 +; CHECK-NEXT: .cfi_offset 31, -44 +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $zero, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $ra, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $tp, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a1, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a2, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a3, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a4, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a5, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a6, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a7, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t0, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t1, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t2, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t3, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t4, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t5, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t6, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t7, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t8, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $fp, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s0, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s1, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s2, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s3, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s4, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s5, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s6, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s7, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s8, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: beq $s7, $s8, .LBB0_1 +; CHECK-NEXT: # %bb.4: +; CHECK-NEXT: st.w $t8, $sp, 0 +; CHECK-NEXT: pcalau12i $t8, %pc_hi20(.LBB0_5) +; CHECK-NEXT: addi.w $t8, $t8, %pc_lo12(.LBB0_5) +; CHECK-NEXT: jr $t8 +; CHECK-NEXT: .LBB0_1: # %iftrue +; CHECK-NEXT: #APP +; CHECK-NEXT: .space 536870912 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: .LBB0_5: # %iftrue +; CHECK-NEXT: ld.w $t8, $sp, 0 +; CHECK-NEXT: # %bb.2: # %iffalse +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $zero +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $ra +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $tp +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a5 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a6 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a7 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t5 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t6 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t7 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t8 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $fp +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s5 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s6 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s7 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s8 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: .LBB0_3: # %iftrue +; CHECK-NEXT: ld.w $s8, $sp, 4 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $s7, $sp, 8 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $s5, $sp, 16 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload +; CHECK-NEXT: addi.w $sp, $sp, 48 +; CHECK-NEXT: ret + %zero = call i32 asm sideeffect "addi.w $$zero, $$zero, 1", "={r0}"() + %ra = call i32 asm sideeffect "addi.w $$ra, $$zero, 1", "={r1}"() + %tp = call i32 asm sideeffect "addi.w $$tp, $$zero, 1", "={r2}"() + %a0 = call i32 asm sideeffect "addi.w $$a0, $$zero, 1", "={r4}"() + %a1 = call i32 asm sideeffect "addi.w $$a1, $$zero, 1", "={r5}"() + %a2 = call i32 asm sideeffect "addi.w $$a2, $$zero, 1", "={r6}"() + %a3 = call i32 asm sideeffect "addi.w $$a3, $$zero, 1", "={r7}"() + %a4 = call i32 asm sideeffect "addi.w $$a4, $$zero, 1", "={r8}"() + %a5 = call i32 asm sideeffect "addi.w $$a5, $$zero, 1", "={r9}"() + %a6 = call i32 asm sideeffect "addi.w $$a6, $$zero, 1", "={r10}"() + %a7 = call i32 asm sideeffect "addi.w $$a7, $$zero, 1", "={r11}"() + %t0 = call i32 asm sideeffect "addi.w $$t0, $$zero, 1", "={r12}"() + %t1 = call i32 asm sideeffect "addi.w $$t1, $$zero, 1", "={r13}"() + %t2 = call i32 asm sideeffect "addi.w $$t2, $$zero, 1", "={r14}"() + %t3 = call i32 asm sideeffect "addi.w $$t3, $$zero, 1", "={r15}"() + %t4 = call i32 asm sideeffect "addi.w $$t4, $$zero, 1", "={r16}"() + %t5 = call i32 asm sideeffect "addi.w $$t5, $$zero, 1", "={r17}"() + %t6 = call i32 asm sideeffect "addi.w $$t6, $$zero, 1", "={r18}"() + %t7 = call i32 asm sideeffect "addi.w $$t7, $$zero, 1", "={r19}"() + %t8 = call i32 asm sideeffect "addi.w $$t8, $$zero, 1", "={r20}"() + ;; r21 Reserved (Non-allocatable) + %s9 = call i32 asm sideeffect "addi.w $$s9, $$zero, 1", "={r22}"() + %s0 = call i32 asm sideeffect "addi.w $$s0, $$zero, 1", "={r23}"() + %s1 = call i32 asm sideeffect "addi.w $$s1, $$zero, 1", "={r24}"() + %s2 = call i32 asm sideeffect "addi.w $$s2, $$zero, 1", "={r25}"() + %s3 = call i32 asm sideeffect "addi.w $$s3, $$zero, 1", "={r26}"() + %s4 = call i32 asm sideeffect "addi.w $$s4, $$zero, 1", "={r27}"() + %s5 = call i32 asm sideeffect "addi.w $$s5, $$zero, 1", "={r28}"() + %s6 = call i32 asm sideeffect "addi.w $$s6, $$zero, 1", "={r29}"() + %s7 = call i32 asm sideeffect "addi.w $$s7, $$zero, 1", "={r30}"() + %s8 = call i32 asm sideeffect "addi.w $$s8, $$zero, 1", "={r31}"() + + %cmp = icmp eq i32 %s7, %s8 + br i1 %cmp, label %iftrue, label %iffalse + +iftrue: + call void asm sideeffect ".space 536870912", ""() + ret void + +iffalse: + call void asm sideeffect "# reg use $0", "{r0}"(i32 %zero) + call void asm sideeffect "# reg use $0", "{r1}"(i32 %ra) + call void asm sideeffect "# reg use $0", "{r2}"(i32 %tp) + call void asm sideeffect "# reg use $0", "{r4}"(i32 %a0) + call void asm sideeffect "# reg use $0", "{r5}"(i32 %a1) + call void asm sideeffect "# reg use $0", "{r6}"(i32 %a2) + call void asm sideeffect "# reg use $0", "{r7}"(i32 %a3) + call void asm sideeffect "# reg use $0", "{r8}"(i32 %a4) + call void asm sideeffect "# reg use $0", "{r9}"(i32 %a5) + call void asm sideeffect "# reg use $0", "{r10}"(i32 %a6) + call void asm sideeffect "# reg use $0", "{r11}"(i32 %a7) + call void asm sideeffect "# reg use $0", "{r12}"(i32 %t0) + call void asm sideeffect "# reg use $0", "{r13}"(i32 %t1) + call void asm sideeffect "# reg use $0", "{r14}"(i32 %t2) + call void asm sideeffect "# reg use $0", "{r15}"(i32 %t3) + call void asm sideeffect "# reg use $0", "{r16}"(i32 %t4) + call void asm sideeffect "# reg use $0", "{r17}"(i32 %t5) + call void asm sideeffect "# reg use $0", "{r18}"(i32 %t6) + call void asm sideeffect "# reg use $0", "{r19}"(i32 %t7) + call void asm sideeffect "# reg use $0", "{r20}"(i32 %t8) + ;; r21 Reserved (Non-allocatable) + call void asm sideeffect "# reg use $0", "{r22}"(i32 %s9) + call void asm sideeffect "# reg use $0", "{r23}"(i32 %s0) + call void asm sideeffect "# reg use $0", "{r24}"(i32 %s1) + call void asm sideeffect "# reg use $0", "{r25}"(i32 %s2) + call void asm sideeffect "# reg use $0", "{r26}"(i32 %s3) + call void asm sideeffect "# reg use $0", "{r27}"(i32 %s4) + call void asm sideeffect "# reg use $0", "{r28}"(i32 %s5) + call void asm sideeffect "# reg use $0", "{r29}"(i32 %s6) + call void asm sideeffect "# reg use $0", "{r30}"(i32 %s7) + call void asm sideeffect "# reg use $0", "{r31}"(i32 %s8) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/branch-relaxation-spill-64.ll b/llvm/test/CodeGen/LoongArch/branch-relaxation-spill-64.ll new file mode 100644 index 0000000..93320e1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/branch-relaxation-spill-64.ll @@ -0,0 +1,313 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --filetype=obj --verify-machineinstrs < %s \ +; RUN: -o /dev/null 2>&1 +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s | FileCheck %s + +define void @relax_b28_spill() { +; CHECK-LABEL: relax_b28_spill: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -96 +; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s0, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s1, $sp, 64 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s2, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s3, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s4, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s5, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s6, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s7, $sp, 16 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s8, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: .cfi_offset 23, -24 +; CHECK-NEXT: .cfi_offset 24, -32 +; CHECK-NEXT: .cfi_offset 25, -40 +; CHECK-NEXT: .cfi_offset 26, -48 +; CHECK-NEXT: .cfi_offset 27, -56 +; CHECK-NEXT: .cfi_offset 28, -64 +; CHECK-NEXT: .cfi_offset 29, -72 +; CHECK-NEXT: .cfi_offset 30, -80 +; CHECK-NEXT: .cfi_offset 31, -88 +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $zero, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $ra, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $tp, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $a0, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $a1, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $a2, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $a3, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $a4, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $a5, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $a6, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $a7, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t0, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t1, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t2, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t3, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t4, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t5, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t6, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t7, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t8, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $fp, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s0, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s1, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s2, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s3, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s4, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s5, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s6, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s7, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s8, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: beq $s7, $s8, .LBB0_1 +; CHECK-NEXT: # %bb.4: +; CHECK-NEXT: st.d $t8, $sp, 0 +; CHECK-NEXT: pcalau12i $t8, %pc_hi20(.LBB0_5) +; CHECK-NEXT: addi.d $t8, $t8, %pc_lo12(.LBB0_5) +; CHECK-NEXT: jr $t8 +; CHECK-NEXT: .LBB0_1: # %iftrue +; CHECK-NEXT: #APP +; CHECK-NEXT: .space 536870912 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: .LBB0_5: # %iftrue +; CHECK-NEXT: ld.d $t8, $sp, 0 +; CHECK-NEXT: # %bb.2: # %iffalse +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $zero +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $ra +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $tp +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a5 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a6 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a7 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t5 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t6 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t7 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t8 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $fp +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s5 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s6 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s7 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s8 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: .LBB0_3: # %iftrue +; CHECK-NEXT: ld.d $s8, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $s7, $sp, 16 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $s6, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $s5, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $s4, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $s3, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $s2, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $s1, $sp, 64 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $s0, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 96 +; CHECK-NEXT: ret + %zero = call i64 asm sideeffect "addi.d $$zero, $$zero, 1", "={r0}"() + %ra = call i64 asm sideeffect "addi.d $$ra, $$zero, 1", "={r1}"() + %tp = call i64 asm sideeffect "addi.d $$tp, $$zero, 1", "={r2}"() + %a0 = call i64 asm sideeffect "addi.d $$a0, $$zero, 1", "={r4}"() + %a1 = call i64 asm sideeffect "addi.d $$a1, $$zero, 1", "={r5}"() + %a2 = call i64 asm sideeffect "addi.d $$a2, $$zero, 1", "={r6}"() + %a3 = call i64 asm sideeffect "addi.d $$a3, $$zero, 1", "={r7}"() + %a4 = call i64 asm sideeffect "addi.d $$a4, $$zero, 1", "={r8}"() + %a5 = call i64 asm sideeffect "addi.d $$a5, $$zero, 1", "={r9}"() + %a6 = call i64 asm sideeffect "addi.d $$a6, $$zero, 1", "={r10}"() + %a7 = call i64 asm sideeffect "addi.d $$a7, $$zero, 1", "={r11}"() + %t0 = call i64 asm sideeffect "addi.d $$t0, $$zero, 1", "={r12}"() + %t1 = call i64 asm sideeffect "addi.d $$t1, $$zero, 1", "={r13}"() + %t2 = call i64 asm sideeffect "addi.d $$t2, $$zero, 1", "={r14}"() + %t3 = call i64 asm sideeffect "addi.d $$t3, $$zero, 1", "={r15}"() + %t4 = call i64 asm sideeffect "addi.d $$t4, $$zero, 1", "={r16}"() + %t5 = call i64 asm sideeffect "addi.d $$t5, $$zero, 1", "={r17}"() + %t6 = call i64 asm sideeffect "addi.d $$t6, $$zero, 1", "={r18}"() + %t7 = call i64 asm sideeffect "addi.d $$t7, $$zero, 1", "={r19}"() + %t8 = call i64 asm sideeffect "addi.d $$t8, $$zero, 1", "={r20}"() + ;; r21 Reserved (Non-allocatable) + %s9 = call i64 asm sideeffect "addi.d $$s9, $$zero, 1", "={r22}"() + %s0 = call i64 asm sideeffect "addi.d $$s0, $$zero, 1", "={r23}"() + %s1 = call i64 asm sideeffect "addi.d $$s1, $$zero, 1", "={r24}"() + %s2 = call i64 asm sideeffect "addi.d $$s2, $$zero, 1", "={r25}"() + %s3 = call i64 asm sideeffect "addi.d $$s3, $$zero, 1", "={r26}"() + %s4 = call i64 asm sideeffect "addi.d $$s4, $$zero, 1", "={r27}"() + %s5 = call i64 asm sideeffect "addi.d $$s5, $$zero, 1", "={r28}"() + %s6 = call i64 asm sideeffect "addi.d $$s6, $$zero, 1", "={r29}"() + %s7 = call i64 asm sideeffect "addi.d $$s7, $$zero, 1", "={r30}"() + %s8 = call i64 asm sideeffect "addi.d $$s8, $$zero, 1", "={r31}"() + + %cmp = icmp eq i64 %s7, %s8 + br i1 %cmp, label %iftrue, label %iffalse + +iftrue: + call void asm sideeffect ".space 536870912", ""() + ret void + +iffalse: + call void asm sideeffect "# reg use $0", "{r0}"(i64 %zero) + call void asm sideeffect "# reg use $0", "{r1}"(i64 %ra) + call void asm sideeffect "# reg use $0", "{r2}"(i64 %tp) + call void asm sideeffect "# reg use $0", "{r4}"(i64 %a0) + call void asm sideeffect "# reg use $0", "{r5}"(i64 %a1) + call void asm sideeffect "# reg use $0", "{r6}"(i64 %a2) + call void asm sideeffect "# reg use $0", "{r7}"(i64 %a3) + call void asm sideeffect "# reg use $0", "{r8}"(i64 %a4) + call void asm sideeffect "# reg use $0", "{r9}"(i64 %a5) + call void asm sideeffect "# reg use $0", "{r10}"(i64 %a6) + call void asm sideeffect "# reg use $0", "{r11}"(i64 %a7) + call void asm sideeffect "# reg use $0", "{r12}"(i64 %t0) + call void asm sideeffect "# reg use $0", "{r13}"(i64 %t1) + call void asm sideeffect "# reg use $0", "{r14}"(i64 %t2) + call void asm sideeffect "# reg use $0", "{r15}"(i64 %t3) + call void asm sideeffect "# reg use $0", "{r16}"(i64 %t4) + call void asm sideeffect "# reg use $0", "{r17}"(i64 %t5) + call void asm sideeffect "# reg use $0", "{r18}"(i64 %t6) + call void asm sideeffect "# reg use $0", "{r19}"(i64 %t7) + call void asm sideeffect "# reg use $0", "{r20}"(i64 %t8) + ;; r21 Reserved (Non-allocatable) + call void asm sideeffect "# reg use $0", "{r22}"(i64 %s9) + call void asm sideeffect "# reg use $0", "{r23}"(i64 %s0) + call void asm sideeffect "# reg use $0", "{r24}"(i64 %s1) + call void asm sideeffect "# reg use $0", "{r25}"(i64 %s2) + call void asm sideeffect "# reg use $0", "{r26}"(i64 %s3) + call void asm sideeffect "# reg use $0", "{r27}"(i64 %s4) + call void asm sideeffect "# reg use $0", "{r28}"(i64 %s5) + call void asm sideeffect "# reg use $0", "{r29}"(i64 %s6) + call void asm sideeffect "# reg use $0", "{r30}"(i64 %s7) + call void asm sideeffect "# reg use $0", "{r31}"(i64 %s8) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/branch-relaxation.ll b/llvm/test/CodeGen/LoongArch/branch-relaxation.ll index aecd4cb..7d064dd 100644 --- a/llvm/test/CodeGen/LoongArch/branch-relaxation.ll +++ b/llvm/test/CodeGen/LoongArch/branch-relaxation.ll @@ -88,6 +88,8 @@ iffalse: define i32 @relax_b28(i1 %a) { ; LA32-LABEL: relax_b28: ; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 ; LA32-NEXT: andi $a0, $a0, 1 ; LA32-NEXT: bnez $a0, .LBB2_1 ; LA32-NEXT: # %bb.3: @@ -99,13 +101,17 @@ define i32 @relax_b28(i1 %a) { ; LA32-NEXT: .space 536870912 ; LA32-NEXT: #NO_APP ; LA32-NEXT: ori $a0, $zero, 1 +; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: ret ; LA32-NEXT: .LBB2_2: # %iffalse ; LA32-NEXT: move $a0, $zero +; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: ret ; ; LA64-LABEL: relax_b28: ; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: .cfi_def_cfa_offset 16 ; LA64-NEXT: andi $a0, $a0, 1 ; LA64-NEXT: bnez $a0, .LBB2_1 ; LA64-NEXT: # %bb.3: @@ -117,9 +123,11 @@ define i32 @relax_b28(i1 %a) { ; LA64-NEXT: .space 536870912 ; LA64-NEXT: #NO_APP ; LA64-NEXT: ori $a0, $zero, 1 +; LA64-NEXT: addi.d $sp, $sp, 16 ; LA64-NEXT: ret ; LA64-NEXT: .LBB2_2: # %iffalse ; LA64-NEXT: move $a0, $zero +; LA64-NEXT: addi.d $sp, $sp, 16 ; LA64-NEXT: ret br i1 %a, label %iftrue, label %iffalse