From 9c4b49db1928be41f9b531d3e050d1e4c22a77aa Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 11 Jan 2022 10:01:25 -0800 Subject: [PATCH] [ShrinkWrap] check for PPC's non-callee-saved LR As pointed out in https://reviews.llvm.org/D115688#inline-1108193, we don't want to sink the save point past an INLINEASM_BR, otherwise prologepilog may incorrectly sink a prolog past the MBB containing an INLINEASM_BR and into the wrong MBB. ShrinkWrap is getting this wrong because LR is not in the list of callee saved registers. Specifically, ShrinkWrap::useOrDefCSROrFI calls RegisterClassInfo::getLastCalleeSavedAlias which reads CalleeSavedAliases which was populated by RegisterClassInfo::runOnMachineFunction by iterating the list of MCPhysReg returned from MachineRegisterInfo::getCalleeSavedRegs. Because PPC's LR is non-allocatable, it's NOT considered callee saved. Add an interface to TargetRegisterInfo for such a case and use it in Shrinkwrap to ensure we don't sink a prolog past an INLINEASM or INLINEASM_BR that clobbers LR. Reviewed By: jyknight, efriedma, nemanjai, #powerpc Differential Revision: https://reviews.llvm.org/D116424 --- llvm/include/llvm/CodeGen/TargetRegisterInfo.h | 7 +++++ llvm/lib/CodeGen/ShrinkWrap.cpp | 12 ++++++-- llvm/lib/Target/PowerPC/PPCRegisterInfo.h | 4 +++ .../CodeGen/PowerPC/ppc64-inlineasm-clobber.ll | 34 ++++++++++------------ 4 files changed, 37 insertions(+), 20 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h index 8483d07..c3b8420 100644 --- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -1094,6 +1094,13 @@ public: inline MCRegister getSubReg(MCRegister Reg, unsigned Idx) const { return static_cast(this)->getSubReg(Reg, Idx); } + + /// Some targets have non-allocatable registers that aren't technically part + /// of the explicit callee saved register list, but should be handled as such + /// in certain cases. + virtual bool isNonallocatableRegisterCalleeSave(MCRegister Reg) const { + return false; + } }; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp index f89069e..f6ad2b5 100644 --- a/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -273,6 +273,8 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, LLVM_DEBUG(dbgs() << "Frame instruction: " << MI << '\n'); return true; } + const MachineFunction *MF = MI.getParent()->getParent(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); for (const MachineOperand &MO : MI.operands()) { bool UseOrDefCSR = false; if (MO.isReg()) { @@ -288,8 +290,14 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, // separately. An SP mentioned by a call instruction, we can ignore, // though, as it's harmless and we do not want to effectively disable tail // calls by forcing the restore point to post-dominate them. - UseOrDefCSR = (!MI.isCall() && PhysReg == SP) || - RCI.getLastCalleeSavedAlias(PhysReg); + // PPC's LR is also not normally described as a callee-saved register in + // calling convention definitions, so we need to watch for it, too. An LR + // mentioned implicitly by a return (or "branch to link register") + // instruction we can ignore, otherwise we may pessimize shrinkwrapping. + UseOrDefCSR = + (!MI.isCall() && PhysReg == SP) || + RCI.getLastCalleeSavedAlias(PhysReg) || + (!MI.isReturn() && TRI->isNonallocatableRegisterCalleeSave(PhysReg)); } else if (MO.isRegMask()) { // Check if this regmask clobbers any of the CSRs. for (unsigned Reg : getCurrentCSRs(RS)) { diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h index 2e534dd..ce2a343 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h @@ -185,6 +185,10 @@ public: return RegName; } + + bool isNonallocatableRegisterCalleeSave(MCRegister Reg) const override { + return Reg == PPC::LR || Reg == PPC::LR8; + } }; } // end namespace llvm diff --git a/llvm/test/CodeGen/PowerPC/ppc64-inlineasm-clobber.ll b/llvm/test/CodeGen/PowerPC/ppc64-inlineasm-clobber.ll index 3d66683..41231e5 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64-inlineasm-clobber.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-inlineasm-clobber.ll @@ -75,43 +75,41 @@ entry: define dso_local signext i32 @ClobberLR_BR(i32 signext %in) #0 { ; PPC64LE-LABEL: ClobberLR_BR: ; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mflr r0 +; PPC64LE-NEXT: std r0, 16(r1) +; PPC64LE-NEXT: stdu r1, -32(r1) ; PPC64LE-NEXT: #APP ; PPC64LE-NEXT: nop ; PPC64LE-NEXT: #NO_APP -; PPC64LE-NEXT: # %bb.1: # %return +; PPC64LE-NEXT: .LBB3_1: # %return ; PPC64LE-NEXT: extsw r3, r3 -; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .Ltmp0: # Block address taken -; PPC64LE-NEXT: .LBB3_2: # %return_early -; PPC64LE-NEXT: mflr r0 -; PPC64LE-NEXT: std r0, 16(r1) -; PPC64LE-NEXT: stdu r1, -32(r1) -; PPC64LE-NEXT: li r3, 0 ; PPC64LE-NEXT: addi r1, r1, 32 ; PPC64LE-NEXT: ld r0, 16(r1) ; PPC64LE-NEXT: mtlr r0 -; PPC64LE-NEXT: extsw r3, r3 ; PPC64LE-NEXT: blr +; PPC64LE-NEXT: .Ltmp0: # Block address taken +; PPC64LE-NEXT: .LBB3_2: # %return_early +; PPC64LE-NEXT: li r3, 0 +; PPC64LE-NEXT: b .LBB3_1 ; ; PPC64BE-LABEL: ClobberLR_BR: ; PPC64BE: # %bb.0: # %entry +; PPC64BE-NEXT: mflr r0 +; PPC64BE-NEXT: std r0, 16(r1) +; PPC64BE-NEXT: stdu r1, -48(r1) ; PPC64BE-NEXT: #APP ; PPC64BE-NEXT: nop ; PPC64BE-NEXT: #NO_APP -; PPC64BE-NEXT: # %bb.1: # %return +; PPC64BE-NEXT: .LBB3_1: # %return ; PPC64BE-NEXT: extsw r3, r3 -; PPC64BE-NEXT: blr -; PPC64BE-NEXT: .Ltmp0: # Block address taken -; PPC64BE-NEXT: .LBB3_2: # %return_early -; PPC64BE-NEXT: mflr r0 -; PPC64BE-NEXT: std r0, 16(r1) -; PPC64BE-NEXT: stdu r1, -48(r1) -; PPC64BE-NEXT: li r3, 0 ; PPC64BE-NEXT: addi r1, r1, 48 ; PPC64BE-NEXT: ld r0, 16(r1) ; PPC64BE-NEXT: mtlr r0 -; PPC64BE-NEXT: extsw r3, r3 ; PPC64BE-NEXT: blr +; PPC64BE-NEXT: .Ltmp0: # Block address taken +; PPC64BE-NEXT: .LBB3_2: # %return_early +; PPC64BE-NEXT: li r3, 0 +; PPC64BE-NEXT: b .LBB3_1 entry: callbr void asm sideeffect "nop", "X,~{lr}"(i8* blockaddress(@ClobberLR_BR, %return_early)) to label %return [label %return_early] -- 2.7.4