From 02141a17aea0603b89baee14febea6a3b89474d1 Mon Sep 17 00:00:00 2001 From: Victor Huang Date: Wed, 22 Apr 2020 10:55:34 -0500 Subject: [PATCH] [PowerPC][Future] Remove redundant r2 save and restore for indirect call Currently an indirect call produces the following sequence on PCRelative mode: extern void function( ); extern void (*ptrfunc) ( ); void g() { ptrfunc=function; } void f() { (*ptrfunc) ( ); } Producing paddi 3, 0, .LC0@PCREL, 1 ld 3, 0(3) std 2, 24(1) ld 12, 0(3) mtctr 12 bctrl ld 2, 24(1) Though the caller does not use or preserve r2, it is still saved and restored across a function call. This patch is added to remove these redundant save and restores for indirect calls. Differential Revision: https://reviews.llvm.org/D77749 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 52 ++++++++++++---------- .../PowerPC/pcrel-call-linkage-with-calls.ll | 25 ----------- llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll | 2 - llvm/test/CodeGen/PowerPC/pcrel-indirect-call.ll | 37 +++++++++++++++ 4 files changed, 65 insertions(+), 51 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/pcrel-indirect-call.ll diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 4f065df..effb165 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5152,6 +5152,12 @@ static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG, return true; } +// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls. +static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) { + return Subtarget.isAIXABI() || + (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()); +} + static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags, const Function &Caller, const SDValue &Callee, @@ -5168,20 +5174,12 @@ static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags, // pointer is modeled by using a pseudo instruction for the call opcode that // represents the 2 instruction sequence of an indirect branch and link, // immediately followed by a load of the TOC pointer from the the stack save - // slot into gpr2. - if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) - return PPCISD::BCTRL_LOAD_TOC; - - // An indirect call that does not need a TOC restore. - return PPCISD::BCTRL; + // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC + // as it is not saved or used. + return isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC + : PPCISD::BCTRL; } - // FIXME: At this moment indirect calls are treated ahead of the - // PC Relative condition because binaries can still contain a possible - // mix of functions that use a TOC and functions that do not use a TOC. - // Once the PC Relative feature is complete this condition should be moved - // up ahead of the indirect calls and should return a PPCISD::BCTRL for - // that case. if (Subtarget.isUsingPCRelativeCalls()) { assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI."); return PPCISD::CALL_NOTOC; @@ -5439,7 +5437,9 @@ buildCallOperands(SmallVectorImpl &Ops, // pointer from the linkage area. The operand for the TOC restore is an add // of the TOC save offset to the stack pointer. This must be the second // operand: after the chain input but before any other variadic arguments. - if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) { + // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not + // saved or used. + if (isTOCSaveRestoreRequired(Subtarget)) { const MCRegister StackPtrReg = Subtarget.getStackPointerRegister(); SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT); @@ -6509,17 +6509,21 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( // See prepareDescriptorIndirectCall and buildCallOperands for more // information about calls through function pointers in the 64-bit SVR4 ABI. if (CFlags.IsIndirect) { - assert(!CFlags.IsTailCall && "Indirect tails calls not supported"); - // Load r2 into a virtual register and store it to the TOC save area. - setUsesTOCBasePtr(DAG); - SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); - // TOC save area offset. - unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); - SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); - SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); - Chain = DAG.getStore( - Val.getValue(1), dl, Val, AddPtr, - MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset)); + // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the + // caller in the TOC save area. + if (isTOCSaveRestoreRequired(Subtarget)) { + assert(!CFlags.IsTailCall && "Indirect tails calls not supported"); + // Load r2 into a virtual register and store it to the TOC save area. + setUsesTOCBasePtr(DAG); + SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); + // TOC save area offset. + unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); + SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); + SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); + Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, + MachinePointerInfo::getStack( + DAG.getMachineFunction(), TOCSaveOffset)); + } // In the ELFv2 ABI, R12 must contain the address of an indirect callee. // This does not mean the MTCTR instruction must use R12; it's easier // to model this as an extra parameter, so do that. diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll index ed96e73..010704f 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll @@ -328,14 +328,10 @@ entry: define dso_local signext i32 @IndirectCall1(i32 signext %a, i32 signext %b) local_unnamed_addr { ; CHECK-ALL-LABEL: IndirectCall1: -; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep13@ha -; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep13@l -; CHECK-S: .localentry IndirectCall1, .Lfunc_lep13-.Lfunc_gep13 ; CHECK-S: # %bb.0: # %entry ; CHECK-S-NEXT: mflr r0 ; CHECK-S-NEXT: std r0, 16(r1) ; CHECK-S-NEXT: stdu r1, -32(r1) -; CHECK-S-NEXT: std r2, 24(r1) ; CHECK-S-NEXT: .cfi_def_cfa_offset 32 ; CHECK-S-NEXT: .cfi_offset lr, 16 ; CHECK-S-NEXT: pld r12, indirectCall@PCREL(0), 1 @@ -343,7 +339,6 @@ define dso_local signext i32 @IndirectCall1(i32 signext %a, i32 signext %b) loca ; CHECK-S-NEXT: extsw r3, r3 ; CHECK-S-NEXT: mtctr r12 ; CHECK-S-NEXT: bctrl -; CHECK-S-NEXT: ld 2, 24(r1) ; CHECK-S-NEXT: plwz r4, globalVar@PCREL(0), 1 ; CHECK-S-NEXT: mullw r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 @@ -362,14 +357,10 @@ entry: define dso_local signext i32 @IndirectCall2(i32 signext %a, i32 signext %b) local_unnamed_addr { ; CHECK-ALL-LABEL: IndirectCall2: -; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep14@ha -; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep14@l -; CHECK-S: .localentry IndirectCall2, .Lfunc_lep14-.Lfunc_gep14 ; CHECK-S: # %bb.0: # %entry ; CHECK-S-NEXT: mflr r0 ; CHECK-S-NEXT: std r0, 16(r1) ; CHECK-S-NEXT: stdu r1, -32(r1) -; CHECK-S-NEXT: std r2, 24(r1) ; CHECK-S-NEXT: .cfi_def_cfa_offset 32 ; CHECK-S-NEXT: .cfi_offset lr, 16 ; CHECK-S-NEXT: pld r12, indirectCall@PCREL(0), 1 @@ -377,7 +368,6 @@ define dso_local signext i32 @IndirectCall2(i32 signext %a, i32 signext %b) loca ; CHECK-S-NEXT: extsw r3, r3 ; CHECK-S-NEXT: mtctr r12 ; CHECK-S-NEXT: bctrl -; CHECK-S-NEXT: ld 2, 24(r1) ; CHECK-S-NEXT: pld r4, externGlobalVar@got@pcrel(0), 1 ; CHECK-S-NEXT: lwz r4, 0(r4) ; CHECK-S-NEXT: mullw r3, r4, r3 @@ -397,14 +387,10 @@ entry: define dso_local signext i32 @IndirectCall3(i32 signext %a, i32 signext %b, i32 (i32)* nocapture %call_param) local_unnamed_addr { ; CHECK-ALL-LABEL: IndirectCall3: -; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep15@ha -; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep15@l -; CHECK-S: .localentry IndirectCall3, .Lfunc_lep15-.Lfunc_gep15 ; CHECK-S: # %bb.0: # %entry ; CHECK-S-NEXT: mflr r0 ; CHECK-S-NEXT: std r0, 16(r1) ; CHECK-S-NEXT: stdu r1, -32(r1) -; CHECK-S-NEXT: std r2, 24(r1) ; CHECK-S-NEXT: .cfi_def_cfa_offset 32 ; CHECK-S-NEXT: .cfi_offset lr, 16 ; CHECK-S-NEXT: add r3, r4, r3 @@ -412,7 +398,6 @@ define dso_local signext i32 @IndirectCall3(i32 signext %a, i32 signext %b, i32 ; CHECK-S-NEXT: mtctr r5 ; CHECK-S-NEXT: mr r12, r5 ; CHECK-S-NEXT: bctrl -; CHECK-S-NEXT: ld 2, 24(r1) ; CHECK-S-NEXT: plwz r4, globalVar@PCREL(0), 1 ; CHECK-S-NEXT: mullw r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 @@ -430,9 +415,6 @@ entry: define dso_local signext i32 @IndirectCallNoGlobal(i32 signext %a, i32 signext %b, i32 (i32)* nocapture %call_param) local_unnamed_addr { ; CHECK-ALL-LABEL: IndirectCallNoGlobal: -; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep16@ha -; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep16@l -; CHECK-S: .localentry IndirectCallNoGlobal, .Lfunc_lep16-.Lfunc_gep16 ; CHECK-S: # %bb.0: # %entry ; CHECK-S-NEXT: mflr r0 ; CHECK-S-NEXT: .cfi_def_cfa_offset 48 @@ -443,10 +425,8 @@ define dso_local signext i32 @IndirectCallNoGlobal(i32 signext %a, i32 signext % ; CHECK-S-NEXT: stdu r1, -48(r1) ; CHECK-S-NEXT: mtctr r5 ; CHECK-S-NEXT: mr r12, r5 -; CHECK-S-NEXT: std r2, 24(r1) ; CHECK-S-NEXT: mr r30, r4 ; CHECK-S-NEXT: bctrl -; CHECK-S-NEXT: ld 2, 24(r1) ; CHECK-S-NEXT: add r3, r3, r30 ; CHECK-S-NEXT: extsw r3, r3 ; CHECK-S-NEXT: addi r1, r1, 48 @@ -462,20 +442,15 @@ entry: define dso_local signext i32 @IndirectCallOnly(i32 signext %a, i32 (i32)* nocapture %call_param) local_unnamed_addr { ; CHECK-ALL-LABEL: IndirectCallOnly: -; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep17@ha -; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep17@l -; CHECK-S: .localentry IndirectCallOnly, .Lfunc_lep17-.Lfunc_gep17 ; CHECK-S: # %bb.0: # %entry ; CHECK-S-NEXT: mflr r0 ; CHECK-S-NEXT: std r0, 16(r1) ; CHECK-S-NEXT: stdu r1, -32(r1) -; CHECK-S-NEXT: std r2, 24(r1) ; CHECK-S-NEXT: .cfi_def_cfa_offset 32 ; CHECK-S-NEXT: .cfi_offset lr, 16 ; CHECK-S-NEXT: mtctr r4 ; CHECK-S-NEXT: mr r12, r4 ; CHECK-S-NEXT: bctrl -; CHECK-S-NEXT: ld 2, 24(r1) ; CHECK-S-NEXT: addi r1, r1, 32 ; CHECK-S-NEXT: ld r0, 16(r1) ; CHECK-S-NEXT: mtlr r0 diff --git a/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll b/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll index e9aeccd..a3404a8 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll @@ -219,14 +219,12 @@ define dso_local void @ReadFuncPtr() local_unnamed_addr { ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stdu r1, -32(r1) -; CHECK-NEXT: std r2, 24(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: pld r3, ptrfunc@got@pcrel(0), 1 ; CHECK-NEXT: ld r12, 0(r3) ; CHECK-NEXT: mtctr r12 ; CHECK-NEXT: bctrl -; CHECK-NEXT: ld 2, 24(r1) ; CHECK-NEXT: addi r1, r1, 32 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 diff --git a/llvm/test/CodeGen/PowerPC/pcrel-indirect-call.ll b/llvm/test/CodeGen/PowerPC/pcrel-indirect-call.ll new file mode 100644 index 0000000..7806d69 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pcrel-indirect-call.ll @@ -0,0 +1,37 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names < %s | FileCheck %s + +; The test checks the behavior of PC Relative indirect calls. When using +; PC Relative, TOC save and restore are no longer required. Function pointer +; is passed as a parameter in this test. + +; Function Attrs: noinline +define dso_local void @IndirectCallExternFuncPtr(void ()* nocapture %ptrfunc) { +; CHECK-LABEL: IndirectCallExternFuncPtr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -32(r1) + +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: mtctr r3 +; CHECK-NEXT: mr r12, r3 +; CHECK-NEXT: bctrl + +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + tail call void %ptrfunc() + ret void +} + +define dso_local void @FuncPtrPassAsParam() { +entry: + tail call void @IndirectCallExternFuncPtr(void ()* nonnull @Function) + ret void +} + +declare void @Function() -- 2.7.4