From 90131ac26c8133e0a244039506c90793835f1f85 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Fri, 19 Oct 2012 21:47:33 +0000 Subject: [PATCH] [mips] Add code to do tail call optimization. Currently, it is enabled only if option "enable-mips-tail-calls" is given and all of the callee's arguments are passed in registers. llvm-svn: 166342 --- llvm/lib/Target/Mips/MipsISelLowering.cpp | 44 +++++++++++-- llvm/lib/Target/Mips/MipsISelLowering.h | 5 ++ llvm/test/CodeGen/Mips/tailcall.ll | 100 ++++++++++++++++++++++++++++++ 3 files changed, 144 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/Mips/tailcall.ll diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index cc069fe..7b42686 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -25,6 +25,7 @@ #include "llvm/GlobalVariable.h" #include "llvm/Intrinsics.h" #include "llvm/CallingConv.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -32,12 +33,19 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +STATISTIC(NumTailCalls, "Number of tail calls"); + +static cl::opt +EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden, + cl::desc("MIPS: Enable tail calls."), cl::init(false)); + // If I is a shifted mask, set the size (Size) and the first bit of the // mask (Pos), and return true. // For example, if I is 0x003ff800, (Pos, Size) = (11, 11). @@ -2871,9 +2879,26 @@ PassByValArg64(SDValue Chain, DebugLoc dl, MemOpChains.push_back(Chain); } +/// IsEligibleForTailCallOptimization - Check whether the call is eligible +/// for tail call optimization. +bool MipsTargetLowering:: +IsEligibleForTailCallOptimization(CallingConv::ID CalleeCC, + unsigned NextStackOffset) const { + if (!EnableMipsTailCalls) + return false; + + // Do not tail-call optimize if there is an argument passed on stack. + if (IsO32 && (CalleeCC != CallingConv::Fast)) { + if (NextStackOffset > 16) + return false; + } else if (NextStackOffset) + return false; + + return true; +} + /// LowerCall - functions arguments are copied from virtual regs to /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. -/// TODO: isTailCall. SDValue MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { @@ -2888,9 +2913,6 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CallingConv::ID CallConv = CLI.CallConv; bool isVarArg = CLI.IsVarArg; - // MIPs target does not yet support tail call optimization. - isTailCall = false; - MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering(); @@ -2921,11 +2943,20 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (IsO32 && (CallConv != CallingConv::Fast)) NextStackOffset = std::max(NextStackOffset, (unsigned)16); + // Check if it's really possible to do a tail call. + if (isTailCall) + isTailCall = IsEligibleForTailCallOptimization(CallConv, NextStackOffset); + + if (isTailCall) + ++NumTailCalls; + // Chain is the output chain of the last Load/Store or CopyToReg node. // ByValChain is the output chain of the last Memcpy node created for copying // byval arguments to the stack. SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true); - Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal); + + if (!isTailCall) + Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal); SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, IsN64 ? Mips::SP_64 : Mips::SP, @@ -3135,6 +3166,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (InFlag.getNode()) Ops.push_back(InFlag); + if (isTailCall) + return DAG.getNode(MipsISD::TailCall, dl, MVT::Other, &Ops[0], Ops.size()); + Chain = DAG.getNode(MipsISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h index f185f120..b75a513 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/llvm/lib/Target/Mips/MipsISelLowering.h @@ -208,6 +208,11 @@ namespace llvm { SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; + /// IsEligibleForTailCallOptimization - Check whether the call is eligible + /// for tail call optimization. + bool IsEligibleForTailCallOptimization(CallingConv::ID CalleeCC, + unsigned NextStackOffset) const; + virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, diff --git a/llvm/test/CodeGen/Mips/tailcall.ll b/llvm/test/CodeGen/Mips/tailcall.ll new file mode 100644 index 0000000..4989636 --- /dev/null +++ b/llvm/test/CodeGen/Mips/tailcall.ll @@ -0,0 +1,100 @@ +; RUN: llc -march=mipsel -relocation-model=pic -enable-mips-tail-calls < %s | \ +; RUN: FileCheck %s -check-prefix=PIC32 +; RUN: llc -march=mipsel -relocation-model=static \ +; RUN: -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=STATIC32 +; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=+n64 -enable-mips-tail-calls \ +; RUN: < %s | FileCheck %s -check-prefix=N64 + +@g0 = common global i32 0, align 4 +@g1 = common global i32 0, align 4 +@g2 = common global i32 0, align 4 +@g3 = common global i32 0, align 4 +@g4 = common global i32 0, align 4 +@g5 = common global i32 0, align 4 +@g6 = common global i32 0, align 4 +@g7 = common global i32 0, align 4 +@g8 = common global i32 0, align 4 +@g9 = common global i32 0, align 4 + +define i32 @caller1(i32 %a0) nounwind { +entry: +; PIC32-NOT: jalr +; STATIC32-NOT: jal +; N64-NOT: jalr + + %call = tail call i32 @callee1(i32 1, i32 1, i32 1, i32 %a0) nounwind + ret i32 %call +} + +declare i32 @callee1(i32, i32, i32, i32) + +define i32 @caller2(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind { +entry: +; PIC32: jalr +; STATIC32: jal +; N64-NOT: jalr + + %call = tail call i32 @callee2(i32 1, i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind + ret i32 %call +} + +declare i32 @callee2(i32, i32, i32, i32, i32) + +define i32 @caller3(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) nounwind { +entry: +; PIC32: jalr +; STATIC32: jal +; N64-NOT: jalr + + %call = tail call i32 @callee3(i32 1, i32 1, i32 1, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) nounwind + ret i32 %call +} + +declare i32 @callee3(i32, i32, i32, i32, i32, i32, i32, i32) + +define i32 @caller4(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind { +entry: +; PIC32: jalr +; STATIC32: jal +; N64: jalr + + %call = tail call i32 @callee4(i32 1, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind + ret i32 %call +} + +declare i32 @callee4(i32, i32, i32, i32, i32, i32, i32, i32, i32) + +define i32 @caller5() nounwind readonly { +entry: +; PIC32-NOT: jalr +; STATIC32-NOT: jal +; N64-NOT: jalr + + %0 = load i32* @g0, align 4 + %1 = load i32* @g1, align 4 + %2 = load i32* @g2, align 4 + %3 = load i32* @g3, align 4 + %4 = load i32* @g4, align 4 + %5 = load i32* @g5, align 4 + %6 = load i32* @g6, align 4 + %7 = load i32* @g7, align 4 + %8 = load i32* @g8, align 4 + %9 = load i32* @g9, align 4 + %call = tail call fastcc i32 @callee5(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9) + ret i32 %call +} + +define internal fastcc i32 @callee5(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9) nounwind readnone noinline { +entry: + %add = add nsw i32 %a1, %a0 + %add1 = add nsw i32 %add, %a2 + %add2 = add nsw i32 %add1, %a3 + %add3 = add nsw i32 %add2, %a4 + %add4 = add nsw i32 %add3, %a5 + %add5 = add nsw i32 %add4, %a6 + %add6 = add nsw i32 %add5, %a7 + %add7 = add nsw i32 %add6, %a8 + %add8 = add nsw i32 %add7, %a9 + ret i32 %add8 +} + -- 2.7.4