From df92f841105e23be1f2c3fe6bcc543d53cbf7576 Mon Sep 17 00:00:00 2001 From: "Diogo N. Sampaio" Date: Mon, 3 Jun 2019 08:58:05 +0000 Subject: [PATCH] [ARM][FIX] Ran out of registers due tail recursion Summary: - pr42062 When compiling for MinSize, ARMTargetLowering::LowerCall decides to indirect multiple calls to a same function. However, it disconsiders the limitation that thumb1 indirect calls require the callee to be in a register from r0 to r3 (llvm limiation). If all those registers are used by arguments, the compiler dies with "error: run out of registers during register allocation". This patch tells the function IsEligibleForTailCallOptimization if we intend to perform indirect calls, as to avoid tail call optimization. Reviewers: dmgreen, efriedma Reviewed By: efriedma Subscribers: javed.absar, kristof.beyls, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62683 llvm-svn: 362366 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 77 ++++++++++++++++----------------- llvm/lib/Target/ARM/ARMISelLowering.h | 16 +++---- llvm/test/CodeGen/ARM/pr42062.ll | 38 ++++++++++++++++ 3 files changed, 82 insertions(+), 49 deletions(-) create mode 100644 llvm/test/CodeGen/ARM/pr42062.ll diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 9231ad2..ad84f03 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1832,29 +1832,40 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool isVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); - bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); - bool isThisReturn = false; - bool isSibCall = false; + bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); + bool isThisReturn = false; auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls"); + bool PreferIndirect = false; // Disable tail calls if they're not supported. if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true") isTailCall = false; + if (isa(Callee)) { + // If we're optimizing for minimum size and the function is called three or + // more times in this block, we can improve codesize by calling indirectly + // as BLXr has a 16-bit encoding. + auto *GV = cast(Callee)->getGlobal(); + auto *BB = CLI.CS.getParent(); + PreferIndirect = + Subtarget->isThumb() && Subtarget->hasMinSize() && + count_if(GV->users(), [&BB](const User *U) { + return isa(U) && cast(U)->getParent() == BB; + }) > 2; + } if (isTailCall) { // Check if it's really possible to do a tail call. - isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, - isVarArg, isStructRet, MF.getFunction().hasStructRetAttr(), - Outs, OutVals, Ins, DAG); + isTailCall = IsEligibleForTailCallOptimization( + Callee, CallConv, isVarArg, isStructRet, + MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG, + PreferIndirect); if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall()) report_fatal_error("failed to perform tail call elimination on a call " "site marked musttail"); // We don't support GuaranteedTailCallOpt for ARM, only automatically // detected sibcalls. - if (isTailCall) { + if (isTailCall) ++NumTailCalls; - isSibCall = true; - } } // Analyze operands of the call, assigning locations to each operand. @@ -1866,14 +1877,14 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); - // For tail calls, memory operands are available in our caller's stack. - if (isSibCall) + if (isTailCall) { + // For tail calls, memory operands are available in our caller's stack. NumBytes = 0; - - // Adjust the stack pointer for the new arguments... - // These operations are automatically eliminated by the prolog/epilog pass - if (!isSibCall) + } else { + // Adjust the stack pointer for the new arguments... + // These operations are automatically eliminated by the prolog/epilog pass Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); + } SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout())); @@ -1995,7 +2006,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, Ops)); } - } else if (!isSibCall) { + } else if (!isTailCall) { assert(VA.isMemLoc()); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, @@ -2067,17 +2078,6 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } } else if (isa(Callee)) { - // If we're optimizing for minimum size and the function is called three or - // more times in this block, we can improve codesize by calling indirectly - // as BLXr has a 16-bit encoding. - auto *GV = cast(Callee)->getGlobal(); - auto *BB = CLI.CS.getParent(); - bool PreferIndirect = - Subtarget->isThumb() && Subtarget->hasMinSize() && - count_if(GV->users(), [&BB](const User *U) { - return isa(U) && cast(U)->getParent() == BB; - }) > 2; - if (!PreferIndirect) { isDirect = true; bool isDef = GV->isStrongDefinitionForLinker(); @@ -2309,28 +2309,25 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. -bool -ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, - CallingConv::ID CalleeCC, - bool isVarArg, - bool isCalleeStructRet, - bool isCallerStructRet, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - SelectionDAG& DAG) const { +bool ARMTargetLowering::IsEligibleForTailCallOptimization( + SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, + bool isCalleeStructRet, bool isCallerStructRet, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SmallVectorImpl &Ins, SelectionDAG &DAG, + const bool isIndirect) const { MachineFunction &MF = DAG.getMachineFunction(); const Function &CallerF = MF.getFunction(); CallingConv::ID CallerCC = CallerF.getCallingConv(); assert(Subtarget->supportsTailCall()); - // Tail calls to function pointers cannot be optimized for Thumb1 if the args + // Indirect tail calls cannot be optimized for Thumb1 if the args // to the call take up r0-r3. The reason is that there are no legal registers // left to hold the pointer to the function to be called. if (Subtarget->isThumb1Only() && Outs.size() >= 4 && - !isa(Callee.getNode())) - return false; + (!isa(Callee.getNode()) || isIndirect)) + return false; // Look for obvious safe cases to perform tail call optimization that do not // require ABI changes. This is what gcc calls sibcall. diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 8e254d7..c61135f 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -763,15 +763,13 @@ class VectorType; /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. - bool IsEligibleForTailCallOptimization(SDValue Callee, - CallingConv::ID CalleeCC, - bool isVarArg, - bool isCalleeStructRet, - bool isCallerStructRet, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - SelectionDAG& DAG) const; + bool IsEligibleForTailCallOptimization( + SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, + bool isCalleeStructRet, bool isCallerStructRet, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SmallVectorImpl &Ins, SelectionDAG &DAG, + const bool isIndirect) const; bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, diff --git a/llvm/test/CodeGen/ARM/pr42062.ll b/llvm/test/CodeGen/ARM/pr42062.ll new file mode 100644 index 0000000..612c9d6 --- /dev/null +++ b/llvm/test/CodeGen/ARM/pr42062.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -o - %s 2>&1 | FileCheck %s --implicit-check-not=error +target triple = "thumbv8m.base-arm-none-eabi" +@foo = external global i8 +declare i32 @bar(i8* nocapture, i32, i32, i8* nocapture) + +define void @food(i8* %a) #0 { +; CHECK-LABEL: food: +; CHECK: mov [[ARG0:r[4-7]]], r0 +; CHECK-NEXT: movs r1, #8 +; CHECK-NEXT: movs r2, #1 +; CHECK-NEXT: ldr [[FOO_R:r[4-7]]], [[FOO_ADDR:\..*]] +; CHECK-NEXT: ldr [[BAR_R:r[4-7]]], [[BAR_ADDR:\..*]] +; CHECK-NEXT: mov r3, [[FOO_R]] +; CHECK-NEXT: blx [[BAR_R]] +; CHECK-NEXT: movs r1, #9 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: mov r0, [[ARG0]] +; CHECK-NEXT: mov r3, [[FOO_R]] +; CHECK-NEXT: blx [[BAR_R]] +; CHECK-NEXT: movs r1, #7 +; CHECK-NEXT: movs r2, #2 +; CHECK-NEXT: mov r0, [[ARG0]] +; CHECK-NEXT: mov r3, [[FOO_R]] +; CHECK-NEXT: blx [[BAR_R]] +; CHECK-NEXT: pop {r4, r5, r6, pc} +; CHECK: [[FOO_ADDR]]: +; CHECK-NEXT: .long foo +; CHECK: [[BAR_ADDR]]: +; CHECK-NEXT: .long bar +entry: + %0 = tail call i32 @bar(i8* %a, i32 8, i32 1, i8* nonnull @foo) + %1 = tail call i32 @bar(i8* %a, i32 9, i32 0, i8* nonnull @foo) + %2 = tail call i32 @bar(i8* %a, i32 7, i32 2, i8* nonnull @foo) + ret void +} +attributes #0 = { minsize "target-cpu"="cortex-m23" } + -- 2.7.4