llvm_unreachable("target does not provide no preserved mask");
}
+ /// Return true if all bits that are set in mask \p mask0 are also set in
+ /// \p mask1.
+ bool regmaskSubsetEqual(const uint32_t *mask0, const uint32_t *mask1) const;
+
/// Return all the call-preserved register masks defined for this target.
virtual ArrayRef<const uint32_t *> getRegMasks() const = 0;
virtual ArrayRef<const char *> getRegMaskNames() const = 0;
return false;
}
+bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0,
+ const uint32_t *mask1) const {
+ unsigned N = (getNumRegs()+31) / 32;
+ for (unsigned I = 0; I < N; ++I)
+ if ((mask0[I] & mask1[I]) != mask0[I])
+ return false;
+ return true;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void
TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex,
CallingConv::ID CallerCC = CallerF->getCallingConv();
bool CCMatch = CallerCC == CalleeCC;
- // Disable tailcall for CXX_FAST_TLS when callee and caller have different
- // calling conventions, given that CXX_FAST_TLS has a bigger CSR set.
- if (!CCMatch &&
- (CallerCC == CallingConv::CXX_FAST_TLS ||
- CalleeCC == CallingConv::CXX_FAST_TLS))
- return false;
-
// Byval parameters hand the function a pointer directly into the stack area
// we want to reuse during a tail call. Working around this *is* possible (see
// X86) but less efficient and uglier in LowerCall.
CCAssignFnForCall(CalleeCC, isVarArg),
CCAssignFnForCall(CallerCC, isVarArg)))
return false;
+ // The callee has to preserve all registers the caller needs to preserve.
+ if (!CCMatch) {
+ const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
+ if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC),
+ TRI->getCallPreservedMask(MF, CalleeCC)))
+ return false;
+ }
// Nothing more to check if the callee is taking no arguments
if (Outs.empty())
MachineFunction &MF = DAG.getMachineFunction();
const Function *CallerF = MF.getFunction();
CallingConv::ID CallerCC = CallerF->getCallingConv();
- bool CCMatch = CallerCC == CalleeCC;
-
- // Disable tailcall for CXX_FAST_TLS when callee and caller have different
- // calling conventions, given that CXX_FAST_TLS has a bigger CSR set.
- if (!CCMatch &&
- (CallerCC == CallingConv::CXX_FAST_TLS ||
- CalleeCC == CallingConv::CXX_FAST_TLS))
- return false;
assert(Subtarget->supportsTailCall());
CCAssignFnForNode(CalleeCC, true, isVarArg),
CCAssignFnForNode(CallerCC, true, isVarArg)))
return false;
+ // The callee has to preserve all registers the caller needs to preserve.
+ if (CalleeCC != CallerCC) {
+ const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC),
+ TRI->getCallPreservedMask(MF, CalleeCC)))
+ return false;
+ }
// If Caller's vararg or byval argument has been split between registers and
// stack, do not perform tail call, since part of the argument is in caller's
if (IsCalleeWin64 != IsCallerWin64)
return false;
- // Disable tailcall for CXX_FAST_TLS when callee and caller have different
- // calling conventions, given that CXX_FAST_TLS has a bigger CSR set.
- if (!CCMatch &&
- (CallerCC == CallingConv::CXX_FAST_TLS ||
- CalleeCC == CallingConv::CXX_FAST_TLS))
- return false;
-
if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
if (canGuaranteeTCO(CalleeCC) && CCMatch)
return true;
if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
RetCC_X86, RetCC_X86))
return false;
+ // The callee has to preserve all registers the caller needs to preserve.
+ if (!CCMatch) {
+ const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
+ if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC),
+ TRI->getCallPreservedMask(MF, CalleeCC)))
+ return false;
+ }
unsigned StackArgsSize = 0;
--- /dev/null
+; RUN: llc -o - %s | FileCheck %s
+target triple="aarch64--"
+
+declare void @somefunc()
+define preserve_mostcc void @test_ccmismatch_notail() {
+; Ensure that no tail call is used here, as the called function somefunc does
+; not preserve enough registers for preserve_mostcc.
+; CHECK-LABEL: test_ccmismatch_notail:
+; CHECK-NOT: b somefunc
+; CHECK: bl somefunc
+ tail call void @somefunc()
+ ret void
+}
+
+declare preserve_mostcc void @some_preserve_most_func()
+define void @test_ccmismatch_tail() {
+; We can perform a tail call here, because some_preserve_most_func preserves
+; all registers necessary for test_ccmismatch_tail.
+; CHECK-LABEL: test_ccmismatch_tail:
+; CHECK-NOT: bl some_preserve_most_func
+; CHECK: b some_preserve_most_func
+ tail call preserve_mostcc void @some_preserve_most_func()
+ ret void
+}
ret void
}
+declare void @somefunc()
+define cxx_fast_tlscc void @test_ccmismatch_notail() {
+; A tail call is not possible here because somefunc does not preserve enough
+; registers.
+; CHECK-LABEL: test_ccmismatch_notail:
+; CHECK-NOT: b _somefunc
+; CHECK: bl _somefunc
+ tail call void @somefunc()
+ ret void
+}
+
+declare cxx_fast_tlscc void @some_fast_tls_func()
+define void @test_ccmismatch_tail() {
+; We can perform a tail call here because some_fast_tls_func preserves all
+; necessary registers (and more).
+; CHECK-LABEL: test_ccmismatch_tail:
+; CHECK-NOT: bl _some_fast_tls_func
+; CHECK: b _some_fast_tls_func
+ tail call cxx_fast_tlscc void @some_fast_tls_func()
+ ret void
+}
+
attributes #0 = { nounwind "no-frame-pointer-elim"="true" }
attributes #1 = { nounwind }