/// If the target supports tail calls.
bool supportsTailCalls() const;
+ /// If target supports tail call on \p CB
+ bool supportsTailCallFor(const CallBase *CB) const;
+
/// Don't restrict interleaved unrolling to small loops.
bool enableAggressiveInterleaving(bool LoopHasReductions) const;
ArrayRef<Type *> Tys) = 0;
virtual bool supportsEfficientVectorElementLoadStore() = 0;
virtual bool supportsTailCalls() = 0;
+ virtual bool supportsTailCallFor(const CallBase *CB) = 0;
virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
virtual MemCmpExpansionOptions
enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
}
bool supportsTailCalls() override { return Impl.supportsTailCalls(); }
+ bool supportsTailCallFor(const CallBase *CB) override {
+ return Impl.supportsTailCallFor(CB);
+ }
bool enableAggressiveInterleaving(bool LoopHasReductions) override {
return Impl.enableAggressiveInterleaving(LoopHasReductions);
// evl but no mask, on Power 9/10. Otherwise, we must scalarize.
return getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
}
+
+bool PPCTTIImpl::supportsTailCallFor(const CallBase *CB) const {
+ // Subtargets using PC-Relative addressing supported.
+ if (ST->isUsingPCRelativeCalls())
+ return true;
+
+ const Function *Callee = CB->getCalledFunction();
+ // Indirect calls and variadic argument functions not supported.
+ if (!Callee || Callee->isVarArg())
+ return false;
+
+ const Function *Caller = CB->getCaller();
+ // Support if we can share TOC base.
+ return ST->getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(),
+ Callee);
+}
// for symmetrical coroutine control transfer (C++ Coroutines TS extension).
// This transformation is done only in the resume part of the coroutine that has
// identical signature and calling convention as the coro.resume call.
-static void addMustTailToCoroResumes(Function &F) {
+static void addMustTailToCoroResumes(Function &F, TargetTransformInfo &TTI) {
bool changed = false;
// Collect potential resume instructions.
// Set musttail on those that are followed by a ret instruction.
for (CallInst *Call : Resumes)
- if (simplifyTerminatorLeadingToRet(Call->getNextNode())) {
+ // Skip targets which don't support tail call on the specific case.
+ if (TTI.supportsTailCallFor(Call) &&
+ simplifyTerminatorLeadingToRet(Call->getNextNode())) {
Call->setTailCallKind(CallInst::TCK_MustTail);
changed = true;
}
// FIXME: Could we support symmetric transfer effectively without musttail
// call?
if (TTI.supportsTailCalls())
- addMustTailToCoroResumes(*ResumeClone);
+ addMustTailToCoroResumes(*ResumeClone, TTI);
// Store addresses resume/destroy/cleanup functions in the coroutine frame.
updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone);
--- /dev/null
+; Tests that some target (e.g. ppc) can support tail call under condition.
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S \
+; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S \
+; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 --code-model=medium \
+; RUN: | FileCheck %s --check-prefix=CHECK-PCREL
+
+define void @f() #0 {
+entry:
+ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+ %alloc = call i8* @malloc(i64 16) #3
+ %vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc)
+
+ %save = call token @llvm.coro.save(i8* null)
+ %addr1 = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
+ %pv1 = bitcast i8* %addr1 to void (i8*)*
+ call fastcc void %pv1(i8* null)
+
+ %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+ switch i8 %suspend, label %exit [
+ i8 0, label %await.ready
+ i8 1, label %exit
+ ]
+await.ready:
+ %save2 = call token @llvm.coro.save(i8* null)
+ %addr2 = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
+ %pv2 = bitcast i8* %addr2 to void (i8*)*
+ call fastcc void %pv2(i8* null)
+
+ %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
+ switch i8 %suspend2, label %exit [
+ i8 0, label %exit
+ i8 1, label %exit
+ ]
+exit:
+ call i1 @llvm.coro.end(i8* null, i1 false)
+ ret void
+}
+
+; Verify that in the initial function resume is not marked with musttail.
+; CHECK-LABEL: @f(
+; CHECK: %[[addr1:.+]] = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
+; CHECK-NEXT: %[[pv1:.+]] = bitcast i8* %[[addr1]] to void (i8*)*
+; CHECK-NOT: musttail call fastcc void %[[pv1]](i8* null)
+
+; Verify that ppc target not using PC-Relative addressing in the resume part resume call is not marked with musttail.
+; CHECK-LABEL: @f.resume(
+; CHECK: %[[addr2:.+]] = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
+; CHECK-NEXT: %[[pv2:.+]] = bitcast i8* %[[addr2]] to void (i8*)*
+; CHECK-NEXT: call fastcc void %[[pv2]](i8* null)
+
+; Verify that ppc target using PC-Relative addressing in the resume part resume call is marked with musttail.
+; CHECK-PCREL-LABEL: @f.resume(
+; CHECK-PCREL: %[[addr2:.+]] = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
+; CHECK-PCREL-NEXT: %[[pv2:.+]] = bitcast i8* %[[addr2]] to void (i8*)*
+; CHECK-PCREL-NEXT: musttail call fastcc void %[[pv2]](i8* null)
+; CHECK-PCREL-NEXT: ret void
+
+declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #1
+declare i1 @llvm.coro.alloc(token) #2
+declare i64 @llvm.coro.size.i64() #3
+declare i8* @llvm.coro.begin(token, i8* writeonly) #2
+declare token @llvm.coro.save(i8*) #2
+declare i8* @llvm.coro.frame() #3
+declare i8 @llvm.coro.suspend(token, i1) #2
+declare i8* @llvm.coro.free(token, i8* nocapture readonly) #1
+declare i1 @llvm.coro.end(i8*, i1) #2
+declare i8* @llvm.coro.subfn.addr(i8* nocapture readonly, i8) #1
+declare i8* @malloc(i64)
+
+attributes #0 = { presplitcoroutine }
+attributes #1 = { argmemonly nounwind readonly }
+attributes #2 = { nounwind }
+attributes #3 = { nounwind readnone }