From 1d04dc52dd24d791970e56053cdd67fe149b0554 Mon Sep 17 00:00:00 2001 From: Xun Li Date: Mon, 18 Jan 2021 09:06:21 -0800 Subject: [PATCH] [Coroutine] Do not CoroElide if there are musttail calls This is to address https://bugs.llvm.org/show_bug.cgi?id=48626. When there are musttail calls that use parameters aliasing the newly created coroutine frame, the existing implementation will fatal. We simply cannot perform CoroElide in such cases. In theory a precise analysis can be done to check whether the parameters of the musttail call actually alias the frame, but it's very hard to do it before the transformation happens. Also in most cases the existence of musttail call is generated due to symmetric transfers, and in those cases alias analysis won't be able to tell that they don't alias anyway. Differential Revision: https://reviews.llvm.org/D94834 --- llvm/lib/Transforms/Coroutines/CoroElide.cpp | 23 ++++--- .../Transforms/Coroutines/coro-elide-musttail.ll | 74 ++++++++++++++++++++++ 2 files changed, 89 insertions(+), 8 deletions(-) create mode 100644 llvm/test/Transforms/Coroutines/coro-elide-musttail.ll diff --git a/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/llvm/lib/Transforms/Coroutines/CoroElide.cpp index a0b8168..07a183c 100644 --- a/llvm/lib/Transforms/Coroutines/CoroElide.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroElide.cpp @@ -83,14 +83,8 @@ static void removeTailCallAttribute(AllocaInst *Frame, AAResults &AA) { Function &F = *Frame->getFunction(); for (Instruction &I : instructions(F)) if (auto *Call = dyn_cast(&I)) - if (Call->isTailCall() && operandReferences(Call, Frame, AA)) { - // FIXME: If we ever hit this check. Evaluate whether it is more - // appropriate to retain musttail and allow the code to compile. - if (Call->isMustTailCall()) - report_fatal_error("Call referring to the coroutine frame cannot be " - "marked as musttail"); + if (Call->isTailCall() && operandReferences(Call, Frame, AA)) Call->setTailCall(false); - } } // Given a resume function @f.resume(%f.frame* %frame), returns the size @@ -252,7 +246,20 @@ bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const { // If size of the set is the same as total number of coro.begin, that means we // found a coro.free or coro.destroy referencing each coro.begin, so we can // perform heap elision. - return ReferencedCoroBegins.size() == CoroBegins.size(); + if (ReferencedCoroBegins.size() != CoroBegins.size()) + return false; + + // If any call in the function is a musttail call, it usually won't work + // because we cannot drop the tailcall attribute, and a tail call will reuse + // the entire stack where we are going to put the new frame. In theory a more + // precise analysis can be done to check whether the new frame aliases with + // the call, however it's challenging to do so before the elision actually + // happened. + for (BasicBlock &BB : *F) + if (BB.getTerminatingMustTailCall()) + return false; + + return true; } void Lowerer::collectPostSplitCoroIds(Function *F) { diff --git a/llvm/test/Transforms/Coroutines/coro-elide-musttail.ll b/llvm/test/Transforms/Coroutines/coro-elide-musttail.ll new file mode 100644 index 0000000..7511904 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-elide-musttail.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; Only run with new pass manager since old pass manager's alias analysis isn't +; powerful enough to tell that the tailcall's arguments don't alias the frame. +; +; RUN: opt < %s -passes='coro-elide' -S | FileCheck %s + +%"bar.Frame" = type { void (%"bar.Frame"*)*, void (%"bar.Frame"*)*, %"struct.coroutine::promise_type", i1 } +%"struct.coroutine::promise_type" = type { i32 } +%"foo.Frame" = type { void (%"foo.Frame"*)*, void (%"foo.Frame"*)*, %"struct.coroutine::promise_type", i1, %"struct.coroutine::promise_type::final_awaitable" } +%"struct.coroutine::promise_type" = type { i32 } +%"struct.coroutine::promise_type::final_awaitable" = type { i8 } +@"bar.resumers" = private constant [3 x void (%"bar.Frame"*)*] [void (%"bar.Frame"*)* @"bar.resume", void (%"bar.Frame"*)* undef, void (%"bar.Frame"*)* undef] + +declare dso_local void @"bar"() align 2 +declare dso_local fastcc void @"bar.resume"(%"bar.Frame"*) align 2 + +define internal fastcc void @foo.resume_musttail(%"foo.Frame"* %FramePtr) { +; CHECK-LABEL: @foo.resume_musttail( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call token @llvm.coro.id +; CHECK-NEXT: [[TMP1:%.*]] = tail call i1 @llvm.coro.alloc(token [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call i8* @llvm.coro.begin(token [[TMP0]], i8* null) +; CHECK-NEXT: [[CALL34:%.*]] = call i8* undef() +; CHECK-NEXT: musttail call fastcc void undef(i8* [[CALL34]]) +; CHECK-NEXT: ret void +; +entry: + %0 = tail call token @llvm.coro.id(i32 16, i8* null, i8* bitcast (void ()* @"bar" to i8*), i8* bitcast ([3 x void (%"bar.Frame"*)*]* @"bar.resumers" to i8*)) + %1 = tail call i1 @llvm.coro.alloc(token %0) + %2 = tail call i8* @llvm.coro.begin(token %0, i8* null) + call i8* @llvm.coro.subfn.addr(i8* %2, i8 1) + %call34 = call i8* undef() + musttail call fastcc void undef(i8* %call34) + ret void +} + +define internal fastcc void @foo.resume_no_musttail(%"foo.Frame"* %FramePtr) { +; CHECK-LABEL: @foo.resume_no_musttail( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = alloca [24 x i8], align 8 +; CHECK-NEXT: [[VFRAME:%.*]] = bitcast [24 x i8]* [[TMP0]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = call token @llvm.coro.id +; CHECK-NEXT: [[CALL34:%.*]] = call i8* undef() +; CHECK-NEXT: call fastcc void undef(i8* [[CALL34]]) +; CHECK-NEXT: ret void +; +entry: + %0 = tail call token @llvm.coro.id(i32 16, i8* null, i8* bitcast (void ()* @"bar" to i8*), i8* bitcast ([3 x void (%"bar.Frame"*)*]* @"bar.resumers" to i8*)) + %1 = tail call i1 @llvm.coro.alloc(token %0) + %2 = tail call i8* @llvm.coro.begin(token %0, i8* null) + call i8* @llvm.coro.subfn.addr(i8* %2, i8 1) + %call34 = call i8* undef() + tail call fastcc void undef(i8* %call34) + ret void +} + +; Function Attrs: argmemonly nofree nosync nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #0 + +; Function Attrs: argmemonly nounwind readonly +declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #1 + +; Function Attrs: nounwind +declare i1 @llvm.coro.alloc(token) #2 + +; Function Attrs: nounwind +declare i8* @llvm.coro.begin(token, i8* writeonly) #2 + +; Function Attrs: argmemonly nounwind readonly +declare i8* @llvm.coro.subfn.addr(i8* nocapture readonly, i8) #1 + +attributes #0 = { argmemonly nofree nosync nounwind willreturn } +attributes #1 = { argmemonly nounwind readonly } +attributes #2 = { nounwind } -- 2.7.4