define i8* @f(i32 %n) {
entry:
- %id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
+ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%alloc = call i8* @malloc(i32 %size)
%hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %alloc)
switch i8 %0, label %suspend [i8 0, label %loop
i8 1, label %cleanup]
cleanup:
- %mem = call i8* @llvm.coro.free(i8* %hdl)
+ %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
define i8* @f(i32 %n) {
entry:
- %id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
+ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%alloc = call noalias i8* @malloc(i32 24)
%0 = call noalias i8* @llvm.coro.begin(token %id, i8* %alloc)
%frame = bitcast i8* %0 to %f.frame*
.. code-block:: none
entry:
- %id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
+ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
dyn.alloc:
.. code-block:: llvm
cleanup:
- %mem = call i8* @llvm.coro.free(i8* %hdl)
+ %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
%need.dyn.free = icmp ne i8* %mem, null
br i1 %need.dyn.free, label %dyn.free, label %if.end
dyn.free:
entry:
%promise = alloca i32
%pv = bitcast i32* %promise to i8*
- %id = call token @llvm.coro.id(i32 0, i8* %pv, i8* null)
+ %id = call token @llvm.coro.id(i32 0, i8* %pv, i8* null, i8* null)
%need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
dyn.alloc:
switch i8 %0, label %suspend [i8 0, label %loop
i8 1, label %cleanup]
cleanup:
- %mem = call i8* @llvm.coro.free(i8* %hdl)
+ %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
%promise = alloca i32
%pv = bitcast i32* %promise to i8*
; the second argument to coro.id points to the coroutine promise.
- %id = call token @llvm.coro.id(i32 0, i8* %pv, i8* null)
+ %id = call token @llvm.coro.id(i32 0, i8* %pv, i8* null, i8* null)
...
%hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %alloc)
...
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
::
- declare i8* @llvm.coro.free(i8* <frame>)
+ declare i8* @llvm.coro.free(token %id, i8* <frame>)
Overview:
"""""""""
Arguments:
""""""""""
-A pointer to the coroutine frame. This should be the same pointer that was
-returned by prior `coro.begin` call.
+The first argument is a token returned by a call to '``llvm.coro.id``'
+identifying the coroutine.
+
+The second argument is a pointer to the coroutine frame. This should be the same
+pointer that was returned by prior `coro.begin` call.
Example (custom deallocation function):
"""""""""""""""""""""""""""""""""""""""
.. code-block:: llvm
cleanup:
- %mem = call i8* @llvm.coro.free(i8* %frame)
+ %mem = call i8* @llvm.coro.free(token %id, i8* %frame)
%mem_not_null = icmp ne i8* %mem, null
br i1 %mem_not_null, label %if.then, label %if.end
if.then:
.. code-block:: llvm
cleanup:
- %mem = call i8* @llvm.coro.free(i8* %frame)
+ %mem = call i8* @llvm.coro.free(token %id, i8* %frame)
call void @free(i8* %mem)
ret void
.. code-block:: text
entry:
- %id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
+ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%dyn.alloc.required = call i1 @llvm.coro.alloc(token %id)
br i1 %dyn.alloc.required, label %coro.alloc, label %coro.begin
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
::
- declare token @llvm.coro.id(i32 <align>, i8* <promise>, i8* <fnaddr>)
+ declare token @llvm.coro.id(i32 <align>, i8* <promise>, i8* <coroaddr>,
+ i8* <fnaddrs>)
Overview:
"""""""""
The second argument, if not `null`, designates a particular alloca instruction
to be a `coroutine promise`_.
-The third argument is `null` before coroutine is split, and later is replaced
+The third argument is `null` coming out of the frontend. The CoroEarly pass sets
+this argument to point to the function this coro.id belongs to.
+
+The fourth argument is `null` before coroutine is split, and later is replaced
to point to a private global constant array containing function pointers to
outlined resume and destroy parts of the coroutine.
This pass runs late to lower all coroutine related intrinsics not replaced by
earlier passes.
-Upstreaming sequence (rough plan)
-=================================
-#. Add documentation.
-#. Add coroutine intrinsics.
-#. Add empty coroutine passes.
-#. Add coroutine devirtualization + tests.
-#. Add CGSCC restart trigger + tests.
-#. Add coroutine heap elision + tests.
-#. Add custom allocation heap elision + tests. <== we are here
-#. Add coroutine splitting logic + tests.
-#. Add simple coroutine frame builder + tests.
-#. Add the rest of the logic + tests. (Maybe split further as needed).
-
Areas Requiring Attention
=========================
#. A coroutine frame is bigger than it could be. Adding stack packing and stack
// Coroutine Structure Intrinsics.
def int_coro_id : Intrinsic<[llvm_token_ty], [llvm_i32_ty, llvm_ptr_ty,
- llvm_ptr_ty],
+ llvm_ptr_ty, llvm_ptr_ty],
[IntrArgMemOnly, IntrReadMem,
ReadNone<1>, ReadOnly<2>, NoCapture<2>]>;
def int_coro_alloc : Intrinsic<[llvm_i1_ty], [llvm_token_ty], []>;
def int_coro_begin : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_ptr_ty],
[WriteOnly<1>]>;
-def int_coro_free : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty],
- [IntrArgMemOnly, ReadOnly<0>, NoCapture<0>]>;
+def int_coro_free : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_ptr_ty],
+ [IntrReadMem, IntrArgMemOnly, ReadOnly<1>,
+ NoCapture<1>]>;
def int_coro_end : Intrinsic<[], [llvm_ptr_ty, llvm_i1_ty], []>;
def int_coro_frame : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
default:
break;
case Intrinsic::coro_id: {
- auto *InfoArg = CS.getArgOperand(2)->stripPointerCasts();
+ auto *InfoArg = CS.getArgOperand(3)->stripPointerCasts();
if (isa<ConstantPointerNull>(InfoArg))
break;
auto *GV = dyn_cast<GlobalVariable>(InfoArg);
if (CII->getInfo().isPreSplit()) {
F.addFnAttr(CORO_PRESPLIT_ATTR, UNPREPARED_FOR_SPLIT);
setCannotDuplicate(CII);
+ CII->setCoroutineSelf();
}
}
break;
Lowerer(Module &M) : LowererBase(M) {}
void elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA);
+ bool shouldElide() const;
bool processCoroId(CoroIdInst *, AAResults &AA);
};
} // end anonymous namespace
CA->eraseFromParent();
}
- // To suppress deallocation code, we replace all llvm.coro.free intrinsics
- // associated with this coro.begin with null constant.
- auto *NullPtr = ConstantPointerNull::get(Type::getInt8PtrTy(C));
- for (auto *CF : CoroFrees) {
- CF->replaceAllUsesWith(NullPtr);
- CF->eraseFromParent();
- }
-
// FIXME: Design how to transmit alignment information for every alloca that
// is spilled into the coroutine frame and recreate the alignment information
// here. Possibly we will need to do a mini SROA here and break the coroutine
removeTailCallAttribute(Frame, AA);
}
+bool Lowerer::shouldElide() const {
+ // If no CoroAllocs, we cannot suppress allocation, so elision is not
+ // possible.
+ if (CoroAllocs.empty())
+ return false;
+
+ // Check that for every coro.begin there is a coro.destroy directly
+ // referencing the SSA value of that coro.begin. If the value escaped, then
+ // coro.destroy would have been referencing a memory location storing that
+ // value and not the virtual register.
+
+ SmallPtrSet<CoroBeginInst *, 8> ReferencedCoroBegins;
+
+ for (CoroSubFnInst *DA : DestroyAddr) {
+ if (auto *CB = dyn_cast<CoroBeginInst>(DA->getFrame()))
+ ReferencedCoroBegins.insert(CB);
+ else
+ return false;
+ }
+
+ // If size of the set is the same as total number of CoroBegins, means we
+ // found a coro.free or coro.destroy mentioning a coro.begin and we can
+ // perform heap elision.
+ return ReferencedCoroBegins.size() == CoroBegins.size();
+}
+
bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA) {
CoroBegins.clear();
CoroAllocs.clear();
+ CoroFrees.clear();
ResumeAddr.clear();
DestroyAddr.clear();
CoroBegins.push_back(CB);
else if (auto *CA = dyn_cast<CoroAllocInst>(U))
CoroAllocs.push_back(CA);
+ else if (auto *CF = dyn_cast<CoroFreeInst>(U))
+ CoroFrees.push_back(CF);
}
// Collect all coro.subfn.addrs associated with coro.begin.
replaceWithConstant(ResumeAddrConstant, ResumeAddr);
- if (DestroyAddr.empty())
- return true;
+ bool ShouldElide = shouldElide();
- auto *DestroyAddrConstant =
- ConstantExpr::getExtractValue(Resumers, CoroSubFnInst::DestroyIndex);
+ auto *DestroyAddrConstant = ConstantExpr::getExtractValue(
+ Resumers,
+ ShouldElide ? CoroSubFnInst::CleanupIndex : CoroSubFnInst::DestroyIndex);
replaceWithConstant(DestroyAddrConstant, DestroyAddr);
- // If there is a coro.alloc that llvm.coro.id refers to, we have the ability
- // to suppress dynamic allocation.
- if (!CoroAllocs.empty()) {
- // FIXME: The check above is overly lax. It only checks for whether we have
- // an ability to elide heap allocations, not whether it is safe to do so.
- // We need to do something like:
- // If for every exit from the function where coro.begin is
- // live, there is a coro.free or coro.destroy dominating that exit block,
- // then it is safe to elide heap allocation, since the lifetime of coroutine
- // is fully enclosed in its caller.
+ if (ShouldElide) {
auto *FrameTy = getFrameType(cast<Function>(ResumeAddrConstant));
elideHeapAllocations(CoroId->getFunction(), FrameTy, AA);
+ coro::replaceCoroFree(CoroId, /*Elide=*/true);
}
+
return true;
}
Changed = replaceDevirtTrigger(F);
L->CoroIds.clear();
- L->CoroFrees.clear();
- // Collect all PostSplit coro.ids and all coro.free.
+ // Collect all PostSplit coro.ids.
for (auto &I : instructions(F))
- if (auto *CF = dyn_cast<CoroFreeInst>(&I))
- L->CoroFrees.push_back(CF);
- else if (auto *CII = dyn_cast<CoroIdInst>(&I))
+ if (auto *CII = dyn_cast<CoroIdInst>(&I))
if (CII->getInfo().isPostSplit())
- L->CoroIds.push_back(CII);
+ // If it is the coroutine itself, don't touch it.
+ if (CII->getCoroutine() != CII->getFunction())
+ L->CoroIds.push_back(CII);
// If we did not find any coro.id, there is nothing to do.
if (L->CoroIds.empty())
return Changed;
AAResults &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+
for (auto *CII : L->CoroIds)
Changed |= L->processCoroId(CII, AA);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AAResultsWrapperPass>();
- AU.setPreservesCFG();
}
};
}
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/circular_raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
}
// Iterate propagating consumes and kills until they stop changing
- int Iteration = 0; (void)Iteration;
-
+ int Iteration = 0;
+ (void)Iteration;
+
bool Changed;
do {
DEBUG(dbgs() << "iteration " << ++Iteration);
/*IsVarArgs=*/false);
auto *FnPtrTy = FnTy->getPointerTo();
- if (Shape.CoroSuspends.size() > UINT32_MAX)
- report_fatal_error("Cannot handle coroutine with this many suspend points");
+ // Figure out how wide should be an integer type storing the suspend index.
+ unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size()));
- SmallVector<Type *, 8> Types{FnPtrTy, FnPtrTy, Type::getInt32Ty(C)};
+ SmallVector<Type *, 8> Types{FnPtrTy, FnPtrTy, Type::getIntNTy(C, IndexBits)};
Value *CurrentDef = nullptr;
// Create an entry for every spilled value.
return FrameTy;
}
-// Returns the index of the last non-spill field in the coroutine frame.
-// 2 - if there is no coroutine promise specified or 3, if there is.
-static unsigned getLastNonSpillIndex(coro::Shape &Shape) {
- // TODO: Add support for coroutine promise.
- return 2;
-}
-
// Replace all alloca and SSA values that are accessed across suspend points
// with GetElementPointer from coroutine frame + loads and stores. Create an
// AllocaSpillBB that will become the new entry block for the resume parts of
Value *CurrentValue = nullptr;
BasicBlock *CurrentBlock = nullptr;
Value *CurrentReload = nullptr;
- unsigned Index = getLastNonSpillIndex(Shape);
+ unsigned Index = coro::Shape::LastKnownField;
// We need to keep track of any allocas that need "spilling"
// since they will live in the coroutine frame now, all access to them
// frame.
if (isa<CoroBeginInst>(&I))
continue;
+ // A token returned CoroIdInst is used to tie together structural intrinsics
+ // in a coroutine. It should not be saved to the coroutine frame.
+ if (isa<CoroIdInst>(&I))
+ continue;
for (User *U : I.users())
if (Checker.isDefinitionAcrossSuspend(I, U)) {
// allows you to do things like:
//
// if (auto *SF = dyn_cast<CoroSubFnInst>(Inst))
-// ... SF->getFrame() ...
+// ... SF->getFrame() ...
//
// All intrinsic function calls are instances of the call instruction, so these
// are all subclasses of the CallInst class. Note that none of these classes
RestartTrigger = -1,
ResumeIndex,
DestroyIndex,
+ CleanupIndex,
IndexLast,
IndexFirst = RestartTrigger
};
/// This represents the llvm.coro.alloc instruction.
class LLVM_LIBRARY_VISIBILITY CoroIdInst : public IntrinsicInst {
- enum { AlignArg, PromiseArg, InfoArg };
+ enum { AlignArg, PromiseArg, CoroutineArg, InfoArg };
+
public:
// Info argument of coro.id is
// fresh out of the frontend: null ;
void setInfo(Constant *C) { setArgOperand(InfoArg, C); }
+ Function *getCoroutine() const {
+ return cast<Function>(getArgOperand(CoroutineArg)->stripPointerCasts());
+ }
+ void setCoroutineSelf() {
+ assert(isa<ConstantPointerNull>(getArgOperand(CoroutineArg)) &&
+ "Coroutine argument is already assigned");
+ auto *const Int8PtrTy = Type::getInt8PtrTy(getContext());
+ setArgOperand(CoroutineArg,
+ ConstantExpr::getBitCast(getFunction(), Int8PtrTy));
+ }
// Methods to support type inquiry through isa, cast, and dyn_cast:
static inline bool classof(const IntrinsicInst *I) {
/// This represents the llvm.coro.free instruction.
class LLVM_LIBRARY_VISIBILITY CoroFreeInst : public IntrinsicInst {
+ enum { IdArg, FrameArg };
+
public:
+ Value *getFrame() const { return getArgOperand(FrameArg); }
+
// Methods to support type inquiry through isa, cast, and dyn_cast:
static inline bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_free;
enum { IdArg, MemArg };
public:
- CoroIdInst *getId() const {
- return cast<CoroIdInst>(getArgOperand(IdArg));
- }
+ CoroIdInst *getId() const { return cast<CoroIdInst>(getArgOperand(IdArg)); }
Value *getMem() const { return getArgOperand(MemArg); }
bool declaresIntrinsics(Module &M, std::initializer_list<StringRef>);
void replaceAllCoroAllocs(CoroBeginInst *CB, bool Replacement);
void replaceAllCoroFrees(CoroBeginInst *CB, Value *Replacement);
+void replaceCoroFree(CoroIdInst *CoroId, bool Elide);
void updateCallGraph(Function &Caller, ArrayRef<Function *> Funcs,
CallGraph &CG, CallGraphSCC &SCC);
// Field Indexes for known coroutine frame fields.
enum {
- ResumeField = 0,
- DestroyField = 1,
- IndexField = 2,
+ ResumeField,
+ DestroyField,
+ IndexField,
+ LastKnownField = IndexField
};
StructType *FrameTy;
SwitchInst* ResumeSwitch;
bool HasFinalSuspend;
+ IntegerType* getIndexType() const {
+ assert(FrameTy && "frame type not assigned");
+ return cast<IntegerType>(FrameTy->getElementType(IndexField));
+ }
+ ConstantInt *getIndex(uint64_t Value) const {
+ return ConstantInt::get(getIndexType(), Value);
+ }
+
Shape() = default;
explicit Shape(Function &F) { buildFrom(F); }
void buildFrom(Function &F);
uint32_t SuspendIndex = 0;
for (auto S : Shape.CoroSuspends) {
- ConstantInt *IndexVal = Builder.getInt32(SuspendIndex);
+ ConstantInt *IndexVal = Shape.getIndex(SuspendIndex);
// Replace CoroSave with a store to Index:
// %index.addr = getelementptr %f.frame... (index field number)
// resume or cleanup pass for every suspend point.
static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape,
BasicBlock *ResumeEntry, int8_t FnIndex) {
-
Module *M = F.getParent();
auto *FrameTy = Shape.FrameTy;
auto *FnPtrTy = cast<PointerType>(FrameTy->getElementType(0));
OldVFrame->replaceAllUsesWith(NewVFrame);
// Replace coro suspend with the appropriate resume index.
- auto *NewValue = Builder.getInt8(FnIndex);
+ // Replacing coro.suspend with (0) will result in control flow proceeding to
+ // a resume label associated with a suspend point, replacing it with (1) will
+ // result in control flow proceeding to a cleanup label associated with this
+ // suspend point.
+ auto *NewValue = Builder.getInt8(FnIndex ? 1 : 0);
for (CoroSuspendInst *CS : Shape.CoroSuspends) {
auto *MappedCS = cast<CoroSuspendInst>(VMap[CS]);
MappedCS->replaceAllUsesWith(NewValue);
// FIXME: coming in upcoming patches:
// replaceUnwindCoroEnds(Shape.CoroEnds, VMap);
- // Store the address of this clone in the coroutine frame.
- Builder.SetInsertPoint(Shape.FramePtr->getNextNode());
- auto *G = Builder.CreateConstInBoundsGEP2_32(Shape.FrameTy, Shape.FramePtr, 0,
- FnIndex, "fn.addr");
- Builder.CreateStore(NewF, G);
+ // We only store resume(0) and destroy(1) addresses in the coroutine frame.
+ // The cleanup(2) clone is only used during devirtualization when coroutine is
+ // eligible for heap elision and thus does not participate in indirect calls
+ // and does not need its address to be stored in the coroutine frame.
+ if (FnIndex < 2) {
+ // Store the address of this clone in the coroutine frame.
+ Builder.SetInsertPoint(Shape.FramePtr->getNextNode());
+ auto *G = Builder.CreateConstInBoundsGEP2_32(Shape.FrameTy, Shape.FramePtr,
+ 0, FnIndex, "fn.addr");
+ Builder.CreateStore(NewF, G);
+ }
+
+ // Eliminate coro.free from the clones, replacing it with 'null' in cleanup,
+ // to suppress deallocation code.
+ coro::replaceCoroFree(cast<CoroIdInst>(VMap[Shape.CoroBegin->getId()]),
+ /*Elide=*/FnIndex == 2);
+
NewF->setCallingConv(CallingConv::Fast);
return NewF;
return;
buildCoroutineFrame(F, Shape);
+ replaceFrameSize(Shape);
auto *ResumeEntry = createResumeEntryBlock(F, Shape);
- auto *ResumeClone = createClone(F, ".resume", Shape, ResumeEntry, 0);
- auto *DestroyClone = createClone(F, ".destroy", Shape, ResumeEntry, 1);
+ auto ResumeClone = createClone(F, ".resume", Shape, ResumeEntry, 0);
+ auto DestroyClone = createClone(F, ".destroy", Shape, ResumeEntry, 1);
+ auto CleanupClone = createClone(F, ".cleanup", Shape, ResumeEntry, 2);
// We no longer need coro.end in F.
removeCoroEnds(Shape);
postSplitCleanup(F);
postSplitCleanup(*ResumeClone);
postSplitCleanup(*DestroyClone);
+ postSplitCleanup(*CleanupClone);
- replaceFrameSize(Shape);
-
- setCoroInfo(F, Shape.CoroBegin, {ResumeClone, DestroyClone});
- coro::updateCallGraph(F, {ResumeClone, DestroyClone}, CG, SCC);
+ setCoroInfo(F, Shape.CoroBegin, {ResumeClone, DestroyClone, CleanupClone});
+ coro::updateCallGraph(F, {ResumeClone, DestroyClone, CleanupClone}, CG, SCC);
}
// When we see the coroutine the first time, we insert an indirect call to a
return false;
}
+// Replace all coro.frees associated with the provided CoroId either with 'null'
+// if Elide is true and with its frame parameter otherwise.
+void coro::replaceCoroFree(CoroIdInst *CoroId, bool Elide) {
+ SmallVector<CoroFreeInst *, 4> CoroFrees;
+ for (User *U : CoroId->users())
+ if (auto CF = dyn_cast<CoroFreeInst>(U))
+ CoroFrees.push_back(CF);
+
+ if (CoroFrees.empty())
+ return;
+
+ Value *Replacement =
+ Elide ? ConstantPointerNull::get(Type::getInt8PtrTy(CoroId->getContext()))
+ : CoroFrees.front()->getFrame();
+
+ for (CoroFreeInst *CF : CoroFrees) {
+ CF->replaceAllUsesWith(Replacement);
+ CF->eraseFromParent();
+ }
+}
+
// FIXME: This code is stolen from CallGraph::addToCallGraph(Function *F), which
// happens to be private. It is better for this functionality exposed by the
// CallGraph.
// Canonicalize coro.suspend by inserting a coro.save if needed.
for (CoroSuspendInst *CS : CoroSuspends)
if (!CS->getCoroSave())
- createCoroSave(CoroBegin, CoroSuspends.back());
+ createCoroSave(CoroBegin, CS);
}
define i8* @f() {
entry:
%id = call token @llvm.coro.id(i32 0, i8* null,
+ i8* bitcast (i8*()* @f to i8*),
i8* bitcast ([2 x void (i8*)*]* @f.resumers to i8*))
%hdl = call i8* @llvm.coro.begin(token %id, i8* null)
ret i8* %hdl
; CHECK-LABEL: @callResume(
define void @callResume() {
entry:
-; CHECK: call i8* @llvm.coro.begin
%hdl = call i8* @f()
-; CHECK-NEXT: call void @print(i32 0)
+; CHECK: call void @print(i32 0)
%0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
%1 = bitcast i8* %0 to void (i8*)*
call fastcc void %1(i8* %hdl)
; CHECK-LABEL: @eh(
define void @eh() personality i8* null {
entry:
-; CHECK: call i8* @llvm.coro.begin
%hdl = call i8* @f()
-; CHECK-NEXT: call void @print(i32 0)
+; CHECK: call void @print(i32 0)
%0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
%1 = bitcast i8* %0 to void (i8*)*
invoke void %1(i8* %hdl)
; no devirtualization here, since coro.begin info parameter is null
define void @no_devirt_info_null() {
entry:
- %id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
+ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%hdl = call i8* @llvm.coro.begin(token %id, i8* null)
; CHECK: call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
ret void
}
-declare token @llvm.coro.id(i32, i8*, i8*)
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i8* @llvm.coro.begin(token, i8*)
declare i8* @llvm.coro.frame()
declare i8* @llvm.coro.subfn.addr(i8*, i8)
declare fastcc void @f.resume(%f.frame*)
declare fastcc void @f.destroy(%f.frame*)
+declare fastcc void @f.cleanup(%f.frame*)
declare void @may_throw()
declare i8* @CustomAlloc(i32)
declare void @CustomFree(i8*)
-@f.resumers = internal constant
- [2 x void (%f.frame*)*] [void (%f.frame*)* @f.resume, void (%f.frame*)* @f.destroy]
+@f.resumers = internal constant [3 x void (%f.frame*)*]
+ [void (%f.frame*)* @f.resume, void (%f.frame*)* @f.destroy, void (%f.frame*)* @f.cleanup]
; a coroutine start function
define i8* @f() personality i8* null {
entry:
%id = call token @llvm.coro.id(i32 0, i8* null,
- i8* bitcast ([2 x void (%f.frame*)*]* @f.resumers to i8*))
+ i8* bitcast (i8*()* @f to i8*),
+ i8* bitcast ([3 x void (%f.frame*)*]* @f.resumers to i8*))
%need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
dyn.alloc:
ehcleanup:
%tok = cleanuppad within none []
- %mem = call i8* @llvm.coro.free(i8* %hdl)
+ %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
%need.dyn.free = icmp ne i8* %mem, null
br i1 %need.dyn.free, label %dyn.free, label %if.end
dyn.free:
; CHECK-NOT: tail call void @bar(
; CHECK: call void @bar(
tail call void @bar(i8* %hdl)
-; CHECK: tail call void @bar(
+; CHECK: tail call void @bar(
tail call void @bar(i8* null)
; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.resume to void (i8*)*)(i8* %vFrame)
%1 = bitcast i8* %0 to void (i8*)*
call fastcc void %1(i8* %hdl)
-; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.destroy to void (i8*)*)(i8* %vFrame)
+; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.cleanup to void (i8*)*)(i8* %vFrame)
%2 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
%3 = bitcast i8* %2 to void (i8*)*
call fastcc void %3(i8* %hdl)
define i8* @f_no_elision() personality i8* null {
entry:
%id = call token @llvm.coro.id(i32 0, i8* null,
- i8* bitcast ([2 x void (%f.frame*)*]* @f.resumers to i8*))
+ i8* bitcast (i8*()* @f_no_elision to i8*),
+ i8* bitcast ([3 x void (%f.frame*)*]* @f.resumers to i8*))
%alloc = call i8* @CustomAlloc(i32 4)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
ret i8* %hdl
ret void
}
-declare token @llvm.coro.id(i32, i8*, i8*)
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i1 @llvm.coro.alloc(token)
-declare i8* @llvm.coro.free(i8*)
+declare i8* @llvm.coro.free(token, i8*)
declare i8* @llvm.coro.begin(token, i8*)
declare i8* @llvm.coro.frame(token)
declare i8* @llvm.coro.subfn.addr(i8*, i8)
define i8* @f() "coroutine.presplit"="1" {
entry:
- %id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
+ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%alloc = call i8* @malloc(i32 %size)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
br label %cleanup
cleanup:
- %mem = call i8* @llvm.coro.free(i8* %hdl)
+ %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
; CHECK: call void @free(
; CHECK: ret void
-declare i8* @llvm.coro.free(i8*)
+declare i8* @llvm.coro.free(token, i8*)
declare i32 @llvm.coro.size.i32()
declare i8 @llvm.coro.suspend(token, i1)
declare void @llvm.coro.resume(i8*)
declare void @llvm.coro.destroy(i8*)
-declare token @llvm.coro.id(i32, i8*, i8*)
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i8* @llvm.coro.alloc(token)
declare i8* @llvm.coro.begin(token, i8*)
declare void @llvm.coro.end(i8*, i1)
define i8* @f() {
entry:
- %id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
+ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
dyn.alloc:
br label %cleanup
cleanup:
- %mem = call i8* @llvm.coro.free(i8* %hdl)
+ %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
call void @llvm.coro.resume(i8* %hdl)
ret i32 0
; CHECK-LABEL: @main(
-; CHECK: call i8* @malloc
-; CHECK-NOT: call void @free
; CHECK: call void @print(i32 0)
-; CHECK-NOT: call void @free
; CHECK: call void @print(i32 1)
-; CHECK: call void @free
; CHECK: ret i32 0
}
-declare i8* @llvm.coro.free(i8*)
+declare i8* @llvm.coro.free(token, i8*)
declare i32 @llvm.coro.size.i32()
declare i8 @llvm.coro.suspend(token, i1)
declare void @llvm.coro.resume(i8*)
declare void @llvm.coro.destroy(i8*)
-declare token @llvm.coro.id(i32, i8*, i8*)
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i1 @llvm.coro.alloc(token)
declare i8* @llvm.coro.begin(token, i8*)
declare void @llvm.coro.end(i8*, i1)
define i8* @f(i32 %n) {
entry:
- %id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
+ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%alloc = call i8* @malloc(i32 %size)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
br label %loop
cleanup:
- %mem = call i8* @llvm.coro.free(i8* %hdl)
+ %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
; CHECK: ret i32 0
}
-declare token @llvm.coro.id(i32, i8*, i8*)
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i8* @llvm.coro.alloc(token)
-declare i8* @llvm.coro.free(i8*)
+declare i8* @llvm.coro.free(token, i8*)
declare i32 @llvm.coro.size.i32()
declare i8 @llvm.coro.suspend(token, i1)
declare void @llvm.coro.resume(i8*)
define i8* @f(i32 %n) {
entry:
- %id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
+ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%alloc = call i8* @malloc(i32 %size)
%hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %alloc)
switch i8 %0, label %suspend [i8 0, label %loop
i8 1, label %cleanup]
cleanup:
- %mem = call i8* @llvm.coro.free(i8* %hdl)
+ %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
declare void @free(i8*)
declare void @print(i32)
-declare token @llvm.coro.id(i32, i8*, i8*)
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i32 @llvm.coro.size.i32()
declare i8* @llvm.coro.begin(token, i8*)
declare i8 @llvm.coro.suspend(token, i1)
-declare i8* @llvm.coro.free(i8*)
+declare i8* @llvm.coro.free(token, i8*)
declare void @llvm.coro.end(i8*, i1)
declare void @llvm.coro.resume(i8*)
--- /dev/null
+; Second example from Doc/Coroutines.rst (custom alloc and free functions)
+; RUN: opt < %s -O2 -enable-coroutines -S | FileCheck %s
+
+define i8* @f(i32 %n) {
+entry:
+ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+ %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
+ br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
+dyn.alloc:
+ %size = call i32 @llvm.coro.size.i32()
+ %alloc = call i8* @CustomAlloc(i32 %size)
+ br label %coro.begin
+coro.begin:
+ %phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ]
+ %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %phi)
+ br label %loop
+loop:
+ %n.val = phi i32 [ %n, %coro.begin ], [ %inc, %loop ]
+ %inc = add nsw i32 %n.val, 1
+ call void @print(i32 %n.val)
+ %0 = call i8 @llvm.coro.suspend(token none, i1 false)
+ switch i8 %0, label %suspend [i8 0, label %loop
+ i8 1, label %cleanup]
+cleanup:
+ %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+ %need.dyn.free = icmp ne i8* %mem, null
+ br i1 %need.dyn.free, label %dyn.free, label %suspend
+dyn.free:
+ call void @CustomFree(i8* %mem)
+ br label %suspend
+suspend:
+ call void @llvm.coro.end(i8* %hdl, i1 false)
+ ret i8* %hdl
+}
+
+; CHECK-LABEL: @main
+define i32 @main() {
+entry:
+ %hdl = call i8* @f(i32 4)
+ call void @llvm.coro.resume(i8* %hdl)
+ call void @llvm.coro.resume(i8* %hdl)
+ call void @llvm.coro.destroy(i8* %hdl)
+ ret i32 0
+; CHECK: call void @print(i32 4)
+; CHECK-NEXT: call void @print(i32 5)
+; CHECK-NEXT: call void @print(i32 6)
+; CHECK-NEXT: ret i32 0
+}
+
+declare i8* @CustomAlloc(i32)
+declare void @CustomFree(i8*)
+declare void @print(i32)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i32 @llvm.coro.size.i32()
+declare i8* @llvm.coro.begin(token, i8*)
+declare i8 @llvm.coro.suspend(token, i1)
+declare i8* @llvm.coro.free(token, i8*)
+declare void @llvm.coro.end(i8*, i1)
+
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
--- /dev/null
+; Third example from Doc/Coroutines.rst (two suspend points)
+; RUN: opt < %s -O2 -enable-coroutines -S | FileCheck %s
+
+define i8* @f(i32 %n) {
+entry:
+ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+ %size = call i32 @llvm.coro.size.i32()
+ %alloc = call i8* @malloc(i32 %size)
+ %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %alloc)
+ br label %loop
+loop:
+ %n.val = phi i32 [ %n, %entry ], [ %inc, %loop.resume ]
+ call void @print(i32 %n.val) #4
+ %0 = call i8 @llvm.coro.suspend(token none, i1 false)
+ switch i8 %0, label %suspend [i8 0, label %loop.resume
+ i8 1, label %cleanup]
+loop.resume:
+ %inc = add nsw i32 %n.val, 1
+ %sub = xor i32 %n.val, -1
+ call void @print(i32 %sub)
+ %1 = call i8 @llvm.coro.suspend(token none, i1 false)
+ switch i8 %1, label %suspend [i8 0, label %loop
+ i8 1, label %cleanup]
+cleanup:
+ %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+ call void @free(i8* %mem)
+ br label %suspend
+suspend:
+ call void @llvm.coro.end(i8* %hdl, i1 false)
+ ret i8* %hdl
+}
+
+; CHECK-LABEL: @main
+define i32 @main() {
+entry:
+ %hdl = call i8* @f(i32 4)
+ call void @llvm.coro.resume(i8* %hdl)
+ call void @llvm.coro.resume(i8* %hdl)
+ call void @llvm.coro.destroy(i8* %hdl)
+ ret i32 0
+; CHECK: call void @print(i32 4)
+; CHECK-NEXT: call void @print(i32 -5)
+; CHECK-NEXT: call void @print(i32 5)
+; CHECK: ret i32 0
+}
+
+declare i8* @malloc(i32)
+declare void @free(i8*)
+declare void @print(i32)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i32 @llvm.coro.size.i32()
+declare i8* @llvm.coro.begin(token, i8*)
+declare i8 @llvm.coro.suspend(token, i1)
+declare i8* @llvm.coro.free(token, i8*)
+declare void @llvm.coro.end(i8*, i1)
+
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
; CHECK-NEXT: CoroSplit: Processing coroutine 'f' state: 1
define void @f() {
- %id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
+ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%alloc = call i8* @malloc(i32 %size)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
br label %cleanup
cleanup:
- %mem = call i8* @llvm.coro.free(i8* %hdl)
+ %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
ret void
}
-declare token @llvm.coro.id(i32, i8*, i8*)
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i8* @llvm.coro.begin(token, i8*)
-declare i8* @llvm.coro.free(i8*)
+declare i8* @llvm.coro.free(token, i8*)
declare i32 @llvm.coro.size.i32()
declare i8 @llvm.coro.suspend(token, i1)
declare void @llvm.coro.resume(i8*)