From b51325dbdb10f0b4ab2b9c5ec4a979dffc246794 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Fri, 11 Mar 2016 19:08:34 +0000 Subject: [PATCH] Introduce @llvm.experimental.deoptimize Summary: This intrinsic, together with deoptimization operand bundles, allow frontends to express transfer of control and frame-local state from one (typically more specialized, hence faster) version of a function into another (typically more generic, hence slower) version. In languages with a fully integrated managed runtime this intrinsic can be used to implement "uncommon trap" like functionality. In unmanaged languages like C and C++, this intrinsic can be used to represent the slow paths of specialized functions. Note: this change does not address how `@llvm.experimental_deoptimize` is lowered. That will be done in a later change. Reviewers: chandlerc, rnk, atrick, reames Subscribers: llvm-commits, kmod, mjacob, maksfb, mcrosier, JosephTremoulet Differential Revision: http://reviews.llvm.org/D17732 llvm-svn: 263281 --- llvm/docs/LangRef.rst | 70 +++++++++++++++++ llvm/include/llvm/ADT/STLExtras.h | 7 ++ llvm/include/llvm/IR/BasicBlock.h | 8 ++ llvm/include/llvm/IR/CallSite.h | 4 + llvm/include/llvm/IR/Intrinsics.td | 4 + llvm/lib/IR/BasicBlock.cpp | 15 ++++ llvm/lib/IR/Verifier.cpp | 23 ++++++ llvm/lib/Transforms/Utils/InlineFunction.cpp | 65 +++++++++++++++- .../test/Transforms/Inline/deoptimize-intrinsic.ll | 90 ++++++++++++++++++++++ llvm/test/Verifier/deoptimize-intrinsic.ll | 42 ++++++++++ 10 files changed, 327 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/Inline/deoptimize-intrinsic.ll create mode 100644 llvm/test/Verifier/deoptimize-intrinsic.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index e1232cb..e47cc040 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -1534,6 +1534,8 @@ operand bundle to not miscompile programs containing it. More specific types of operand bundles are described below. +.. _deopt_opbundles: + Deoptimization Operand Bundles ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -12102,6 +12104,74 @@ Semantics: This intrinsic does nothing, and it's removed by optimizers and ignored by codegen. +'``llvm.experimental.deoptimize``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare type @llvm.experimental.deoptimize(...) [ "deopt"(...) ] + +Overview: +""""""""" + +This intrinsic, together with :ref:`deoptimization operand bundles +`, allow frontends to express transfer of control and +frame-local state from the currently executing (typically more specialized, +hence faster) version of a function into another (typically more generic, hence +slower) version. + +In languages with a fully integrated managed runtime like Java and JavaScript +this intrinsic can be used to implement "uncommon trap" or "side exit" like +functionality. In unmanaged languages like C and C++, this intrinsic can be +used to represent the slow paths of specialized functions. + + +Arguments: +"""""""""" + +The intrinsic takes an arbitrary number of arguments, whose meaning is +decided by the :ref:`lowering strategy`. + +Semantics: +"""""""""" + +The ``@llvm.experimental.deoptimize`` intrinsic executes an attached +deoptimization continuation (denoted using a :ref:`deoptimization +operand bundle `) and returns the value returned by +the deoptimization continuation. Defining the semantic properties of +the continuation itself is out of scope of the language reference -- +as far as LLVM is concerned, the deoptimization continuation can +invoke arbitrary side effects, including reading from and writing to +the entire heap. + +Deoptimization continuations expressed using ``"deopt"`` operand bundles always +continue execution to the end of the physical frame containing them, so all +calls to ``@llvm.experimental.deoptimize`` must be in "tail position": + + - ``@llvm.experimental.deoptimize`` cannot be invoked. + - The call must immediately precede a :ref:`ret ` instruction. + - The ``ret`` instruction must return the value produced by the + ``@llvm.experimental.deoptimize`` call if there is one, or void. + +Note that the above restrictions imply that the return type for a call to +``@llvm.experimental.deoptimize`` will match the return type of its immediate +caller. + +The inliner composes the ``"deopt"`` continuations of the caller into the +``"deopt"`` continuations present in the inlinee, and also updates calls to this +intrinsic to return directly from the frame of the function it inlined into. + +.. _deoptimize_lowering: + +Lowering: +""""""""" + +Lowering for ``@llvm.experimental.deoptimize`` is not yet implemented, +and is a work in progress. + Stack Map Intrinsics -------------------- diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index 2668335..28f7594 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -393,6 +393,13 @@ auto find_if(R &&Range, const T &Pred) -> decltype(Range.begin()) { return std::find_if(Range.begin(), Range.end(), Pred); } +/// Provide wrappers to std::remove_if which take ranges instead of having to +/// pass begin/end explicitly. +template +auto remove_if(R &&Range, UnaryPredicate &&P) -> decltype(Range.begin()) { + return std::remove_if(Range.begin(), Range.end(), P); +} + //===----------------------------------------------------------------------===// // Extra additions to //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h index c6b54d3..e7daf6e 100644 --- a/llvm/include/llvm/IR/BasicBlock.h +++ b/llvm/include/llvm/IR/BasicBlock.h @@ -111,6 +111,14 @@ public: TerminatorInst *getTerminator(); const TerminatorInst *getTerminator() const; + /// \brief Returns the call instruction calling @llvm.experimental.deoptimize + /// prior to the terminating return instruction of this basic block, if such a + /// call is present. Otherwise, returns null. + CallInst *getTerminatingDeoptimizeCall(); + const CallInst *getTerminatingDeoptimizeCall() const { + return const_cast(this)->getTerminatingDeoptimizeCall(); + } + /// \brief Returns the call instruction marked 'musttail' prior to the /// terminating return instruction of this basic block, if such a call is /// present. Otherwise, returns null. diff --git a/llvm/include/llvm/IR/CallSite.h b/llvm/include/llvm/IR/CallSite.h index 55bd713..2bd836f 100644 --- a/llvm/include/llvm/IR/CallSite.h +++ b/llvm/include/llvm/IR/CallSite.h @@ -453,6 +453,10 @@ public: CALLSITE_DELEGATE_GETTER(getOperandBundle(ID)); } + unsigned countOperandBundlesOfType(uint32_t ID) const { + CALLSITE_DELEGATE_GETTER(countOperandBundlesOfType(ID)); + } + IterTy arg_begin() const { CALLSITE_DELEGATE_GETTER(arg_begin()); } diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index a493eab..f5839ac 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -593,6 +593,10 @@ def int_trap : Intrinsic<[], [], [IntrNoReturn]>, def int_debugtrap : Intrinsic<[]>, GCCBuiltin<"__builtin_debugtrap">; +// Support for dynamic deoptimization (or de-specialization) +def int_experimental_deoptimize : Intrinsic<[llvm_any_ty], [llvm_vararg_ty], + [Throws]>; + // NOP: calls/invokes to this intrinsic are removed by codegen def int_donothing : Intrinsic<[], [], [IntrNoMem]>; diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp index 89a1d74..9f806fa 100644 --- a/llvm/lib/IR/BasicBlock.cpp +++ b/llvm/lib/IR/BasicBlock.cpp @@ -162,6 +162,21 @@ CallInst *BasicBlock::getTerminatingMustTailCall() { return nullptr; } +CallInst *BasicBlock::getTerminatingDeoptimizeCall() { + if (InstList.empty()) + return nullptr; + auto *RI = dyn_cast(&InstList.back()); + if (!RI || RI == &InstList.front()) + return nullptr; + + if (auto *CI = dyn_cast_or_null(RI->getPrevNode())) + if (Function *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize) + return CI; + + return nullptr; +} + Instruction* BasicBlock::getFirstNonPHI() { for (Instruction &I : *this) if (!isa(I)) diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 48792fb..9d9fe7d 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4082,6 +4082,29 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { "masked_store: vector mask must be same length as data", CS); break; } + + case Intrinsic::experimental_deoptimize: { + Assert(CS.isCall(), "experimental_deoptimize cannot be invoked", CS); + Assert(CS.countOperandBundlesOfType(LLVMContext::OB_deopt) == 1, + "experimental_deoptimize must have exactly one " + "\"deopt\" operand bundle"); + Assert(CS.getType() == CS.getInstruction()->getFunction()->getReturnType(), + "experimental_deoptimize return type must match caller return type"); + + if (CS.isCall()) { + auto *DeoptCI = CS.getInstruction(); + auto *RI = dyn_cast(DeoptCI->getNextNode()); + Assert(RI, + "calls to experimental_deoptimize must be followed by a return"); + + if (!CS.getType()->isVoidTy() && RI) + Assert(RI->getReturnValue() == DeoptCI, + "calls to experimental_deoptimize must be followed by a return " + "of the value computed by experimental_deoptimize"); + } + + break; + } }; } diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 491b18e..31cf5fb 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -427,6 +427,15 @@ static BasicBlock *HandleCallsInBlockInlinedThroughInvoke( if (!CI || CI->doesNotThrow() || isa(CI->getCalledValue())) continue; + // We do not need to (and in fact, cannot) convert possibly throwing calls + // to @llvm.experimental_deoptimize into invokes. The caller's "segment" of + // the deoptimization continuation attached to the newly inlined + // @llvm.experimental_deoptimize call should contain the exception handling + // logic, if any. + if (auto *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize) + continue; + if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) { // This call is nested inside a funclet. If that funclet has an unwind // destination within the inlinee, then unwinding out of this call would @@ -1613,7 +1622,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, replaceDbgDeclareForAlloca(AI, AI, DIB, /*Deref=*/false); } - bool InlinedMustTailCalls = false; + bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false; if (InlinedFunctionInfo.ContainsCalls) { CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None; if (CallInst *CI = dyn_cast(TheCall)) @@ -1626,6 +1635,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, if (!CI) continue; + if (Function *F = CI->getCalledFunction()) + InlinedDeoptimizeCalls |= + F->getIntrinsicID() == Intrinsic::experimental_deoptimize; + // We need to reduce the strength of any inlined tail calls. For // musttail, we have to avoid introducing potential unbounded stack // growth. For example, if functions 'f' and 'g' are mutually recursive @@ -1799,6 +1812,56 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, } } + if (InlinedDeoptimizeCalls) { + // We need to at least remove the deoptimizing returns from the Return set, + // so that the control flow from those returns does not get merged into the + // caller (but terminate it instead). If the caller's return type does not + // match the callee's return type, we also need to change the return type of + // the intrinsic. + if (Caller->getReturnType() == TheCall->getType()) { + auto NewEnd = remove_if(Returns, [](ReturnInst *RI) { + return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr; + }); + Returns.erase(NewEnd, Returns.end()); + } else { + SmallVector NormalReturns; + Function *NewDeoptIntrinsic = Intrinsic::getDeclaration( + Caller->getParent(), Intrinsic::experimental_deoptimize, + {Caller->getReturnType()}); + + for (ReturnInst *RI : Returns) { + CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall(); + if (!DeoptCall) { + NormalReturns.push_back(RI); + continue; + } + + auto *CurBB = RI->getParent(); + RI->eraseFromParent(); + + SmallVector CallArgs(DeoptCall->arg_begin(), + DeoptCall->arg_end()); + + SmallVector OpBundles; + DeoptCall->getOperandBundlesAsDefs(OpBundles); + DeoptCall->eraseFromParent(); + assert(!OpBundles.empty() && + "Expected at least the deopt operand bundle"); + + IRBuilder<> Builder(CurBB); + Value *NewDeoptCall = + Builder.CreateCall(NewDeoptIntrinsic, CallArgs, OpBundles); + if (NewDeoptCall->getType()->isVoidTy()) + Builder.CreateRetVoid(); + else + Builder.CreateRet(NewDeoptCall); + } + + // Leave behind the normal returns so we can merge control flow. + std::swap(Returns, NormalReturns); + } + } + // Handle any inlined musttail call sites. In order for a new call site to be // musttail, the source of the clone and the inlined call site must have been // musttail. Therefore it's safe to return without merging control into the diff --git a/llvm/test/Transforms/Inline/deoptimize-intrinsic.ll b/llvm/test/Transforms/Inline/deoptimize-intrinsic.ll new file mode 100644 index 0000000..84e54a0 --- /dev/null +++ b/llvm/test/Transforms/Inline/deoptimize-intrinsic.ll @@ -0,0 +1,90 @@ +; RUN: opt -S -always-inline < %s | FileCheck %s + +declare i8 @llvm.experimental.deoptimize.i8(...) + +define i8 @callee(i1* %c) alwaysinline { + %c0 = load volatile i1, i1* %c + br i1 %c0, label %left, label %right + +left: + %c1 = load volatile i1, i1* %c + br i1 %c1, label %lleft, label %lright + +lleft: + %v0 = call i8(...) @llvm.experimental.deoptimize.i8(i32 1) [ "deopt"(i32 1) ] + ret i8 %v0 + +lright: + ret i8 10 + +right: + %c2 = load volatile i1, i1* %c + br i1 %c2, label %rleft, label %rright + +rleft: + %v1 = call i8(...) @llvm.experimental.deoptimize.i8(i32 1, i32 300, float 500.0, <2 x i32*> undef) [ "deopt"(i32 1) ] + ret i8 %v1 + +rright: + %v2 = call i8(...) @llvm.experimental.deoptimize.i8() [ "deopt"(i32 1) ] + ret i8 %v2 +} + +define void @caller_0(i1* %c, i8* %ptr) { +; CHECK-LABEL: @caller_0( +entry: + %v = call i8 @callee(i1* %c) [ "deopt"(i32 2) ] + store i8 %v, i8* %ptr + ret void + +; CHECK: lleft.i: +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid(i32 1) [ "deopt"(i32 2, i32 1) ] +; CHECK-NEXT: ret void + +; CHECK: rleft.i: +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid(i32 1, i32 300, float 5.000000e+02, <2 x i32*> undef) [ "deopt"(i32 2, i32 1) ] +; CHECK-NEXT: ret void + +; CHECK: rright.i: +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"(i32 2, i32 1) ] +; CHECK-NEXT: ret void + +; CHECK: callee.exit: +; CHECK-NEXT: store i8 10, i8* %ptr +; CHECK-NEXT: ret void + +} + +define i32 @caller_1(i1* %c, i8* %ptr) personality i8 3 { +; CHECK-LABEL: @caller_1( +entry: + %v = invoke i8 @callee(i1* %c) [ "deopt"(i32 3) ] to label %normal + unwind label %unwind + +; CHECK: lleft.i: +; CHECK-NEXT: %0 = call i32 (...) @llvm.experimental.deoptimize.i32(i32 1) [ "deopt"(i32 3, i32 1) ] +; CHECK-NEXT: ret i32 %0 + +; CHECK: rleft.i: +; CHECK-NEXT: %1 = call i32 (...) @llvm.experimental.deoptimize.i32(i32 1, i32 300, float 5.000000e+02, <2 x i32*> undef) [ "deopt"(i32 3, i32 1) ] +; CHECK-NEXT: ret i32 %1 + +; CHECK: rright.i: +; CHECK-NEXT: %2 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 3, i32 1) ] +; CHECK-NEXT: ret i32 %2 + +; CHECK: callee.exit: +; CHECK-NEXT: br label %normal + +; CHECK: normal: +; CHECK-NEXT: store i8 10, i8* %ptr +; CHECK-NEXT: ret i32 42 + +unwind: + %lp = landingpad i32 cleanup + ret i32 43 + +normal: + store i8 %v, i8* %ptr + ret i32 42 +} diff --git a/llvm/test/Verifier/deoptimize-intrinsic.ll b/llvm/test/Verifier/deoptimize-intrinsic.ll new file mode 100644 index 0000000..81e4d3a --- /dev/null +++ b/llvm/test/Verifier/deoptimize-intrinsic.ll @@ -0,0 +1,42 @@ +; RUN: not opt -verify < %s 2>&1 | FileCheck %s + +declare i8 @llvm.experimental.deoptimize.i8(...) +declare void @llvm.experimental.deoptimize.isVoid(...) + +declare void @unknown() + +define void @f_notail() { +entry: + call void(...) @llvm.experimental.deoptimize.isVoid(i32 0) [ "deopt"() ] +; CHECK: calls to experimental_deoptimize must be followed by a return + call void @unknown() + ret void +} + +define void @f_nodeopt() { +entry: + call void(...) @llvm.experimental.deoptimize.isVoid() +; CHECK: experimental_deoptimize must have exactly one "deopt" operand bundle + ret void +} + +define void @f_invoke() personality i8 3 { +entry: + invoke void(...) @llvm.experimental.deoptimize.isVoid(i32 0, float 0.0) to label %ok unwind label %not_ok +; CHECK: experimental_deoptimize cannot be invoked + +ok: + ret void + +not_ok: + %0 = landingpad { i8*, i32 } + filter [0 x i8*] zeroinitializer + ret void +} + +define i8 @f_incorrect_return() { +entry: + %val = call i8(...) @llvm.experimental.deoptimize.i8() [ "deopt"() ] +; CHECK: calls to experimental_deoptimize must be followed by a return of the value computed by experimental_deoptimize + ret i8 0 +} -- 2.7.4