Implement simple version of On Stack Replacement (OSR) (#32969)
authorAndy Ayers <andya@microsoft.com>
Tue, 17 Mar 2020 00:29:55 +0000 (17:29 -0700)
committerGitHub <noreply@github.com>
Tue, 17 Mar 2020 00:29:55 +0000 (17:29 -0700)
Add support to runtime and jit to allow switching from unoptimized to
optimized code for a method while the method has active stack frames.

Details in the included document.

120 files changed:
docs/design/features/OnStackReplacement.md [new file with mode: 0644]
src/coreclr/clrdefinitions.cmake
src/coreclr/src/ToolBox/superpmi/superpmi-shared/compileresult.cpp
src/coreclr/src/ToolBox/superpmi/superpmi-shared/compileresult.h
src/coreclr/src/ToolBox/superpmi/superpmi-shared/crlwmlist.h
src/coreclr/src/ToolBox/superpmi/superpmi-shared/icorjitinfoimpl.h
src/coreclr/src/ToolBox/superpmi/superpmi-shared/lwmlist.h
src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.cpp
src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.h
src/coreclr/src/ToolBox/superpmi/superpmi-shared/runtimedetails.h
src/coreclr/src/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp
src/coreclr/src/ToolBox/superpmi/superpmi-shim-counter/icorjitinfo.cpp
src/coreclr/src/ToolBox/superpmi/superpmi-shim-simple/icorjitinfo.cpp
src/coreclr/src/ToolBox/superpmi/superpmi/icorjitinfo.cpp
src/coreclr/src/inc/CrstTypes.def
src/coreclr/src/inc/clrconfigvalues.h
src/coreclr/src/inc/corinfo.h
src/coreclr/src/inc/corjitflags.h
src/coreclr/src/inc/crsttypes.h
src/coreclr/src/inc/jithelpers.h
src/coreclr/src/inc/patchpointinfo.h [new file with mode: 0644]
src/coreclr/src/jit/CMakeLists.txt
src/coreclr/src/jit/block.cpp
src/coreclr/src/jit/block.h
src/coreclr/src/jit/codegencommon.cpp
src/coreclr/src/jit/codegenxarch.cpp
src/coreclr/src/jit/compiler.cpp
src/coreclr/src/jit/compiler.h
src/coreclr/src/jit/compphases.h
src/coreclr/src/jit/emitxarch.cpp
src/coreclr/src/jit/flowgraph.cpp
src/coreclr/src/jit/gcencode.cpp
src/coreclr/src/jit/gschecks.cpp
src/coreclr/src/jit/importer.cpp
src/coreclr/src/jit/jitconfigvalues.h
src/coreclr/src/jit/jitee.h
src/coreclr/src/jit/jiteh.cpp
src/coreclr/src/jit/lclvars.cpp
src/coreclr/src/jit/lsrabuild.cpp
src/coreclr/src/jit/morph.cpp
src/coreclr/src/jit/patchpoint.cpp [new file with mode: 0644]
src/coreclr/src/jit/rangecheck.cpp
src/coreclr/src/jit/valuenum.cpp
src/coreclr/src/tools/Common/JitInterface/CorInfoBase.cs
src/coreclr/src/tools/Common/JitInterface/CorInfoTypes.cs
src/coreclr/src/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt
src/coreclr/src/tools/crossgen2/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs
src/coreclr/src/tools/crossgen2/jitinterface/jitinterface.h
src/coreclr/src/tools/crossgen2/jitinterface/jitwrapper.cpp
src/coreclr/src/vm/CMakeLists.txt
src/coreclr/src/vm/ceemain.cpp
src/coreclr/src/vm/codeman.cpp
src/coreclr/src/vm/codeman.h
src/coreclr/src/vm/codeversion.cpp
src/coreclr/src/vm/codeversion.h
src/coreclr/src/vm/compile.cpp
src/coreclr/src/vm/debuginfostore.cpp
src/coreclr/src/vm/debuginfostore.h
src/coreclr/src/vm/eeconfig.cpp
src/coreclr/src/vm/eeconfig.h
src/coreclr/src/vm/jithelpers.cpp
src/coreclr/src/vm/jitinterface.cpp
src/coreclr/src/vm/jitinterface.h
src/coreclr/src/vm/loaderallocator.cpp
src/coreclr/src/vm/loaderallocator.hpp
src/coreclr/src/vm/method.hpp
src/coreclr/src/vm/onstackreplacement.cpp [new file with mode: 0644]
src/coreclr/src/vm/onstackreplacement.h [new file with mode: 0644]
src/coreclr/src/vm/prestub.cpp
src/coreclr/src/vm/tieredcompilation.cpp
src/coreclr/src/zap/zapinfo.cpp
src/coreclr/src/zap/zapinfo.h
src/coreclr/tests/src/JIT/opt/OSR/addressexposedlocal.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/addressexposedlocal.csproj [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/doublestackalloc.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/doublestackalloc.csproj [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/example.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/example.csproj [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/genericmethodpatchpoint.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/genericmethodpatchpoint.csproj [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/innerloop.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/innerloop.csproj [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/integersumloop.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/integersumloop.csproj [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/livelocaladdress.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/livelocaladdress.csproj [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/livelocalstackalloc.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/livelocalstackalloc.csproj [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/mainloop.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/mainloop.csproj [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/mainloop2.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/mainloop2.csproj [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/mainlooptry.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/mainlooptry.csproj [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/mainlooptry2.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/mainlooptry2.csproj [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/mainlooptry3.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/mainlooptry3.csproj [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/mainlooptry4.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/mainlooptry4.csproj [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/memoryargument.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/memoryargument.csproj [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/nesteddoloops.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/nesteddoloops.csproj [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/originalstackalloc.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/originalstackalloc.csproj [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/osrcontainstry.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/osrcontainstry.csproj [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/promoted.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/promoted.csproj [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/tailrecurse.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/tailrecurse.csproj [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/tailrecursetry.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/tailrecursetry.csproj [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/tailrecursetry2.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/tailrecursetry2.csproj [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/twoosrmethods.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/twoosrmethods.csproj [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/twoosrmethods1.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/opt/OSR/twoosrmethods1.csproj [new file with mode: 0644]

diff --git a/docs/design/features/OnStackReplacement.md b/docs/design/features/OnStackReplacement.md
new file mode 100644 (file)
index 0000000..9056968
--- /dev/null
@@ -0,0 +1,1111 @@
+# On Stack Replacement in the CLR
+
+Design Sketch and Prototype Assessment
+
+Andy Ayers
+
+Initial: 7 July 2019 &mdash;
+Revised: 25 February 2020
+
+## Overview
+
+On Stack Replacement allows the code executed by currently running methods to be
+changed in the middle of method execution, while those methods are active "on
+stack." This document describes design considerations and challenges involved in
+implementing basic On Stack Replacement for the CLR, presents the results of
+some prototype investigations, and describes how OSR might be used to re-host
+Edit and Continue and support more general transitions like deoptimization.
+
+* [Background](#1-Background)
+* [Design Principles](#2-Design-Principles)
+* [An Overview of OSR](#3-An-Overview-of-OSR)
+* [Complications](#4-Complications)
+* [The Prototype](#5-The-Prototype)
+* [Edit and Continue](#6-Edit-and-Continue)
+* [Deoptimization](#7-Deoptimization)
+* [References](#8-References)
+
+## 1. Background
+
+On Stack Replacement (hereafter _OSR_) refers to a set of techniques for
+migrating active stack frames from one version of code to another.
+
+The two versions of the code involved in OSR may arise from different program
+sources (as in Edit and Continue) or different approaches to compiling or
+executing a single program (say, unoptimized code versus optimized code). The
+goal of OSR is to transparently redirect execution from an old version of code
+into a new version, even when in the middle of executing the old version.
+
+Initial work on OSR was pioneered in Self [[1](#1)] as an approach for debugging
+optimized code. But in the years since, OSR has mainly seen adoption on
+platforms like Java [[2](#2), [3](#3)] and JavaScript that rely heavily on
+adaptive recompilation of code.
+
+The ability to adaptively recompile and switch code versions while methods are
+running provides some key advantages:
+
+* Platforms can offer both quick start up and excellent steady-state
+  performance, interpreting or quickly jitting to enable initial method
+  execution, and using OSR to update the methods with better performing or more
+  completely compiled versions as needed.
+
+* Platforms can take advantage of transient program facts and recover when those
+  facts no longer become true. For example, a platform may compile virtual or
+  interface calls as direct calls initially and use OSR to update to more
+  general versions of code when overriding methods or other interface
+  implementations arrive on scene.
+
+The CLR already supports various mechanisms for changing the code for a method
+in a runtime instance. Edit and Continue implements true OSR but is supported
+only on some architectures, works only when code is running under a managed
+debugger, and is supported only for unoptimized to unoptimized code. Profiler
+rejit and tiered compilation can update code used in future invocations of
+methods, but not code running in currently active methods.
+
+In this document we will vary a bit from the literature and use OSR to refer
+strictly to the case where we are transitioning execution **from** an
+unoptimized code instance (either to another unoptimized instance or an
+optimized instance). We will use _deoptimization_ (_deopt_) to describe the
+transition from an optimized code instance to some other code instance
+(typically to an unoptimized instance).
+
+We envision OSR as a technology that will allow us to enable tiered compilation
+by default: performance-critical applications will no longer risk seeing key
+methods trapped in unoptimized tier0 code, and straightforwardly written
+microbenchmarks (e.g. all code in main) will perform as expected, as no matter
+how they are coded, they will be able to transition to optimized code.
+
+OSR also provides key building blocks for an eventual implementation of deopt
+and the ability of our platforms to make strong speculative bets in code
+generation.
+
+In addition, OSR will also allow us to experiment with so-called _deferred
+compilation_, where the jit initially only compiles parts of methods that it
+believes likely to execute (say, based on heuristics or prior runs). If an
+uncompiled part of a method is reached at runtime, OSR can trigger recompilation
+of the missing part or recompilation of the entire method.
+
+The remainder of this document describes OSR in more detail, providing a design
+sketch and some key design choice points, the results and insights gained from
+creating a fully functional prototype, and a list of open issues and areas
+requiring further investigation. We will also mention _deopt_ in passing and
+describe why it presents a different and larger set of challenges.
+
+## 2. Design Principles
+
+As we consider proposals for implementing OSR, we will try and satisfy the
+following design principles:
+
+* Pay as you go. The costs of OSR should be limited to methods that can benefit
+  from OSR, and where possible, paid largely when OSR actually happens.
+
+* Impose few restrictions on optimized codegen. We should not have to restrict
+  or dumb down optimized codegen to allow transitions to it via OSR
+
+* Anticipate likely changes in jit codegen strategy. We should support enabling
+  some optimizations (devirtualization, early branch pruning, some expression
+  opts) at Tier0 without having to radically alter our approach.
+
+* Design for testability. We should be able to force OSR transitions wherever
+  possible and with alternative stress strategies.
+
+* Full diagnostic experience. OSR should not inhibit user ability to debug or
+  reason about logical behavior of their programs. OSR activities should be
+  tracked via suitable eventing mechanisms.
+
+## 3 An overview of OSR
+
+OSR enables transitioning from older unoptimized code to new code
+while the old code is active in some stack frames. An implementation
+must come up with solutions to several related sub problems, which we
+describe briefly here, and in more detail below.
+
+* **Patchpoints** : Identify where in the original method OSR is possible. 
+We will use the term _patchpoint_ to describe a particular location in a
+method's code that supports OSR transitions.
+* **Triggers** : Determine what will trigger an OSR transition
+* **Alternatives** : Have means to prepare a suitable alternative code
+version covering all or part of the method (loops, for instance), and
+having one or possibly many entry points.
+* **Transitions**: Remap the stack frame(s) as needed to carry out the
+OSR transition
+
+### 3.1 Patchpoints
+
+A _patchpoint_ is a point in a version of code where OSR is possible.
+Patchpoints are similar in many ways to GC safepoints. At a patchpoint, the live
+state of the ongoing computation must be identifiable (for a GC safepoint, only
+the live GC references need be so identified). All live registers and stack
+slots must be enumerable, and logically described in terms of concepts visible
+in the IL. Additional state like the return address, implicit arguments, and so
+on must also be accounted for.
+
+As with GC safepoints, patchpoints can be handled in a _fully interruptible_
+manner where most any instruction boundary is a patchpoint, or a _partially
+interruptible_ manner, where only some instruction boundaries are patchpoints.
+Also, as with GC, it is acceptable (if suboptimal) to over-identify the live
+state at a patch point. For instance, the live set can include values that never
+end up being consumed by the new method (the upshot here is that we can simply
+decide all the visible IL state is live everywhere, and so avoid running
+liveness analysis in Tier0.)
+
+Also, as with GC safepoints, it is desirable to keep the volume of information
+that must be retained to describe patchpoints to a minimum. Most methods
+executions will never undergo OSR transition and so the information generated
+will never be consulted. To try and keep OSR a _pay as you go_ technique, it is
+important that this information be cheap to generate and store.
+
+#### 3.1.1 Choosing Patchpoints
+
+Most commonly, patchpoints are chosen to be the places in the code that are
+targets of loop back edges. This is a partially interruptible scheme. This
+ensures that no loop in the method can iterate without hitting a patchpoint, and
+so that the method itself cannot execute indefinitely between patchpoints. Note
+by this rule, methods that do not contain any loops will not have any
+patchpoints.
+
+From a compilation standpoint, it would be ideal if patchpoints were also IL
+stack empty points, as this tends to minimize and regularize the live state.
+However, there is no guarantee that execution of a method will reach stack empty
+points with any frequency. So, a fully general patchpoint mechanism must handle
+the case where the evaluation stack is not empty. However, it may be acceptable
+to only allow patchpoints at stack empty points, as loops that execute with
+non-empty evaluation stacks are likely rare.
+
+It is also beneficial if patchpoint selection works via a fairly simple set of
+rules, and here we propose that using the set of _lexical back edges_ or
+backwards branches in IL is a reasonable choice. These can be identified by a
+single scan over a method's IL.
+
+When generating unoptimized code, it is thus sufficient to note the target of
+any backwards branch in IL, the set of those locations (filtered to just the
+subset where the IL stack is empty) are the candidate patchpoints in the method.
+
+We can also rely on the fact that in our current unoptimized code, no IL state
+is kept in registers across IL stack empty points&mdash;all the IL state is
+stored in the native stack frame. This means that each patchpoint's live state
+description is the same&mdash;he set of stack frame locations holding the IL
+state.
+
+So, with the above restrictions, a single patchpoint descriptor suffices for the
+entire method (analogous to the concept of _untracked_ GC lifetimes in the GC
+info). Further, this information is a superset of the current GC info, so the
+additional data needed to describe a patchpoint is simply the set of live non-GC
+slots on the native stack frame.
+
+[Note: more general schemes like _deopt_ will require something more
+sophisticated.]
+
+#### 3.1.2 Option I: non-stack empty patchpoints
+
+If it turns out we must also allow patchpoints at non-stack empty points, then
+some per-patchpoint state will be needed to map the logical state of the
+evaluation stack into actual stack slots on the methods frame. This state will
+vary from patchpoint to patchpoint.
+
+#### 3.1.3 Option II: fully interruptible patchpoints
+
+Patchpoints can be much more fine-grained, at any block boundary or even within
+blocks, so long as the correspondence of the generated code to the inspiring IL
+is well understood. However fine-grained patchpoints in our proposed version of
+OSR do not seem to offer much in the way of advantages, given that we are also
+proposing synchronous triggers and transitions, and transitioning from
+unoptimized code. A fine-grained patchpoint mechanism would require more
+metadata to describe each transition point.
+
+#### 3.1.4 The Prototype
+
+In the prototype, patchpoints are the set of IL boundaries in a method that are
+stack-empty and the targets of lexical back edges. The live state of the
+original method is just the IL-visible locals and arguments, plus a few special
+values found in certain frames (GS Cookie, etc).
+
+### 3.2 Triggers
+
+When OSR is used to enable transfer control from an unoptimized method into
+optimized code, the most natural trigger is a count of the number of times a
+patchpoint in the method is reached. Once a threshold is reached at a
+patchpoint, the system can begin preparation of the alternative code version
+that will work for that patchpoint.
+
+This counting can be done fairly efficiently, at least in comparison to the
+ambient unoptimized code in the method, by using counters on the local frame.
+When the threshold is reached, control can transfer to a local policy block;
+this can check whether an alternative version needs to be prepared, is already
+being prepared, or is ready for transfer. Since this policy logic is common to
+all patchpoints it most likely should be encapsulated as a helper. In
+pseudocode:
+
+```
+Patchpoint:   // each assigned a dense set of IDs
+
+       if (++counter[ppID] > threshold) call PatchpointHelper(ppID)
+```
+The helper can use the return address to determine which patchpoint is making
+the request. To keep overheads manageable, we might instead want to down-count
+and pass the counter address to the helper.
+```
+Patchpoint:   // each assigned a dense set of IDs
+
+       if (--counter[ppID] <= 0) call PatchpointHelper(ppID, &counter[ppID])
+```
+The helper logic would be similar to the following:
+```
+PatchpointHelper(int ppID, int* counter)
+{
+  void* patchpointSite = _ReturnAddress();
+  PPState s = GetState(patchpointSite);
+
+    switch (s)
+    {
+      case Unknown: 
+        *counter = initialThreshold; 
+        SetState(s, Active);
+        return;
+
+      case Active:  
+        *counter = checkThreshold; 
+        SetState(s, Pending);
+        RequestAlternative(ppID);
+        return;
+
+      case Pending:
+        *counter = checkThreshold;
+        return;
+
+      case Ready:   
+         Transition(...); // does not return
+     }
+}
+```
+Here `RequestAlternative` would queue up a request to produce the alternative
+code version; when that request completes the patchpoint state would be set to
+Ready. So the cost for a patchpoint would be an initial helper call (to set the
+Active threshold), then counting, then a second helper call (to request and set
+the pending threshold), then counting, and, depending on how long the request
+took, more callbacks in pending state.
+
+Note that just because a patchpoint is hit often enough to reach Active state,
+there is no guarantee that the patchpoint will be reached again in the future.
+So, it is possible to trigger alternative version compilations that end up never
+getting used, if those alternative versions are patchpoint specific. In a
+pathological case a method might have an entire sequence of patchpoints that
+reach Active state and trigger alternative versions, none of which ever get
+used.
+
+In this scheme, the local frame of the method would have one local counter per
+patchpoint.
+
+#### 3.2.1 Option I: one global counter per patchpoint
+
+Instead of keeping the counters on the local frame, they could be kept in global
+storage associated with the method, to give an absolute count of patchpoint
+frequency over all invocations of the method. This would help trigger
+transitions in methods in use across multiple threads or methods that are a weak
+mixture of iteration and recursion. Because there would now be shared counter
+state, we'd have to think though how to handle the concurrent access. Likely
+we'd implement something like we do for IBC and have a method fetch and locally
+cache the address of its counter vector locally in the prolog.
+
+#### 3.2.2 Option II: shared counters
+
+Alternatively, all patchpoints in a method could share one counter slot (either
+local or global), this would save space but would lead to somewhat more frequent
+callbacks into the runtime and slightly higher likelihood that useless
+alternatives would be created.
+
+#### 3.2.3 Option III: synchronous OSR
+
+Independent of the counter scheme, the runtime could also block and
+synchronously produce and then transition to the alternative version. This would
+eliminate the potential for wasted alternates (though depending on other
+choices, we still might produce multiple alternates for a method). It would also
+hold up progress of the app, as the thread could just as well continue executing
+the unoptimized code past the patchpoint. We might consider transitioning to
+synchronous OSR selectively for methods that have a track record of generating
+useless versions. This is entirely a runtime policy and would not impact jitted
+codegen.
+
+Note: If OSR is used for EnC or for _deopt_ when an invariant changes, then
+synchronous transitions are required as in general, the old method cannot safely
+execute past a patchpoint. If the delay from jitting code is a concern it may be
+possible to fall back to an interpreter for a time while the new version of the
+method is jitted, though this would require that the system also support
+OSR-style transitions from interpreted methods to compiled methods...
+
+#### 3.2.4 Option IV: share counter space with Tiered Compilation
+
+A final option here is to use global counters and also add a counter at method
+entry. The entry counter could be used for two purposes: first to trigger tiered
+jitting of the entire method, and second, to help normalize the per-patchpoint
+counters so as to provide relative profile weights for the blocks in the method
+when it is rejitted (either via tiering or OSR). We note that the set of
+observation points from patchpoint counters is fairly sparse (not as detailed as
+what we get from IBC, say) but it may be sufficient to build a reasonable
+profile.
+
+#### 3.2.5 The Prototype
+
+In the prototype OSR transitions are synchronous; there is one local patchpoint
+counter per frame shared by all patchpoints; patchpoint IDs are IL offsets.
+
+### 3.3 Alternative Versions
+
+When a patchpoint is hit often enough, the runtime should produce an alternative
+version of the code that can be transitioned to at that patchpoint.
+
+There are several choice points for alternatives:
+
+* Whether to tailor the alternative code specifically to that patchpoint or have
+  the alternative handle multiple (or perhaps all) the patchpoints in a method.
+  We'll call the former a single-entry alternative, and the latter
+  multi-entry alternatives (and, in the limit, whole-method alternatives).
+
+* Whether the alternative version encompasses the remainder of the method, or
+  just some part of the method. We'll call these whole and partial
+  alternatives.
+
+* If a partial method alternative, whether the part of the method compiled
+  includes the entire remainder of the method, or just some fragment that
+  includes the patchpoint (say the enclosing loop nest).
+
+* Whether or not the alternative entry points include the code to build up the
+  alternative stack frames, or setup of the new frame happens via some runtime
+  logic.
+
+* Whether or not the alternate version is tailored to the actual runtime state
+  at the point of the trigger. For instance, specific argument or local values,
+  or actual types.
+
+The partial alternatives obviously are special versions that can only be used by
+OSR. The whole method alternative could also be conceivably used as the
+optimized version of the method, but the additional entry points may result some
+loss of optimizations. So, in general, the OSR alternatives are likely distinct
+from the Tier-1 versions of methods and are used only for active frame
+transitions. New calls to methods can be handled via the existing tiering
+mechanisms.
+
+[Note there are some interesting dynamics here that may warrant further
+consideration. A method that is seldomly called with a hot loop will eventually
+trigger both OSR (from the loop) and Tier1 recompilation (from the calls). We
+might consider deferring tiered recompilation for such methods, as the
+unoptimized versions can readily transition to OSR alternates in code that
+matters for performance.]
+
+Taken together there are various combinations of these alternatives that make
+sense, and various tradeoffs to consider. We explore a few of these below.
+
+#### 3.3.1 Option 1: Partial Method with Transition Prolog
+
+In this option, the runtime invokes the jit with a method, IL offset, and the
+original method mapping of stack frame state to IL state at that offset. The jit
+uses the logical PC (IL offset) to determine the scope of the alternative
+fragment. Here the scope is the IL in the method reachable from the patchpoint.
+
+For the entry point it creates a specialized transition prolog that sets up a
+normal frame, and takes the values of the locals from the old stack frame and
+copies them to the new stack slots, and pushes& any live evaluation stack
+arguments. Arguments passed in registers are restored to the right registers.
+Control then transfers to the IL offset of the patchpoint. Any IL in the method
+not reachable from the patchpoint is dead code and can be removed (including the
+original method entry point). This new partial method is then jitted more or
+less normally (modulo the generation of the special prolog).
+
+It might be possible to express this new prolog in IL or something similar. At
+any rate it seems likely the impact on the jit overall can be mostly localized
+to the &importer and prolog generation stages and the rest of the jit would
+operate more or less as it does today.
+
+This alternative version can be used any time the original method reaches the
+inspiring patchpoint.
+
+#### 3.3.2 Option 2: Partial Tailored Method with Transition Prolog
+
+If the runtime also passes the triggering stack frame to the jit, the jit can
+incorporate the values in that frame (or information derived from the frame
+values) into the alternative method codegen. This creates a tailored alternative
+that can only be used at this patchpoint from this specific original method
+invocation. The potential benefit here is that the code in the method may be
+more optimizable with the additional context, and since OSR alternatives are
+likely to be lightly used there may not be much downside to specializing exactly
+for this trigger instance. This alternative likely implies synchronous OSR.
+
+#### 3.3.3 Option 3: Full Method with Multiple Entry Points
+
+Instead of generating an alternative that can only be used to transition from
+one specific patchpoint, the alternative method can offer multiple entry points
+to allow transition from some or all of the patchpoints in the original method.
+
+Note: After thinking about this a bit more, I think we can implement this
+variant without needing multiple prologs&mdash;instead we can pass the IL offset
+of the OSR entry point as a hidden argument to the OSR method, and have a switch
+on that argument in the first body block to jump to the right place in the
+method. This might be a viable option to control the potential explosion of OSR
+variants for methods with many patchpoints. This method would still be OSR
+specific&mdash;that is, it could not also serve as a normally callable Tier1
+method.
+
+#### 3.3.4 Option 4: Method Fragment
+
+If the alternative method is just a fragment of the entire method, then in
+addition to a specialized entry point, the jit will have to create specialized
+exit points that either transition back to the unoptimized method, or else use
+synchronous OSR to invoke jitting of the method code that comes after the
+fragment.
+
+#### 3.3.5 Prototype
+
+The prototype generates partial methods with transition prolog. Per 4.1 below,
+the OSR method frame incorporates the (live portion of the) original method
+frame instead of supplanting it.
+
+### 3.4 Transitions
+
+A transition can happen once an OSR capable method reaches a patchpoint where a
+suitable alternative version is ready. Because transitions will likely require
+changes in stack frame size it is much simpler to consider transitions only for
+methods at the top of the stack. This means that methods that are invoked
+recursively may be transitioned by OSR gradually as the stack unwinds.
+
+Abstractly, the actual transition could work something like the following: the
+runtime would copy the top stack frame into temporary storage, then carefully
+unwind the current frame. Then the alternative method would be put in place and
+invoked, being passed the copy of the original frame as an argument.
+
+However, the presence of original frame addresses and values derived from those
+addresses in the original frame's live state complicates matters (more on this
+in [Section 4.1](#Addresses-of-Locals)). So the OSR method needs to ensure that
+any "address-exposed" local ends up at the exact same stack location in the OSR
+frame as it did in the original method frame. The simplest way to accomplish
+this is to just leave the original frame in place, and have the OSR frame
+"incorporate" it as part of its frame.
+
+#### 3.4.1 The Prototype
+
+The original method conditionally calls to the patchpoint helper at
+patchpoints. The helper will return if there is no transition. 
+
+For a transition, the helper will capture context and virtually unwind itself
+and the original method from the stack to recover callee-save register values
+live into the original method and then restore the callee FP and SP values into
+the context (preserving the original method frame); then set the context IP to
+the OSR method entry and restore context. OSR method will incorporate the
+original method frame as part of its frame.
+
+## 4 Complications
+
+### 4.1 Addresses of Locals
+
+If the live state at the patchpoint includes addresses of locals (or addresses
+of arguments, if the OSR transition pushes a new frame), either these addresses
+must be updated to properly reflect the new locations or the address-taken
+locals must end up in the same relative location in the frame. The jit might
+require some hardening to ensure that address of local is always properly
+described at patchpoints.
+
+Detection of address-taken locals (especially in a non-optimizing jit) may
+require some attention. We frequently see `ldloca` in IL that is consumed in a
+dereference before a stack empty point; such locals are transiently exposed but
+their addresses would not be live at our proposed set of patchpoints (note
+`ldflda` can cause similar issues if it exposes addresses if local struct
+fields).
+
+Arithmetic done on addresses of locals might not be stable across an OSR
+transition (that is, different values could be obtained for a given piece of
+code before and after the transition). While in general there is no guarantee
+about the values produced by this kind of code it is not unreasonable to expect
+that the value would not change over the lifetime of a given method's
+execution. It is not clear how much code might depend on this.
+
+This problem could be partially solved by requiring any address-taken local to
+appear at the same stack location in the alternative method frame and by
+requiring that the OSR frame supplant the original frame (this is how EnC
+works). In that case all address-taken locals would be at the same address.
+Ensuring that this is possible likely entails other restrictions like reserving
+a maximally large register save area for the original method.
+
+However, it seems simplest to just preserve the original method frame, or at
+least the portion of it that contains the live state, and allow the OSR method
+to access the original frame values, either as initial values or as the actual
+homes for that state.
+
+### 4.2 Localloc
+
+Methods with localloc pose similar challenges to those posed by methods with
+address taken locals. Room is made on the original method stack for the localloc
+storage, and a native pointer to that storage is part of the live state of the
+method. The live state may also include pointers and other values derived from
+that address. So, the alternative version must use that same location; a
+copy/fixup procedure to allow this storage to be relocated in some manner seems
+impractical.
+
+In addition, localloc makes describing the local frame more complex, as the size
+of the frame and the location of particular bits of live state can vary.
+Typically, the jit will use multiple frame pointers in a localloc frame to allow
+for relative addressing.
+
+In the most complex case, the original method will have executed one or more
+locallocs before hitting the patchpoint, and the OSR variant will then execute
+more locallocs. Such cases might require the OSR method to maintain 3 or more
+frame pointers.
+
+### 4.3 Funclets
+
+When control is executing in a funclet there are effectively two activation
+records on the stack that share a single frame: the parent frame and the
+funclet frame. The funclet frame is largely a stub frame and most of the frame
+state is kept in the parent frame. 
+
+These two frames are not adjacent; they are separated by some number of runtime
+frames. This means it is going to be difficult for our system to handle
+patchpoints within funclets; even if we could update the code the funclet is
+running we would not be able to update the parent frame.
+
+The proposal here is to disallow patchpoints within funclets so that we do not
+attempt OSR transitions when the top of stack frame is a funclet frame. One
+hopes that performance critical loops rarely appear in catch or finally clauses.
+
+EnC has similar restrictions.
+
+### 4.4 GC
+
+There is a brief window of time during the transition where there are GC live
+values on both the original and alternative frames (and the original frame may
+have been copied off-stack). Since the transition is done via a runtime helper,
+it seems prudent to forbid GC during this part of the transition, which should
+be relatively brief.
+
+### 4.5 Diagnostics
+
+Alternative methods will never be called &mdash; they are only transitioned to
+by active original methods, so likely no special work is needed to make them
+compatible with the current profiler guarantees for IL modifications ("new
+invocations" of the method invoke the new version).
+
+We may need to update the mechanisms that the runtime uses to notify profilers
+of new native code versions of a method.
+
+The jit will generate the same debug info mappings as it does today, and so the
+debugging experience when debugging an alternative should be similar to the
+experience debugging a Tier1 method. Likewise, the code publishing aspects
+should be common, so for instance active breakpoints should get applied.
+
+[Note: I have verified this on simple examples using the VS debugger; a source
+breakpoint set in the original method is applied to the OSR method too.]
+
+We need to decide what happens if the debugger tries to use SetIP on an OSR
+method for an IL offset that is not within the range of IL compiled; likely
+we'll just have to fail the request.
+
+Breakpoints set at native code addresses won't transfer to the corresponding
+points in OSR methods. We have the same issue with Tiered compilation already.
+
+OSR (exclusive of EnC) will be disabled for debuggable code.
+
+Debugging through an OSR transition (say a single-step that triggers OSR) may
+require special consideration. This is something that needs further
+investigation.
+
+**Prototype: The OSR methods have somewhat unusual unwind records that may be
+confusing the (Windbg) debugger stack trace.**
+
+### 4.6 Proposed Tier-0 Optimizations
+
+We have been assuming up until this point that the original method was not
+optimized in any way, and so its live state is safely over-approximated by the
+values of all locals, arguments, evaluation stack entries. This means that any
+value truly live at a reachable patchpoint (capable of influencing future
+computation) is included in the live set. The reported live set might well be
+larger, of course. The alternative method will likely run liveness and pick from
+this set only the values it sees as truly live.
+
+This means that we can run optimizations in the original method so long as they
+do not alter the computation of the over-approximated live set at any
+patchpoint.
+
+The proposed Tier0 optimizations fit into this category, so long as we restrict
+patchpoints to stack-empty points: we may prune away unreachable code paths (say
+from HW intrinsic checks or provably true or false predicate evaluations &mdash
+;patchpoints in pruned sections would be unreachable) and simplify computations.
+Optimizing expressions may reduce the truly live set but so long as all stores
+to locals and args are kept live the base values needed for any alternate
+version of the code will be available.
+
+### 4.7 Alternative Method Optimizations
+
+In options where the alternative method has multiple entry points, one must be
+wary of early aggressive optimizations done when optimizing the alternative. The
+original version of the method may hit a patchpoint while executing code that
+can be optimized away by the more aggressive alternative method compiler (e.g.
+it may be executing a series of type equality tests in a generic method that the
+optimizing jit can evaluate at jit time). But with our simple patchpoint
+recognition algorithm the alternate compiler can quickly verify that the
+patchpoint IL offset is a viable entry point and ensure that the code at that
+offset is not optimized away. If it turns out that the entry point code is
+optimizable then we may choose to peel one iteration from the entry point loop
+(because with our patchpoint strategy, execution in the alternate method will
+immediately hit a loop top once it is out of the prolog) and allow the in-loop
+versions to be optimized.
+
+###  4.8 Prologs and Unwind
+
+The alternative version of the method will, in all likelihood, need to save and
+restore a different set of callee-saves registers than the original version. But
+since the original stack frame has already saved some registers, the alternative
+version prolog will either need to save a superset of those registers or else
+restore the value of some registers in its prolog. So, the alternative version
+needs to know which registers the original saved and where in the stack they are
+stored.
+
+If we want to preserve frame offsets for address-taken locals then we may face a
+conflict as altering the number of callee save slots may alter frame offsets for
+locals. One thought here is that we could perhaps implement a chained unwind
+scheme, where there is an initial prolog that emulates the original version
+prolog and duplicates its saves, and then a subsequent "shrink wrapped" prolog
+&amp; epilog that saves any additional registers in a disjoint area.
+
+**Prototype:** When it is time to transition, the patchpoint helper virtually
+unwinds two frames from the stack&mdash;its own frame, and the frame for the
+original method. So the unwound context restores the callee saves done by the
+original method. That turns out to be sufficient.
+
+You might think the helper would need to carefully save all the register state
+on entry, but that's not the case. Because the original method is un-optimized,
+there isn't any live IL state in registers across the call to the patchpoint
+helper&mdash;all the live IL state for the method is on the original
+frame&mdash;so the argument and caller-save registers are dead at the
+patchpoint. Thus only part of register state that is significant for ongoing
+computation is the callee-saves, which are recovered via virtual unwind, and the
+frame and stack pointers of the original method, which are likewise recovered by
+virtual unwind.
+
+With this context in hand, the helper then "calls" the OSR method by restoring
+the context. The OSR method performs its own callee-saves as needed, and
+recovers the arguments/IL state from the original frame.
+
+If we were to support patchpoints in optimized code things would be more
+complicated.
+
+### 4.9 Synchronous Methods
+
+OSR methods only need add the code to release the synchronous method monitor.
+This must still be done in a try-finally to ensure release even on exceptional
+exit.
+
+### 4.10 Profile Enter/Leave Hooks
+
+OSR methods only need to support the method exit hook.
+
+## 5 The Prototype
+
+Based on the above, we developed a prototype implementation of OSR to gain
+experience, gather data, and test out assumptions.
+
+The prototype chose the following options:
+* Patchpoints: lexical back edge targets that are stack empty and not in try
+  regions; live state is all locals and args + specials (thus no liveness needed
+  at Tier0)
+* Trigger: one shared counter per frame. Initial value configurable at runtime.
+  Patchpoints decrement the counter and conditionally call the runtime helper if
+  the value is zero or negative.
+* Alternatives: partial method tailored to each patchpoint. OSR method
+  incorporates the original method frame.
+* Transition: synchronous&mdash;once the patchpoint has been hit often enough a
+  new alternative is jitted.
+
+The prototype works for x64 on Windows and Linux, and can pass the basic (pri0)
+tests suites with an aggressive transition policy (produce the OSR method and
+transition the first time each patchpoint is hit).
+
+### 5.1 Example Codegen
+
+Consider the following simple method:
+```C#
+    public static int F(int from, int to)
+    {
+        int result = 0;
+        for (int i = from; i < to; i++)
+        {
+            result += i;
+        }
+        return result;
+    }
+
+```
+Normal (Tier0, x64 windows) codegen for the method is:
+```asm
+; Tier-0 compilation
+
+G_M6138_IG01:
+       55                   push     rbp
+       4883EC10             sub      rsp, 16
+       488D6C2410           lea      rbp, [rsp+10H]
+       33C0                 xor      rax, rax
+       8945FC               mov      dword ptr [rbp-04H], eax    // result
+       8945F8               mov      dword ptr [rbp-08H], eax    // i
+       894D10               mov      dword ptr [rbp+10H], ecx    // from
+       895518               mov      dword ptr [rbp+18H], edx    // to
+
+G_M6138_IG02:
+       33C0                 xor      eax, eax
+       8945FC               mov      dword ptr [rbp-04H], eax
+       8B4510               mov      eax, dword ptr [rbp+10H]
+       8945F8               mov      dword ptr [rbp-08H], eax
+       EB11                 jmp      SHORT G_M6138_IG04
+
+G_M6138_IG03:
+       8B45FC               mov      eax, dword ptr [rbp-04H]
+       0345F8               add      eax, dword ptr [rbp-08H]    // result += i
+       8945FC               mov      dword ptr [rbp-04H], eax
+       8B45F8               mov      eax, dword ptr [rbp-08H]
+       FFC0                 inc      eax
+       8945F8               mov      dword ptr [rbp-08H], eax
+
+G_M6138_IG04:
+       8B45F8               mov      eax, dword ptr [rbp-08H]
+       3B4518               cmp      eax, dword ptr [rbp+18H]
+       7CE7                 jl       SHORT G_M6138_IG03          // i < to ?
+       8B45FC               mov      eax, dword ptr [rbp-04H]
+
+G_M6138_IG05:
+       488D6500             lea      rsp, [rbp]
+       5D                   pop      rbp
+       C3                   ret
+```
+with OSR enabled (and patchpoint counter initial value = 2), this becomes:
+```asm
+; Tier-0 compilation + Patchpoints
+
+G_M6138_IG01:
+       55                   push     rbp
+       4883EC30             sub      rsp, 48
+       488D6C2430           lea      rbp, [rsp+30H]
+       33C0                 xor      rax, rax
+       8945FC               mov      dword ptr [rbp-04H], eax    // result
+       8945F8               mov      dword ptr [rbp-08H], eax    // i
+       894D10               mov      dword ptr [rbp+10H], ecx    // from
+       895518               mov      dword ptr [rbp+18H], edx    // to
+
+G_M6138_IG02:
+       33C9                 xor      ecx, ecx
+       894DFC               mov      dword ptr [rbp-04H], ecx    // result = 0
+       8B4D10               mov      ecx, dword ptr [rbp+10H]
+       894DF8               mov      dword ptr [rbp-08H], ecx    // i = from
+       C745F002000000       mov      dword ptr [rbp-10H], 2      // patchpointCounter = 2
+       EB2D                 jmp      SHORT G_M6138_IG06
+
+G_M6138_IG03:
+       8B4DF0               mov      ecx, dword ptr [rbp-10H]    // patchpointCounter--
+       FFC9                 dec      ecx
+       894DF0               mov      dword ptr [rbp-10H], ecx
+       837DF000             cmp      dword ptr [rbp-10H], 0      // ... > 0 ?
+       7F0E                 jg       SHORT G_M6138_IG05         
+
+G_M6138_IG04:           ;; bbWeight=0.01
+       488D4DF0             lea      rcx, bword ptr [rbp-10H]    // &patchpointCounter
+       BA06000000           mov      edx, 6                      // ilOffset
+       E808CA465F           call     CORINFO_HELP_PATCHPOINT
+
+G_M6138_IG05:
+       8B45FC               mov      eax, dword ptr [rbp-04H]
+       0345F8               add      eax, dword ptr [rbp-08H]
+       8945FC               mov      dword ptr [rbp-04H], eax
+       8B45F8               mov      eax, dword ptr [rbp-08H]
+       FFC0                 inc      eax
+       8945F8               mov      dword ptr [rbp-08H], eax
+
+G_M6138_IG06:
+       8B4DF8               mov      ecx, dword ptr [rbp-08H]
+       3B4D18               cmp      ecx, dword ptr [rbp+18H]
+       7CCB                 jl       SHORT G_M6138_IG03
+       8B45FC               mov      eax, dword ptr [rbp-04H]
+
+G_M6138_IG07:
+       488D6500             lea      rsp, [rbp]
+       5D                   pop      rbp
+       C3                   ret
+```
+Because Tier0 is unoptimized code, the patchpoint sequence is currently
+unoptimized. This leads to a moderate amount of code bloat in methods with
+patchpoints. The overall code size impact of patchpoints (as measured by
+`jit-diff`) is around 2%, but this is this is an understatement of the impact to
+methods that have patchpoints, as most Tier0 methods won't require patchpoints.
+This is something that can be improved.
+
+The OSR method for this patchpoint is:
+```asm
+; Tier-1 compilation
+; OSR variant for entry point 0x6
+
+G_M6138_IG01:
+       8B542450             mov      edx, dword ptr [rsp+50H]    // to
+       8B4C2434             mov      ecx, dword ptr [rsp+34H]    // result
+       8B442430             mov      eax, dword ptr [rsp+30H]    // i
+
+G_M6138_IG02:           ;; bbWeight=8
+       03C8                 add      ecx, eax
+       FFC0                 inc      eax
+       3BC2                 cmp      eax, edx
+       7CF8                 jl       SHORT G_M6138_IG02
+
+G_M6138_IG03:
+       8BC1                 mov      eax, ecx
+
+G_M6138_IG04:
+       4883C438             add      rsp, 56
+       5D                   pop      rbp
+       C3                   ret
+```
+Here the live state is `result`, `i`, and `to`. These are kept in registers and
+initialized in the prolog to the values they had in the original frame. The jit
+request for the OSR method includes 'OSR_INFO" metadata describing the original
+method frame, so the jit can compute the correct addresses for original frame
+slots in the OSR method.
+
+Because the OSR method is entered with the original method frame still active,
+the OSR method has asymmetric prolog and epilog sequences. This is reflected in
+the unwind data for the OSR method by recording a "phantom prolog" to account
+for actions taken by the original method. These are at code offset 0 so happen
+"instantaneously" when the method is entered.
+```
+  UnwindCodes:
+    CodeOffset: 0x00 UnwindOp: UWOP_ALLOC_SMALL (2)     OpInfo: 6 * 8 + 8 = 56 = 0x38
+    CodeOffset: 0x00 UnwindOp: UWOP_PUSH_NONVOL (0)     OpInfo: rbp (5)
+```
+By way of comparison, here is the full Tier-1 version of the method.
+```asm
+G_M6138_IG01:
+
+G_M6138_IG02:
+       33C0                 xor      eax, eax
+       3BCA                 cmp      ecx, edx
+       7D08                 jge      SHORT G_M6138_IG04
+
+G_M6138_IG03:           ;; bbWeight=4
+       03C1                 add      eax, ecx
+       FFC1                 inc      ecx
+       3BCA                 cmp      ecx, edx
+       7CF8                 jl       SHORT G_M6138_IG03
+
+G_M6138_IG04:
+       C3                   ret
+```
+Note the inner loop codegen is very similar to the OSR variant. This is typical.
+It is often possible to diff the Tier1 and OSR codegen and see that the latter
+is just a partial version of the former, with different register usage and
+different stack offsets.
+
+### 5.2 More Complex Examples
+
+If the OSR method needs to save and restore registers, then the epilog will have
+two stack pointer adjustments: the first to reach the register save area on the
+OSR frame, the second to reach the saved RBP and return address on the original
+frame.
+
+For example:
+```asm
+       4883C440             add      rsp, 64
+       5B                   pop      rbx
+       5E                   pop      rsi
+       5F                   pop      rdi
+       4883C448             add      rsp, 72
+       5D                   pop      rbp
+       C3                   ret      
+```
+with unwind info:
+```
+  UnwindCodes:
+    CodeOffset: 0x07 UnwindOp: UWOP_ALLOC_SMALL (2)     OpInfo: 7 * 8 + 8 = 64 = 0x40
+    CodeOffset: 0x03 UnwindOp: UWOP_PUSH_NONVOL (0)     OpInfo: rbx (3)
+    CodeOffset: 0x02 UnwindOp: UWOP_PUSH_NONVOL (0)     OpInfo: rsi (6)
+    CodeOffset: 0x01 UnwindOp: UWOP_PUSH_NONVOL (0)     OpInfo: rdi (7)
+    CodeOffset: 0x00 UnwindOp: UWOP_ALLOC_SMALL (2)     OpInfo: 8 * 8 + 8 = 72 = 0x48
+    CodeOffset: 0x00 UnwindOp: UWOP_PUSH_NONVOL (0)     OpInfo: rbp (5)
+```
+
+If the OSR method needs to save RBP, we may see two RBP restores in the epilog;
+this does not appear to cause problems during execution, as the "last one wins"
+when unwinding.
+
+However, the debugger (at least windbg) may end up being confused; any tool
+simply following the RBP chain will see the original frame is still "linked"
+into the active stack.
+
+### 5.3 PatchpointInfo
+
+As noted above, when the jit is invoked to create the OSR method, it asks the
+runtime for some extra data:
+* The IL offset of the OSR entry point
+* `PatchpointInfo`: a description of the original method frame
+
+`PatchpointInfo` is produced by the jit when jitting the Tier0 method. It is
+allocated by the runtime similarly to other codegen metadata like GC info and
+unwind info and is likewise associated with the original method. When the
+runtime helper decides to kick off an OSR jit, it sets things up so that the jit
+can retrieve this data.
+
+Since the `PatchpointInfo` is produced and consumed by the jit its format is
+largely opaque to the runtime. It has the following general layout:
+```C++
+struct PatchpointInfo
+{
+    unsigned m_patchpointInfoSize;
+    unsigned m_ilSize;
+    unsigned m_numberOfLocals;
+    int      m_fpToSpDelta;
+    int      m_genericContextArgOffset;
+    int      m_keptAliveThisOffset;
+    int      m_securityCookieOffset;
+    int      m_offsetAndExposureData[];
+};
+```
+The key values are the `fpToSpDelta` which describes the extent of the original
+frame, and the `offsetAndExposureData` which describe the offset of each local
+on the original frame.
+
+### 5.4 Performance Impact
+
+The prototype is mainly intended to show that OSR can be used to improve startup
+without compromising steady-state performance: with OSR, we can safely use the
+quick jit for almost all methods.
+
+We are currently evaluating the performance impact of OSR on some realistic
+scenarios.
+
+Initial data shows a general improvement in startup time, in particular for
+applications where startup was impacted by disabling quick-jitting of methods
+with loops (see dotnet/coreclr#24252).
+
+### 5.5 Prototype Limitations and Workarounds
+
+* x64 only
+* Struct promotion is currently disabled for OSR methods
+* No OSR for synchronous methods
+* No OSR for methods with profiler hooks
+* No OSR for methods with localloc
+* No OSR from "handler" regions (catch/finally/filter)
+
+The prototype trigger strategy is a hybrid: it has a per-frame local counter and
+a per-patchpoint global counter (kept by the runtime). This is probably
+something we need to re-assess.
+
+## 6 Edit and Continue
+
+As mentioned in the introduction, OSR is similar to Edit and Continue (EnC). EnC
+transitions from an original unoptimized version of a method to a new
+unoptimized version with slightly different IL. The CLR already supports EnC on
+some platforms, and we briefly review the current implementation here. Our main
+interest here is in edits to method IL for an active method, so we focus on that
+aspect.
+
+### 6.1 Current EnC Support
+
+Method modification in EnC works roughly as follows. A process being debugged is
+stopped at a breakpoint. The user makes a source edit and hits apply. The source
+edit is vetted by the language compiler as suitable for EnC and metadata edits
+are sent to the runtime via the debugger. For method modifications these edits
+create a new version of method IL. Any subsequent invocations of that method
+will use that new IL.
+
+To update currently active versions, the debugger also adds special breakpoints
+to the plausible patchpoints of the original method's native code. Execution
+then resumes. When one of those special breakpoints is hit, an original
+method's active frame is at the top of the stack, and the frame can be
+transitioned over to the new version. The remapping is done by using the debug
+information generated by the jit for both the old and new versions of the
+method. As part of this the runtime verifies that locals remain at the same
+addresses in the old and new stack frames (thus avoiding the complication noted
+earlier in [Section 4.1](#Addresses-of-Locals)).
+
+The jit is notified if a method is going to potentially be eligible for EnC and
+takes some precautions to ensure the EnC transition can be handled by the
+runtime: for instance, the jit will always save the same set of registers and
+use a frame pointer.
+
+So, for EnC, we see that:
+
+- The viable patchpoints are determined by the debugger (via
+  EnCSequencePointHelper). These are restricted to be stack empty points (since
+  debug info will not describe contents of the evaluation stack) that are not in
+  filter or handlers. They are a broader set than we envision needing for OSR.
+- The necessary mapping information (stack frame layout, native to IL mapping,
+  and native offsets of stack empty points) is present in the debug stream
+  generated by the jit for the original method.
+- The trigger is a set of special breakpoints placed into the original method
+  native code by the debugger when an edit is applied to the method.
+- When an EnC breakpoint is hit, the debugger can choose whether or not to
+  initiate a transition.
+- If the debugger initiates a transition, it is done synchronously: the new
+  version of the method is jitted if necessary and the currently active frame is
+  transitioned over to the new version, via ResumeInUpdatedFunction. Of interest
+  here are the lower-level methods used here to update the frame:
+  FixContextAndResume and FixContextForEnC.
+- The alternative version is a full method and can be used to transition from
+  any patchpoint in the original method.
+- The jit modifies its codegen somewhat to facilitate the transition. It does
+  not, however, explicitly model the alternate entry points.
+
+## 7 Deoptimization
+
+Up until this point we have been assuming the original method was not optimized
+or was optimized in a manner that did not alter its reported live state.
+
+More general optimizations break this property and so additional bookkeeping and
+some restrictions on optimizations may be necessary to allow OSR transitions
+from optimized code. We touch on this briefly below.
+
+Optimizations can either increase or decrease live state.
+
+For instance, unused computations can be removed, and unused local updates
+("dead stores") can be skipped. Registers holding no longer live locals can be
+reused for other values (as can stack slots, though the current jit does not do
+this).
+
+Other optimizations can increase the live state. The classic example is inlining
+&mdash; a call to a method is expanded inline, and so at patchpoints within the
+inline body, there are now arguments and locals to the original method, plus
+arguments and locals to the inline method. If we wish to make an OSR transition
+from such a patchpoint to say unoptimized code, we need to effectively undo the
+inlining, creating two frames (or more generally N frames) in place of the
+original frame, and two alternate methods (or N alternate methods).
+
+The general solution is to first ensure that the live state never decreases. The
+patchpoint locations are determined early, and any values truly live at a
+patchpoint at that initial stage of compilation are forced to remain live at
+that patchpoint always. So, some dead store elimination is inhibited, and some
+forms of code motion are inhibited (e.g. one cannot sink a store to a local out
+of a loop, as the patchpoint at loop top would not observe the updated value).
+
+With all the "naive" state guaranteed live at a patchpoint, and any additions to
+live state via inlining carefully tracked, one can transition from optimized
+code via OSR.
+
+Given the need to preserve address artifacts, this transition must be done
+gradually&mdash;first creating a frame for the innermost inlined method that
+extends the original frame, then, when this innermost method returns, creating a
+frame for the next innermost inlined method, and so on, until finally the
+root method frame returns and can clean up the optimized method frame as well.
+
+Each of these (presumably, unoptimized) deopt target methods will need to be
+custom-crafted to access the optimized method frame.
+
+This same consideration makes it challenging to implement deopt fallbacks to
+an interpreter; the interpreter will likewise need to keep some of its state
+in the original method frame.
+
+We currently don't have any need to transfer control out of jitted optimized
+code (Tier1), though one could potentially imagine supporting this to better
+debug optimized code. The really strong motivations for deoptimization may come
+about when the system is optimizing based on "currently true" information that
+has now become invalid.
+
+## 8 References
+
+1. <a id="1"></a> U. Holzle, C. Chambers and D. Ungar, "Debugging Optimized
+   Code with Dynamic Deoptimization," in _ACM PLDI_, 1992.
+2. <a id="2"></a> M. Paleczny, C. Vick and C. Click, "The Java Hotspot(tm)
+   Server Compiler," in _USENIX Java Virtual Machine Research and
+   Technology Symposium_, 2001.
+3. <a id="3"></a> S. Fink and F. Qian, "Design, Implementation and
+   Evaluation of Adaptive Recompilation with On-Stack Replacement," in _In
+   International Symposium on Code Generation and Optimization (CGO)_, 2003.
index a58e3af..8ba4c0d 100644 (file)
@@ -197,6 +197,9 @@ endif(FEATURE_ENABLE_NO_ADDRESS_SPACE_RANDOMIZATION)
 add_definitions(-DFEATURE_SVR_GC)
 add_definitions(-DFEATURE_SYMDIFF)
 add_compile_definitions($<$<NOT:$<BOOL:$<TARGET_PROPERTY:CROSSGEN_COMPONENT>>>:FEATURE_TIERED_COMPILATION>)
+if (CLR_CMAKE_TARGET_ARCH_AMD64)
+   add_compile_definitions($<$<NOT:$<BOOL:$<TARGET_PROPERTY:CROSSGEN_COMPONENT>>>:FEATURE_ON_STACK_REPLACEMENT>)
+endif (CLR_CMAKE_TARGET_ARCH_AMD64)
 if (CLR_CMAKE_TARGET_WIN32)
     add_definitions(-DFEATURE_TYPEEQUIVALENCE)
 endif(CLR_CMAKE_TARGET_WIN32)
index b8c0060..5a6f82f 100644 (file)
@@ -326,6 +326,38 @@ bool CompileResult::repSetVars(CORINFO_METHOD_HANDLE* ftn, ULONG32* cVars, ICorD
     return true;
 }
 
+// Note - Ownership of patchpointInfo is transfered with this call. In replay icorjitinfo we should free it.
+void CompileResult::recSetPatchpointInfo(PatchpointInfo* patchpointInfo)
+{
+    if (SetPatchpointInfo == nullptr)
+        SetPatchpointInfo = new LightWeightMap<DWORD, Agnostic_SetPatchpointInfo>();
+
+    Agnostic_SetPatchpointInfo value;
+    value.index = (DWORD)SetPatchpointInfo->AddBuffer((const unsigned char*) patchpointInfo, patchpointInfo->PatchpointInfoSize());
+    SetPatchpointInfo->Add(0, value);
+}
+void CompileResult::dmpSetPatchpointInfo(DWORD key, const Agnostic_SetPatchpointInfo& value)
+{
+    PatchpointInfo* patchpointInfo = (PatchpointInfo*)SetPatchpointInfo->GetBuffer(value.index);
+    printf("SetPatchpointInfo key %u, index %u{", key, value.index);
+    // todo -- dump contents
+    printf("}");
+    SetPatchpointInfo->Unlock();
+}
+bool CompileResult::repSetPatchpointInfo(PatchpointInfo** patchpointInfo)
+{
+    if ((SetPatchpointInfo == nullptr) || (SetPatchpointInfo->GetCount() == 0))
+    {
+        *patchpointInfo = nullptr;
+        return false;
+    }
+
+    Agnostic_SetPatchpointInfo value;
+    value = SetPatchpointInfo->Get(0);
+    *patchpointInfo = (PatchpointInfo*)SetPatchpointInfo->GetBuffer(value.index);
+    return true;
+}
+
 void CompileResult::recAllocGCInfo(size_t size, void* retval)
 {
     allocGCInfoDets.size   = size;
index 9fa9e45..8fce7ab 100644 (file)
@@ -112,6 +112,10 @@ public:
         DWORD     cVars;
         DWORD     vars_offset;
     };
+    struct Agnostic_SetPatchpointInfo
+    {
+        DWORD     index;
+    };
     struct Agnostic_CORINFO_EH_CLAUSE2
     {
         DWORD Flags;
@@ -200,6 +204,10 @@ public:
     void dmpSetVars(DWORD key, const Agnostic_SetVars& value);
     bool repSetVars(CORINFO_METHOD_HANDLE* ftn, ULONG32* cVars, ICorDebugInfo::NativeVarInfo** vars);
 
+    void recSetPatchpointInfo(PatchpointInfo* patchpointInfo);
+    void dmpSetPatchpointInfo(DWORD key, const Agnostic_SetPatchpointInfo& value);
+    bool repSetPatchpointInfo(PatchpointInfo** patchpointInfo);
+
     void recAllocGCInfo(size_t size, void* retval);
     void recAllocGCInfoCapture();
     void dmpAllocGCInfo(DWORD key, const Agnostic_AllocGCInfo& value);
index 72fa540..b42077d 100644 (file)
@@ -41,6 +41,7 @@ LWM(SetEHcount, DWORD, DWORD)
 LWM(SetEHinfo, DWORD, CompileResult::Agnostic_CORINFO_EH_CLAUSE2)
 LWM(SetMethodAttribs, DWORDLONG, DWORD)
 LWM(SetVars, DWORD, CompileResult::Agnostic_SetVars)
+LWM(SetPatchpointInfo, DWORD, CompileResult::Agnostic_SetPatchpointInfo)
 
 #undef LWM
 #undef DENSELWM
index 349cec9..9183ad7 100644 (file)
@@ -180,6 +180,15 @@ void getGSCookie(GSCookie*  pCookieVal, // OUT
                  GSCookie** ppCookieVal // OUT
                  );
 
+// Provide patchpoint info for the method currently being jitted.
+void setPatchpointInfo(
+            PatchpointInfo* patchpointInfo
+            );
+
+PatchpointInfo* getOSRInfo(
+           unsigned * ilOffset   // OUT
+           );
+
 /**********************************************************************************/
 //
 // ICorModuleInfo
index c24e0e0..c8aa01e 100644 (file)
@@ -112,6 +112,7 @@ LWM(GetMethodSync, DWORDLONG, DLDL)
 LWM(GetMethodVTableOffset, DWORDLONG, DDD)
 LWM(GetNewArrHelper, DWORDLONG, DWORD)
 LWM(GetNewHelper, Agnostic_GetNewHelper, DD)
+LWM(GetOSRInfo, DWORD, Agnostic_GetOSRInfo)
 LWM(GetParentType, DWORDLONG, DWORDLONG)
 LWM(GetProfilingHandle, DWORD, Agnostic_GetProfilingHandle)
 LWM(GetReadyToRunHelper, GetReadyToRunHelper_TOKENin, GetReadyToRunHelper_TOKENout)
@@ -152,5 +153,6 @@ LWM(TryResolveToken, Agnostic_CORINFO_RESOLVED_TOKENin, TryResolveTokenValue)
 LWM(SatisfiesClassConstraints, DWORDLONG, DWORD)
 LWM(SatisfiesMethodConstraints, DLDL, DWORD)
 
+
 #undef LWM
 #undef DENSELWM
index 9ceb35a..c4a9f47 100644 (file)
@@ -727,6 +727,7 @@ void MethodContext::repCompileMethod(CORINFO_METHOD_INFO* info, unsigned* flags)
     info->locals.pSig  = (PCCOR_SIGNATURE)CompileMethod->GetBuffer(value.info.locals.pSig_Index);
     info->locals.scope = (CORINFO_MODULE_HANDLE)value.info.locals.scope;
     info->locals.token = (mdToken)value.info.locals.token;
+
     *flags             = (unsigned)value.flags;
     DEBUG_REP(dmpCompileMethod(0, value));
 }
@@ -3965,6 +3966,39 @@ void MethodContext::repGetGSCookie(GSCookie* pCookieVal, GSCookie** ppCookieVal)
         *ppCookieVal = (GSCookie*)value.B;
 }
 
+void MethodContext::recGetOSRInfo(PatchpointInfo* patchpointInfo, unsigned* ilOffset)
+{
+    if (GetOSRInfo == nullptr)
+    {
+        GetOSRInfo = new LightWeightMap<DWORD, Agnostic_GetOSRInfo>();
+    }
+
+    Agnostic_GetOSRInfo value;
+
+    value.index = (DWORD)GetOSRInfo->AddBuffer((const unsigned char*) patchpointInfo, patchpointInfo->PatchpointInfoSize());
+    value.ilOffset = *ilOffset;
+
+    // use 0 for key
+    DWORD key = 0;
+    GetOSRInfo->Add(key, value);
+    DEBUG_REC(dmpGetOSRInfo(key, value));
+}
+
+void MethodContext::dmpGetOSRInfo(DWORD key, const Agnostic_GetOSRInfo& value)
+{
+    // todo - dump patchpoint info?
+    printf("GetOSRInfo key %u, value patchpointInfo-%u {...} iloffset-%u\n",
+        key, value.index, value.ilOffset);
+}
+
+PatchpointInfo* MethodContext::repGetOSRInfo(unsigned* ilOffset)
+{
+    DWORD key = 0;
+    Agnostic_GetOSRInfo value = GetOSRInfo->Get(key);
+    *ilOffset = value.ilOffset;
+    return (PatchpointInfo*)GetOSRInfo->GetBuffer(value.index);
+}
+
 void MethodContext::recGetClassModuleIdForStatics(CORINFO_CLASS_HANDLE   cls,
                                                   CORINFO_MODULE_HANDLE* pModule,
                                                   void**                 ppIndirection,
index d0de0fd..5e2454d 100644 (file)
@@ -176,6 +176,11 @@ public:
         DWORD targetAbi;
         DWORD osType;
     };
+    struct Agnostic_GetOSRInfo
+    {
+        DWORD index;
+        unsigned ilOffset;
+    };
     struct Agnostic_GetFieldAddress
     {
         DWORDLONG ppIndirection;
@@ -986,6 +991,10 @@ public:
     void dmpGetGSCookie(DWORD key, DLDL value);
     void repGetGSCookie(GSCookie* pCookieVal, GSCookie** ppCookieVal);
 
+    void recGetOSRInfo(PatchpointInfo* patchpointInfo, unsigned* ilOffset);
+    void dmpGetOSRInfo(DWORD key, const Agnostic_GetOSRInfo& value);
+    PatchpointInfo* repGetOSRInfo(unsigned* ilOffset);
+
     void recGetClassModuleIdForStatics(CORINFO_CLASS_HANDLE   cls,
                                        CORINFO_MODULE_HANDLE* pModule,
                                        void**                 ppIndirection,
@@ -1309,7 +1318,7 @@ private:
 };
 
 // ********************* Please keep this up-to-date to ease adding more ***************
-// Highest packet number: 175
+// Highest packet number: 177
 // *************************************************************************************
 enum mcPackets
 {
@@ -1420,6 +1429,7 @@ enum mcPackets
     Packet_GetMethodVTableOffset                         = 78,
     Packet_GetNewArrHelper                               = 79,
     Packet_GetNewHelper                                  = 80,
+    Packet_GetOSRInfo                                    = 177, // Added 3/5/2020
     Packet_GetParentType                                 = 81,
     Packet_GetPInvokeUnmanagedTarget                     = 82, // Retired 2/18/2020
     Packet_GetProfilingHandle                            = 83,
@@ -1487,6 +1497,7 @@ enum mcPackets
     PacketCR_SetEHinfo                         = 128,
     PacketCR_SetMethodAttribs                  = 129,
     PacketCR_SetVars                           = 130,
+    PacketCR_SetPatchpointInfo                 = 176, // added 8/5/2019
     PacketCR_RecordCallSite                    = 146, // Added 10/28/2013 - to support indirect calls
 };
 
index 3d6c7e2..a8723ce 100644 (file)
@@ -25,6 +25,7 @@
 #include <mscoree.h>
 #include <corjit.h>
 #include <utilcode.h>
+#include <patchpointinfo.h>
 
 /// Turn back on direct access to a few OS level things...
 #undef HeapCreate
index 2768739..29dc318 100644 (file)
@@ -365,6 +365,23 @@ void interceptor_ICJI::getGSCookie(GSCookie*  pCookieVal, // OUT
     mc->recGetGSCookie(pCookieVal, ppCookieVal);
 }
 
+// Provide patchpoint info for the method currently being jitted.
+void interceptor_ICJI::setPatchpointInfo(PatchpointInfo* patchpointInfo)
+{
+    mc->cr->AddCall("setPatchpointInfo");
+    mc->cr->recSetPatchpointInfo(patchpointInfo); // Since the EE frees, we've gotta record before its sent to the EE.
+    original_ICorJitInfo->setPatchpointInfo(patchpointInfo);
+}
+
+// Get OSR info for the method currently being jitted
+PatchpointInfo* interceptor_ICJI::getOSRInfo(unsigned* ilOffset)
+{
+    mc->cr->AddCall("getOSRInfo");
+    PatchpointInfo* patchpointInfo = original_ICorJitInfo->getOSRInfo(ilOffset);
+    mc->recGetOSRInfo(patchpointInfo, ilOffset);
+    return patchpointInfo;
+}
+
 /**********************************************************************************/
 //
 // ICorModuleInfo
index 93d71b7..6a2dff8 100644 (file)
@@ -262,6 +262,20 @@ void interceptor_ICJI::getGSCookie(GSCookie*  pCookieVal, // OUT
     original_ICorJitInfo->getGSCookie(pCookieVal, ppCookieVal);
 }
 
+// Provide patchpoint info for the method currently being jitted.
+void interceptor_ICJI::setPatchpointInfo(PatchpointInfo* patchpointInfo)
+{
+    mcs->AddCall("setPatchpointInfo");
+    original_ICorJitInfo->setPatchpointInfo(patchpointInfo);
+}
+
+// Get OSR info for the method currently being jitted
+PatchpointInfo* interceptor_ICJI::getOSRInfo(unsigned* ilOffset)
+{
+    mcs->AddCall("getOSRInfo");
+    return original_ICorJitInfo->getOSRInfo(ilOffset);
+}
+
 /**********************************************************************************/
 //
 // ICorModuleInfo
index 5fee245..d043fde 100644 (file)
@@ -237,6 +237,19 @@ void interceptor_ICJI::getGSCookie(GSCookie*  pCookieVal, // OUT
     original_ICorJitInfo->getGSCookie(pCookieVal, ppCookieVal);
 }
 
+
+// Provide patchpoint info for the method currently being jitted.
+void interceptor_ICJI::setPatchpointInfo(PatchpointInfo* patchpointInfo)
+{
+    original_ICorJitInfo->setPatchpointInfo(patchpointInfo);
+}
+
+// Get OSR info for the method currently being jitted
+PatchpointInfo* interceptor_ICJI::getOSRInfo(unsigned* ilOffset)
+{
+    return original_ICorJitInfo->getOSRInfo(ilOffset);
+}
+
 /**********************************************************************************/
 //
 // ICorModuleInfo
index 084bf27..ba742ec 100644 (file)
@@ -288,6 +288,21 @@ void MyICJI::getGSCookie(GSCookie*  pCookieVal, // OUT
     jitInstance->mc->repGetGSCookie(pCookieVal, ppCookieVal);
 }
 
+// Provide patchpoint info for the method currently being jitted.
+void MyICJI::setPatchpointInfo(PatchpointInfo* patchpointInfo)
+{
+    jitInstance->mc->cr->AddCall("setPatchpointInfo");
+    jitInstance->mc->cr->recSetPatchpointInfo(patchpointInfo);
+    freeArray(patchpointInfo); // See note in recSetPatchpointInfo... we own destroying this array
+}
+
+// Get OSR info for the method currently being jitted
+PatchpointInfo* MyICJI::getOSRInfo(unsigned* ilOffset)
+{
+    jitInstance->mc->cr->AddCall("getOSRInfo");
+    return jitInstance->mc->repGetOSRInfo(ilOffset);
+}
+
 /**********************************************************************************/
 //
 // ICorModuleInfo
index 5bbf1b2..06f900a 100644 (file)
@@ -351,6 +351,10 @@ End
 Crst JitGenericHandleCache
 End
 
+Crst JitPatchpoint
+    AcquiredBefore LoaderHeap
+End
+
 Crst JitPerf
     Unordered
 End
index d4ec91f..175a376 100644 (file)
@@ -645,6 +645,16 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_TC_DeleteCallCountingStubsAfter, W("TC_DeleteC
 #endif
 
 ///
+/// On-Stack Replacement
+///
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+RETAIL_CONFIG_DWORD_INFO(INTERNAL_OSR_CounterBump, W("OSR_CounterBump"), 1000, "Counter reload value when a patchpoint is hit")
+RETAIL_CONFIG_DWORD_INFO(INTERNAL_OSR_HitLimit, W("OSR_HitLimit"), 10, "Number of times a patchpoint must call back to trigger an OSR transition")
+CONFIG_DWORD_INFO(INTERNAL_OSR_LowId, W("OSR_LowId"), (DWORD)-1, "Low end of enabled patchpoint range (inclusive)");
+CONFIG_DWORD_INFO(INTERNAL_OSR_HighId, W("OSR_HighId"), 10000000, "High end of enabled patchpoint range (inclusive)");
+#endif
+
+///
 /// Entry point slot backpatch
 ///
 #ifndef CROSSGEN_COMPILE
index 3d20908..9978fac 100644 (file)
@@ -217,11 +217,11 @@ TODO: Talk about initializing strutures before use
 #endif
 #endif
 
-SELECTANY const GUID JITEEVersionIdentifier = { /* b2e40020-6125-41e4-a0fc-821127ec192a */
-    0xb2e40020,
-    0x6125,
-    0x41e4,
-    {0xa0, 0xfc, 0x82, 0x11, 0x27, 0xec, 0x19, 0x2a}
+SELECTANY const GUID JITEEVersionIdentifier = { /* c231d2d7-4764-4097-a9ef-5961041540df */
+    0xc231d2d7,
+    0x4764,
+    0x4097,
+    {0xa9, 0xef, 0x59, 0x61, 0x04, 0x15, 0x40, 0xdf}
 };
 
 //////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -631,6 +631,8 @@ enum CorInfoHelpFunc
 
     CORINFO_HELP_STACK_PROBE,               // Probes each page of the allocated stack frame
 
+    CORINFO_HELP_PATCHPOINT,                // Notify runtime that code has reached a patchpoint
+
     CORINFO_HELP_COUNT,
 };
 
@@ -1083,6 +1085,11 @@ inline bool dontInline(CorInfoInline val) {
     return(val < 0);
 }
 
+// Patchpoint info is passed back and forth across the interface
+// but is opaque.
+
+struct PatchpointInfo;
+
 // Cookie types consumed by the code generator (these are opaque values
 // not inspected by the code generator):
 
@@ -2145,6 +2152,16 @@ public:
             GSCookie ** ppCookieVal                    // OUT
             ) = 0;
 
+    // Provide patchpoint info for the method currently being jitted.
+    virtual void setPatchpointInfo(
+            PatchpointInfo* patchpointInfo
+            ) = 0;
+
+    // Get patchpoint info and il offset for the method currently being jitted.
+    virtual PatchpointInfo* getOSRInfo(
+            unsigned                       *ilOffset        // [OUT] il offset of OSR entry point
+            ) = 0;
+
     /**********************************************************************************/
     //
     // ICorModuleInfo
index 1982e0c..18f24b5 100644 (file)
@@ -52,7 +52,7 @@ public:
 
     #endif // !defined(TARGET_X86)
 
-        CORJIT_FLAG_UNUSED6                 = 13,
+        CORJIT_FLAG_OSR                     = 13, // Generate alternate method for On Stack Replacement
 
     #if defined(TARGET_X86) || defined(TARGET_AMD64)
 
index 3638826..98d24c2 100644 (file)
@@ -89,88 +89,89 @@ enum CrstType
     CrstJit = 70,
     CrstJitGenericHandleCache = 71,
     CrstJitInlineTrackingMap = 72,
-    CrstJitPerf = 73,
-    CrstJumpStubCache = 74,
-    CrstLeafLock = 75,
-    CrstListLock = 76,
-    CrstLoaderAllocator = 77,
-    CrstLoaderAllocatorReferences = 78,
-    CrstLoaderHeap = 79,
-    CrstMda = 80,
-    CrstMetadataTracker = 81,
-    CrstMethodDescBackpatchInfoTracker = 82,
-    CrstModIntPairList = 83,
-    CrstModule = 84,
-    CrstModuleFixup = 85,
-    CrstModuleLookupTable = 86,
-    CrstMulticoreJitHash = 87,
-    CrstMulticoreJitManager = 88,
-    CrstMUThunkHash = 89,
-    CrstNativeBinderInit = 90,
-    CrstNativeImageCache = 91,
-    CrstNativeImageEagerFixups = 92,
-    CrstNls = 93,
-    CrstNotifyGdb = 94,
-    CrstObjectList = 95,
-    CrstOnEventManager = 96,
-    CrstPatchEntryPoint = 97,
-    CrstPEImage = 98,
-    CrstPEImagePDBStream = 99,
-    CrstPendingTypeLoadEntry = 100,
-    CrstPinHandle = 101,
-    CrstPinnedByrefValidation = 102,
-    CrstProfilerGCRefDataFreeList = 103,
-    CrstProfilingAPIStatus = 104,
-    CrstPublisherCertificate = 105,
-    CrstRCWCache = 106,
-    CrstRCWCleanupList = 107,
-    CrstRCWRefCache = 108,
-    CrstReadyToRunEntryPointToMethodDescMap = 109,
-    CrstReDacl = 110,
-    CrstReflection = 111,
-    CrstReJITGlobalRequest = 112,
-    CrstRemoting = 113,
-    CrstRetThunkCache = 114,
-    CrstRWLock = 115,
-    CrstSavedExceptionInfo = 116,
-    CrstSaveModuleProfileData = 117,
-    CrstSecurityStackwalkCache = 118,
-    CrstSharedAssemblyCreate = 119,
-    CrstSigConvert = 120,
-    CrstSingleUseLock = 121,
-    CrstSpecialStatics = 122,
-    CrstSqmManager = 123,
-    CrstStackSampler = 124,
-    CrstStressLog = 125,
-    CrstStrongName = 126,
-    CrstStubCache = 127,
-    CrstStubDispatchCache = 128,
-    CrstStubUnwindInfoHeapSegments = 129,
-    CrstSyncBlockCache = 130,
-    CrstSyncHashLock = 131,
-    CrstSystemBaseDomain = 132,
-    CrstSystemDomain = 133,
-    CrstSystemDomainDelayedUnloadList = 134,
-    CrstThreadIdDispenser = 135,
-    CrstThreadpoolEventCache = 136,
-    CrstThreadpoolTimerQueue = 137,
-    CrstThreadpoolWaitThreads = 138,
-    CrstThreadpoolWorker = 139,
-    CrstThreadStaticDataHashTable = 140,
-    CrstThreadStore = 141,
-    CrstTieredCompilation = 142,
-    CrstTPMethodTable = 143,
-    CrstTypeEquivalenceMap = 144,
-    CrstTypeIDMap = 145,
-    CrstUMEntryThunkCache = 146,
-    CrstUMThunkHash = 147,
-    CrstUniqueStack = 148,
-    CrstUnresolvedClassLock = 149,
-    CrstUnwindInfoTableLock = 150,
-    CrstVSDIndirectionCellLock = 151,
-    CrstWinRTFactoryCache = 152,
-    CrstWrapperTemplate = 153,
-    kNumberOfCrstTypes = 154
+    CrstJitPatchpoint = 73,
+    CrstJitPerf = 74,
+    CrstJumpStubCache = 75,
+    CrstLeafLock = 76,
+    CrstListLock = 77,
+    CrstLoaderAllocator = 78,
+    CrstLoaderAllocatorReferences = 79,
+    CrstLoaderHeap = 80,
+    CrstMda = 81,
+    CrstMetadataTracker = 82,
+    CrstMethodDescBackpatchInfoTracker = 83,
+    CrstModIntPairList = 84,
+    CrstModule = 85,
+    CrstModuleFixup = 86,
+    CrstModuleLookupTable = 87,
+    CrstMulticoreJitHash = 88,
+    CrstMulticoreJitManager = 89,
+    CrstMUThunkHash = 90,
+    CrstNativeBinderInit = 91,
+    CrstNativeImageCache = 92,
+    CrstNativeImageEagerFixups = 93,
+    CrstNls = 94,
+    CrstNotifyGdb = 95,
+    CrstObjectList = 96,
+    CrstOnEventManager = 97,
+    CrstPatchEntryPoint = 98,
+    CrstPEImage = 99,
+    CrstPEImagePDBStream = 100,
+    CrstPendingTypeLoadEntry = 101,
+    CrstPinHandle = 102,
+    CrstPinnedByrefValidation = 103,
+    CrstProfilerGCRefDataFreeList = 104,
+    CrstProfilingAPIStatus = 105,
+    CrstPublisherCertificate = 106,
+    CrstRCWCache = 107,
+    CrstRCWCleanupList = 108,
+    CrstRCWRefCache = 109,
+    CrstReadyToRunEntryPointToMethodDescMap = 110,
+    CrstReDacl = 111,
+    CrstReflection = 112,
+    CrstReJITGlobalRequest = 113,
+    CrstRemoting = 114,
+    CrstRetThunkCache = 115,
+    CrstRWLock = 116,
+    CrstSavedExceptionInfo = 117,
+    CrstSaveModuleProfileData = 118,
+    CrstSecurityStackwalkCache = 119,
+    CrstSharedAssemblyCreate = 120,
+    CrstSigConvert = 121,
+    CrstSingleUseLock = 122,
+    CrstSpecialStatics = 123,
+    CrstSqmManager = 124,
+    CrstStackSampler = 125,
+    CrstStressLog = 126,
+    CrstStrongName = 127,
+    CrstStubCache = 128,
+    CrstStubDispatchCache = 129,
+    CrstStubUnwindInfoHeapSegments = 130,
+    CrstSyncBlockCache = 131,
+    CrstSyncHashLock = 132,
+    CrstSystemBaseDomain = 133,
+    CrstSystemDomain = 134,
+    CrstSystemDomainDelayedUnloadList = 135,
+    CrstThreadIdDispenser = 136,
+    CrstThreadpoolEventCache = 137,
+    CrstThreadpoolTimerQueue = 138,
+    CrstThreadpoolWaitThreads = 139,
+    CrstThreadpoolWorker = 140,
+    CrstThreadStaticDataHashTable = 141,
+    CrstThreadStore = 142,
+    CrstTieredCompilation = 143,
+    CrstTPMethodTable = 144,
+    CrstTypeEquivalenceMap = 145,
+    CrstTypeIDMap = 146,
+    CrstUMEntryThunkCache = 147,
+    CrstUMThunkHash = 148,
+    CrstUniqueStack = 149,
+    CrstUnresolvedClassLock = 150,
+    CrstUnwindInfoTableLock = 151,
+    CrstVSDIndirectionCellLock = 152,
+    CrstWinRTFactoryCache = 153,
+    CrstWrapperTemplate = 154,
+    kNumberOfCrstTypes = 155
 };
 
 #endif // __CRST_TYPES_INCLUDED
@@ -254,6 +255,7 @@ int g_rgCrstLevelMap[] =
     8,          // CrstJit
     0,          // CrstJitGenericHandleCache
     16,         // CrstJitInlineTrackingMap
+    3,          // CrstJitPatchpoint
     -1,         // CrstJitPerf
     6,          // CrstJumpStubCache
     0,          // CrstLeafLock
@@ -413,6 +415,7 @@ LPCSTR g_rgCrstNameMap[] =
     "CrstJit",
     "CrstJitGenericHandleCache",
     "CrstJitInlineTrackingMap",
+    "CrstJitPatchpoint",
     "CrstJitPerf",
     "CrstJumpStubCache",
     "CrstLeafLock",
index 8e92cbd..cc726a9 100644 (file)
     JITHELPER(CORINFO_HELP_STACK_PROBE, NULL, CORINFO_HELP_SIG_UNDEF)
 #endif
 
+    JITHELPER(CORINFO_HELP_PATCHPOINT, JIT_Patchpoint, CORINFO_HELP_SIG_REG_ONLY)
+
 #undef JITHELPER
 #undef DYNAMICJITHELPER
 #undef JITHELPER
diff --git a/src/coreclr/src/inc/patchpointinfo.h b/src/coreclr/src/inc/patchpointinfo.h
new file mode 100644 (file)
index 0000000..135ad01
--- /dev/null
@@ -0,0 +1,145 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// --------------------------------------------------------------------------------
+// patchpointinfo.h
+// --------------------------------------------------------------------------------
+
+#include <clrtypes.h>
+
+#ifndef _PATCHPOINTINFO_H_
+#define _PATCHPOINTINFO_H_
+
+// --------------------------------------------------------------------------------
+// Describes information needed to make an OSR transition
+//  - location of Il-visible locals and other important state on the 
+//    original (Tier0) method frame
+//  - total size of the original frame, and SP-FP delta
+//
+// Currently the patchpoint info is independent of the IL offset of the patchpoint.
+//
+// This data is produced when jitting a Tier0 method with OSR enabled, and consumed
+// by the Tier1/OSR jit request.
+//
+struct PatchpointInfo
+{
+    // Determine how much storage is needed to hold this info
+    static unsigned ComputeSize(unsigned localCount)
+    {
+        unsigned baseSize     = sizeof(PatchpointInfo);
+        unsigned variableSize = localCount * sizeof(int);
+        unsigned totalSize    = baseSize + variableSize;
+        return totalSize;
+    }
+
+    // Initialize
+    void Initialize(unsigned localCount, int fpToSpDelta)
+    {
+        m_fpToSpDelta             = fpToSpDelta;
+        m_numberOfLocals          = localCount;
+        m_genericContextArgOffset = -1;
+        m_keptAliveThisOffset     = -1;
+        m_securityCookieOffset    = -1;
+    }
+
+    // Total size of this patchpoint info record, in bytes
+    unsigned PatchpointInfoSize() const
+    {
+        return ComputeSize(m_numberOfLocals);
+    }
+
+    // FP to SP delta of the original method
+    int FpToSpDelta() const
+    {
+        return m_fpToSpDelta;
+    }
+
+    // Number of locals in the original method (including special locals)
+    unsigned NumberOfLocals() const
+    {
+        return m_numberOfLocals;
+    }
+
+    // Original method caller SP offset for generic context arg
+    int GenericContextArgOffset() const
+    {
+        return m_genericContextArgOffset;
+    }
+
+    void SetGenericContextArgOffset(int offset)
+    {
+        m_genericContextArgOffset = offset;
+    }
+
+    // Original method FP relative offset for kept-alive this
+    int KeptAliveThisOffset() const
+    {
+        return m_keptAliveThisOffset;
+    }
+
+    bool HasKeptAliveThis() const
+    {
+        return m_keptAliveThisOffset != -1;
+    }
+
+    void SetKeptAliveThisOffset(int offset)
+    {
+        m_keptAliveThisOffset = offset;
+    }
+
+    // Original method FP relative offset for security cookie
+    int SecurityCookieOffset() const
+    {
+        return m_securityCookieOffset;
+    }
+
+    bool HasSecurityCookie() const
+    {
+        return m_securityCookieOffset != -1;
+    }
+
+    void SetSecurityCookieOffset(int offset)
+    {
+        m_securityCookieOffset = offset;
+    }
+
+    // True if this local was address exposed in the original method
+    bool IsExposed(unsigned localNum) const
+    {
+        return ((m_offsetAndExposureData[localNum] & EXPOSURE_MASK) != 0);
+    }
+
+    void SetIsExposed(unsigned localNum)
+    {
+        m_offsetAndExposureData[localNum] |= EXPOSURE_MASK;
+    }
+
+    // FP relative offset of this local in the original method
+    int Offset(unsigned localNum) const
+    {
+        return (m_offsetAndExposureData[localNum] & ~EXPOSURE_MASK);
+    }
+
+    void SetOffset(unsigned localNum, int offset)
+    {
+        m_offsetAndExposureData[localNum] = offset;
+    }
+
+private:
+    enum
+    {
+        EXPOSURE_MASK = 0x1
+    };
+
+    unsigned m_numberOfLocals;
+    int      m_fpToSpDelta;
+    int      m_genericContextArgOffset;
+    int      m_keptAliveThisOffset;
+    int      m_securityCookieOffset;
+    int      m_offsetAndExposureData[];
+};
+
+typedef DPTR(struct PatchpointInfo) PTR_PatchpointInfo;
+
+#endif // _PATCHPOINTINFO_H_
index 2f4a3f2..5be8e3f 100644 (file)
@@ -67,6 +67,7 @@ set( JIT_SOURCES
   objectalloc.cpp
   optcse.cpp
   optimizer.cpp
+  patchpoint.cpp
   phase.cpp
   rangecheck.cpp
   rationalize.cpp
index 01058ba..fe186a3 100644 (file)
@@ -344,6 +344,14 @@ void BasicBlock::dspFlags()
     {
         printf("bwd ");
     }
+    if (bbFlags & BBF_BACKWARD_JUMP_TARGET)
+    {
+        printf("bwd-target ");
+    }
+    if (bbFlags & BBF_PATCHPOINT)
+    {
+        printf("ppoint ");
+    }
     if (bbFlags & BBF_RETLESS_CALL)
     {
         printf("retless ");
index 1589737..94adae3 100644 (file)
@@ -440,14 +440,15 @@ struct BasicBlock : private LIR::Range
                                            // BBJ_CALLFINALLY block, as well as, on x86, the final step block out of a
                                            // finally.
 
-#define BBF_CLONED_FINALLY_BEGIN    0x100000000 // First block of a cloned finally region
-#define BBF_CLONED_FINALLY_END      0x200000000 // Last block of a cloned finally region
-#define BBF_HAS_CALL                0x400000000 // BB contains a call
+#define BBF_CLONED_FINALLY_BEGIN           0x100000000 // First block of a cloned finally region
+#define BBF_CLONED_FINALLY_END             0x200000000 // Last block of a cloned finally region
+#define BBF_HAS_CALL                       0x400000000 // BB contains a call
+#define BBF_DOMINATED_BY_EXCEPTIONAL_ENTRY 0x800000000 // Block is dominated by exceptional entry.
+#define BBF_BACKWARD_JUMP_TARGET          0x1000000000 // Block is a target of a backward jump
+#define BBF_PATCHPOINT                    0x2000000000 // Block is a patchpoint
 
 // clang-format on
 
-#define BBF_DOMINATED_BY_EXCEPTIONAL_ENTRY 0x800000000 // Block is dominated by exceptional entry.
-
 // Flags that relate blocks to loop structure.
 
 #define BBF_LOOP_FLAGS (BBF_LOOP_PREHEADER | BBF_LOOP_HEAD | BBF_LOOP_CALL0 | BBF_LOOP_CALL1)
index 70a5b22..c087fbf 100644 (file)
@@ -28,6 +28,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #include "gcinfoencoder.h"
 #endif
 
+#include "patchpointinfo.h"
+
 /*****************************************************************************/
 
 const BYTE genTypeSizes[] = {
@@ -2152,6 +2154,11 @@ void CodeGen::genGenerateMachineCode()
             printf("; ReadyToRun compilation\n");
         }
 
+        if (compiler->opts.IsOSR())
+        {
+            printf("; OSR variant for entry point 0x%x\n", compiler->info.compILEntry);
+        }
+
         if ((compiler->opts.compFlags & CLFLG_MAXOPT) == CLFLG_MAXOPT)
         {
             printf("; optimized code\n");
@@ -4488,6 +4495,12 @@ void CodeGen::genEnregisterIncomingStackArgs()
     }
 #endif
 
+    // OSR handles this specially
+    if (compiler->opts.IsOSR())
+    {
+        return;
+    }
+
     assert(compiler->compGeneratingProlog);
 
     unsigned varNum = 0;
@@ -4588,6 +4601,20 @@ void CodeGen::genCheckUseBlockInit()
             continue;
         }
 
+        // Initialization of OSR locals must be handled specially
+        if (compiler->lvaIsOSRLocal(varNum))
+        {
+            varDsc->lvMustInit = 0;
+            continue;
+        }
+
+        // Likewise, initialization of the GS cookie is handled specially for OSR.
+        // Could do this for non-OSR too.. (likewise for the dummy)
+        if (compiler->opts.IsOSR() && varNum == compiler->lvaGSSecurityCookie)
+        {
+            continue;
+        }
+
         if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame)
         {
             noway_assert(varDsc->lvRefCnt() == 0);
@@ -6628,6 +6655,127 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg,
             inst_ST_RV(ins_Store(TYP_I_IMPL), tempThis, 0, genGetZeroReg(initReg, pInitRegZeroed), TYP_I_IMPL);
         }
     }
+
+    // Initialize args and locals for OSR. Note this may include promoted fields.
+    if (compiler->opts.IsOSR())
+    {
+        PatchpointInfo* patchpointInfo = compiler->info.compPatchpointInfo;
+
+        // basic sanity checks (make sure we're OSRing the right method)
+        assert(patchpointInfo->NumberOfLocals() == compiler->info.compLocalsCount);
+
+        const int      originalFrameSize = patchpointInfo->FpToSpDelta();
+        const unsigned patchpointInfoLen = patchpointInfo->NumberOfLocals();
+
+        for (unsigned varNum = 0; varNum < compiler->lvaCount; varNum++)
+        {
+            if (!compiler->lvaIsOSRLocal(varNum))
+            {
+                continue;
+            }
+
+            LclVarDsc* const varDsc = compiler->lvaGetDesc(varNum);
+
+            if (!varDsc->lvIsInReg())
+            {
+                JITDUMP("---OSR--- V%02u in memory\n", varNum);
+                continue;
+            }
+
+            if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
+            {
+                JITDUMP("---OSR--- V%02u (reg) not live at entry\n", varNum);
+                continue;
+            }
+
+            int      fieldOffset = 0;
+            unsigned lclNum      = varNum;
+
+            if (varDsc->lvIsStructField)
+            {
+                lclNum = varDsc->lvParentLcl;
+                assert(lclNum < patchpointInfoLen);
+
+                fieldOffset = varDsc->lvFldOffset;
+                JITDUMP("---OSR--- V%02u is promoted field of V%02u at offset %d\n", varNum, lclNum, fieldOffset);
+            }
+
+            // Note we are always reading from the original frame here
+            const var_types lclTyp  = genActualType(varDsc->lvType);
+            const emitAttr  size    = emitTypeSize(lclTyp);
+            const int       stkOffs = patchpointInfo->Offset(lclNum) + fieldOffset;
+
+            // Original frames always use frame pointers, so
+            // stkOffs is the original frame-relative offset
+            // to the variable.
+            //
+            // We need to determine the stack or frame-pointer relative
+            // offset for this variable in the current frame.
+            //
+            // If current frame does not use a frame pointer, we need to
+            // add the SP-to-FP delta of this frame and the SP-to-FP delta
+            // of the original frame; that translates from this frame's
+            // stack pointer the old frame frame pointer.
+            //
+            // We then add the original frame's frame-pointer relative
+            // offset (note this offset is usually negative -- the stack
+            // grows down, so locals are below the frame pointer).
+            //
+            // /-----original frame-----/
+            // / return address         /
+            // / saved RBP   --+        /  <--- Original frame ptr   --+
+            // / ...           |        /                              |
+            // / ...       (stkOffs)    /                              |
+            // / ...           |        /                              |
+            // / variable    --+        /                              |
+            // / ...                    /                (original frame sp-fp delta)
+            // / ...                    /                              |
+            // /-----OSR frame ---------/                              |
+            // / pseudo return address  /                            --+
+            // / ...                    /                              |
+            // / ...                    /                    (this frame sp-fp delta)
+            // / ...                    /                              |
+            // /------------------------/  <--- Stack ptr            --+
+            //
+            // If the current frame is using a frame pointer, we need to
+            // add the SP-to-FP delta of/ the original frame and then add
+            // the original frame's frame-pointer relative offset.
+            //
+            // /-----original frame-----/
+            // / return address         /
+            // / saved RBP   --+        /  <--- Original frame ptr   --+
+            // / ...           |        /                              |
+            // / ...       (stkOffs)    /                              |
+            // / ...           |        /                              |
+            // / variable    --+        /                              |
+            // / ...                    /                (original frame sp-fp delta)
+            // / ...                    /                              |
+            // /-----OSR frame ---------/                              |
+            // / pseudo return address  /                            --+
+            // / saved RBP              /  <--- Frame ptr            --+
+            // / ...                    /
+            // / ...                    /
+            // / ...                    /
+            // /------------------------/
+
+            int offset = originalFrameSize + stkOffs;
+
+            if (isFramePointerUsed())
+            {
+                // also adjust for saved RPB on this frame
+                offset += TARGET_POINTER_SIZE;
+            }
+            else
+            {
+                offset += genSPtoFPdelta();
+            }
+
+            JITDUMP("---OSR--- V%02u (reg) old rbp offset %d old frame %d this frame sp-fp %d new offset %d (%02xH)\n",
+                    varNum, stkOffs, originalFrameSize, genSPtoFPdelta(), offset, offset);
+
+            GetEmitter()->emitIns_R_AR(ins_Load(lclTyp), size, varDsc->GetRegNum(), genFramePointerReg(), offset);
+        }
+    }
 }
 
 /*-----------------------------------------------------------------------------
@@ -6641,6 +6789,12 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg,
 
 void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed)
 {
+    // For OSR the original method has set this up for us.
+    if (compiler->opts.IsOSR())
+    {
+        return;
+    }
+
     assert(compiler->compGeneratingProlog);
 
     bool reportArg = compiler->lvaReportParamTypeArg();
@@ -7295,6 +7449,19 @@ void CodeGen::genFnProlog()
         psiBegProlog();
     }
 
+#if defined(TARGET_XARCH)
+    // For OSR there is a "phantom prolog" to account for the actions taken
+    // in the original frame that impact RBP and RSP on entry to the OSR method.
+    if (compiler->opts.IsOSR())
+    {
+        PatchpointInfo* patchpointInfo    = compiler->info.compPatchpointInfo;
+        const int       originalFrameSize = patchpointInfo->FpToSpDelta();
+
+        compiler->unwindPush(REG_FPBASE);
+        compiler->unwindAllocStack(originalFrameSize);
+    }
+#endif
+
 #ifdef DEBUG
 
     if (compiler->compJitHaltMethod())
@@ -7486,7 +7653,8 @@ void CodeGen::genFnProlog()
         }
     }
 
-    assert((genInitStkLclCnt > 0) == hasUntrLcl);
+    // TODO-Cleanup: Add suitable assert for the OSR case.
+    assert(compiler->opts.IsOSR() || ((genInitStkLclCnt > 0) == hasUntrLcl));
 
 #ifdef DEBUG
     if (verbose)
@@ -7592,7 +7760,9 @@ void CodeGen::genFnProlog()
     // This way, the varargs iterator will be able to retrieve the
     // call arguments properly since both the arg regs and the stack allocated
     // args will be contiguous.
-    if (compiler->info.compIsVarArgs)
+    //
+    // OSR methods can skip this, as the setup is done by the orignal method.
+    if (compiler->info.compIsVarArgs && !compiler->opts.IsOSR())
     {
         GetEmitter()->spillIntArgRegsToShadowSlots();
     }
@@ -7800,7 +7970,11 @@ void CodeGen::genFnProlog()
 #ifdef PROFILING_SUPPORTED
 
     // Insert a function entry callback for profiling, if requested.
-    genProfilingEnterCallback(initReg, &initRegZeroed);
+    // OSR methods aren't called, so don't have enter hooks.
+    if (!compiler->opts.IsOSR())
+    {
+        genProfilingEnterCallback(initReg, &initRegZeroed);
+    }
 
 #endif // PROFILING_SUPPORTED
 
@@ -7839,37 +8013,43 @@ void CodeGen::genFnProlog()
     // Update the arg initial register locations.
     compiler->lvaUpdateArgsWithInitialReg();
 
-    FOREACH_REGISTER_FILE(regState)
+    // Home incoming arguments and generate any required inits.
+    // OSR handles this by moving the values from the original frame.
+    //
+    if (!compiler->opts.IsOSR())
     {
-        if (regState->rsCalleeRegArgMaskLiveIn)
+        FOREACH_REGISTER_FILE(regState)
         {
-            // If we need an extra register to shuffle around the incoming registers
-            // we will use xtraReg (initReg) and set the xtraRegClobbered flag,
-            // if we don't need to use the xtraReg then this flag will stay false
-            //
-            regNumber xtraReg;
-            bool      xtraRegClobbered = false;
-
-            if (genRegMask(initReg) & RBM_ARG_REGS)
-            {
-                xtraReg = initReg;
-            }
-            else
+            if (regState->rsCalleeRegArgMaskLiveIn)
             {
-                xtraReg       = REG_SCRATCH;
-                initRegZeroed = false;
-            }
+                // If we need an extra register to shuffle around the incoming registers
+                // we will use xtraReg (initReg) and set the xtraRegClobbered flag,
+                // if we don't need to use the xtraReg then this flag will stay false
+                //
+                regNumber xtraReg;
+                bool      xtraRegClobbered = false;
 
-            genFnPrologCalleeRegArgs(xtraReg, &xtraRegClobbered, regState);
+                if (genRegMask(initReg) & RBM_ARG_REGS)
+                {
+                    xtraReg = initReg;
+                }
+                else
+                {
+                    xtraReg       = REG_SCRATCH;
+                    initRegZeroed = false;
+                }
 
-            if (xtraRegClobbered)
-            {
-                initRegZeroed = false;
+                genFnPrologCalleeRegArgs(xtraReg, &xtraRegClobbered, regState);
+
+                if (xtraRegClobbered)
+                {
+                    initRegZeroed = false;
+                }
             }
         }
     }
 
-    // Home the incoming arguments
+    // Home the incoming arguments.
     genEnregisterIncomingStackArgs();
 
     /* Initialize any must-init registers variables now */
@@ -8439,6 +8619,24 @@ void CodeGen::genFnEpilog(BasicBlock* block)
         }
 
         genPopCalleeSavedRegisters();
+
+        // Extra OSR adjust to get to where RBP was saved by the original frame, and
+        // restore RBP.
+        //
+        // Note the other callee saves made in that frame are dead, the OSR method
+        // will save and restore what it needs.
+        if (compiler->opts.IsOSR())
+        {
+            PatchpointInfo* patchpointInfo    = compiler->info.compPatchpointInfo;
+            const int       originalFrameSize = patchpointInfo->FpToSpDelta();
+
+            // Use add since we know the SP-to-FP delta of the original method.
+            //
+            // If we ever allow the original method to have localloc this will
+            // need to change.
+            inst_RV_IV(INS_add, REG_SPBASE, originalFrameSize, EA_PTRSIZE);
+            inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
+        }
     }
     else
     {
@@ -8470,9 +8668,11 @@ void CodeGen::genFnEpilog(BasicBlock* block)
 
             if (compiler->compLocallocUsed)
             {
+                // OSR not yet ready for localloc
+                assert(!compiler->opts.IsOSR());
+
                 // ESP may be variable if a localloc was actually executed. Reset it.
                 //    lea esp, [ebp - compiler->compCalleeRegsPushed * REGSIZE_BYTES]
-
                 needLea = true;
             }
             else if (!regSet.rsRegsModified(RBM_CALLEE_SAVED))
@@ -8542,10 +8742,26 @@ void CodeGen::genFnEpilog(BasicBlock* block)
         //
         // Pop the callee-saved registers (if any)
         //
-
         genPopCalleeSavedRegisters();
 
 #ifdef TARGET_AMD64
+        // Extra OSR adjust to get to where RBP was saved by the original frame.
+        //
+        // Note the other callee saves made in that frame are dead, the current method
+        // will save and restore what it needs.
+        if (compiler->opts.IsOSR())
+        {
+            PatchpointInfo* patchpointInfo    = compiler->info.compPatchpointInfo;
+            const int       originalFrameSize = patchpointInfo->FpToSpDelta();
+
+            // Use add since we know the SP-to-FP delta of the original method.
+            // We also need to skip over the slot where we pushed RBP.
+            //
+            // If we ever allow the original method to have localloc this will
+            // need to change.
+            inst_RV_IV(INS_add, REG_SPBASE, originalFrameSize + TARGET_POINTER_SIZE, EA_PTRSIZE);
+        }
+
         assert(!needMovEspEbp); // "mov esp, ebp" is not allowed in AMD64 epilogs
 #else  // !TARGET_AMD64
         if (needMovEspEbp)
index fd923c0..52095ec 100644 (file)
@@ -22,6 +22,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #include "lower.h"
 #include "gcinfo.h"
 #include "gcinfoencoder.h"
+#include "patchpointinfo.h"
 
 /*****************************************************************************
  *
@@ -69,6 +70,12 @@ void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed)
         return;
     }
 
+    if (compiler->opts.IsOSR() && compiler->info.compPatchpointInfo->HasSecurityCookie())
+    {
+        // Security cookie is on original frame and was initialized there.
+        return;
+    }
+
     if (compiler->gsGlobalSecurityCookieAddr == nullptr)
     {
         noway_assert(compiler->gsGlobalSecurityCookieVal != 0);
index cf93c9b..6febed1 100644 (file)
@@ -22,6 +22,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #include "lower.h"
 #include "stacklevelsetter.h"
 #include "jittelemetry.h"
+#include "patchpointinfo.h"
 
 #if defined(DEBUG)
 // Column settings for COMPlus_JitDumpIR.  We could(should) make these programmable.
@@ -3228,6 +3229,11 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
             printf("OPTIONS: Tier-1/FullOpts compilation, switched to MinOpts\n");
         }
 
+        if (jitFlags->IsSet(JitFlags::JIT_FLAG_OSR))
+        {
+            printf("OPTIONS: OSR variant with entry point 0x%x\n", info.compILEntry);
+        }
+
         printf("OPTIONS: compCodeOpt = %s\n",
                (opts.compCodeOpt == BLENDED_CODE)
                    ? "BLENDED_CODE"
@@ -4278,6 +4284,10 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags
     //
     DoPhase(this, PHASE_INDXCALL, &Compiler::fgTransformIndirectCalls);
 
+    // Expand any patchpoints
+    //
+    DoPhase(this, PHASE_PATCHPOINTS, &Compiler::fgTransformPatchpoints);
+
     // PostImportPhase: cleanup inlinees
     //
     auto postImportPhase = [this]() {
@@ -4925,6 +4935,9 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags
     }
 #endif
 
+    // Generate PatchpointInfo
+    generatePatchpointInfo();
+
     RecordStateAtEndOfCompilation();
 
 #ifdef FEATURE_TRACELOGGING
@@ -4953,6 +4966,86 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags
 }
 
 //------------------------------------------------------------------------
+// generatePatchpointInfo: allocate and fill in patchpoint info data,
+//    and report it to the VM
+//
+void Compiler::generatePatchpointInfo()
+{
+    if (!doesMethodHavePatchpoints())
+    {
+        // Nothing to report
+        return;
+    }
+
+    // Patchpoints are only found in Tier0 code, which is unoptimized, and so
+    // should always have frame pointer.
+    assert(codeGen->isFramePointerUsed());
+
+    // Allocate patchpoint info storage from runtime, and fill in initial bits of data.
+    const unsigned        patchpointInfoSize = PatchpointInfo::ComputeSize(info.compLocalsCount);
+    PatchpointInfo* const patchpointInfo     = (PatchpointInfo*)info.compCompHnd->allocateArray(patchpointInfoSize);
+
+    // The +TARGET_POINTER_SIZE here is to account for the extra slot the runtime
+    // creates when it simulates calling the OSR method (the "pseudo return address" slot).
+    patchpointInfo->Initialize(info.compLocalsCount, codeGen->genSPtoFPdelta() + TARGET_POINTER_SIZE);
+
+    JITDUMP("--OSR--- FP-SP delta is %d\n", patchpointInfo->FpToSpDelta());
+
+    // We record offsets for all the "locals" here. Could restrict
+    // this to just the IL locals with some extra logic, and save a bit of space,
+    // but would need to adjust all consumers, too.
+    for (unsigned lclNum = 0; lclNum < info.compLocalsCount; lclNum++)
+    {
+        LclVarDsc* const varDsc = lvaGetDesc(lclNum);
+
+        // We expect all these to have stack homes, and be FP relative
+        assert(varDsc->lvOnFrame);
+        assert(varDsc->lvFramePointerBased);
+
+        // Record FramePtr relative offset (no localloc yet)
+        patchpointInfo->SetOffset(lclNum, varDsc->lvStkOffs);
+
+        // Note if IL stream contained an address-of that potentially leads to exposure.
+        // This bit of IL may be skipped by OSR partial importation.
+        if (varDsc->lvHasLdAddrOp)
+        {
+            patchpointInfo->SetIsExposed(lclNum);
+        }
+
+        JITDUMP("--OSR-- V%02u is at offset %d%s\n", lclNum, patchpointInfo->Offset(lclNum),
+                patchpointInfo->IsExposed(lclNum) ? " (exposed)" : "");
+    }
+
+    // Special offsets
+
+    if (lvaReportParamTypeArg() || lvaKeepAliveAndReportThis())
+    {
+        const int offset = lvaToCallerSPRelativeOffset(lvaCachedGenericContextArgOffset(), true);
+        patchpointInfo->SetGenericContextArgOffset(offset);
+        JITDUMP("--OSR-- cached generic context offset is CallerSP %d\n", patchpointInfo->GenericContextArgOffset());
+    }
+
+    if (lvaKeepAliveAndReportThis())
+    {
+        const int offset = lvaCachedGenericContextArgOffset();
+        patchpointInfo->SetKeptAliveThisOffset(offset);
+        JITDUMP("--OSR-- kept-alive this offset is FP %d\n", patchpointInfo->KeptAliveThisOffset());
+    }
+
+    if (compGSReorderStackLayout)
+    {
+        assert(lvaGSSecurityCookie != BAD_VAR_NUM);
+        LclVarDsc* const varDsc = lvaGetDesc(lvaGSSecurityCookie);
+        patchpointInfo->SetSecurityCookieOffset(varDsc->lvStkOffs);
+        JITDUMP("--OSR-- security cookie V%02u offset is FP %d\n", lvaGSSecurityCookie,
+                patchpointInfo->SecurityCookieOffset());
+    }
+
+    // Register this with the runtime.
+    info.compCompHnd->setPatchpointInfo(patchpointInfo);
+}
+
+//------------------------------------------------------------------------
 // ResetOptAnnotations: Clear annotations produced during global optimizations.
 //
 // Notes:
@@ -5208,6 +5301,19 @@ int Compiler::compCompile(CORINFO_METHOD_HANDLE methodHnd,
     info.compMethodHnd  = methodHnd;
     info.compMethodInfo = methodInfo;
 
+    if (compIsForInlining())
+    {
+        compileFlags->Clear(JitFlags::JIT_FLAG_OSR);
+        info.compILEntry        = 0;
+        info.compPatchpointInfo = nullptr;
+    }
+    else if (compileFlags->IsSet(JitFlags::JIT_FLAG_OSR))
+    {
+        // Fetch OSR info from the runtime
+        info.compPatchpointInfo = info.compCompHnd->getOSRInfo(&info.compILEntry);
+        assert(info.compPatchpointInfo != nullptr);
+    }
+
     virtualStubParamInfo = new (this, CMK_Unknown) VirtualStubParamInfo(IsTargetAbi(CORINFO_CORERT_ABI));
 
     // compMatchedVM is set to true if both CPU/ABI and OS are matching the execution engine requirements
@@ -6019,9 +6125,9 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
 #ifdef DEBUG
     if ((JitConfig.DumpJittedMethods() == 1) && !compIsForInlining())
     {
-        printf("Compiling %4d %s::%s, IL size = %u, hash=0x%08x %s%s\n", Compiler::jitTotalMethodCompiled,
+        printf("Compiling %4d %s::%s, IL size = %u, hash=0x%08x %s%s%s\n", Compiler::jitTotalMethodCompiled,
                info.compClassName, info.compMethodName, info.compILCodeSize, info.compMethodHash(),
-               compGetTieringName(), compGetStressMessage());
+               compGetTieringName(), opts.IsOSR() ? " OSR" : "", compGetStressMessage());
     }
     if (compIsForInlining())
     {
@@ -9183,3 +9289,36 @@ bool Compiler::killGCRefs(GenTree* tree)
 
     return false;
 }
+
+//------------------------------------------------------------------------
+// lvaIsOSRLocal: check if this local var is one that requires special
+//     treatment for OSR compilations.
+//
+// Arguments:
+//    varNum     - variable of interest
+//
+// Return Value:
+//    true       - this is an OSR compile and this local requires special treatment
+//    false      - not an OSR compile, or not an interesting local for OSR
+
+bool Compiler::lvaIsOSRLocal(unsigned varNum)
+{
+    if (!opts.IsOSR())
+    {
+        return false;
+    }
+
+    if (varNum < info.compLocalsCount)
+    {
+        return true;
+    }
+
+    LclVarDsc* varDsc = lvaGetDesc(varNum);
+
+    if (varDsc->lvIsStructField)
+    {
+        return (varDsc->lvParentLcl < info.compLocalsCount);
+    }
+
+    return false;
+}
index 8b88349..2a87d9c 100644 (file)
@@ -2406,6 +2406,8 @@ public:
 
     EHblkDsc* ehInitTryBlockRange(BasicBlock* blk, BasicBlock** tryBeg, BasicBlock** tryLast);
 
+    void fgSetTryBeg(EHblkDsc* handlerTab, BasicBlock* newTryBeg);
+
     void fgSetTryEnd(EHblkDsc* handlerTab, BasicBlock* newTryLast);
 
     void fgSetHndEnd(EHblkDsc* handlerTab, BasicBlock* newHndLast);
@@ -3203,6 +3205,10 @@ public:
     int lvaToInitialSPRelativeOffset(unsigned offset, bool isFpBased);
     int lvaGetInitialSPRelativeOffset(unsigned varNum);
 
+    // True if this is an OSR compilation and this local is potentially
+    // located on the original method stack frame.
+    bool lvaIsOSRLocal(unsigned varNum);
+
     //------------------------ For splitting types ----------------------------
 
     void lvaInitTypeRef();
@@ -3525,8 +3531,7 @@ public:
 
 public:
     void impInit();
-
-    void impImport(BasicBlock* method);
+    void impImport();
 
     CORINFO_CLASS_HANDLE impGetRefAnyClass();
     CORINFO_CLASS_HANDLE impGetRuntimeArgumentHandle();
@@ -4142,6 +4147,7 @@ public:
     BasicBlock* fgFirstBB;        // Beginning of the basic block list
     BasicBlock* fgLastBB;         // End of the basic block list
     BasicBlock* fgFirstColdBlock; // First block to be placed in the cold section
+    BasicBlock* fgEntryBB;        // For OSR, the original method's entry point
 #if defined(FEATURE_EH_FUNCLETS)
     BasicBlock* fgFirstFuncletBB; // First block of outlined funclets (to allow block insertion before the funclets)
 #endif
@@ -4355,6 +4361,8 @@ public:
 
     void fgTransformIndirectCalls();
 
+    void fgTransformPatchpoints();
+
     void fgInline();
 
     void fgRemoveEmptyTry();
@@ -5264,11 +5272,10 @@ public:
 public:
     void fgInsertStmtAtEnd(BasicBlock* block, Statement* stmt);
     Statement* fgNewStmtAtEnd(BasicBlock* block, GenTree* tree);
+    Statement* fgNewStmtNearEnd(BasicBlock* block, GenTree* tree);
 
 private:
     void fgInsertStmtNearEnd(BasicBlock* block, Statement* stmt);
-    Statement* fgNewStmtNearEnd(BasicBlock* block, GenTree* tree);
-
     void fgInsertStmtAtBeg(BasicBlock* block, Statement* stmt);
     Statement* fgNewStmtAtBeg(BasicBlock* block, GenTree* tree);
 
@@ -6348,6 +6355,7 @@ public:
 #define OMF_HAS_OBJSTACKALLOC 0x00000040    // Method contains an object allocated on the stack.
 #define OMF_HAS_GUARDEDDEVIRT 0x00000080    // Method contains guarded devirtualization candidate
 #define OMF_HAS_EXPRUNTIMELOOKUP 0x00000100 // Method contains a runtime lookup to an expandable dictionary.
+#define OMF_HAS_PATCHPOINT 0x00000200       // Method contains patchpoints
 
     bool doesMethodHaveFatPointer()
     {
@@ -6404,6 +6412,16 @@ public:
 
     void addExpRuntimeLookupCandidate(GenTreeCall* call);
 
+    bool doesMethodHavePatchpoints()
+    {
+        return (optMethodFlags & OMF_HAS_PATCHPOINT) != 0;
+    }
+
+    void setMethodHasPatchpoint()
+    {
+        optMethodFlags |= OMF_HAS_PATCHPOINT;
+    }
+
     unsigned optMethodFlags;
 
     bool doesMethodHaveNoReturnCalls()
@@ -8447,6 +8465,18 @@ public:
         }
 #endif
 
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+        bool IsOSR() const
+        {
+            return jitFlags->IsSet(JitFlags::JIT_FLAG_OSR);
+        }
+#else
+        bool IsOSR() const
+        {
+            return false;
+        }
+#endif
+
         // true if we should use the PINVOKE_{BEGIN,END} helpers instead of generating
         // PInvoke transitions inline (e.g. when targeting CoreRT).
         bool ShouldUsePInvokeHelpers()
@@ -8783,11 +8813,13 @@ public:
         // The following holds the class attributes for the method we're compiling.
         unsigned compClassAttr;
 
-        const BYTE*    compCode;
-        IL_OFFSET      compILCodeSize;     // The IL code size
-        IL_OFFSET      compILImportSize;   // Estimated amount of IL actually imported
-        UNATIVE_OFFSET compNativeCodeSize; // The native code size, after instructions are issued. This
-                                           // is less than (compTotalHotCodeSize + compTotalColdCodeSize) only if:
+        const BYTE*     compCode;
+        IL_OFFSET       compILCodeSize;     // The IL code size
+        IL_OFFSET       compILImportSize;   // Estimated amount of IL actually imported
+        IL_OFFSET       compILEntry;        // The IL entry point (normally 0)
+        PatchpointInfo* compPatchpointInfo; // Patchpoint data for OSR (normally nullptr)
+        UNATIVE_OFFSET  compNativeCodeSize; // The native code size, after instructions are issued. This
+        // is less than (compTotalHotCodeSize + compTotalColdCodeSize) only if:
         // (1) the code is not hot/cold split, and we issued less code than we expected, or
         // (2) the code is hot/cold split, and we issued less code than we expected
         // in the cold section (the hot section will always be padded out to compTotalHotCodeSize).
@@ -9070,6 +9102,8 @@ public:
 
     ArenaAllocator* compGetArenaAllocator();
 
+    void generatePatchpointInfo();
+
 #if MEASURE_MEM_ALLOC
     static bool s_dspMemStats; // Display per-phase memory statistics for every function
 #endif                         // MEASURE_MEM_ALLOC
index f9e0684..cf0463c 100644 (file)
@@ -27,6 +27,7 @@
 CompPhaseNameMacro(PHASE_PRE_IMPORT,             "Pre-import",                     "PRE-IMP",  false, -1, false)
 CompPhaseNameMacro(PHASE_IMPORTATION,            "Importation",                    "IMPORT",   false, -1, true)
 CompPhaseNameMacro(PHASE_INDXCALL,               "Indirect call transform",        "INDXCALL", false, -1, true)
+CompPhaseNameMacro(PHASE_PATCHPOINTS,            "Expand patchpoints",             "PPOINT",   false, -1, true)
 CompPhaseNameMacro(PHASE_POST_IMPORT,            "Post-import",                    "POST-IMP", false, -1, false)
 CompPhaseNameMacro(PHASE_IBCINSTR,               "IBC instrumentation",            "IBCINSTR", false, -1, false)
 CompPhaseNameMacro(PHASE_MORPH_INIT,             "Morph - Init",                   "MOR-INIT" ,false, -1, false)
index dabf06f..d2629c3 100644 (file)
@@ -1981,9 +1981,11 @@ inline UNATIVE_OFFSET emitter::emitInsSizeSV(code_t code, int var, int dsp)
                     LclVarDsc* varDsc         = emitComp->lvaTable + var;
                     bool       isRegPassedArg = varDsc->lvIsParam && varDsc->lvIsRegArg;
                     // Register passed args could have a stack offset of 0.
-                    noway_assert((int)offs < 0 || isRegPassedArg);
+                    noway_assert((int)offs < 0 || isRegPassedArg || emitComp->opts.IsOSR());
 #else  // !UNIX_AMD64_ABI
-                    noway_assert((int)offs < 0);
+
+                    // OSR transitioning to RBP frame currently can have mid-frame FP
+                    noway_assert(((int)offs < 0) || emitComp->opts.IsOSR());
 #endif // !UNIX_AMD64_ABI
                 }
 
index 63d8090..8ba6d2f 100644 (file)
@@ -65,6 +65,7 @@ void Compiler::fgInit()
     fgFirstBB        = nullptr;
     fgLastBB         = nullptr;
     fgFirstColdBlock = nullptr;
+    fgEntryBB        = nullptr;
 
 #if defined(FEATURE_EH_FUNCLETS)
     fgFirstFuncletBB  = nullptr;
@@ -5177,16 +5178,19 @@ void Compiler::fgObserveInlineConstants(OPCODE opcode, const FgStack& stack, boo
     }
 }
 
-/*****************************************************************************
- *
- *  Finally link up the bbJumpDest of the blocks together
- */
+//------------------------------------------------------------------------
+// fgMarkBackwardJump: mark blocks indicating there is a jump backwards in
+//   IL, from a higher to lower IL offset.
+//
+// Arguments:
+//   targetBlock -- target of the jump
+//   sourceBlock -- source of the jump
 
-void Compiler::fgMarkBackwardJump(BasicBlock* startBlock, BasicBlock* endBlock)
+void Compiler::fgMarkBackwardJump(BasicBlock* targetBlock, BasicBlock* sourceBlock)
 {
-    noway_assert(startBlock->bbNum <= endBlock->bbNum);
+    noway_assert(targetBlock->bbNum <= sourceBlock->bbNum);
 
-    for (BasicBlock* block = startBlock; block != endBlock->bbNext; block = block->bbNext)
+    for (BasicBlock* block = targetBlock; block != sourceBlock->bbNext; block = block->bbNext)
     {
         if ((block->bbFlags & BBF_BACKWARD_JUMP) == 0)
         {
@@ -5194,6 +5198,8 @@ void Compiler::fgMarkBackwardJump(BasicBlock* startBlock, BasicBlock* endBlock)
             compHasBackwardJump = true;
         }
     }
+
+    targetBlock->bbFlags |= BBF_BACKWARD_JUMP_TARGET;
 }
 
 /*****************************************************************************
@@ -5983,6 +5989,28 @@ void Compiler::fgFindBasicBlocks()
         return;
     }
 
+    // If we are doing OSR, add an entry block that simply branches to the right IL offset.
+    if (opts.IsOSR())
+    {
+        // Remember the original entry block in case this method is tail recursive.
+        fgEntryBB = fgLookupBB(0);
+
+        // Find the OSR entry block.
+        assert(info.compILEntry >= 0);
+        BasicBlock* bbTarget = fgLookupBB(info.compILEntry);
+
+        fgEnsureFirstBBisScratch();
+        fgFirstBB->bbJumpKind = BBJ_ALWAYS;
+        fgFirstBB->bbJumpDest = bbTarget;
+        fgAddRefPred(bbTarget, fgFirstBB);
+
+        JITDUMP("OSR: redirecting flow at entry via " FMT_BB " to " FMT_BB " (il offset 0x%x)\n", fgFirstBB->bbNum,
+                bbTarget->bbNum, info.compILEntry);
+
+        // rebuild lookup table... should be able to avoid this by leaving room up front.
+        fgInitBBLookup();
+    }
+
     /* Mark all blocks within 'try' blocks as such */
 
     if (info.compXcptnsCount == 0)
@@ -6859,7 +6887,7 @@ unsigned Compiler::fgGetNestingLevel(BasicBlock* block, unsigned* pFinallyNestin
 
 void Compiler::fgImport()
 {
-    impImport(fgFirstBB);
+    impImport();
 
     // Estimate how much of method IL was actually imported.
     //
@@ -9693,118 +9721,238 @@ void Compiler::fgSimpleLowering()
 #endif
 }
 
-/*****************************************************************************
- *
- *  Find and remove any basic blocks that are useless (e.g. they have not been
- *  imported because they are not reachable, or they have been optimized away).
- */
-
+//------------------------------------------------------------------------
+// fgRemoveEmptyBlocks: clean up flow graph after importation
+//
+// Notes:
+//
+//  Find and remove any basic blocks that are useless (e.g. they have not been
+//  imported because they are not reachable, or they have been optimized away).
+//
+//  Remove try regions where no blocks in the try were imported.
+//  Update the end of try and handler regions where trailing blocks were not imported.
+//  Update the start of try regions that were partially imported (OSR)
+//
 void Compiler::fgRemoveEmptyBlocks()
 {
+    JITDUMP("\n*************** In fgRemoveEmptyBlocks\n");
+
     BasicBlock* cur;
     BasicBlock* nxt;
 
-    /* If we remove any blocks, we'll have to do additional work */
-
+    // If we remove any blocks, we'll have to do additional work
     unsigned removedBlks = 0;
 
     for (cur = fgFirstBB; cur != nullptr; cur = nxt)
     {
-        /* Get hold of the next block (in case we delete 'cur') */
-
+        // Get hold of the next block (in case we delete 'cur')
         nxt = cur->bbNext;
 
-        /* Should this block be removed? */
-
+        // Should this block be removed?
         if (!(cur->bbFlags & BBF_IMPORTED))
         {
             noway_assert(cur->isEmpty());
 
             if (ehCanDeleteEmptyBlock(cur))
             {
-                /* Mark the block as removed */
+                JITDUMP(FMT_BB " was not imported, marking as removed (%d)\n", cur->bbNum, removedBlks);
 
                 cur->bbFlags |= BBF_REMOVED;
-
-                /* Remember that we've removed a block from the list */
-
                 removedBlks++;
 
-#ifdef DEBUG
-                if (verbose)
-                {
-                    printf(FMT_BB " was not imported, marked as removed (%d)\n", cur->bbNum, removedBlks);
-                }
-#endif // DEBUG
-
-                /* Drop the block from the list */
-
+                // Drop the block from the list.
+                //
+                // We rely on the fact that this does not clear out
+                // cur->bbNext or cur->bbPrev in the code that
+                // follows.
                 fgUnlinkBlock(cur);
             }
             else
             {
-                // We were prevented from deleting this block by EH normalization. Mark the block as imported.
+                // We were prevented from deleting this block by EH
+                // normalization. Mark the block as imported.
                 cur->bbFlags |= BBF_IMPORTED;
             }
         }
     }
 
-    /* If no blocks were removed, we're done */
-
+    // If no blocks were removed, we're done
     if (removedBlks == 0)
     {
         return;
     }
 
-    /*  Update all references in the exception handler table.
-     *  Mark the new blocks as non-removable.
-     *
-     *  We may have made the entire try block unreachable.
-     *  Check for this case and remove the entry from the EH table.
-     */
-
+    // Update all references in the exception handler table.
+    //
+    // We may have made the entire try block unreachable.
+    // Check for this case and remove the entry from the EH table.
+    //
+    // For OSR, just the initial part of a try range may become
+    // unreachable; if so we need to shrink the try range down
+    // to the portion that was imported.
     unsigned  XTnum;
     EHblkDsc* HBtab;
-    INDEBUG(unsigned delCnt = 0;)
+    unsigned  delCnt = 0;
 
+    // Walk the EH regions from inner to outer
     for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
     {
     AGAIN:
-        /* If the beginning of the try block was not imported, we
-         * need to remove the entry from the EH table. */
 
+        // If start of a try region was not imported, then we either
+        // need to trim the region extent, or remove the region
+        // entirely.
+        //
+        // In normal importation, it is not valid to jump into the
+        // middle of a try, so if the try entry was not imported, the
+        // entire try can be removed.
+        //
+        // In OSR importation the entry patchpoint may be in the
+        // middle of a try, and we need to determine how much of the
+        // try ended up getting imported.  Because of backwards
+        // branches we may end up importing the entire try even though
+        // execution starts in the middle.
+        //
+        // Note it is common in both cases for the ends of trys (and
+        // associated handlers) to end up not getting imported, so if
+        // the try region is not removed, we always check if we need
+        // to trim the ends.
+        //
         if (HBtab->ebdTryBeg->bbFlags & BBF_REMOVED)
         {
-            noway_assert(!(HBtab->ebdTryBeg->bbFlags & BBF_IMPORTED));
-#ifdef DEBUG
-            if (verbose)
+            // Usual case is that the entire try can be removed.
+            bool removeTryRegion = true;
+
+            if (opts.IsOSR())
             {
-                printf("Beginning of try block (" FMT_BB ") not imported "
-                       "- remove index #%u from the EH table\n",
-                       HBtab->ebdTryBeg->bbNum, XTnum + delCnt);
-            }
-            delCnt++;
-#endif // DEBUG
+                // For OSR we may need to trim the try region start.
+                //
+                // We rely on the fact that removed blocks have been snipped from
+                // the main block list, but that those removed blocks have kept
+                // their bbprev (and bbnext) links.
+                //
+                // Find the first unremoved block before the try entry block.
+                //
+                BasicBlock* const oldTryEntry  = HBtab->ebdTryBeg;
+                BasicBlock*       tryEntryPrev = oldTryEntry->bbPrev;
+                while ((tryEntryPrev != nullptr) && ((tryEntryPrev->bbFlags & BBF_REMOVED) != 0))
+                {
+                    tryEntryPrev = tryEntryPrev->bbPrev;
+                }
 
-            fgRemoveEHTableEntry(XTnum);
+                // Because we've added an unremovable scratch block as
+                // fgFirstBB, this backwards walk should always find
+                // some block.
+                assert(tryEntryPrev != nullptr);
 
-            if (XTnum < compHndBBtabCount)
-            {
-                // There are more entries left to process, so do more. Note that
-                // HBtab now points to the next entry, that we copied down to the
-                // current slot. XTnum also stays the same.
-                goto AGAIN;
+                // If there is a next block of this prev block, and that block is
+                // contained in the current try, we'd like to make that block
+                // the new start of the try, and keep the region.
+                BasicBlock* newTryEntry    = tryEntryPrev->bbNext;
+                bool        updateTryEntry = false;
+
+                if ((newTryEntry != nullptr) && bbInTryRegions(XTnum, newTryEntry))
+                {
+                    // We want to trim the begin extent of the current try region to newTryEntry.
+                    //
+                    // This method is invoked after EH normalization, so we may need to ensure all
+                    // try regions begin at blocks that are not the start or end of some other try.
+                    //
+                    // So, see if this block is already the start or end of some other EH region.
+                    if (bbIsTryBeg(newTryEntry))
+                    {
+                        // We've already end-trimmed the inner try. Do the same now for the
+                        // current try, so it is easier to detect when they mutually protect.
+                        // (we will call this again later, which is harmless).
+                        fgSkipRmvdBlocks(HBtab);
+
+                        // If this try and the inner try form a "mutually protected try region"
+                        // then we must continue to share the try entry block.
+                        EHblkDsc* const HBinner = ehGetBlockTryDsc(newTryEntry);
+                        assert(HBinner->ebdTryBeg == newTryEntry);
+
+                        if (HBtab->ebdTryLast != HBinner->ebdTryLast)
+                        {
+                            updateTryEntry = true;
+                        }
+                    }
+                    // Also, a try and handler cannot start at the same block
+                    else if (bbIsHandlerBeg(newTryEntry))
+                    {
+                        updateTryEntry = true;
+                    }
+
+                    if (updateTryEntry)
+                    {
+                        // We need to trim the current try to begin at a different block. Normally
+                        // this would be problematic as we don't have enough context to redirect
+                        // all the incoming edges, but we know oldTryEntry is unreachable.
+                        // So there are no incoming edges to worry about.
+                        //
+                        assert(!tryEntryPrev->bbFallsThrough());
+
+                        // What follows is similar to fgNewBBInRegion, but we can't call that
+                        // here as the oldTryEntry is no longer in the main bb list.
+                        newTryEntry = bbNewBasicBlock(BBJ_NONE);
+                        newTryEntry->bbFlags |= (BBF_IMPORTED | BBF_INTERNAL);
+
+                        // Set the right EH region indices on this new block.
+                        //
+                        // Patchpoints currently cannot be inside handler regions,
+                        // and so likewise the old and new try region entries.
+                        assert(!oldTryEntry->hasHndIndex());
+                        newTryEntry->setTryIndex(XTnum);
+                        newTryEntry->clearHndIndex();
+                        fgInsertBBafter(tryEntryPrev, newTryEntry);
+
+                        JITDUMP("OSR: changing start of try region #%u from " FMT_BB " to new " FMT_BB "\n",
+                                XTnum + delCnt, oldTryEntry->bbNum, newTryEntry->bbNum);
+                    }
+                    else
+                    {
+                        // We can just trim the try to newTryEntry as it is not part of some inner try or handler.
+                        JITDUMP("OSR: changing start of try region #%u from " FMT_BB " to " FMT_BB "\n", XTnum + delCnt,
+                                oldTryEntry->bbNum, newTryEntry->bbNum);
+                    }
+
+                    // Update the handler table
+                    fgSetTryBeg(HBtab, newTryEntry);
+
+                    // Try entry blocks get specially marked and have special protection.
+                    HBtab->ebdTryBeg->bbFlags |= BBF_DONT_REMOVE | BBF_TRY_BEG | BBF_HAS_LABEL;
+
+                    // We are keeping this try region
+                    removeTryRegion = false;
+                }
             }
 
-            break; // no more entries (we deleted the last one), so exit the loop
-        }
+            if (removeTryRegion)
+            {
+                // In the dump, refer to the region by its original index.
+                JITDUMP("Try region #%u (" FMT_BB " -- " FMT_BB ") not imported, removing try from the EH table\n",
+                        XTnum + delCnt, HBtab->ebdTryBeg->bbNum, HBtab->ebdTryLast->bbNum);
 
-/* At this point we know we have a valid try block */
+                delCnt++;
 
-#ifdef DEBUG
+                fgRemoveEHTableEntry(XTnum);
+
+                if (XTnum < compHndBBtabCount)
+                {
+                    // There are more entries left to process, so do more. Note that
+                    // HBtab now points to the next entry, that we copied down to the
+                    // current slot. XTnum also stays the same.
+                    goto AGAIN;
+                }
+
+                // no more entries (we deleted the last one), so exit the loop
+                break;
+            }
+        }
+
+        // If we get here, the try entry block was not removed.
+        // Check some invariants.
         assert(HBtab->ebdTryBeg->bbFlags & BBF_IMPORTED);
         assert(HBtab->ebdTryBeg->bbFlags & BBF_DONT_REMOVE);
-
         assert(HBtab->ebdHndBeg->bbFlags & BBF_IMPORTED);
         assert(HBtab->ebdHndBeg->bbFlags & BBF_DONT_REMOVE);
 
@@ -9813,10 +9961,10 @@ void Compiler::fgRemoveEmptyBlocks()
             assert(HBtab->ebdFilter->bbFlags & BBF_IMPORTED);
             assert(HBtab->ebdFilter->bbFlags & BBF_DONT_REMOVE);
         }
-#endif // DEBUG
 
+        // Finally, do region end trimming -- update try and handler ends to reflect removed blocks.
         fgSkipRmvdBlocks(HBtab);
-    } /* end of the for loop over XTnum */
+    }
 
     // Renumber the basic blocks
     JITDUMP("\nRenumbering the basic blocks for fgRemoveEmptyBlocks\n");
@@ -13222,6 +13370,10 @@ void Compiler::fgComputeCalledCount(BasicBlock::weight_t returnWeight)
         {
             fgFirstBB->bbFlags |= BBF_RUN_RARELY;
         }
+        else
+        {
+            fgFirstBB->bbFlags &= ~BBF_RUN_RARELY;
+        }
     }
 
 #if DEBUG
@@ -20570,6 +20722,12 @@ bool BBPredsChecker::CheckEhTryDsc(BasicBlock* block, BasicBlock* blockPred, EHb
         return true;
     }
 
+    // For OSR, we allow the firstBB to branch to the middle of a try.
+    if (comp->opts.IsOSR() && (blockPred == comp->fgFirstBB))
+    {
+        return true;
+    }
+
     printf("Jump into the middle of try region: " FMT_BB " branches to " FMT_BB "\n", blockPred->bbNum, block->bbNum);
     assert(!"Jump into middle of try region");
     return false;
@@ -20752,6 +20910,7 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef
 #endif // DEBUG
 
     fgDebugCheckBlockLinks();
+    fgFirstBBisScratch();
 
     if (fgBBcount > 10000 && expensiveDebugCheckLevel < 1)
     {
index 5d05f96..a346b9f 100644 (file)
@@ -22,6 +22,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #endif
 
 #include "gcinfotypes.h"
+#include "patchpointinfo.h"
 
 ReturnKind GCTypeToReturnKind(CorInfoGCType gcType)
 {
@@ -3888,20 +3889,43 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz
                 assert(false);
         }
 
-        gcInfoEncoderWithLog->SetGenericsInstContextStackSlot(
-            compiler->lvaToCallerSPRelativeOffset(compiler->lvaCachedGenericContextArgOffset(),
-                                                  compiler->isFramePointerUsed()),
-            ctxtParamType);
+        int offset = 0;
+
+        if (compiler->opts.IsOSR())
+        {
+            PatchpointInfo* ppInfo = compiler->info.compPatchpointInfo;
+            offset                 = ppInfo->GenericContextArgOffset();
+            assert(offset != -1);
+        }
+        else
+        {
+            offset = compiler->lvaToCallerSPRelativeOffset(compiler->lvaCachedGenericContextArgOffset(),
+                                                           compiler->isFramePointerUsed());
+        }
+
+        gcInfoEncoderWithLog->SetGenericsInstContextStackSlot(offset, ctxtParamType);
     }
     // As discussed above, handle the case where the generics context is obtained via
     // the method table of "this".
     else if (compiler->lvaKeepAliveAndReportThis())
     {
         assert(compiler->info.compThisArg != BAD_VAR_NUM);
-        gcInfoEncoderWithLog->SetGenericsInstContextStackSlot(
-            compiler->lvaToCallerSPRelativeOffset(compiler->lvaCachedGenericContextArgOffset(),
-                                                  compiler->isFramePointerUsed()),
-            GENERIC_CONTEXTPARAM_THIS);
+
+        int offset = 0;
+
+        if (compiler->opts.IsOSR())
+        {
+            PatchpointInfo* ppInfo = compiler->info.compPatchpointInfo;
+            offset                 = ppInfo->GenericContextArgOffset();
+            assert(offset != -1);
+        }
+        else
+        {
+            offset = compiler->lvaToCallerSPRelativeOffset(compiler->lvaCachedGenericContextArgOffset(),
+                                                           compiler->isFramePointerUsed());
+        }
+
+        gcInfoEncoderWithLog->SetGenericsInstContextStackSlot(offset, GENERIC_CONTEXTPARAM_THIS);
     }
 
     if (compiler->getNeedsGSSecurityCookie())
@@ -3909,12 +3933,27 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz
         assert(compiler->lvaGSSecurityCookie != BAD_VAR_NUM);
 
         // The lv offset is FP-relative, and the using code expects caller-sp relative, so translate.
+        int offset = compiler->lvaGetCallerSPRelativeOffset(compiler->lvaGSSecurityCookie);
+
+        if (compiler->opts.IsOSR())
+        {
+            // The offset computed above already includes the OSR frame adjustment, plus the
+            // pop of the "pseudo return address" from the OSR frame.
+            //
+            // To get to caller-SP, we need to subtract off the original frame size and the
+            // pushed RA and RBP for that frame. But ppInfo's FpToSpDelta also accounts for the
+            // pseudo RA between the original method frame and the OSR frame. So the net adjustment
+            // is simply FpToSpDelta plus one register.
+            PatchpointInfo* ppInfo     = compiler->info.compPatchpointInfo;
+            int             adjustment = ppInfo->FpToSpDelta() + REGSIZE_BYTES;
+            offset -= adjustment;
+            JITDUMP("OSR cookie adjustment %d, final caller-SP offset %d\n", adjustment, offset);
+        }
+
         // The code offset ranges assume that the GS Cookie slot is initialized in the prolog, and is valid
         // through the remainder of the method.  We will not query for the GS Cookie while we're in an epilog,
         // so the question of where in the epilog it becomes invalid is moot.
-        gcInfoEncoderWithLog->SetGSCookieStackSlot(compiler->lvaGetCallerSPRelativeOffset(
-                                                       compiler->lvaGSSecurityCookie),
-                                                   prologSize, methodSize);
+        gcInfoEncoderWithLog->SetGSCookieStackSlot(offset, prologSize, methodSize);
     }
     else if (compiler->lvaReportParamTypeArg() || compiler->lvaKeepAliveAndReportThis())
     {
index cc1bb6f..e31998d 100644 (file)
@@ -24,7 +24,7 @@ void Compiler::gsGSChecksInitCookie()
 {
     var_types type = TYP_I_IMPL;
 
-    lvaGSSecurityCookie = lvaGrabTemp(false DEBUGARG("GSSecurityCookie"));
+    lvaGSSecurityCookie = lvaGrabTempWithImplicitUse(false DEBUGARG("GSSecurityCookie"));
 
     // Prevent cookie init/check from being optimized
     lvaSetVarAddrExposed(lvaGSSecurityCookie);
index a22f850..04079ba 100644 (file)
@@ -8481,10 +8481,37 @@ DONE:
     if ((tailCallFlags != 0) && canTailCall && gtIsRecursiveCall(methHnd))
     {
         assert(verCurrentState.esStackDepth == 0);
+        BasicBlock* loopHead = nullptr;
+        if (opts.IsOSR())
+        {
+            // We might not have been planning on importing the method
+            // entry block, but now we must.
+
+            // We should have remembered the real method entry block.
+            assert(fgEntryBB != nullptr);
+
+            JITDUMP("\nOSR: found tail recursive call in the method, scheduling " FMT_BB " for importation\n",
+                    fgEntryBB->bbNum);
+            impImportBlockPending(fgEntryBB);
+
+            // Note there is no explicit flow to this block yet,
+            // make sure it stays around until we actually try
+            // the optimization.
+            fgEntryBB->bbFlags |= BBF_DONT_REMOVE;
+
+            loopHead = fgEntryBB;
+        }
+        else
+        {
+            // For normal jitting we'll branch back to the firstBB; this
+            // should already be imported.
+            loopHead = fgFirstBB;
+        }
+
         JITDUMP("\nFound tail recursive call in the method. Mark " FMT_BB " to " FMT_BB
                 " as having a backward branch.\n",
-                fgFirstBB->bbNum, compCurBB->bbNum);
-        fgMarkBackwardJump(fgFirstBB, compCurBB);
+                loopHead->bbNum, compCurBB->bbNum);
+        fgMarkBackwardJump(loopHead, compCurBB);
     }
 
     // Note: we assume that small return types are already normalized by the managed callee
@@ -10504,6 +10531,35 @@ void Compiler::impImportBlockCode(BasicBlock* block)
 
     impBeginTreeList();
 
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+
+    // Are there any places in the method where we might add a patchpoint?
+    if (compHasBackwardJump)
+    {
+        // Are patchpoints enabled?
+        if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0) && (JitConfig.TC_OnStackReplacement() > 0))
+        {
+            // We don't inline at Tier0, if we do, we may need rethink our approach.
+            // Could probably support inlines that don't introduce flow.
+            assert(!compIsForInlining());
+
+            // Is the start of this block a suitable patchpoint?
+            // Current strategy is blocks that are stack-empty and backwards branch targets
+            if (block->bbFlags & BBF_BACKWARD_JUMP_TARGET && (verCurrentState.esStackDepth == 0))
+            {
+                block->bbFlags |= BBF_PATCHPOINT;
+                setMethodHasPatchpoint();
+            }
+        }
+    }
+    else
+    {
+        // Should not see backward branch targets w/o backwards branches
+        assert((block->bbFlags & BBF_BACKWARD_JUMP_TARGET) == 0);
+    }
+
+#endif // FEATURE_ON_STACK_REPLACEMENT
+
     /* Walk the opcodes that comprise the basic block */
 
     const BYTE* codeAddr = info.compCode + block->bbCodeOffs;
@@ -16728,10 +16784,13 @@ void Compiler::impVerifyEHBlock(BasicBlock* block, bool isTryStart)
                     assert(HBtab->HasFaultHandler());
                 }
             }
+        }
 
-            /* Recursively process the handler block */
-            BasicBlock* hndBegBB = HBtab->ebdHndBeg;
+        // Recursively process the handler block, if we haven't already done so.
+        BasicBlock* hndBegBB = HBtab->ebdHndBeg;
 
+        if (((hndBegBB->bbFlags & BBF_IMPORTED) == 0) && (impGetPendingBlockMember(hndBegBB) == 0))
+        {
             //  Construct the proper verification stack state
             //   either empty or one that contains just
             //   the Exception Object that we are dealing with
@@ -16767,18 +16826,22 @@ void Compiler::impVerifyEHBlock(BasicBlock* block, bool isTryStart)
             // Queue up the handler for importing
             //
             impImportBlockPending(hndBegBB);
+        }
 
-            if (HBtab->HasFilter())
-            {
-                /* @VERIFICATION : Ideally the end of filter state should get
-                   propagated to the catch handler, this is an incompleteness,
-                   but is not a security/compliance issue, since the only
-                   interesting state is the 'thisInit' state.
-                   */
+        // Process the filter block, if we haven't already done so.
+        if (HBtab->HasFilter())
+        {
+            /* @VERIFICATION : Ideally the end of filter state should get
+               propagated to the catch handler, this is an incompleteness,
+               but is not a security/compliance issue, since the only
+               interesting state is the 'thisInit' state.
+            */
 
-                verCurrentState.esStackDepth = 0;
+            BasicBlock* filterBB = HBtab->ebdFilter;
 
-                BasicBlock* filterBB = HBtab->ebdFilter;
+            if (((filterBB->bbFlags & BBF_IMPORTED) == 0) && (impGetPendingBlockMember(filterBB) == 0))
+            {
+                verCurrentState.esStackDepth = 0;
 
                 // push catch arg the stack, spill to a temp if necessary
                 // Note: can update HBtab->ebdFilter!
@@ -16788,7 +16851,9 @@ void Compiler::impVerifyEHBlock(BasicBlock* block, bool isTryStart)
                 impImportBlockPending(filterBB);
             }
         }
-        else if (verTrackObjCtorInitState && HBtab->HasFaultHandler())
+
+        // This seems redundant ....??
+        if (verTrackObjCtorInitState && HBtab->HasFaultHandler())
         {
             /* Recursively process the handler block */
 
@@ -17855,7 +17920,7 @@ void Compiler::impSpillCliqueSetMember(SpillCliqueDir predOrSucc, BasicBlock* bl
  *  basic flowgraph has already been constructed and is passed in.
  */
 
-void Compiler::impImport(BasicBlock* method)
+void Compiler::impImport()
 {
 #ifdef DEBUG
     if (verbose)
@@ -17917,21 +17982,45 @@ void Compiler::impImport(BasicBlock* method)
 
     impPendingList = impPendingFree = nullptr;
 
-    /* Add the entry-point to the worker-list */
+    // Skip leading internal blocks.
+    // These can arise from needing a leading scratch BB, from EH normalization, and from OSR entry redirects.
+    //
+    // We expect a linear flow to the first non-internal block. But not necessarily straght-line flow.
+    BasicBlock* entryBlock = fgFirstBB;
 
-    // Skip leading internal blocks. There can be one as a leading scratch BB, and more
-    // from EH normalization.
-    // NOTE: It might be possible to always just put fgFirstBB on the pending list, and let everything else just fall
-    // out.
-    for (; method->bbFlags & BBF_INTERNAL; method = method->bbNext)
+    while (entryBlock->bbFlags & BBF_INTERNAL)
     {
-        // Treat these as imported.
-        assert(method->bbJumpKind == BBJ_NONE); // We assume all the leading ones are fallthrough.
-        JITDUMP("Marking leading BBF_INTERNAL block " FMT_BB " as BBF_IMPORTED\n", method->bbNum);
-        method->bbFlags |= BBF_IMPORTED;
+        JITDUMP("Marking leading BBF_INTERNAL block " FMT_BB " as BBF_IMPORTED\n", entryBlock->bbNum);
+        entryBlock->bbFlags |= BBF_IMPORTED;
+
+        if (entryBlock->bbJumpKind == BBJ_NONE)
+        {
+            entryBlock = entryBlock->bbNext;
+        }
+        else if (entryBlock->bbJumpKind == BBJ_ALWAYS)
+        {
+            // Only expected for OSR
+            assert(opts.IsOSR());
+            entryBlock = entryBlock->bbJumpDest;
+        }
+        else
+        {
+            assert(!"unexpected bbJumpKind in entry sequence");
+        }
     }
 
-    impImportBlockPending(method);
+    // Note for OSR we'd like to be able to verify this block must be
+    // stack empty, but won't know that until we've imported...so instead
+    // we'll BADCODE out if we mess up.
+    //
+    // (the concern here is that the runtime asks us to OSR a
+    // different IL version than the one that matched the method that
+    // triggered OSR).  This should not happen but I might have the
+    // IL versioning stuff wrong.
+    //
+    // TODO: we also currently expect this block to be a join point,
+    // which we should verify over when we find jump targets.
+    impImportBlockPending(entryBlock);
 
     /* Import blocks in the worker-list until there are no more */
 
index 70f65eb..65b92b5 100644 (file)
@@ -394,6 +394,11 @@ CONFIG_INTEGER(JitGuardedDevirtualizationGuessUniqueInterface, W("JitGuardedDevi
 CONFIG_INTEGER(JitGuardedDevirtualizationGuessBestClass, W("JitGuardedDevirtualizationGuessBestClass"), 1)
 #endif // DEBUG
 
+// Enable insertion of patchpoints into Tier0 methods with loops.
+CONFIG_INTEGER(TC_OnStackReplacement, W("TC_OnStackReplacement"), 0)
+// Initial patchpoint counter value used by jitted code
+CONFIG_INTEGER(TC_OnStackReplacement_InitialCounter, W("TC_OnStackReplacement_InitialCounter"), 1000)
+
 #if defined(DEBUG)
 // JitFunctionFile: Name of a file that contains a list of functions. If the currently compiled function is in the
 // file, certain other JIT config variables will be active. If the currently compiled function is not in the file,
index 4f9df37..473bfc5 100644 (file)
@@ -39,7 +39,7 @@ public:
 
     #endif // !defined(TARGET_X86)
 
-        JIT_FLAG_UNUSED6                 = 13,
+        JIT_FLAG_OSR                     = 13, // Generate alternate version for On Stack Replacement
 
     #if defined(TARGET_X86) || defined(TARGET_AMD64)
 
index b17785c..4c9f39e 100644 (file)
@@ -1213,6 +1213,25 @@ EHblkDsc* Compiler::ehInitTryBlockRange(BasicBlock* blk, BasicBlock** tryBeg, Ba
 }
 
 /*****************************************************************************
+ *  This method updates the value of ebdTryBeg
+ */
+
+void Compiler::fgSetTryBeg(EHblkDsc* handlerTab, BasicBlock* newTryBeg)
+{
+    assert(newTryBeg != nullptr);
+
+    // Check if we are going to change the existing value of endTryLast
+    //
+    if (handlerTab->ebdTryBeg != newTryBeg)
+    {
+        // Update the EH table with the newTryLast block
+        handlerTab->ebdTryBeg = newTryBeg;
+
+        JITDUMP("EH#%u: New first block of try: " FMT_BB "\n", ehGetIndex(handlerTab), handlerTab->ebdTryBeg->bbNum);
+    }
+}
+
+/*****************************************************************************
  *  This method updates the value of ebdTryLast.
  */
 
index d0d00b7..76e17f0 100644 (file)
@@ -21,6 +21,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #include "emit.h"
 #include "register_arg_convention.h"
 #include "jitstd/algorithm.h"
+#include "patchpointinfo.h"
 
 /*****************************************************************************/
 
@@ -278,6 +279,17 @@ void Compiler::lvaInitTypeRef()
             CORINFO_CLASS_HANDLE clsHnd = info.compCompHnd->getArgClass(&info.compMethodInfo->locals, localsSig);
             lvaSetClass(varNum, clsHnd);
         }
+
+        if (opts.IsOSR() && info.compPatchpointInfo->IsExposed(varNum))
+        {
+            JITDUMP("-- V%02u is OSR exposed\n", varNum);
+            varDsc->lvHasLdAddrOp = 1;
+
+            if (varDsc->lvType != TYP_STRUCT)
+            {
+                lvaSetVarAddrExposed(varNum);
+            }
+        }
     }
 
     if ( // If there already exist unsafe buffers, don't mark more structs as unsafe
@@ -1027,6 +1039,14 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo)
             lvaSetVarAddrExposed(varDscInfo->varNum);
 #endif // !TARGET_X86
         }
+
+        if (opts.IsOSR() && info.compPatchpointInfo->IsExposed(varDscInfo->varNum))
+        {
+            JITDUMP("-- V%02u is OSR exposed\n", varDscInfo->varNum);
+            varDsc->lvHasLdAddrOp = 1;
+            lvaSetVarAddrExposed(varDscInfo->varNum);
+        }
+
     } // for each user arg
 
 #ifdef TARGET_ARM
@@ -1829,6 +1849,13 @@ bool Compiler::StructPromotionHelper::CanPromoteStructVar(unsigned lclNum)
         return false;
     }
 
+    // TODO-CQ: enable promotion for OSR locals
+    if (compiler->lvaIsOSRLocal(lclNum))
+    {
+        JITDUMP("  struct promotion of V%02u is disabled because it is an OSR local\n", lclNum);
+        return false;
+    }
+
     CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
     return CanPromoteStructType(typeHnd);
 }
@@ -4780,6 +4807,13 @@ void Compiler::lvaFixVirtualFrameOffsets()
         assert(varDsc->lvFramePointerBased); // We always access it RBP-relative.
         assert(!varDsc->lvMustInit);         // It is never "must init".
         varDsc->lvStkOffs = codeGen->genCallerSPtoInitialSPdelta() + lvaLclSize(lvaOutgoingArgSpaceVar);
+
+        // With OSR the new frame RBP points at the base of the new frame, but the virtual offsets
+        // are from the base of the old frame. Adjust.
+        if (opts.IsOSR())
+        {
+            varDsc->lvStkOffs -= info.compPatchpointInfo->FpToSpDelta();
+        }
     }
 #endif
 
@@ -4788,9 +4822,11 @@ void Compiler::lvaFixVirtualFrameOffsets()
 
 #ifdef TARGET_XARCH
     delta += REGSIZE_BYTES; // pushed PC (return address) for x86/x64
+    JITDUMP("--- delta bump %d for RA\n", REGSIZE_BYTES);
 
     if (codeGen->doubleAlignOrFramePointerUsed())
     {
+        JITDUMP("--- delta bump %d for FP\n", REGSIZE_BYTES);
         delta += REGSIZE_BYTES; // pushed EBP (frame pointer)
     }
 #endif
@@ -4798,6 +4834,7 @@ void Compiler::lvaFixVirtualFrameOffsets()
     if (!codeGen->isFramePointerUsed())
     {
         // pushed registers, return address, and padding
+        JITDUMP("--- delta bump %d for RSP frame\n", codeGen->genTotalFrameSize());
         delta += codeGen->genTotalFrameSize();
     }
 #if defined(TARGET_ARM)
@@ -4810,10 +4847,21 @@ void Compiler::lvaFixVirtualFrameOffsets()
     else
     {
         // FP is used.
+        JITDUMP("--- delta bump %d for RBP frame\n", codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta());
         delta += codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta();
     }
 #endif // TARGET_AMD64
 
+    // For OSR, update the delta to reflect the current policy that
+    // RBP points at the base of the new frame, and RSP is relative to that RBP.
+    if (opts.IsOSR())
+    {
+        JITDUMP("--- delta bump %d for OSR\n", info.compPatchpointInfo->FpToSpDelta());
+        delta += info.compPatchpointInfo->FpToSpDelta();
+    }
+
+    JITDUMP("--- virtual stack offset to actual stack offset delta is %d\n", delta);
+
     unsigned lclNum;
     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
     {
@@ -4856,6 +4904,7 @@ void Compiler::lvaFixVirtualFrameOffsets()
 
         if (doAssignStkOffs)
         {
+            JITDUMP("-- V%02u was %d, now %d\n", lclNum, varDsc->lvStkOffs, varDsc->lvStkOffs + delta);
             varDsc->lvStkOffs += delta;
 
 #if DOUBLE_ALIGN
@@ -4875,8 +4924,9 @@ void Compiler::lvaFixVirtualFrameOffsets()
             }
 #endif
             // On System V environments the stkOffs could be 0 for params passed in registers.
-            assert(codeGen->isFramePointerUsed() ||
-                   varDsc->lvStkOffs >= 0); // Only EBP relative references can have negative offsets
+            //
+            // For normal methods only EBP relative references can have negative offsets.
+            assert(codeGen->isFramePointerUsed() || varDsc->lvStkOffs >= 0);
         }
     }
 
@@ -5578,7 +5628,9 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
  */
 void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
 {
-    int stkOffs = 0;
+    int stkOffs              = 0;
+    int originalFrameStkOffs = 0;
+    int originalFrameSize    = 0;
     // codeGen->isFramePointerUsed is set in regalloc phase. Initialize it to a guess for pre-regalloc layout.
     if (lvaDoneFrameLayout <= PRE_REGALLOC_FRAME_LAYOUT)
     {
@@ -5613,6 +5665,15 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
     // On x86/amd64, the return address has already been pushed by the call instruction in the caller.
     stkOffs -= TARGET_POINTER_SIZE; // return address;
 
+    // If we are an OSR method, we "inherit" the frame of the original method,
+    // and the stack is already double aligned on entry (since the return adddress push
+    // and any special alignment push happened "before").
+    if (opts.IsOSR())
+    {
+        originalFrameSize    = info.compPatchpointInfo->FpToSpDelta();
+        originalFrameStkOffs = stkOffs;
+        stkOffs -= originalFrameSize;
+    }
     // TODO-AMD64-CQ: for X64 eventually this should be pushed with all the other
     // calleeregs.  When you fix this, you'll also need to fix
     // the assert at the bottom of this method
@@ -5686,10 +5747,16 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
     //     boundary we would have to use movups when offset turns out unaligned.  Movaps is more
     //     performant than movups.
     unsigned calleeFPRegsSavedSize = genCountBits(compCalleeFPRegsSavedMask) * XMM_REGSIZE_BYTES;
-    if (calleeFPRegsSavedSize > 0 && ((stkOffs % XMM_REGSIZE_BYTES) != 0))
+
+    // For OSR the alignment pad computation should not take the original frame into account.
+    // Original frame size includes the pseudo-saved RA and so is always = 8 mod 16.
+    const int offsetForAlign = -(stkOffs + originalFrameSize);
+
+    if ((calleeFPRegsSavedSize > 0) && ((offsetForAlign % XMM_REGSIZE_BYTES) != 0))
     {
         // Take care of alignment
-        int alignPad = (int)AlignmentPad((unsigned)-stkOffs, XMM_REGSIZE_BYTES);
+        int alignPad = (int)AlignmentPad((unsigned)offsetForAlign, XMM_REGSIZE_BYTES);
+        assert(alignPad != 0);
         stkOffs -= alignPad;
         lvaIncrementFrameSize(alignPad);
     }
@@ -5772,7 +5839,19 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
     }
 #endif // JIT32_GCENCODER
 
-    if (lvaReportParamTypeArg())
+    // OSR methods use the original method slot for the cached kept alive this,
+    // so don't need to allocate  a slot on the new frame.
+    if (opts.IsOSR())
+    {
+        if (lvaKeepAliveAndReportThis())
+        {
+            PatchpointInfo* ppInfo = info.compPatchpointInfo;
+            assert(ppInfo->HasKeptAliveThis());
+            int originalOffset             = ppInfo->KeptAliveThisOffset();
+            lvaCachedGenericContextArgOffs = originalFrameStkOffs + originalOffset;
+        }
+    }
+    else if (lvaReportParamTypeArg())
     {
 #ifdef JIT32_GCENCODER
         noway_assert(codeGen->isFramePointerUsed());
@@ -5816,7 +5895,11 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
     if (compGSReorderStackLayout)
     {
         assert(getNeedsGSSecurityCookie());
-        stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaGSSecurityCookie, lvaLclSize(lvaGSSecurityCookie), stkOffs);
+
+        if (!opts.IsOSR() || !info.compPatchpointInfo->HasSecurityCookie())
+        {
+            stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaGSSecurityCookie, lvaLclSize(lvaGSSecurityCookie), stkOffs);
+        }
     }
 
     /*
@@ -5909,7 +5992,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
                In other words, we will not calculate the "base" address of the struct local if
                the promotion type is PROMOTION_TYPE_FIELD_DEPENDENT.
             */
-            if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+            if (!opts.IsOSR() && lvaIsFieldOfDependentlyPromotedStruct(varDsc))
             {
                 continue;
             }
@@ -5930,6 +6013,29 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
                 allocateOnFrame = false;
             }
 
+            // For OSR args and locals, we use the slots on the original frame.
+            //
+            // Note we must do this even for "non frame" locals, as we sometimes
+            // will refer to their memory homes.
+            if (lvaIsOSRLocal(lclNum))
+            {
+                // TODO-CQ: enable struct promotion for OSR locals; when that
+                // happens, figure out how to properly refer to the original
+                // frame slots for the promoted fields.
+                assert(!varDsc->lvIsStructField);
+
+                // Add frampointer-relative offset of this OSR live local in the original frame
+                // to the offset of original frame in our new frame.
+                int originalOffset = info.compPatchpointInfo->Offset(lclNum);
+                int offset         = originalFrameStkOffs + originalOffset;
+
+                JITDUMP("---OSR--- V%02u (on old frame) old rbp offset %d old frame offset %d new virt offset %d\n",
+                        lclNum, originalOffset, originalFrameStkOffs, offset);
+
+                lvaTable[lclNum].lvStkOffs = offset;
+                continue;
+            }
+
             /* Ignore variables that are not on the stack frame */
 
             if (!allocateOnFrame)
@@ -5951,7 +6057,21 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
             }
             else if (lvaGSSecurityCookie == lclNum && getNeedsGSSecurityCookie())
             {
-                continue; // This is allocated outside of this loop.
+                // Special case for OSR. If the original method had a cookie,
+                // we use its slot on the original frame.
+                if (opts.IsOSR() && info.compPatchpointInfo->HasSecurityCookie())
+                {
+                    int originalOffset = info.compPatchpointInfo->SecurityCookieOffset();
+                    int offset         = originalFrameStkOffs + originalOffset;
+
+                    JITDUMP("---OSR--- V%02u (on old frame, security cookie) old rbp offset %d old frame offset %d new "
+                            "virt offset %d\n",
+                            lclNum, originalOffset, originalFrameStkOffs, offset);
+
+                    lvaTable[lclNum].lvStkOffs = offset;
+                }
+
+                continue;
             }
 
             // These need to be located as the very first variables (highest memory address)
@@ -6144,8 +6264,11 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
 
     if (getNeedsGSSecurityCookie() && !compGSReorderStackLayout)
     {
-        // LOCALLOC used, but we have no unsafe buffer.  Allocated cookie last, close to localloc buffer.
-        stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaGSSecurityCookie, lvaLclSize(lvaGSSecurityCookie), stkOffs);
+        if (!opts.IsOSR() || !info.compPatchpointInfo->HasSecurityCookie())
+        {
+            // LOCALLOC used, but we have no unsafe buffer.  Allocated cookie last, close to localloc buffer.
+            stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaGSSecurityCookie, lvaLclSize(lvaGSSecurityCookie), stkOffs);
+        }
     }
 
     if (tempsAllocated == false)
@@ -6275,7 +6398,8 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
     pushedCount += 1; // pushed PC (return address)
 #endif
 
-    noway_assert(compLclFrameSize == (unsigned)-(stkOffs + (pushedCount * (int)TARGET_POINTER_SIZE)));
+    noway_assert(compLclFrameSize + originalFrameSize ==
+                 (unsigned)-(stkOffs + (pushedCount * (int)TARGET_POINTER_SIZE)));
 }
 
 int Compiler::lvaAllocLocalAndSetVirtualOffset(unsigned lclNum, unsigned size, int stkOffs)
@@ -7218,6 +7342,21 @@ int Compiler::lvaToCallerSPRelativeOffset(int offset, bool isFpBased) const
 {
     assert(lvaDoneFrameLayout == FINAL_FRAME_LAYOUT);
 
+    // TODO-Cleanup
+    //
+    // This current should not be called for OSR as caller SP relative
+    // offsets computed below do not reflect the extra stack space
+    // taken up by the original method frame.
+    //
+    // We should make it work.
+    //
+    // Instead we record the needed offsets in the patchpoint info
+    // when doing the original method compile(see special offsets
+    // in generatePatchpointInfo) and consume those values in the OSR
+    // compile. If we fix this we may be able to reduce the size
+    // of the patchpoint info and have less special casing for these
+    // frame slots.
+
     if (isFpBased)
     {
         offset += codeGen->genCallerSPtoFPdelta();
index c45aad7..91ac9fd 100644 (file)
@@ -1732,15 +1732,7 @@ BasicBlock* getNonEmptyBlock(BasicBlock* block)
 {
     while (block != nullptr && block->GetFirstLIRNode() == nullptr)
     {
-        BasicBlock* nextBlock = block->bbNext;
-        // Note that here we use the version of NumSucc that does not take a compiler.
-        // That way this doesn't have to take a compiler, or be an instance method, e.g. of LinearScan.
-        // If we have an empty block, it must have jump type BBJ_NONE or BBJ_ALWAYS, in which
-        // case we don't need the version that takes a compiler.
-        assert(block->NumSucc() == 1 && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_NONE)));
-        // sometimes the first block is empty and ends with an uncond branch
-        // assert( block->GetSucc(0) == nextBlock);
-        block = nextBlock;
+        block = block->GetUniqueSucc();
     }
     assert(block != nullptr && block->GetFirstLIRNode() != nullptr);
     return block;
@@ -1785,12 +1777,21 @@ void LinearScan::insertZeroInitRefPositions()
             Interval* interval = getIntervalForLocalVar(varIndex);
             if (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet()))
             {
+                varDsc->lvMustInit = true;
+
+                // OSR will handle init of locals and promoted fields thereof
+                if (compiler->lvaIsOSRLocal(compiler->lvaTrackedIndexToLclNum(varIndex)))
+                {
+                    JITDUMP(" will be initialized by OSR\n");
+                    // setIntervalAsSpilled(interval);
+                    varDsc->lvMustInit = false;
+                }
+
                 JITDUMP(" creating ZeroInit\n");
                 GenTree*     firstNode = getNonEmptyBlock(compiler->fgFirstBB)->firstNode();
                 RefPosition* pos =
                     newRefPosition(interval, MinLocation, RefTypeZeroInit, firstNode, allRegs(interval->registerType));
                 pos->setRegOptional(true);
-                varDsc->lvMustInit = true;
             }
             else
             {
index 1f705e1..4af4823 100644 (file)
@@ -8180,14 +8180,26 @@ void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCa
     // Remove the call
     fgRemoveStmt(block, lastStmt);
 
-    // Set the loop edge.  Ensure we have a scratch block and then target the
-    // next block.  Loop detection needs to see a pred out of the loop, so
-    // mark the scratch block BBF_DONT_REMOVE to prevent empty block removal
-    // on it.
-    fgEnsureFirstBBisScratch();
-    fgFirstBB->bbFlags |= BBF_DONT_REMOVE;
+    // Set the loop edge.
+    if (opts.IsOSR())
+    {
+        // Todo: this may not look like a viable loop header.
+        // Might need the moral equivalent of a scratch BB.
+        block->bbJumpDest = fgEntryBB;
+    }
+    else
+    {
+        // Ensure we have a scratch block and then target the next
+        // block.  Loop detection needs to see a pred out of the loop,
+        // so mark the scratch block BBF_DONT_REMOVE to prevent empty
+        // block removal on it.
+        fgEnsureFirstBBisScratch();
+        fgFirstBB->bbFlags |= BBF_DONT_REMOVE;
+        block->bbJumpDest = fgFirstBB->bbNext;
+    }
+
+    // Finish hooking things up.
     block->bbJumpKind = BBJ_ALWAYS;
-    block->bbJumpDest = fgFirstBB->bbNext;
     block->bbJumpDest->bbFlags |= BBF_JMP_TARGET;
     fgAddRefPred(block->bbJumpDest, block);
     block->bbFlags &= ~BBF_HAS_JMP;
diff --git a/src/coreclr/src/jit/patchpoint.cpp b/src/coreclr/src/jit/patchpoint.cpp
new file mode 100644 (file)
index 0000000..e9aca3b
--- /dev/null
@@ -0,0 +1,238 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+//------------------------------------------------------------------------
+// PatchpointTransformer
+//
+// Insert patchpoint checks into Tier0 methods, based on locations identified
+// during importation (see impImportBlockCode).
+//
+// Policy decisions implemented here:
+//
+//   * One counter per stack frame, regardless of the number of patchpoints.
+//   * Shared counter value initialized to zero in prolog.
+//   * Patchpoint trees fully expanded into jit IR. Deferring expansion could
+//       lead to more compact code and lessen size overhead for Tier0.
+//
+// Workarounds and limitations:
+//
+//   * no patchpoints in handler regions
+//   * no patchpoints for localloc methods
+//   * no patchpoints in try regions (workaround)
+//   * no patchpoints for synchronized methods (workaround)
+//
+class PatchpointTransformer
+{
+    unsigned  ppCounterLclNum;
+    const int HIGH_PROBABILITY = 99;
+    Compiler* compiler;
+
+public:
+    PatchpointTransformer(Compiler* compiler) : compiler(compiler)
+    {
+        ppCounterLclNum                            = compiler->lvaGrabTemp(true DEBUGARG("patchpoint counter"));
+        compiler->lvaTable[ppCounterLclNum].lvType = TYP_INT;
+    }
+
+    //------------------------------------------------------------------------
+    // Run: run transformation for each block.
+    //
+    // Returns:
+    //   Number of patchpoints transformed.
+    int Run()
+    {
+        // If the first block is a patchpoint, insert a scratch block.
+        if (compiler->fgFirstBB->bbFlags & BBF_PATCHPOINT)
+        {
+            compiler->fgEnsureFirstBBisScratch();
+        }
+
+        BasicBlock* block = compiler->fgFirstBB;
+        TransformEntry(block);
+
+        int count = 0;
+        for (block = block->bbNext; block != nullptr; block = block->bbNext)
+        {
+            if (block->bbFlags & BBF_PATCHPOINT)
+            {
+                // If block is in a handler region, don't insert a patchpoint.
+                // We can't OSR from funclets.
+                //
+                // TODO: check this earlier, somehow, and fall back to fully
+                // optimizing the method (ala QJFL=0).
+                if (compiler->ehGetBlockHndDsc(block) != nullptr)
+                {
+                    JITDUMP("Patchpoint: skipping patchpoint for " FMT_BB " as it is in a handler\n", block->bbNum);
+                    continue;
+                }
+
+                JITDUMP("Patchpoint: instrumenting " FMT_BB "\n", block->bbNum);
+                assert(block != compiler->fgFirstBB);
+                TransformBlock(block);
+                count++;
+            }
+        }
+
+        return count;
+    }
+
+private:
+    //------------------------------------------------------------------------
+    // CreateAndInsertBasicBlock: ask compiler to create new basic block.
+    // and insert in into the basic block list.
+    //
+    // Arguments:
+    //    jumpKind - jump kind for the new basic block
+    //    insertAfter - basic block, after which compiler has to insert the new one.
+    //
+    // Return Value:
+    //    new basic block.
+    BasicBlock* CreateAndInsertBasicBlock(BBjumpKinds jumpKind, BasicBlock* insertAfter)
+    {
+        BasicBlock* block = compiler->fgNewBBafter(jumpKind, insertAfter, true);
+        if ((insertAfter->bbFlags & BBF_INTERNAL) == 0)
+        {
+            block->bbFlags &= ~BBF_INTERNAL;
+            block->bbFlags |= BBF_IMPORTED;
+        }
+        return block;
+    }
+
+    //------------------------------------------------------------------------
+    // TransformBlock: expand current block to include patchpoint logic.
+    //
+    //  S;
+    //
+    //  ==>
+    //
+    //  if (--ppCounter <= 0)
+    //  {
+    //     ppHelper(&ppCounter, ilOffset);
+    //  }
+    //  S;
+    //
+    void TransformBlock(BasicBlock* block)
+    {
+        // Capture the IL offset
+        IL_OFFSET ilOffset = block->bbCodeOffs;
+        assert(ilOffset != BAD_IL_OFFSET);
+
+        // Current block now becomes the test block
+        BasicBlock* remainderBlock = compiler->fgSplitBlockAtBeginning(block);
+        BasicBlock* helperBlock    = CreateAndInsertBasicBlock(BBJ_NONE, block);
+
+        // Update flow and flags
+        block->bbJumpKind = BBJ_COND;
+        block->bbJumpDest = remainderBlock;
+        helperBlock->bbFlags |= BBF_BACKWARD_JUMP;
+
+        // Update weights
+        remainderBlock->inheritWeight(block);
+        helperBlock->inheritWeightPercentage(block, 100 - HIGH_PROBABILITY);
+
+        // Fill in test block
+        //
+        // --ppCounter;
+        GenTree* ppCounterBefore = compiler->gtNewLclvNode(ppCounterLclNum, TYP_INT);
+        GenTree* ppCounterAfter  = compiler->gtNewLclvNode(ppCounterLclNum, TYP_INT);
+        GenTree* one             = compiler->gtNewIconNode(1, TYP_INT);
+        GenTree* ppCounterSub    = compiler->gtNewOperNode(GT_SUB, TYP_INT, ppCounterBefore, one);
+        GenTree* ppCounterAsg    = compiler->gtNewOperNode(GT_ASG, TYP_INT, ppCounterAfter, ppCounterSub);
+
+        compiler->fgNewStmtAtEnd(block, ppCounterAsg);
+
+        // if (ppCounter > 0), bypass helper call
+        GenTree* ppCounterUpdated = compiler->gtNewLclvNode(ppCounterLclNum, TYP_INT);
+        GenTree* zero             = compiler->gtNewIconNode(0, TYP_INT);
+        GenTree* compare          = compiler->gtNewOperNode(GT_GT, TYP_INT, ppCounterUpdated, zero);
+        GenTree* jmp              = compiler->gtNewOperNode(GT_JTRUE, TYP_VOID, compare);
+
+        compiler->fgNewStmtAtEnd(block, jmp);
+
+        // Fill in helper block
+        //
+        // call PPHelper(&ppCounter, ilOffset)
+        GenTree*          ilOffsetNode  = compiler->gtNewIconNode(ilOffset, TYP_INT);
+        GenTree*          ppCounterRef  = compiler->gtNewLclvNode(ppCounterLclNum, TYP_INT);
+        GenTree*          ppCounterAddr = compiler->gtNewOperNode(GT_ADDR, TYP_I_IMPL, ppCounterRef);
+        GenTreeCall::Use* helperArgs    = compiler->gtNewCallArgs(ppCounterAddr, ilOffsetNode);
+        GenTreeCall*      helperCall    = compiler->gtNewHelperCallNode(CORINFO_HELP_PATCHPOINT, TYP_VOID, helperArgs);
+
+        compiler->fgNewStmtAtEnd(helperBlock, helperCall);
+    }
+
+    //  ppCounter = <initial value>
+    void TransformEntry(BasicBlock* block)
+    {
+        assert((block->bbFlags & BBF_PATCHPOINT) == 0);
+
+        int initialCounterValue = JitConfig.TC_OnStackReplacement_InitialCounter();
+
+        if (initialCounterValue < 0)
+        {
+            initialCounterValue = 0;
+        }
+
+        GenTree* initialCounterNode = compiler->gtNewIconNode(initialCounterValue, TYP_INT);
+        GenTree* ppCounterRef       = compiler->gtNewLclvNode(ppCounterLclNum, TYP_INT);
+        GenTree* ppCounterAsg       = compiler->gtNewOperNode(GT_ASG, TYP_INT, ppCounterRef, initialCounterNode);
+
+        compiler->fgNewStmtNearEnd(block, ppCounterAsg);
+    }
+};
+
+//------------------------------------------------------------------------
+// fgTransformPatchpoints: expansion of patchpoints into control flow.
+//
+// Notes:
+//
+// Patchpoints are placed in the JIT IR during importation, and get expanded
+// here into normal JIT IR.
+//
+void Compiler::fgTransformPatchpoints()
+{
+    JITDUMP("\n*************** in fgTransformPatchpoints\n");
+
+    if (!doesMethodHavePatchpoints())
+    {
+        JITDUMP(" -- no patchpoints to transform\n");
+        return;
+    }
+
+    // We should only be adding patchpoints at Tier0, so should not be in an inlinee
+    assert(!compIsForInlining());
+
+    // We currently can't do OSR in methods with localloc.
+    // Such methods don't have a fixed relationship between frame and stack pointers.
+    //
+    // This is true whether or not the localloc was executed in the original method.
+    //
+    // TODO: handle this case, or else check this earlier and fall back to fully
+    // optimizing the method (ala QJFL=0).
+    if (compLocallocUsed)
+    {
+        JITDUMP(" -- unable to handle methods with localloc\n");
+        return;
+    }
+
+    // We currently can't do OSR in synchronized methods. We need to alter
+    // the logic in fgAddSyncMethodEnterExit for OSR to not try and obtain the
+    // monitor (since the original method will have done so) and set the monitor
+    // obtained flag to true (or reuse the original method slot value).
+    if ((info.compFlags & CORINFO_FLG_SYNCH) != 0)
+    {
+        JITDUMP(" -- unable to handle synchronized methods\n");
+        return;
+    }
+
+    PatchpointTransformer ppTransformer(this);
+    int                   count = ppTransformer.Run();
+    JITDUMP("\n*************** After fgTransformPatchpoints() [%d patchpoints transformed]\n", count);
+    INDEBUG(if (verbose) { fgDispBasicBlocks(true); });
+}
index 14044bf..3019824 100644 (file)
@@ -763,7 +763,7 @@ void RangeCheck::MergeAssertion(BasicBlock* block, GenTree* op, Range* pRange DE
         assertions = block->bbAssertionIn;
     }
 
-    if (!BitVecOps::MayBeUninit(assertions))
+    if (!BitVecOps::MayBeUninit(assertions) && (m_pCompiler->GetAssertionCount() > 0))
     {
         // Perform the merge step to fine tune the range value.
         MergeEdgeAssertions(op->AsLclVarCommon(), assertions, pRange);
@@ -889,7 +889,7 @@ Range RangeCheck::ComputeRangeForLocalDef(BasicBlock*          block,
     }
 #endif
     Range range = GetRange(ssaDef->GetBlock(), ssaDef->GetAssignment()->gtGetOp2(), monIncreasing DEBUGARG(indent));
-    if (!BitVecOps::MayBeUninit(block->bbAssertionIn))
+    if (!BitVecOps::MayBeUninit(block->bbAssertionIn) && (m_pCompiler->GetAssertionCount() > 0))
     {
         JITDUMP("Merge assertions from " FMT_BB ":%s for assignment about [%06d]\n", block->bbNum,
                 BitVecOps::ToString(m_pCompiler->apTraits, block->bbAssertionIn),
index 66f5454..679e408 100644 (file)
@@ -5794,9 +5794,17 @@ void Compiler::fgValueNumber()
             // these are variables that are read before being initialized (at least on some control flow paths)
             // if they are not must-init, then they get VNF_InitVal(i), as with the param case.)
 
-            bool      isZeroed = (info.compInitMem || varDsc->lvMustInit);
-            ValueNum  initVal  = ValueNumStore::NoVN; // We must assign a new value to initVal
-            var_types typ      = varDsc->TypeGet();
+            bool isZeroed = (info.compInitMem || varDsc->lvMustInit);
+
+            // For OSR, locals or promoted fields of locals may be missing the initial def
+            // because of partial importation. We can't assume they are zero.
+            if (lvaIsOSRLocal(lclNum))
+            {
+                isZeroed = false;
+            }
+
+            ValueNum  initVal = ValueNumStore::NoVN; // We must assign a new value to initVal
+            var_types typ     = varDsc->TypeGet();
 
             switch (typ)
             {
@@ -6010,7 +6018,8 @@ void Compiler::fgValueNumberBlock(BasicBlock* blk)
                 BasicBlock::MemoryPhiArg* phiArgs = blk->bbMemorySsaPhiFunc[memoryKind];
                 assert(phiArgs != BasicBlock::EmptyMemoryPhiDef);
                 // There should be > 1 args to a phi.
-                assert(phiArgs->m_nextArg != nullptr);
+                // But OSR might leave around "dead" try entry blocks...
+                assert((phiArgs->m_nextArg != nullptr) || opts.IsOSR());
                 ValueNum phiAppVN = vnStore->VNForIntCon(phiArgs->GetSsaNum());
                 JITDUMP("  Building phi application: $%x = SSA# %d.\n", phiAppVN, phiArgs->GetSsaNum());
                 bool     allSame = true;
index 850cb24..6803336 100644 (file)
@@ -62,6 +62,10 @@ namespace Internal.JitInterface
         [UnmanagedFunctionPointerAttribute(default(CallingConvention))]
         delegate void __getGSCookie(IntPtr _this, IntPtr* ppException, IntPtr* pCookieVal, IntPtr** ppCookieVal);
         [UnmanagedFunctionPointerAttribute(default(CallingConvention))]
+        delegate void __setPatchpointInfo(IntPtr _this, IntPtr* ppException, PatchpointInfo* patchpointInfo);
+        [UnmanagedFunctionPointerAttribute(default(CallingConvention))]
+        delegate PatchpointInfo* __getOSRInfo(IntPtr _this, IntPtr* ppException, ref uint ilOffset);
+        [UnmanagedFunctionPointerAttribute(default(CallingConvention))]
         delegate void __resolveToken(IntPtr _this, IntPtr* ppException, ref CORINFO_RESOLVED_TOKEN pResolvedToken);
         [UnmanagedFunctionPointerAttribute(default(CallingConvention))]
         delegate void __tryResolveToken(IntPtr _this, IntPtr* ppException, ref CORINFO_RESOLVED_TOKEN pResolvedToken);
@@ -687,6 +691,33 @@ namespace Internal.JitInterface
             }
         }
 
+        static void _setPatchpointInfo(IntPtr thisHandle, IntPtr* ppException, PatchpointInfo* patchpointInfo)
+        {
+            var _this = GetThis(thisHandle);
+            try
+            {
+                _this.setPatchpointInfo(patchpointInfo);
+            }
+            catch (Exception ex)
+            {
+                *ppException = _this.AllocException(ex);
+            }
+        }
+
+        static PatchpointInfo* _getOSRInfo(IntPtr thisHandle, IntPtr* ppException, ref uint ilOffset)
+        {
+            var _this = GetThis(thisHandle);
+            try
+            {
+                return _this.getOSRInfo(ref ilOffset);
+            }
+            catch (Exception ex)
+            {
+                *ppException = _this.AllocException(ex);
+                return default(PatchpointInfo*);
+            }
+        }
+
         static void _resolveToken(IntPtr thisHandle, IntPtr* ppException, ref CORINFO_RESOLVED_TOKEN pResolvedToken)
         {
             var _this = GetThis(thisHandle);
@@ -2643,8 +2674,8 @@ namespace Internal.JitInterface
 
         static IntPtr GetUnmanagedCallbacks(out Object keepAlive)
         {
-            IntPtr * callbacks = (IntPtr *)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 167);
-            Object[] delegates = new Object[167];
+            IntPtr * callbacks = (IntPtr *)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 169);
+            Object[] delegates = new Object[169];
 
             var d0 = new __getMethodAttribs(_getMethodAttribs);
             callbacks[0] = Marshal.GetFunctionPointerForDelegate(d0);
@@ -2721,432 +2752,438 @@ namespace Internal.JitInterface
             var d24 = new __getGSCookie(_getGSCookie);
             callbacks[24] = Marshal.GetFunctionPointerForDelegate(d24);
             delegates[24] = d24;
-            var d25 = new __resolveToken(_resolveToken);
+            var d25 = new __setPatchpointInfo(_setPatchpointInfo);
             callbacks[25] = Marshal.GetFunctionPointerForDelegate(d25);
             delegates[25] = d25;
-            var d26 = new __tryResolveToken(_tryResolveToken);
+            var d26 = new __getOSRInfo(_getOSRInfo);
             callbacks[26] = Marshal.GetFunctionPointerForDelegate(d26);
             delegates[26] = d26;
-            var d27 = new __findSig(_findSig);
+            var d27 = new __resolveToken(_resolveToken);
             callbacks[27] = Marshal.GetFunctionPointerForDelegate(d27);
             delegates[27] = d27;
-            var d28 = new __findCallSiteSig(_findCallSiteSig);
+            var d28 = new __tryResolveToken(_tryResolveToken);
             callbacks[28] = Marshal.GetFunctionPointerForDelegate(d28);
             delegates[28] = d28;
-            var d29 = new __getTokenTypeAsHandle(_getTokenTypeAsHandle);
+            var d29 = new __findSig(_findSig);
             callbacks[29] = Marshal.GetFunctionPointerForDelegate(d29);
             delegates[29] = d29;
-            var d30 = new __isValidToken(_isValidToken);
+            var d30 = new __findCallSiteSig(_findCallSiteSig);
             callbacks[30] = Marshal.GetFunctionPointerForDelegate(d30);
             delegates[30] = d30;
-            var d31 = new __isValidStringRef(_isValidStringRef);
+            var d31 = new __getTokenTypeAsHandle(_getTokenTypeAsHandle);
             callbacks[31] = Marshal.GetFunctionPointerForDelegate(d31);
             delegates[31] = d31;
-            var d32 = new __getStringLiteral(_getStringLiteral);
+            var d32 = new __isValidToken(_isValidToken);
             callbacks[32] = Marshal.GetFunctionPointerForDelegate(d32);
             delegates[32] = d32;
-            var d33 = new __asCorInfoType(_asCorInfoType);
+            var d33 = new __isValidStringRef(_isValidStringRef);
             callbacks[33] = Marshal.GetFunctionPointerForDelegate(d33);
             delegates[33] = d33;
-            var d34 = new __getClassName(_getClassName);
+            var d34 = new __getStringLiteral(_getStringLiteral);
             callbacks[34] = Marshal.GetFunctionPointerForDelegate(d34);
             delegates[34] = d34;
-            var d35 = new __getClassNameFromMetadata(_getClassNameFromMetadata);
+            var d35 = new __asCorInfoType(_asCorInfoType);
             callbacks[35] = Marshal.GetFunctionPointerForDelegate(d35);
             delegates[35] = d35;
-            var d36 = new __getTypeInstantiationArgument(_getTypeInstantiationArgument);
+            var d36 = new __getClassName(_getClassName);
             callbacks[36] = Marshal.GetFunctionPointerForDelegate(d36);
             delegates[36] = d36;
-            var d37 = new __appendClassName(_appendClassName);
+            var d37 = new __getClassNameFromMetadata(_getClassNameFromMetadata);
             callbacks[37] = Marshal.GetFunctionPointerForDelegate(d37);
             delegates[37] = d37;
-            var d38 = new __isValueClass(_isValueClass);
+            var d38 = new __getTypeInstantiationArgument(_getTypeInstantiationArgument);
             callbacks[38] = Marshal.GetFunctionPointerForDelegate(d38);
             delegates[38] = d38;
-            var d39 = new __canInlineTypeCheck(_canInlineTypeCheck);
+            var d39 = new __appendClassName(_appendClassName);
             callbacks[39] = Marshal.GetFunctionPointerForDelegate(d39);
             delegates[39] = d39;
-            var d40 = new __getClassAttribs(_getClassAttribs);
+            var d40 = new __isValueClass(_isValueClass);
             callbacks[40] = Marshal.GetFunctionPointerForDelegate(d40);
             delegates[40] = d40;
-            var d41 = new __isStructRequiringStackAllocRetBuf(_isStructRequiringStackAllocRetBuf);
+            var d41 = new __canInlineTypeCheck(_canInlineTypeCheck);
             callbacks[41] = Marshal.GetFunctionPointerForDelegate(d41);
             delegates[41] = d41;
-            var d42 = new __getClassModule(_getClassModule);
+            var d42 = new __getClassAttribs(_getClassAttribs);
             callbacks[42] = Marshal.GetFunctionPointerForDelegate(d42);
             delegates[42] = d42;
-            var d43 = new __getModuleAssembly(_getModuleAssembly);
+            var d43 = new __isStructRequiringStackAllocRetBuf(_isStructRequiringStackAllocRetBuf);
             callbacks[43] = Marshal.GetFunctionPointerForDelegate(d43);
             delegates[43] = d43;
-            var d44 = new __getAssemblyName(_getAssemblyName);
+            var d44 = new __getClassModule(_getClassModule);
             callbacks[44] = Marshal.GetFunctionPointerForDelegate(d44);
             delegates[44] = d44;
-            var d45 = new __LongLifetimeMalloc(_LongLifetimeMalloc);
+            var d45 = new __getModuleAssembly(_getModuleAssembly);
             callbacks[45] = Marshal.GetFunctionPointerForDelegate(d45);
             delegates[45] = d45;
-            var d46 = new __LongLifetimeFree(_LongLifetimeFree);
+            var d46 = new __getAssemblyName(_getAssemblyName);
             callbacks[46] = Marshal.GetFunctionPointerForDelegate(d46);
             delegates[46] = d46;
-            var d47 = new __getClassModuleIdForStatics(_getClassModuleIdForStatics);
+            var d47 = new __LongLifetimeMalloc(_LongLifetimeMalloc);
             callbacks[47] = Marshal.GetFunctionPointerForDelegate(d47);
             delegates[47] = d47;
-            var d48 = new __getClassSize(_getClassSize);
+            var d48 = new __LongLifetimeFree(_LongLifetimeFree);
             callbacks[48] = Marshal.GetFunctionPointerForDelegate(d48);
             delegates[48] = d48;
-            var d49 = new __getHeapClassSize(_getHeapClassSize);
+            var d49 = new __getClassModuleIdForStatics(_getClassModuleIdForStatics);
             callbacks[49] = Marshal.GetFunctionPointerForDelegate(d49);
             delegates[49] = d49;
-            var d50 = new __canAllocateOnStack(_canAllocateOnStack);
+            var d50 = new __getClassSize(_getClassSize);
             callbacks[50] = Marshal.GetFunctionPointerForDelegate(d50);
             delegates[50] = d50;
-            var d51 = new __getClassAlignmentRequirement(_getClassAlignmentRequirement);
+            var d51 = new __getHeapClassSize(_getHeapClassSize);
             callbacks[51] = Marshal.GetFunctionPointerForDelegate(d51);
             delegates[51] = d51;
-            var d52 = new __getClassGClayout(_getClassGClayout);
+            var d52 = new __canAllocateOnStack(_canAllocateOnStack);
             callbacks[52] = Marshal.GetFunctionPointerForDelegate(d52);
             delegates[52] = d52;
-            var d53 = new __getClassNumInstanceFields(_getClassNumInstanceFields);
+            var d53 = new __getClassAlignmentRequirement(_getClassAlignmentRequirement);
             callbacks[53] = Marshal.GetFunctionPointerForDelegate(d53);
             delegates[53] = d53;
-            var d54 = new __getFieldInClass(_getFieldInClass);
+            var d54 = new __getClassGClayout(_getClassGClayout);
             callbacks[54] = Marshal.GetFunctionPointerForDelegate(d54);
             delegates[54] = d54;
-            var d55 = new __checkMethodModifier(_checkMethodModifier);
+            var d55 = new __getClassNumInstanceFields(_getClassNumInstanceFields);
             callbacks[55] = Marshal.GetFunctionPointerForDelegate(d55);
             delegates[55] = d55;
-            var d56 = new __getNewHelper(_getNewHelper);
+            var d56 = new __getFieldInClass(_getFieldInClass);
             callbacks[56] = Marshal.GetFunctionPointerForDelegate(d56);
             delegates[56] = d56;
-            var d57 = new __getNewArrHelper(_getNewArrHelper);
+            var d57 = new __checkMethodModifier(_checkMethodModifier);
             callbacks[57] = Marshal.GetFunctionPointerForDelegate(d57);
             delegates[57] = d57;
-            var d58 = new __getCastingHelper(_getCastingHelper);
+            var d58 = new __getNewHelper(_getNewHelper);
             callbacks[58] = Marshal.GetFunctionPointerForDelegate(d58);
             delegates[58] = d58;
-            var d59 = new __getSharedCCtorHelper(_getSharedCCtorHelper);
+            var d59 = new __getNewArrHelper(_getNewArrHelper);
             callbacks[59] = Marshal.GetFunctionPointerForDelegate(d59);
             delegates[59] = d59;
-            var d60 = new __getTypeForBox(_getTypeForBox);
+            var d60 = new __getCastingHelper(_getCastingHelper);
             callbacks[60] = Marshal.GetFunctionPointerForDelegate(d60);
             delegates[60] = d60;
-            var d61 = new __getBoxHelper(_getBoxHelper);
+            var d61 = new __getSharedCCtorHelper(_getSharedCCtorHelper);
             callbacks[61] = Marshal.GetFunctionPointerForDelegate(d61);
             delegates[61] = d61;
-            var d62 = new __getUnBoxHelper(_getUnBoxHelper);
+            var d62 = new __getTypeForBox(_getTypeForBox);
             callbacks[62] = Marshal.GetFunctionPointerForDelegate(d62);
             delegates[62] = d62;
-            var d63 = new __getReadyToRunHelper(_getReadyToRunHelper);
+            var d63 = new __getBoxHelper(_getBoxHelper);
             callbacks[63] = Marshal.GetFunctionPointerForDelegate(d63);
             delegates[63] = d63;
-            var d64 = new __getReadyToRunDelegateCtorHelper(_getReadyToRunDelegateCtorHelper);
+            var d64 = new __getUnBoxHelper(_getUnBoxHelper);
             callbacks[64] = Marshal.GetFunctionPointerForDelegate(d64);
             delegates[64] = d64;
-            var d65 = new __getHelperName(_getHelperName);
+            var d65 = new __getReadyToRunHelper(_getReadyToRunHelper);
             callbacks[65] = Marshal.GetFunctionPointerForDelegate(d65);
             delegates[65] = d65;
-            var d66 = new __initClass(_initClass);
+            var d66 = new __getReadyToRunDelegateCtorHelper(_getReadyToRunDelegateCtorHelper);
             callbacks[66] = Marshal.GetFunctionPointerForDelegate(d66);
             delegates[66] = d66;
-            var d67 = new __classMustBeLoadedBeforeCodeIsRun(_classMustBeLoadedBeforeCodeIsRun);
+            var d67 = new __getHelperName(_getHelperName);
             callbacks[67] = Marshal.GetFunctionPointerForDelegate(d67);
             delegates[67] = d67;
-            var d68 = new __getBuiltinClass(_getBuiltinClass);
+            var d68 = new __initClass(_initClass);
             callbacks[68] = Marshal.GetFunctionPointerForDelegate(d68);
             delegates[68] = d68;
-            var d69 = new __getTypeForPrimitiveValueClass(_getTypeForPrimitiveValueClass);
+            var d69 = new __classMustBeLoadedBeforeCodeIsRun(_classMustBeLoadedBeforeCodeIsRun);
             callbacks[69] = Marshal.GetFunctionPointerForDelegate(d69);
             delegates[69] = d69;
-            var d70 = new __getTypeForPrimitiveNumericClass(_getTypeForPrimitiveNumericClass);
+            var d70 = new __getBuiltinClass(_getBuiltinClass);
             callbacks[70] = Marshal.GetFunctionPointerForDelegate(d70);
             delegates[70] = d70;
-            var d71 = new __canCast(_canCast);
+            var d71 = new __getTypeForPrimitiveValueClass(_getTypeForPrimitiveValueClass);
             callbacks[71] = Marshal.GetFunctionPointerForDelegate(d71);
             delegates[71] = d71;
-            var d72 = new __areTypesEquivalent(_areTypesEquivalent);
+            var d72 = new __getTypeForPrimitiveNumericClass(_getTypeForPrimitiveNumericClass);
             callbacks[72] = Marshal.GetFunctionPointerForDelegate(d72);
             delegates[72] = d72;
-            var d73 = new __compareTypesForCast(_compareTypesForCast);
+            var d73 = new __canCast(_canCast);
             callbacks[73] = Marshal.GetFunctionPointerForDelegate(d73);
             delegates[73] = d73;
-            var d74 = new __compareTypesForEquality(_compareTypesForEquality);
+            var d74 = new __areTypesEquivalent(_areTypesEquivalent);
             callbacks[74] = Marshal.GetFunctionPointerForDelegate(d74);
             delegates[74] = d74;
-            var d75 = new __mergeClasses(_mergeClasses);
+            var d75 = new __compareTypesForCast(_compareTypesForCast);
             callbacks[75] = Marshal.GetFunctionPointerForDelegate(d75);
             delegates[75] = d75;
-            var d76 = new __isMoreSpecificType(_isMoreSpecificType);
+            var d76 = new __compareTypesForEquality(_compareTypesForEquality);
             callbacks[76] = Marshal.GetFunctionPointerForDelegate(d76);
             delegates[76] = d76;
-            var d77 = new __getParentType(_getParentType);
+            var d77 = new __mergeClasses(_mergeClasses);
             callbacks[77] = Marshal.GetFunctionPointerForDelegate(d77);
             delegates[77] = d77;
-            var d78 = new __getChildType(_getChildType);
+            var d78 = new __isMoreSpecificType(_isMoreSpecificType);
             callbacks[78] = Marshal.GetFunctionPointerForDelegate(d78);
             delegates[78] = d78;
-            var d79 = new __satisfiesClassConstraints(_satisfiesClassConstraints);
+            var d79 = new __getParentType(_getParentType);
             callbacks[79] = Marshal.GetFunctionPointerForDelegate(d79);
             delegates[79] = d79;
-            var d80 = new __isSDArray(_isSDArray);
+            var d80 = new __getChildType(_getChildType);
             callbacks[80] = Marshal.GetFunctionPointerForDelegate(d80);
             delegates[80] = d80;
-            var d81 = new __getArrayRank(_getArrayRank);
+            var d81 = new __satisfiesClassConstraints(_satisfiesClassConstraints);
             callbacks[81] = Marshal.GetFunctionPointerForDelegate(d81);
             delegates[81] = d81;
-            var d82 = new __getArrayInitializationData(_getArrayInitializationData);
+            var d82 = new __isSDArray(_isSDArray);
             callbacks[82] = Marshal.GetFunctionPointerForDelegate(d82);
             delegates[82] = d82;
-            var d83 = new __canAccessClass(_canAccessClass);
+            var d83 = new __getArrayRank(_getArrayRank);
             callbacks[83] = Marshal.GetFunctionPointerForDelegate(d83);
             delegates[83] = d83;
-            var d84 = new __getFieldName(_getFieldName);
+            var d84 = new __getArrayInitializationData(_getArrayInitializationData);
             callbacks[84] = Marshal.GetFunctionPointerForDelegate(d84);
             delegates[84] = d84;
-            var d85 = new __getFieldClass(_getFieldClass);
+            var d85 = new __canAccessClass(_canAccessClass);
             callbacks[85] = Marshal.GetFunctionPointerForDelegate(d85);
             delegates[85] = d85;
-            var d86 = new __getFieldType(_getFieldType);
+            var d86 = new __getFieldName(_getFieldName);
             callbacks[86] = Marshal.GetFunctionPointerForDelegate(d86);
             delegates[86] = d86;
-            var d87 = new __getFieldOffset(_getFieldOffset);
+            var d87 = new __getFieldClass(_getFieldClass);
             callbacks[87] = Marshal.GetFunctionPointerForDelegate(d87);
             delegates[87] = d87;
-            var d88 = new __getFieldInfo(_getFieldInfo);
+            var d88 = new __getFieldType(_getFieldType);
             callbacks[88] = Marshal.GetFunctionPointerForDelegate(d88);
             delegates[88] = d88;
-            var d89 = new __isFieldStatic(_isFieldStatic);
+            var d89 = new __getFieldOffset(_getFieldOffset);
             callbacks[89] = Marshal.GetFunctionPointerForDelegate(d89);
             delegates[89] = d89;
-            var d90 = new __getBoundaries(_getBoundaries);
+            var d90 = new __getFieldInfo(_getFieldInfo);
             callbacks[90] = Marshal.GetFunctionPointerForDelegate(d90);
             delegates[90] = d90;
-            var d91 = new __setBoundaries(_setBoundaries);
+            var d91 = new __isFieldStatic(_isFieldStatic);
             callbacks[91] = Marshal.GetFunctionPointerForDelegate(d91);
             delegates[91] = d91;
-            var d92 = new __getVars(_getVars);
+            var d92 = new __getBoundaries(_getBoundaries);
             callbacks[92] = Marshal.GetFunctionPointerForDelegate(d92);
             delegates[92] = d92;
-            var d93 = new __setVars(_setVars);
+            var d93 = new __setBoundaries(_setBoundaries);
             callbacks[93] = Marshal.GetFunctionPointerForDelegate(d93);
             delegates[93] = d93;
-            var d94 = new __allocateArray(_allocateArray);
+            var d94 = new __getVars(_getVars);
             callbacks[94] = Marshal.GetFunctionPointerForDelegate(d94);
             delegates[94] = d94;
-            var d95 = new __freeArray(_freeArray);
+            var d95 = new __setVars(_setVars);
             callbacks[95] = Marshal.GetFunctionPointerForDelegate(d95);
             delegates[95] = d95;
-            var d96 = new __getArgNext(_getArgNext);
+            var d96 = new __allocateArray(_allocateArray);
             callbacks[96] = Marshal.GetFunctionPointerForDelegate(d96);
             delegates[96] = d96;
-            var d97 = new __getArgType(_getArgType);
+            var d97 = new __freeArray(_freeArray);
             callbacks[97] = Marshal.GetFunctionPointerForDelegate(d97);
             delegates[97] = d97;
-            var d98 = new __getArgClass(_getArgClass);
+            var d98 = new __getArgNext(_getArgNext);
             callbacks[98] = Marshal.GetFunctionPointerForDelegate(d98);
             delegates[98] = d98;
-            var d99 = new __getHFAType(_getHFAType);
+            var d99 = new __getArgType(_getArgType);
             callbacks[99] = Marshal.GetFunctionPointerForDelegate(d99);
             delegates[99] = d99;
-            var d100 = new __GetErrorHRESULT(_GetErrorHRESULT);
+            var d100 = new __getArgClass(_getArgClass);
             callbacks[100] = Marshal.GetFunctionPointerForDelegate(d100);
             delegates[100] = d100;
-            var d101 = new __GetErrorMessage(_GetErrorMessage);
+            var d101 = new __getHFAType(_getHFAType);
             callbacks[101] = Marshal.GetFunctionPointerForDelegate(d101);
             delegates[101] = d101;
-            var d102 = new __FilterException(_FilterException);
+            var d102 = new __GetErrorHRESULT(_GetErrorHRESULT);
             callbacks[102] = Marshal.GetFunctionPointerForDelegate(d102);
             delegates[102] = d102;
-            var d103 = new __HandleException(_HandleException);
+            var d103 = new __GetErrorMessage(_GetErrorMessage);
             callbacks[103] = Marshal.GetFunctionPointerForDelegate(d103);
             delegates[103] = d103;
-            var d104 = new __ThrowExceptionForJitResult(_ThrowExceptionForJitResult);
+            var d104 = new __FilterException(_FilterException);
             callbacks[104] = Marshal.GetFunctionPointerForDelegate(d104);
             delegates[104] = d104;
-            var d105 = new __ThrowExceptionForHelper(_ThrowExceptionForHelper);
+            var d105 = new __HandleException(_HandleException);
             callbacks[105] = Marshal.GetFunctionPointerForDelegate(d105);
             delegates[105] = d105;
-            var d106 = new __runWithErrorTrap(_runWithErrorTrap);
+            var d106 = new __ThrowExceptionForJitResult(_ThrowExceptionForJitResult);
             callbacks[106] = Marshal.GetFunctionPointerForDelegate(d106);
             delegates[106] = d106;
-            var d107 = new __getEEInfo(_getEEInfo);
+            var d107 = new __ThrowExceptionForHelper(_ThrowExceptionForHelper);
             callbacks[107] = Marshal.GetFunctionPointerForDelegate(d107);
             delegates[107] = d107;
-            var d108 = new __getJitTimeLogFilename(_getJitTimeLogFilename);
+            var d108 = new __runWithErrorTrap(_runWithErrorTrap);
             callbacks[108] = Marshal.GetFunctionPointerForDelegate(d108);
             delegates[108] = d108;
-            var d109 = new __getMethodDefFromMethod(_getMethodDefFromMethod);
+            var d109 = new __getEEInfo(_getEEInfo);
             callbacks[109] = Marshal.GetFunctionPointerForDelegate(d109);
             delegates[109] = d109;
-            var d110 = new __getMethodName(_getMethodName);
+            var d110 = new __getJitTimeLogFilename(_getJitTimeLogFilename);
             callbacks[110] = Marshal.GetFunctionPointerForDelegate(d110);
             delegates[110] = d110;
-            var d111 = new __getMethodNameFromMetadata(_getMethodNameFromMetadata);
+            var d111 = new __getMethodDefFromMethod(_getMethodDefFromMethod);
             callbacks[111] = Marshal.GetFunctionPointerForDelegate(d111);
             delegates[111] = d111;
-            var d112 = new __getMethodHash(_getMethodHash);
+            var d112 = new __getMethodName(_getMethodName);
             callbacks[112] = Marshal.GetFunctionPointerForDelegate(d112);
             delegates[112] = d112;
-            var d113 = new __findNameOfToken(_findNameOfToken);
+            var d113 = new __getMethodNameFromMetadata(_getMethodNameFromMetadata);
             callbacks[113] = Marshal.GetFunctionPointerForDelegate(d113);
             delegates[113] = d113;
-            var d114 = new __getSystemVAmd64PassStructInRegisterDescriptor(_getSystemVAmd64PassStructInRegisterDescriptor);
+            var d114 = new __getMethodHash(_getMethodHash);
             callbacks[114] = Marshal.GetFunctionPointerForDelegate(d114);
             delegates[114] = d114;
-            var d115 = new __getThreadTLSIndex(_getThreadTLSIndex);
+            var d115 = new __findNameOfToken(_findNameOfToken);
             callbacks[115] = Marshal.GetFunctionPointerForDelegate(d115);
             delegates[115] = d115;
-            var d116 = new __getInlinedCallFrameVptr(_getInlinedCallFrameVptr);
+            var d116 = new __getSystemVAmd64PassStructInRegisterDescriptor(_getSystemVAmd64PassStructInRegisterDescriptor);
             callbacks[116] = Marshal.GetFunctionPointerForDelegate(d116);
             delegates[116] = d116;
-            var d117 = new __getAddrOfCaptureThreadGlobal(_getAddrOfCaptureThreadGlobal);
+            var d117 = new __getThreadTLSIndex(_getThreadTLSIndex);
             callbacks[117] = Marshal.GetFunctionPointerForDelegate(d117);
             delegates[117] = d117;
-            var d118 = new __getHelperFtn(_getHelperFtn);
+            var d118 = new __getInlinedCallFrameVptr(_getInlinedCallFrameVptr);
             callbacks[118] = Marshal.GetFunctionPointerForDelegate(d118);
             delegates[118] = d118;
-            var d119 = new __getFunctionEntryPoint(_getFunctionEntryPoint);
+            var d119 = new __getAddrOfCaptureThreadGlobal(_getAddrOfCaptureThreadGlobal);
             callbacks[119] = Marshal.GetFunctionPointerForDelegate(d119);
             delegates[119] = d119;
-            var d120 = new __getFunctionFixedEntryPoint(_getFunctionFixedEntryPoint);
+            var d120 = new __getHelperFtn(_getHelperFtn);
             callbacks[120] = Marshal.GetFunctionPointerForDelegate(d120);
             delegates[120] = d120;
-            var d121 = new __getMethodSync(_getMethodSync);
+            var d121 = new __getFunctionEntryPoint(_getFunctionEntryPoint);
             callbacks[121] = Marshal.GetFunctionPointerForDelegate(d121);
             delegates[121] = d121;
-            var d122 = new __getLazyStringLiteralHelper(_getLazyStringLiteralHelper);
+            var d122 = new __getFunctionFixedEntryPoint(_getFunctionFixedEntryPoint);
             callbacks[122] = Marshal.GetFunctionPointerForDelegate(d122);
             delegates[122] = d122;
-            var d123 = new __embedModuleHandle(_embedModuleHandle);
+            var d123 = new __getMethodSync(_getMethodSync);
             callbacks[123] = Marshal.GetFunctionPointerForDelegate(d123);
             delegates[123] = d123;
-            var d124 = new __embedClassHandle(_embedClassHandle);
+            var d124 = new __getLazyStringLiteralHelper(_getLazyStringLiteralHelper);
             callbacks[124] = Marshal.GetFunctionPointerForDelegate(d124);
             delegates[124] = d124;
-            var d125 = new __embedMethodHandle(_embedMethodHandle);
+            var d125 = new __embedModuleHandle(_embedModuleHandle);
             callbacks[125] = Marshal.GetFunctionPointerForDelegate(d125);
             delegates[125] = d125;
-            var d126 = new __embedFieldHandle(_embedFieldHandle);
+            var d126 = new __embedClassHandle(_embedClassHandle);
             callbacks[126] = Marshal.GetFunctionPointerForDelegate(d126);
             delegates[126] = d126;
-            var d127 = new __embedGenericHandle(_embedGenericHandle);
+            var d127 = new __embedMethodHandle(_embedMethodHandle);
             callbacks[127] = Marshal.GetFunctionPointerForDelegate(d127);
             delegates[127] = d127;
-            var d128 = new __getLocationOfThisType(_getLocationOfThisType);
+            var d128 = new __embedFieldHandle(_embedFieldHandle);
             callbacks[128] = Marshal.GetFunctionPointerForDelegate(d128);
             delegates[128] = d128;
-            var d129 = new __getAddressOfPInvokeTarget(_getAddressOfPInvokeTarget);
+            var d129 = new __embedGenericHandle(_embedGenericHandle);
             callbacks[129] = Marshal.GetFunctionPointerForDelegate(d129);
             delegates[129] = d129;
-            var d130 = new __GetCookieForPInvokeCalliSig(_GetCookieForPInvokeCalliSig);
+            var d130 = new __getLocationOfThisType(_getLocationOfThisType);
             callbacks[130] = Marshal.GetFunctionPointerForDelegate(d130);
             delegates[130] = d130;
-            var d131 = new __canGetCookieForPInvokeCalliSig(_canGetCookieForPInvokeCalliSig);
+            var d131 = new __getAddressOfPInvokeTarget(_getAddressOfPInvokeTarget);
             callbacks[131] = Marshal.GetFunctionPointerForDelegate(d131);
             delegates[131] = d131;
-            var d132 = new __getJustMyCodeHandle(_getJustMyCodeHandle);
+            var d132 = new __GetCookieForPInvokeCalliSig(_GetCookieForPInvokeCalliSig);
             callbacks[132] = Marshal.GetFunctionPointerForDelegate(d132);
             delegates[132] = d132;
-            var d133 = new __GetProfilingHandle(_GetProfilingHandle);
+            var d133 = new __canGetCookieForPInvokeCalliSig(_canGetCookieForPInvokeCalliSig);
             callbacks[133] = Marshal.GetFunctionPointerForDelegate(d133);
             delegates[133] = d133;
-            var d134 = new __getCallInfo(_getCallInfo);
+            var d134 = new __getJustMyCodeHandle(_getJustMyCodeHandle);
             callbacks[134] = Marshal.GetFunctionPointerForDelegate(d134);
             delegates[134] = d134;
-            var d135 = new __canAccessFamily(_canAccessFamily);
+            var d135 = new __GetProfilingHandle(_GetProfilingHandle);
             callbacks[135] = Marshal.GetFunctionPointerForDelegate(d135);
             delegates[135] = d135;
-            var d136 = new __isRIDClassDomainID(_isRIDClassDomainID);
+            var d136 = new __getCallInfo(_getCallInfo);
             callbacks[136] = Marshal.GetFunctionPointerForDelegate(d136);
             delegates[136] = d136;
-            var d137 = new __getClassDomainID(_getClassDomainID);
+            var d137 = new __canAccessFamily(_canAccessFamily);
             callbacks[137] = Marshal.GetFunctionPointerForDelegate(d137);
             delegates[137] = d137;
-            var d138 = new __getFieldAddress(_getFieldAddress);
+            var d138 = new __isRIDClassDomainID(_isRIDClassDomainID);
             callbacks[138] = Marshal.GetFunctionPointerForDelegate(d138);
             delegates[138] = d138;
-            var d139 = new __getStaticFieldCurrentClass(_getStaticFieldCurrentClass);
+            var d139 = new __getClassDomainID(_getClassDomainID);
             callbacks[139] = Marshal.GetFunctionPointerForDelegate(d139);
             delegates[139] = d139;
-            var d140 = new __getVarArgsHandle(_getVarArgsHandle);
+            var d140 = new __getFieldAddress(_getFieldAddress);
             callbacks[140] = Marshal.GetFunctionPointerForDelegate(d140);
             delegates[140] = d140;
-            var d141 = new __canGetVarArgsHandle(_canGetVarArgsHandle);
+            var d141 = new __getStaticFieldCurrentClass(_getStaticFieldCurrentClass);
             callbacks[141] = Marshal.GetFunctionPointerForDelegate(d141);
             delegates[141] = d141;
-            var d142 = new __constructStringLiteral(_constructStringLiteral);
+            var d142 = new __getVarArgsHandle(_getVarArgsHandle);
             callbacks[142] = Marshal.GetFunctionPointerForDelegate(d142);
             delegates[142] = d142;
-            var d143 = new __emptyStringLiteral(_emptyStringLiteral);
+            var d143 = new __canGetVarArgsHandle(_canGetVarArgsHandle);
             callbacks[143] = Marshal.GetFunctionPointerForDelegate(d143);
             delegates[143] = d143;
-            var d144 = new __getFieldThreadLocalStoreID(_getFieldThreadLocalStoreID);
+            var d144 = new __constructStringLiteral(_constructStringLiteral);
             callbacks[144] = Marshal.GetFunctionPointerForDelegate(d144);
             delegates[144] = d144;
-            var d145 = new __setOverride(_setOverride);
+            var d145 = new __emptyStringLiteral(_emptyStringLiteral);
             callbacks[145] = Marshal.GetFunctionPointerForDelegate(d145);
             delegates[145] = d145;
-            var d146 = new __addActiveDependency(_addActiveDependency);
+            var d146 = new __getFieldThreadLocalStoreID(_getFieldThreadLocalStoreID);
             callbacks[146] = Marshal.GetFunctionPointerForDelegate(d146);
             delegates[146] = d146;
-            var d147 = new __GetDelegateCtor(_GetDelegateCtor);
+            var d147 = new __setOverride(_setOverride);
             callbacks[147] = Marshal.GetFunctionPointerForDelegate(d147);
             delegates[147] = d147;
-            var d148 = new __MethodCompileComplete(_MethodCompileComplete);
+            var d148 = new __addActiveDependency(_addActiveDependency);
             callbacks[148] = Marshal.GetFunctionPointerForDelegate(d148);
             delegates[148] = d148;
-            var d149 = new __getTailCallCopyArgsThunk(_getTailCallCopyArgsThunk);
+            var d149 = new __GetDelegateCtor(_GetDelegateCtor);
             callbacks[149] = Marshal.GetFunctionPointerForDelegate(d149);
             delegates[149] = d149;
-            var d150 = new __convertPInvokeCalliToCall(_convertPInvokeCalliToCall);
+            var d150 = new __MethodCompileComplete(_MethodCompileComplete);
             callbacks[150] = Marshal.GetFunctionPointerForDelegate(d150);
             delegates[150] = d150;
-            var d151 = new __allocMem(_allocMem);
+            var d151 = new __getTailCallCopyArgsThunk(_getTailCallCopyArgsThunk);
             callbacks[151] = Marshal.GetFunctionPointerForDelegate(d151);
             delegates[151] = d151;
-            var d152 = new __reserveUnwindInfo(_reserveUnwindInfo);
+            var d152 = new __convertPInvokeCalliToCall(_convertPInvokeCalliToCall);
             callbacks[152] = Marshal.GetFunctionPointerForDelegate(d152);
             delegates[152] = d152;
-            var d153 = new __allocUnwindInfo(_allocUnwindInfo);
+            var d153 = new __allocMem(_allocMem);
             callbacks[153] = Marshal.GetFunctionPointerForDelegate(d153);
             delegates[153] = d153;
-            var d154 = new __allocGCInfo(_allocGCInfo);
+            var d154 = new __reserveUnwindInfo(_reserveUnwindInfo);
             callbacks[154] = Marshal.GetFunctionPointerForDelegate(d154);
             delegates[154] = d154;
-            var d155 = new __setEHcount(_setEHcount);
+            var d155 = new __allocUnwindInfo(_allocUnwindInfo);
             callbacks[155] = Marshal.GetFunctionPointerForDelegate(d155);
             delegates[155] = d155;
-            var d156 = new __setEHinfo(_setEHinfo);
+            var d156 = new __allocGCInfo(_allocGCInfo);
             callbacks[156] = Marshal.GetFunctionPointerForDelegate(d156);
             delegates[156] = d156;
-            var d157 = new __logMsg(_logMsg);
+            var d157 = new __setEHcount(_setEHcount);
             callbacks[157] = Marshal.GetFunctionPointerForDelegate(d157);
             delegates[157] = d157;
-            var d158 = new __doAssert(_doAssert);
+            var d158 = new __setEHinfo(_setEHinfo);
             callbacks[158] = Marshal.GetFunctionPointerForDelegate(d158);
             delegates[158] = d158;
-            var d159 = new __reportFatalError(_reportFatalError);
+            var d159 = new __logMsg(_logMsg);
             callbacks[159] = Marshal.GetFunctionPointerForDelegate(d159);
             delegates[159] = d159;
-            var d160 = new __allocMethodBlockCounts(_allocMethodBlockCounts);
+            var d160 = new __doAssert(_doAssert);
             callbacks[160] = Marshal.GetFunctionPointerForDelegate(d160);
             delegates[160] = d160;
-            var d161 = new __getMethodBlockCounts(_getMethodBlockCounts);
+            var d161 = new __reportFatalError(_reportFatalError);
             callbacks[161] = Marshal.GetFunctionPointerForDelegate(d161);
             delegates[161] = d161;
-            var d162 = new __recordCallSite(_recordCallSite);
+            var d162 = new __allocMethodBlockCounts(_allocMethodBlockCounts);
             callbacks[162] = Marshal.GetFunctionPointerForDelegate(d162);
             delegates[162] = d162;
-            var d163 = new __recordRelocation(_recordRelocation);
+            var d163 = new __getMethodBlockCounts(_getMethodBlockCounts);
             callbacks[163] = Marshal.GetFunctionPointerForDelegate(d163);
             delegates[163] = d163;
-            var d164 = new __getRelocTypeHint(_getRelocTypeHint);
+            var d164 = new __recordCallSite(_recordCallSite);
             callbacks[164] = Marshal.GetFunctionPointerForDelegate(d164);
             delegates[164] = d164;
-            var d165 = new __getExpectedTargetArchitecture(_getExpectedTargetArchitecture);
+            var d165 = new __recordRelocation(_recordRelocation);
             callbacks[165] = Marshal.GetFunctionPointerForDelegate(d165);
             delegates[165] = d165;
-            var d166 = new __getJitFlags(_getJitFlags);
+            var d166 = new __getRelocTypeHint(_getRelocTypeHint);
             callbacks[166] = Marshal.GetFunctionPointerForDelegate(d166);
             delegates[166] = d166;
+            var d167 = new __getExpectedTargetArchitecture(_getExpectedTargetArchitecture);
+            callbacks[167] = Marshal.GetFunctionPointerForDelegate(d167);
+            delegates[167] = d167;
+            var d168 = new __getJitFlags(_getJitFlags);
+            callbacks[168] = Marshal.GetFunctionPointerForDelegate(d168);
+            delegates[168] = d168;
 
             keepAlive = delegates;
             return (IntPtr)callbacks;
index e5a47be..1647c4f 100644 (file)
@@ -91,6 +91,10 @@ namespace Internal.JitInterface
     public struct CORINFO_VarArgInfo
     {
     }
+    
+    public struct PatchpointInfo
+    {
+    }
 
     public enum _EXCEPTION_POINTERS
     { }
@@ -777,6 +781,11 @@ namespace Internal.JitInterface
         CORJIT_FUNC_FILTER         // a funclet associated with an EH filter
     }
 
+    public unsafe struct CORINFO_OSR_INFO
+    {
+        public uint ILOffset;
+        public void* PatchpointInfo;
+    }
 
     public unsafe struct CORINFO_METHOD_INFO
     {
@@ -790,6 +799,7 @@ namespace Internal.JitInterface
         public CorInfoRegionKind regionKind;
         public CORINFO_SIG_INFO args;
         public CORINFO_SIG_INFO locals;
+        public CORINFO_OSR_INFO osrInfo;
     }
     //
     // what type of code region we are in
index b608352..c3f4f21 100644 (file)
@@ -95,6 +95,7 @@ CORINFO_GENERICHANDLE_RESULT*,ref CORINFO_GENERICHANDLE_RESULT,void*
 CORINFO_METHOD_INFO*,CORINFO_METHOD_INFO*,void*
 CORINFO_FIELD_INFO*,CORINFO_FIELD_INFO*,void*
 CORINFO_CALL_INFO*,CORINFO_CALL_INFO*,void*
+PatchpointInfo*,PatchpointInfo*,void*
 DelegateCtorArgs*,ref DelegateCtorArgs,void*
 ICorDynamicInfo*,IntPtr,void*
 va_list,IntPtr
@@ -184,6 +185,8 @@ FUNCTIONS
     void methodMustBeLoadedBeforeCodeIsRun(        CORINFO_METHOD_HANDLE       method        );
     CORINFO_METHOD_HANDLE mapMethodDeclToMethodImpl(        CORINFO_METHOD_HANDLE       method        );
     void getGSCookie(        GSCookie * pCookieVal,        GSCookie ** ppCookieVal        );
+    void setPatchpointInfo(PatchpointInfo* patchpointInfo);
+    PatchpointInfo* getOSRInfo(unsigned * ilOffset);
     void resolveToken(CORINFO_RESOLVED_TOKEN * pResolvedToken);
     void tryResolveToken(CORINFO_RESOLVED_TOKEN * pResolvedToken);
     void findSig(        CORINFO_MODULE_HANDLE       module,        unsigned                    sigTOK,        CORINFO_CONTEXT_HANDLE      context,        CORINFO_SIG_INFO           *sig        );
index 2e72b4c..62f8e33 100644 (file)
@@ -2175,6 +2175,24 @@ namespace Internal.JitInterface
             *ppCookieVal = (IntPtr *)ObjectToHandle(_compilation.NodeFactory.GetReadyToRunHelperCell(ReadyToRunHelper.GSCookie));
         }
 
+        /// <summary>
+        /// Record patchpoint info for the method
+        /// </summary>
+        private void setPatchpointInfo(PatchpointInfo* patchpointInfo)
+        {
+            // No patchpoint info when prejitting
+            throw new NotImplementedException();
+        }
+
+        /// <summary>
+        /// Retrieve OSR info for the method
+        /// </summary>
+        private PatchpointInfo* getOSRInfo(ref uint ilOffset)
+        {
+            // No patchpoint info when prejitting
+            throw new NotImplementedException();
+        }
+
         private void getMethodVTableOffset(CORINFO_METHOD_STRUCT_* method, ref uint offsetOfIndirection, ref uint offsetAfterIndirection, ref bool isRelative)
         { throw new NotImplementedException("getMethodVTableOffset"); }
         private void expandRawHandleIntrinsic(ref CORINFO_RESOLVED_TOKEN pResolvedToken, ref CORINFO_GENERICHANDLE_RESULT pResult)
index d7622f0..f000e5f 100644 (file)
@@ -35,6 +35,8 @@ struct JitInterfaceCallbacks
     void (* methodMustBeLoadedBeforeCodeIsRun)(void * thisHandle, CorInfoException** ppException, void* method);
     void* (* mapMethodDeclToMethodImpl)(void * thisHandle, CorInfoException** ppException, void* method);
     void (* getGSCookie)(void * thisHandle, CorInfoException** ppException, void* pCookieVal, void** ppCookieVal);
+    void (* setPatchpointInfo)(void * thisHandle, CorInfoException** ppException, void* patchpointInfo);
+    void* (* getOSRInfo)(void * thisHandle, CorInfoException** ppException, unsigned* ilOffset);
     void (* resolveToken)(void * thisHandle, CorInfoException** ppException, void* pResolvedToken);
     void (* tryResolveToken)(void * thisHandle, CorInfoException** ppException, void* pResolvedToken);
     void (* findSig)(void * thisHandle, CorInfoException** ppException, void* module, unsigned sigTOK, void* context, void* sig);
@@ -407,6 +409,23 @@ public:
             throw pException;
     }
 
+    virtual void setPatchpointInfo(void* patchpointInfo)
+    {
+        CorInfoException* pException = nullptr;
+        _callbacks->setPatchpointInfo(_thisHandle, &pException, patchpointInfo);
+        if (pException != nullptr)
+            throw pException;
+    }
+
+    virtual void* getOSRInfo(unsigned* ilOffset)
+    {
+        CorInfoException* pException = nullptr;
+        void* _ret = _callbacks->getOSRInfo(_thisHandle, &pException, ilOffset);
+        if (pException != nullptr)
+            throw pException;
+        return _ret;
+    }
+
     virtual void resolveToken(void* pResolvedToken)
     {
         CorInfoException* pException = nullptr;
index 8a4b1f9..fbc0170 100644 (file)
@@ -27,11 +27,11 @@ private:
     uint64_t corJitFlags;
 };
 
-static const GUID JITEEVersionIdentifier = { /* b2e40020-6125-41e4-a0fc-821127ec192a */
-    0xb2e40020,
-    0x6125,
-    0x41e4,
-    {0xa0, 0xfc, 0x82, 0x11, 0x27, 0xec, 0x19, 0x2a}
+static const GUID JITEEVersionIdentifier = { /* c231d2d7-4764-4097-a9ef-5961041540df */
+    0xc231d2d7,
+    0x4764,
+    0x4097,
+    {0xa9, 0xef, 0x59, 0x61, 0x04, 0x15, 0x40, 0xdf}
 };
 
 class Jit
index 30471d4..449e731 100644 (file)
@@ -100,6 +100,7 @@ set(VM_SOURCES_DAC_AND_WKS_COMMON
     methodtable.cpp
     nativeimage.cpp
     object.cpp
+    onstackreplacement.cpp
     pefile.cpp
     peimage.cpp
     peimagelayout.cpp
@@ -207,6 +208,7 @@ set(VM_HEADERS_DAC_AND_WKS_COMMON
     methodtable.inl
     object.h
     object.inl
+    onstackreplacement.h
     pefile.h
     pefile.inl
     peimage.h
index e32fe31..ad38dfe 100644 (file)
@@ -660,6 +660,7 @@ void EEStartupHelper()
         CodeVersionManager::StaticInitialize();
         TieredCompilationManager::StaticInitialize();
         CallCountingManager::StaticInitialize();
+        OnStackReplacementManager::StaticInitialize();
 
         InitThreadManager();
         STRESS_LOG0(LF_STARTUP, LL_ALWAYS, "Returned successfully from InitThreadManager");
index d3ca9d9..5b3fcb8 100644 (file)
@@ -3581,12 +3581,20 @@ BOOL EEJitManager::GetBoundariesAndVars(
     if (pDebugInfo == NULL)
         return FALSE;
 
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+    BOOL hasFlagByte = TRUE;
+#else
+    BOOL hasFlagByte = FALSE;
+#endif
+
     // Uncompress. This allocates memory and may throw.
     CompressDebugInfo::RestoreBoundariesAndVars(
         fpNew, pNewData, // allocators
         pDebugInfo,      // input
-        pcMap, ppMap,
-        pcVars, ppVars); // output
+        pcMap, ppMap,    // output
+        pcVars, ppVars,  // output
+        hasFlagByte
+    );
 
     return TRUE;
 }
@@ -5521,8 +5529,9 @@ BOOL NativeImageJitManager::GetBoundariesAndVars(
     CompressDebugInfo::RestoreBoundariesAndVars(
         fpNew, pNewData, // allocators
         pDebugInfo,      // input
-        pcMap, ppMap,
-        pcVars, ppVars); // output
+        pcMap, ppMap,    // output
+        pcVars, ppVars,  // output
+        FALSE);          // no patchpoint info
 
     return TRUE;
 }
@@ -6735,8 +6744,9 @@ BOOL ReadyToRunJitManager::GetBoundariesAndVars(
     CompressDebugInfo::RestoreBoundariesAndVars(
         fpNew, pNewData, // allocators
         pDebugInfo,      // input
-        pcMap, ppMap,
-        pcVars, ppVars); // output
+        pcMap, ppMap,    // output
+        pcVars, ppVars,  // output
+        FALSE);          // no patchpoint info
 
     return TRUE;
 }
index 433ae89..38a475c 100644 (file)
@@ -262,6 +262,7 @@ public:
     {
         SUPPORTS_DAC;
         return pRealCodeHeader->phdrJitGCInfo;
+
     }
     PTR_MethodDesc          GetMethodDesc()
     {
index af833fc..6981e05 100644 (file)
@@ -8,6 +8,7 @@
 
 #include "common.h"
 #include "codeversion.h"
+#include "patchpointinfo.h"
 
 #ifdef FEATURE_CODE_VERSIONING
 #include "threadsuspend.h"
@@ -52,7 +53,9 @@ NativeCodeVersionNode::NativeCodeVersionNode(
     NativeCodeVersionId id,
     MethodDesc* pMethodDesc,
     ReJITID parentId,
-    NativeCodeVersion::OptimizationTier optimizationTier)
+    NativeCodeVersion::OptimizationTier optimizationTier,
+    PatchpointInfo* patchpointInfo,
+    unsigned ilOffset)
     :
     m_pNativeCode(NULL),
     m_pMethodDesc(pMethodDesc),
@@ -65,6 +68,10 @@ NativeCodeVersionNode::NativeCodeVersionNode(
 #ifdef HAVE_GCCOVER
     m_gcCover(PTR_NULL),
 #endif
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+    m_patchpointInfo(patchpointInfo),
+    m_ilOffset(ilOffset),
+#endif
     m_flags(0)
 {}
 #endif
@@ -153,6 +160,17 @@ void NativeCodeVersionNode::SetOptimizationTier(NativeCodeVersion::OptimizationT
 
 #endif // FEATURE_TIERED_COMPILATION
 
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+
+PatchpointInfo* NativeCodeVersionNode::GetOSRInfo(unsigned * ilOffset)
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+    *ilOffset = m_ilOffset;
+    return m_patchpointInfo;
+}
+
+#endif // FEATURE_ON_STACK_REPLACEMENT
+
 #ifdef HAVE_GCCOVER
 
 PTR_GCCoverageInfo NativeCodeVersionNode::GetGCCoverageInfo() const
@@ -334,6 +352,24 @@ void NativeCodeVersion::SetOptimizationTier(OptimizationTier tier)
 
 #endif
 
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+
+PatchpointInfo * NativeCodeVersion::GetOSRInfo(unsigned * ilOffset)
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+    if (m_storageKind == StorageKind::Explicit)
+    {
+        return AsNode()->GetOSRInfo(ilOffset);
+    }
+    else
+    {
+        return NULL;
+    }
+}
+
+#endif
+
+
 #ifdef HAVE_GCCOVER
 
 PTR_GCCoverageInfo NativeCodeVersion::GetGCCoverageInfo() const
@@ -929,11 +965,14 @@ void ILCodeVersion::SetInstrumentedILMap(SIZE_T cMap, COR_IL_MAP * rgMap)
 HRESULT ILCodeVersion::AddNativeCodeVersion(
     MethodDesc* pClosedMethodDesc,
     NativeCodeVersion::OptimizationTier optimizationTier,
-    NativeCodeVersion* pNativeCodeVersion)
+    NativeCodeVersion* pNativeCodeVersion,
+    PatchpointInfo* patchpointInfo,
+    unsigned ilOffset
+    )
 {
     LIMITED_METHOD_CONTRACT;
     CodeVersionManager* pManager = GetModule()->GetCodeVersionManager();
-    HRESULT hr = pManager->AddNativeCodeVersion(*this, pClosedMethodDesc, optimizationTier, pNativeCodeVersion);
+    HRESULT hr = pManager->AddNativeCodeVersion(*this, pClosedMethodDesc, optimizationTier, pNativeCodeVersion, patchpointInfo, ilOffset);
     if (FAILED(hr))
     {
         _ASSERTE(hr == E_OUTOFMEMORY);
@@ -1555,7 +1594,9 @@ HRESULT CodeVersionManager::AddNativeCodeVersion(
     ILCodeVersion ilCodeVersion,
     MethodDesc* pClosedMethodDesc,
     NativeCodeVersion::OptimizationTier optimizationTier,
-    NativeCodeVersion* pNativeCodeVersion)
+    NativeCodeVersion* pNativeCodeVersion,
+    PatchpointInfo* patchpointInfo,
+    unsigned ilOffset)
 {
     LIMITED_METHOD_CONTRACT;
     _ASSERTE(IsLockOwnedByCurrentThread());
@@ -1569,7 +1610,7 @@ HRESULT CodeVersionManager::AddNativeCodeVersion(
     }
 
     NativeCodeVersionId newId = pMethodVersioningState->AllocateVersionId();
-    NativeCodeVersionNode* pNativeCodeVersionNode = new (nothrow) NativeCodeVersionNode(newId, pClosedMethodDesc, ilCodeVersion.GetVersionId(), optimizationTier);
+    NativeCodeVersionNode* pNativeCodeVersionNode = new (nothrow) NativeCodeVersionNode(newId, pClosedMethodDesc, ilCodeVersion.GetVersionId(), optimizationTier, patchpointInfo, ilOffset);
     if (pNativeCodeVersionNode == NULL)
     {
         return E_OUTOFMEMORY;
index 724d91a..f318e2e 100644 (file)
@@ -37,6 +37,11 @@ class GCCoverageInfo;
 typedef DPTR(class GCCoverageInfo) PTR_GCCoverageInfo;
 #endif
 
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+struct PatchpointInfo;
+typedef DPTR(struct PatchpointInfo) PTR_PatchpointInfo;
+#endif
+
 class NativeCodeVersion
 {
 #ifdef FEATURE_CODE_VERSIONING
@@ -71,6 +76,7 @@ public:
     {
         OptimizationTier0,
         OptimizationTier1,
+        OptimizationTier1OSR,
         OptimizationTierOptimized, // may do less optimizations than tier 1
     };
 #ifdef FEATURE_TIERED_COMPILATION
@@ -80,6 +86,10 @@ public:
 #endif
 #endif // FEATURE_TIERED_COMPILATION
 
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+    PatchpointInfo * GetOSRInfo(unsigned * iloffset);
+#endif // FEATURE_ON_STACK_REPLACEMENT
+
 #ifdef HAVE_GCCOVER
     PTR_GCCoverageInfo GetGCCoverageInfo() const;
     void SetGCCoverageInfo(PTR_GCCoverageInfo gcCover);
@@ -165,7 +175,8 @@ public:
     void SetIL(COR_ILMETHOD* pIL);
     void SetJitFlags(DWORD flags);
     void SetInstrumentedILMap(SIZE_T cMap, COR_IL_MAP * rgMap);
-    HRESULT AddNativeCodeVersion(MethodDesc* pClosedMethodDesc, NativeCodeVersion::OptimizationTier optimizationTier, NativeCodeVersion* pNativeCodeVersion);
+    HRESULT AddNativeCodeVersion(MethodDesc* pClosedMethodDesc, NativeCodeVersion::OptimizationTier optimizationTier, 
+        NativeCodeVersion* pNativeCodeVersion, PatchpointInfo* patchpointInfo = NULL, unsigned ilOffset = 0);
     HRESULT GetOrCreateActiveNativeCodeVersion(MethodDesc* pClosedMethodDesc, NativeCodeVersion* pNativeCodeVersion);
     HRESULT SetActiveNativeCodeVersion(NativeCodeVersion activeNativeCodeVersion);
 #endif //DACCESS_COMPILE
@@ -244,7 +255,8 @@ class NativeCodeVersionNode
 
 public:
 #ifndef DACCESS_COMPILE
-    NativeCodeVersionNode(NativeCodeVersionId id, MethodDesc* pMethod, ReJITID parentId, NativeCodeVersion::OptimizationTier optimizationTier);
+    NativeCodeVersionNode(NativeCodeVersionId id, MethodDesc* pMethod, ReJITID parentId, NativeCodeVersion::OptimizationTier optimizationTier, 
+        PatchpointInfo* patchpointInfo, unsigned ilOffset);
 #endif
 
     PTR_MethodDesc GetMethodDesc() const;
@@ -270,6 +282,10 @@ public:
     void SetGCCoverageInfo(PTR_GCCoverageInfo gcCover);
 #endif
 
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+    PatchpointInfo * GetOSRInfo(unsigned * ilOffset);
+#endif
+
 private:
     //union - could save a little memory?
     //{
@@ -286,6 +302,10 @@ private:
 #ifdef HAVE_GCCOVER
     PTR_GCCoverageInfo m_gcCover;
 #endif
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+    PTR_PatchpointInfo m_patchpointInfo;
+    unsigned m_ilOffset;
+#endif
 
     enum NativeCodeVersionNodeFlags
     {
@@ -569,7 +589,8 @@ public:
     };
 
     HRESULT AddILCodeVersion(Module* pModule, mdMethodDef methodDef, ReJITID rejitId, ILCodeVersion* pILCodeVersion);
-    HRESULT AddNativeCodeVersion(ILCodeVersion ilCodeVersion, MethodDesc* pClosedMethodDesc, NativeCodeVersion::OptimizationTier optimizationTier, NativeCodeVersion* pNativeCodeVersion);
+    HRESULT AddNativeCodeVersion(ILCodeVersion ilCodeVersion, MethodDesc* pClosedMethodDesc, NativeCodeVersion::OptimizationTier optimizationTier, NativeCodeVersion* pNativeCodeVersion,
+        PatchpointInfo* patchpointInfo = NULL, unsigned ilOffset = 0);
     PCODE PublishVersionableCodeIfNecessary(
         MethodDesc* pMethodDesc,
         CallerGCMode callerGCMode,
index c250d5b..cbd51fe 100644 (file)
@@ -1018,7 +1018,7 @@ void CEECompileInfo::CompressDebugInfo(
 {
     STANDARD_VM_CONTRACT;
 
-    CompressDebugInfo::CompressBoundariesAndVars(pOffsetMapping, iOffsetMapping, pNativeVarInfo, iNativeVarInfo, pDebugInfoBuffer, NULL);
+    CompressDebugInfo::CompressBoundariesAndVars(pOffsetMapping, iOffsetMapping, pNativeVarInfo, iNativeVarInfo, NULL, pDebugInfoBuffer, NULL);
 }
 
 ICorJitHost* CEECompileInfo::GetJitHost()
index 295dbe5..e3a18ff 100644 (file)
@@ -8,6 +8,7 @@
 #include "common.h"
 #include "debuginfostore.h"
 #include "nibblestream.h"
+#include "patchpointinfo.h"
 
 
 #ifdef _DEBUG
@@ -440,6 +441,7 @@ PTR_BYTE CompressDebugInfo::CompressBoundariesAndVars(
     IN ULONG            iOffsetMapping,
     IN ICorDebugInfo::NativeVarInfo * pNativeVarInfo,
     IN ULONG            iNativeVarInfo,
+    IN PatchpointInfo * patchpointInfo,
     IN OUT SBuffer    * pDebugInfoBuffer,
     IN LoaderHeap     * pLoaderHeap
     )
@@ -451,6 +453,18 @@ PTR_BYTE CompressDebugInfo::CompressBoundariesAndVars(
         PRECONDITION((pDebugInfoBuffer != NULL) ^ (pLoaderHeap != NULL));
     } CONTRACTL_END;
 
+    // Patchpoint info is currently uncompressed.
+    DWORD cbPatchpointInfo = 0;
+
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+    if (patchpointInfo != NULL)
+    {
+        cbPatchpointInfo = patchpointInfo->PatchpointInfoSize();
+    }
+#else
+    _ASSERTE(patchpointInfo == NULL);
+#endif
+
     // Actually do the compression. These will throw on oom.
     NibbleWriter boundsBuffer;
     DWORD cbBounds = 0;
@@ -479,7 +493,12 @@ PTR_BYTE CompressDebugInfo::CompressBoundariesAndVars(
     DWORD cbHeader;
     PVOID pHeader = w.GetBlob(&cbHeader);
 
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+    S_UINT32 cbFinalSize = S_UINT32(1) + S_UINT32(cbPatchpointInfo) + S_UINT32(cbHeader) + S_UINT32(cbBounds) + S_UINT32(cbVars);
+#else
     S_UINT32 cbFinalSize = S_UINT32(cbHeader) + S_UINT32(cbBounds) + S_UINT32(cbVars);
+#endif
+
     if (cbFinalSize.IsOverflow())
         ThrowHR(COR_E_OVERFLOW);
 
@@ -497,6 +516,22 @@ PTR_BYTE CompressDebugInfo::CompressBoundariesAndVars(
 
     BYTE *ptr = ptrStart;
 
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+
+    // First byte is a flag byte:
+    //   0 - no patchpoint info
+    //   1 - patchpoint info
+
+    *ptr++ = (cbPatchpointInfo > 0) ? 1 : 0;
+
+    if (cbPatchpointInfo > 0)
+    {
+        memcpy(ptr, (BYTE*) patchpointInfo, cbPatchpointInfo);
+        ptr += cbPatchpointInfo;
+    }
+
+#endif
+
     memcpy(ptr, pHeader, cbHeader);
     ptr += cbHeader;
 
@@ -520,11 +555,6 @@ PTR_BYTE CompressDebugInfo::CompressBoundariesAndVars(
 #endif // DACCESS_COMPILE
 
 //-----------------------------------------------------------------------------
-// Compression routines
-// DAC only needs to run the uncompression routines.
-//-----------------------------------------------------------------------------
-
-//-----------------------------------------------------------------------------
 // Uncompression (restore) routines
 //-----------------------------------------------------------------------------
 
@@ -535,7 +565,8 @@ void CompressDebugInfo::RestoreBoundariesAndVars(
     OUT ULONG32                       * pcMap, // number of entries in ppMap
     OUT ICorDebugInfo::OffsetMapping **ppMap, // pointer to newly allocated array
     OUT ULONG32                         *pcVars,
-    OUT ICorDebugInfo::NativeVarInfo    **ppVars
+    OUT ICorDebugInfo::NativeVarInfo    **ppVars,
+    BOOL hasFlagByte
     )
 {
     CONTRACTL
@@ -552,6 +583,28 @@ void CompressDebugInfo::RestoreBoundariesAndVars(
     if (pcVars != NULL) *pcVars = 0;
     if (ppVars != NULL) *ppVars = NULL;
 
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+    if (hasFlagByte)
+    {
+        // Check flag byte and skip over any patchpoint info
+        BYTE flagByte = *pDebugInfo;
+        pDebugInfo++;
+
+        if (flagByte == 1)
+        {
+            PTR_PatchpointInfo patchpointInfo = dac_cast<PTR_PatchpointInfo>(pDebugInfo);
+            pDebugInfo += patchpointInfo->PatchpointInfoSize();
+        }
+        else
+        {
+            _ASSERTE(flagByte == 0);
+        }
+    }
+
+#else
+    _ASSERTE(!hasFlagByte);
+#endif
+
     NibbleReader r(pDebugInfo, 12 /* maximum size of compressed 2 UINT32s */);
 
     ULONG cbBounds = r.ReadEncodedU32();
@@ -615,6 +668,39 @@ void CompressDebugInfo::RestoreBoundariesAndVars(
     }
 }
 
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+
+PatchpointInfo * CompressDebugInfo::RestorePatchpointInfo(IN PTR_BYTE pDebugInfo)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+        SUPPORTS_DAC;
+    }
+    CONTRACTL_END;
+
+    PTR_PatchpointInfo patchpointInfo = NULL;
+
+    // Check flag byte.
+    BYTE flagByte = *pDebugInfo;
+    pDebugInfo++;
+
+    if (flagByte == 1)
+    {
+        patchpointInfo = dac_cast<PTR_PatchpointInfo>(pDebugInfo);
+    }
+    else
+    {
+        _ASSERTE(flagByte == 0);
+    }
+
+    return patchpointInfo;
+}
+
+#endif
+
 #ifdef DACCESS_COMPILE
 void CompressDebugInfo::EnumMemoryRegions(CLRDataEnumMemoryFlags flags, PTR_BYTE pDebugInfo)
 {
index 453e41f..5484330 100644 (file)
@@ -83,6 +83,7 @@ public:
         IN ULONG            iOffsetMapping,
         IN ICorDebugInfo::NativeVarInfo * pNativeVarInfo,
         IN ULONG            iNativeVarInfo,
+        IN PatchpointInfo * patchpointInfo,
         IN OUT SBuffer    * pDebugInfoBuffer,
         IN LoaderHeap     * pLoaderHeap
     );
@@ -95,9 +96,16 @@ public:
         OUT ULONG32                       * pcMap, // number of entries in ppMap
         OUT ICorDebugInfo::OffsetMapping **ppMap, // pointer to newly allocated array
         OUT ULONG32                         *pcVars,
-        OUT ICorDebugInfo::NativeVarInfo    **ppVars
+        OUT ICorDebugInfo::NativeVarInfo    **ppVars,
+        BOOL hasFlagByte
     );
 
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+    static PatchpointInfo * RestorePatchpointInfo(
+        IN PTR_BYTE pDebugInfo
+    );
+#endif
+
 #ifdef DACCESS_COMPILE
     static void EnumMemoryRegions(CLRDataEnumMemoryFlags flags, PTR_BYTE pDebugInfo);
 #endif
index 205730d..dc43f14 100644 (file)
@@ -339,6 +339,11 @@ HRESULT EEConfig::Init()
     tieredCompilation_DeleteCallCountingStubsAfter = 0;
 #endif
 
+#if defined(FEATURE_ON_STACK_REPLACEMENT)
+    dwOSR_HitLimit = 10;
+    dwOSR_CounterBump = 5000;
+#endif
+
 #ifndef CROSSGEN_COMPILE
     backpatchEntryPointSlots = false;
 #endif
@@ -1265,6 +1270,16 @@ fTrackDynamicMethodDebugInfo = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_
     }
 #endif
 
+#if defined(FEATURE_ON_STACK_REPLACEMENT)
+    dwOSR_HitLimit = CLRConfig::GetConfigValue(CLRConfig::INTERNAL_OSR_HitLimit);
+    dwOSR_CounterBump = CLRConfig::GetConfigValue(CLRConfig::INTERNAL_OSR_CounterBump);
+#endif
+
+#if defined(FEATURE_ON_STACK_REPLACEMENT) && defined(_DEBUG)
+    dwOSR_LowId = CLRConfig::GetConfigValue(CLRConfig::INTERNAL_OSR_LowId);
+    dwOSR_HighId = CLRConfig::GetConfigValue(CLRConfig::INTERNAL_OSR_HighId);
+#endif
+
 #ifndef CROSSGEN_COMPILE
     backpatchEntryPointSlots = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_BackpatchEntryPointSlots) != 0;
 #endif
index 3359a4a..e28f648 100644 (file)
@@ -290,6 +290,17 @@ public:
     DWORD         TieredCompilation_DeleteCallCountingStubsAfter() const { LIMITED_METHOD_CONTRACT; return tieredCompilation_DeleteCallCountingStubsAfter; }
 #endif
 
+#if defined(FEATURE_ON_STACK_REPLACEMENT)
+    // OSR Config
+    DWORD         OSR_CounterBump() const { LIMITED_METHOD_CONTRACT; return dwOSR_CounterBump; }
+    DWORD         OSR_HitLimit() const { LIMITED_METHOD_CONTRACT; return dwOSR_HitLimit; }
+#endif
+
+#if defined(FEATURE_ON_STACK_REPLACEMENT) && defined(_DEBUG)
+    DWORD         OSR_LowId() const { LIMITED_METHOD_CONTRACT; return dwOSR_LowId; }
+    DWORD         OSR_HighId() const { LIMITED_METHOD_CONTRACT; return dwOSR_HighId; }
+#endif
+
 #ifndef CROSSGEN_COMPILE
     bool          BackpatchEntryPointSlots() const { LIMITED_METHOD_CONTRACT; return backpatchEntryPointSlots; }
 #endif
@@ -1023,6 +1034,16 @@ private: //----------------------------------------------------------------
     DWORD tieredCompilation_DeleteCallCountingStubsAfter;
 #endif
 
+#if defined(FEATURE_ON_STACK_REPLACEMENT)
+    DWORD dwOSR_HitLimit;
+    DWORD dwOSR_CounterBump;
+#endif
+
+#if defined(FEATURE_ON_STACK_REPLACEMENT) && defined(_DEBUG)
+    DWORD dwOSR_LowId;
+    DWORD dwOSR_HighId;
+#endif
+
 #ifndef CROSSGEN_COMPILE
     bool backpatchEntryPointSlots;
 #endif
index 6fcb1c8..bbc3607 100644 (file)
@@ -54,6 +54,7 @@
 
 #include "runtimehandles.h"
 #include "castcache.h"
+#include "onstackreplacement.h"
 
 //========================================================================
 //
@@ -5004,6 +5005,331 @@ HCIMPL0(void, JIT_DebugLogLoopCloning)
 }
 HCIMPLEND
 
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+
+// Helper method to jit the OSR version of a method.
+//
+// Returns the address of the jitted code.
+// Returns NULL if osr method can't be created.
+static PCODE JitPatchpointWorker(MethodDesc* pMD, EECodeInfo& codeInfo, int ilOffset)
+{
+    PCODE osrVariant = NULL;
+
+    GCX_PREEMP();
+
+    // Fetch the patchpoint info for the current method
+    EEJitManager* jitMgr = ExecutionManager::GetEEJitManager();
+    CodeHeader* codeHdr = jitMgr->GetCodeHeaderFromStartAddress(codeInfo.GetStartAddress());
+    PTR_BYTE debugInfo = codeHdr->GetDebugInfo();
+    PatchpointInfo* patchpointInfo = CompressDebugInfo::RestorePatchpointInfo(debugInfo);
+
+    if (patchpointInfo == NULL)
+    {
+        // Unexpected, but not fatal
+        STRESS_LOG1(LF_TIEREDCOMPILATION, LL_WARNING, "JitPatchpointWorker: failed to restore patchpoint info for Method=0x%pM\n", pMD);
+        return NULL;
+    }
+
+    // Set up a new native code version for the OSR variant of this method.
+    NativeCodeVersion osrNativeCodeVersion;
+    {
+        CodeVersionManager::LockHolder codeVersioningLockHolder;
+
+        NativeCodeVersion currentNativeCodeVersion = codeInfo.GetNativeCodeVersion();
+        ILCodeVersion ilCodeVersion = currentNativeCodeVersion.GetILCodeVersion();
+        HRESULT hr = ilCodeVersion.AddNativeCodeVersion(pMD, NativeCodeVersion::OptimizationTier1OSR, &osrNativeCodeVersion, patchpointInfo, ilOffset);
+        if (FAILED(hr))
+        {
+            // Unexpected, but not fatal
+            STRESS_LOG1(LF_TIEREDCOMPILATION, LL_WARNING, "JitPatchpointWorker: failed to add native code version for Method=0x%pM\n", pMD);
+            return NULL;
+        }
+    }
+
+    // Invoke the jit to compile the OSR version
+    LOG((LF_TIEREDCOMPILATION, LL_INFO10, "JitPatchpointWorker: creating OSR version of Method=0x%pM (%s::%s) at offset %d\n",
+        pMD, pMD->m_pszDebugClassName, pMD->m_pszDebugMethodName, ilOffset));
+
+    PrepareCodeConfigBuffer configBuffer(osrNativeCodeVersion);
+    PrepareCodeConfig *config = configBuffer.GetConfig();
+    osrVariant = pMD->PrepareCode(config);
+
+    return osrVariant;
+}
+
+// Helper method wrapper to set up a frame so we can invoke methods that might GC
+HCIMPL3(PCODE, JIT_Patchpoint_Framed, MethodDesc* pMD, EECodeInfo& codeInfo, int ilOffset)
+{
+    PCODE result = NULL;
+
+    HELPER_METHOD_FRAME_BEGIN_RET_0();
+
+    result = JitPatchpointWorker(pMD, codeInfo, ilOffset);
+
+    HELPER_METHOD_FRAME_END();
+
+    return result;
+}
+HCIMPLEND
+
+// Jit helper invoked at a patchpoint.
+//
+// Checks to see if this is a known patchpoint, if not,
+// an entry is added to the patchpoint table.
+//
+// When the patchpoint has been hit often enough to trigger
+// a transition, create an OSR method.
+//
+// Currently, counter is a pointer into the Tier0 method stack
+// frame so we have exclusive access.
+
+void JIT_Patchpoint(int* counter, int ilOffset)
+{
+    // This method may not return normally
+    STATIC_CONTRACT_GC_NOTRIGGER;
+    STATIC_CONTRACT_MODE_COOPERATIVE;
+
+    // Patchpoint identity is the helper return address
+    PCODE ip = (PCODE)_ReturnAddress();
+
+    // Fetch or setup patchpoint info for this patchpoint.
+    EECodeInfo codeInfo(ip);
+    MethodDesc* pMD = codeInfo.GetMethodDesc();
+    LoaderAllocator* allocator = pMD->GetLoaderAllocator();
+    OnStackReplacementManager* manager = allocator->GetOnStackReplacementManager();
+    PerPatchpointInfo * ppInfo = manager->GetPerPatchpointInfo(ip);
+
+    // In the current prototype, counter is shared by all patchpoints
+    // in a method, so no matter what happens below, we don't want to
+    // impair those other patchpoints.
+    //
+    // One might be tempted, for instance, to set the counter for
+    // invalid or ignored patchpoints to some high value to reduce
+    // the amount of back and forth with the runtime, but this would
+    // lock out other patchpoints in the method.
+    //
+    // So we always reset the counter to the bump value.
+    //
+    // In the prototype, counter is a location in a stack frame,
+    // so we can update it without worrying about other threads.
+    const int counterBump = g_pConfig->OSR_CounterBump();
+    *counter = counterBump;
+
+#if _DEBUG
+    const int ppId = ppInfo->m_patchpointId;
+#endif
+
+    // Is this a patchpoint that was previously marked as invalid? If so, just return to the Tier0 method.
+    if ((ppInfo->m_flags & PerPatchpointInfo::patchpoint_invalid) == PerPatchpointInfo::patchpoint_invalid)
+    {
+        LOG((LF_TIEREDCOMPILATION, LL_INFO1000, "Jit_Patchpoint: invalid patchpoint [%d] (0x%p) in Method=0x%pM (%s::%s) at offset %d\n",
+                ppId, ip, pMD, pMD->m_pszDebugClassName, pMD->m_pszDebugMethodName, ilOffset));
+        return;
+    }
+    
+    // See if we have an OSR method for this patchpoint.
+    PCODE osrMethodCode = ppInfo->m_osrMethodCode;
+    bool isNewMethod = false;
+    
+    if (osrMethodCode == NULL)
+    {
+        // No OSR method yet, let's see if we should create one.
+        //
+        // First, optionally ignore some patchpoints to increase
+        // coverage (stress mode).
+        // 
+        // Because there are multiple patchpoints in a method, and
+        // each OSR method covers the remainder of the method from
+        // that point until the method returns, if we trigger on an
+        // early patchpoint in a method, we may never see triggers on
+        // a later one.
+
+#ifdef _DEBUG
+        const int lowId = g_pConfig->OSR_LowId();
+        const int highId = g_pConfig->OSR_HighId();
+        
+        if ((ppId < lowId) || (ppId > highId))
+        {
+            LOG((LF_TIEREDCOMPILATION, LL_INFO10, "Jit_Patchpoint: ignoring patchpoint [%d] (0x%p) in Method=0x%pM (%s::%s) at offset %d\n",
+                    ppId, ip, pMD, pMD->m_pszDebugClassName, pMD->m_pszDebugMethodName, ilOffset));
+            return;
+        }
+#endif
+
+        // Second, only request the OSR method if this patchpoint has
+        // been hit often enough.
+        //
+        // Note the initial invocation of the helper depends on the
+        // initial counter value baked into jitted code (call this J);
+        // subsequent invocations depend on the counter bump (call
+        // this B).
+        //
+        // J and B may differ, so the total number of loop iterations
+        // before an OSR method is created is:
+        //
+        // J, if hitLimit <= 1;
+        // J + (hitLimit-1)* B, if hitLimit > 1;
+        //
+        // Current thinking is:
+        //
+        // J should be in the range of tens to hundreds, so that newly
+        // called Tier0 methods that already have OSR methods
+        // available can transition to OSR methods quickly, but
+        // methods called only a few times do not invoke this
+        // helper and so create PerPatchpoint runtime state.
+        //
+        // B should be in the range of hundreds to thousands, so that
+        // we're not too eager to create OSR methods (since there is
+        // some jit cost), but are eager enough to transition before
+        // we run too much Tier0 code.
+        //
+        const int hitLimit = g_pConfig->OSR_HitLimit();
+        const int hitCount = InterlockedIncrement(&ppInfo->m_patchpointCount);
+        const int hitLogLevel = (hitCount == 1) ? LL_INFO10 : LL_INFO1000;
+
+        LOG((LF_TIEREDCOMPILATION, hitLogLevel, "Jit_Patchpoint: patchpoint [%d] (0x%p) hit %d in Method=0x%pM (%s::%s) [il offset %d] (limit %d)\n",
+            ppId, ip, hitCount, pMD, pMD->m_pszDebugClassName, pMD->m_pszDebugMethodName, ilOffset, hitLimit));
+        
+        // Defer, if we haven't yet reached the limit 
+        if (hitCount < hitLimit)
+        {
+            return;
+        }
+        
+        // Third, make sure no other thread is trying to create the OSR method.
+        LONG oldFlags = ppInfo->m_flags;
+        if ((oldFlags & PerPatchpointInfo::patchpoint_triggered) == PerPatchpointInfo::patchpoint_triggered)
+        {
+            LOG((LF_TIEREDCOMPILATION, LL_INFO1000, "Jit_Patchpoint: AWAITING OSR method for patchpoint [%d] (0x%p)\n", ppId, ip));
+            return;
+        }
+        
+        LONG newFlags = ppInfo->m_flags | PerPatchpointInfo::patchpoint_triggered;
+        BOOL triggerTransition = InterlockedCompareExchange(&ppInfo->m_flags, newFlags, oldFlags) == oldFlags;
+        
+        if (!triggerTransition)
+        {
+            LOG((LF_TIEREDCOMPILATION, LL_INFO1000, "Jit_Patchpoint: (lost race) AWAITING OSR method for patchpoint [%d] (0x%p)\n", ppId, ip));
+            return;
+        }
+        
+        // Time to create the OSR method.
+        //
+        // We currently do this synchronously. We could instead queue
+        // up a request on some worker thread, like we do for
+        // rejitting, and return control to the Tier0 method. It may
+        // eventually return here, if the patchpoint is hit often
+        // enough.
+        //
+        // There is a chance the async version will create methods
+        // that are never used (just like there is a chance that Tier1
+        // methods are ever called).
+        //
+        // In this prototype we want to expose bugs in the jitted code
+        // for OSR methods, so we stick with synchronous creation.
+        LOG((LF_TIEREDCOMPILATION, LL_INFO10, "Jit_Patchpoint: patchpoint [%d] (0x%p) TRIGGER at count %d\n", ppId, ip, hitCount));
+        
+        // Invoke the helper to build the OSR method
+        osrMethodCode = HCCALL3(JIT_Patchpoint_Framed, pMD, codeInfo, ilOffset);
+        
+        // If that failed, mark the patchpoint as invalid.
+        if (osrMethodCode == NULL)
+        {
+            // Unexpected, but not fatal
+            STRESS_LOG4(LF_TIEREDCOMPILATION, LL_WARNING, "Jit_Patchpoint: patchpoint (0x%p) OSR method creation failed,"
+                " marking patchpoint invalid for Method=0x%pM il offset %d\n", ip, hitCount, pMD, ilOffset);
+            
+            InterlockedOr(&ppInfo->m_flags, (LONG)PerPatchpointInfo::patchpoint_invalid);
+            return;
+        }
+        
+        // We've successfully created the osr method; make it available.
+        _ASSERTE(ppInfo->m_osrMethodCode == NULL);
+        ppInfo->m_osrMethodCode = osrMethodCode;
+        isNewMethod = true;
+    }
+
+    // If we get here, we have code to transition to...
+    _ASSERTE(osrMethodCode != NULL);
+
+    Thread *pThread = GetThread();
+    
+#ifdef FEATURE_HIJACK
+    // We can't crawl the stack of a thread that currently has a hijack pending
+    // (since the hijack routine won't be recognized by any code manager). So we
+    // Undo any hijack, the EE will re-attempt it later.
+    pThread->UnhijackThread();
+#endif
+    
+    // Find context for the original method
+    CONTEXT frameContext;
+    frameContext.ContextFlags = CONTEXT_FULL;
+    RtlCaptureContext(&frameContext);
+    
+    // Walk back to the original method frame
+    pThread->VirtualUnwindToFirstManagedCallFrame(&frameContext);
+    
+    // Remember original method FP and SP because new method will inherit them.
+    UINT_PTR currentSP = GetSP(&frameContext);
+    UINT_PTR currentFP = GetFP(&frameContext);
+    
+    // We expect to be back at the right IP
+    if ((UINT_PTR)ip != GetIP(&frameContext))
+    {
+        // Should be fatal
+        STRESS_LOG2(LF_TIEREDCOMPILATION, LL_INFO10, "Jit_Patchpoint: patchpoint (0x%p) TRANSITION"
+            " unexpected context IP 0x%p\n", ip, GetIP(&frameContext));
+    }
+    
+    // Now unwind back to the original method caller frame.
+    EECodeInfo callerCodeInfo(GetIP(&frameContext));
+    frameContext.ContextFlags = CONTEXT_FULL;
+    ULONG_PTR establisherFrame = 0;
+    PVOID handlerData = NULL;
+    RtlVirtualUnwind(UNW_FLAG_NHANDLER, callerCodeInfo.GetModuleBase(), GetIP(&frameContext), callerCodeInfo.GetFunctionEntry(), 
+        &frameContext, &handlerData, &establisherFrame, NULL);
+    
+    // Now, set FP and SP back to the values they had just before this helper was called,
+    // since the new method must have access to the original method frame.
+    //
+    // TODO: if we access the patchpointInfo here, we can read out the FP-SP delta from there and
+    // use that to adjust the stack, likely saving some stack space.
+    
+#if defined(TARGET_AMD64)
+    // If calls push the return address, we need to simulate that here, so the OSR
+    // method sees the "expected" SP misalgnment on entry.
+    _ASSERTE(currentSP % 16 == 0);
+    currentSP -= 8;
+#endif
+    
+    SetSP(&frameContext, currentSP);
+    frameContext.Rbp = currentFP;
+    
+    // Note we can get here w/o triggering, if there is an existing OSR method and
+    // we hit the patchpoint.
+    const int transitionLogLevel = isNewMethod ? LL_INFO10 : LL_INFO1000;
+    LOG((LF_TIEREDCOMPILATION, transitionLogLevel, "Jit_Patchpoint: patchpoint [%d] (0x%p) TRANSITION to ip 0x%p\n", ppId, ip, osrMethodCode));
+
+    // Install new entry point as IP
+    SetIP(&frameContext, osrMethodCode);
+    
+    // Transition!
+    RtlRestoreContext(&frameContext, NULL);
+}
+
+#else
+
+void JIT_Patchpoint(int* counter, int ilOffset)
+{
+    // Stub version if OSR feature is disabled
+    //
+    // Should not be called.
+
+    UNREACHABLE();
+}
+
+#endif // FEATURE_ON_STACK_REPLACEMENT
+
 //========================================================================
 //
 //      INTEROP HELPERS
@@ -5177,7 +5503,7 @@ void InitJITHelpers2()
 
     g_pJitGenericHandleCacheCrst.Init(CrstJitGenericHandleCache, CRST_UNSAFE_COOPGC);
 
-    // Allocate and initialize the table
+    // Allocate and initialize the generic handle cache
     NewHolder <JitGenericHandleCache> tempGenericHandleCache (new JitGenericHandleCache());
     LockOwner sLock = {&g_pJitGenericHandleCacheCrst, IsOwnerOfCrst};
     if (!tempGenericHandleCache->Init(59, &sLock))
index 2c7b37d..a28d6bb 100644 (file)
@@ -7736,6 +7736,7 @@ getMethodInfoHelper(
         &methInfo->locals,
         ftn,
         true);
+
 } // getMethodInfoHelper
 
 //---------------------------------------------------------------------------------------
@@ -10957,6 +10958,50 @@ void CEEJitInfo::setVars(CORINFO_METHOD_HANDLE ftn, ULONG32 cVars, ICorDebugInfo
     EE_TO_JIT_TRANSITION();
 }
 
+void CEEJitInfo::setPatchpointInfo(PatchpointInfo* patchpointInfo)
+{
+    CONTRACTL {
+        THROWS;
+        GC_TRIGGERS;
+        MODE_PREEMPTIVE;
+    } CONTRACTL_END;
+
+    JIT_TO_EE_TRANSITION();
+
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+    // We receive ownership of the array
+    _ASSERTE(m_pPatchpointInfoFromJit == NULL);
+    m_pPatchpointInfoFromJit = patchpointInfo;
+#else
+    UNREACHABLE();
+#endif
+
+    EE_TO_JIT_TRANSITION();
+}
+
+PatchpointInfo* CEEJitInfo::getOSRInfo(unsigned* ilOffset)
+{
+    CONTRACTL {
+        THROWS;
+        GC_TRIGGERS;
+        MODE_PREEMPTIVE;
+    } CONTRACTL_END;
+
+    PatchpointInfo* result = NULL;
+    *ilOffset = 0;
+
+    JIT_TO_EE_TRANSITION();
+
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+    result = m_pPatchpointInfoFromRuntime;
+    *ilOffset = m_ilOffset;
+#endif
+
+    EE_TO_JIT_TRANSITION();
+
+    return result;
+}
+
 void CEEJitInfo::CompressDebugInfo()
 {
     CONTRACTL {
@@ -10965,11 +11010,20 @@ void CEEJitInfo::CompressDebugInfo()
         MODE_PREEMPTIVE;
     } CONTRACTL_END;
 
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+    PatchpointInfo* patchpointInfo = m_pPatchpointInfoFromJit;
+#else
+    PatchpointInfo* patchpointInfo = NULL;
+#endif
+
     // Don't track JIT info for DynamicMethods.
     if (m_pMethodBeingCompiled->IsDynamicMethod() && !g_pConfig->GetTrackDynamicMethodDebugInfo())
+    {
+        _ASSERTE(patchpointInfo == NULL);
         return;
+    }
 
-    if (m_iOffsetMapping == 0 && m_iNativeVarInfo == 0)
+    if ((m_iOffsetMapping == 0) && (m_iNativeVarInfo == 0) && (patchpointInfo == NULL))
         return;
 
     JIT_TO_EE_TRANSITION();
@@ -10979,6 +11033,7 @@ void CEEJitInfo::CompressDebugInfo()
         PTR_BYTE pDebugInfo = CompressDebugInfo::CompressBoundariesAndVars(
             m_pOffsetMapping, m_iOffsetMapping,
             m_pNativeVarInfo, m_iNativeVarInfo,
+            patchpointInfo,
             NULL,
             m_pMethodBeingCompiled->GetLoaderAllocator()->GetLowFrequencyHeap());
 
@@ -12116,7 +12171,7 @@ CorJitResult invokeCompileMethodHelper(EEJitManager *jitMgr,
                                                      info,
                                                      CORJIT_FLAGS::CORJIT_FLAG_CALL_GETJITFLAGS,
                                                      nativeEntry,
-                                                     nativeSizeOfCode );
+                                                     nativeSizeOfCode);
 
 #ifdef FEATURE_STACK_SAMPLING
         if (jitFlags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_SAMPLING_JIT_BACKGROUND))
@@ -12738,6 +12793,16 @@ PCODE UnsafeJitFunction(PrepareCodeConfig* config,
         jitInfo.SetReserveForJumpStubs(reserveForJumpStubs);
 #endif
 
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+        // If this is an OSR jit request, grab the OSR info so we can pass it to the jit
+        if (flags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_OSR))
+        {
+            unsigned ilOffset = 0;
+            PatchpointInfo* patchpointInfo = nativeCodeVersion.GetOSRInfo(&ilOffset);
+            jitInfo.SetOSRInfo(patchpointInfo, ilOffset);
+        }
+#endif
+
         MethodDesc * pMethodForSecurity = jitInfo.GetMethodForSecurity(ftnHnd);
 
         //Since the check could trigger a demand, we have to do this every time.
@@ -13898,6 +13963,18 @@ void CEEInfo::setVars(CORINFO_METHOD_HANDLE ftn, ULONG32 cVars, ICorDebugInfo::N
     UNREACHABLE();      // only called on derived class.
 }
 
+void CEEInfo::setPatchpointInfo(PatchpointInfo* patchpointInfo)
+{
+    LIMITED_METHOD_CONTRACT;
+    UNREACHABLE();      // only called on derived class.
+}
+
+PatchpointInfo* CEEInfo::getOSRInfo(unsigned* ilOffset)
+{
+    LIMITED_METHOD_CONTRACT;
+    UNREACHABLE();      // only called on derived class.
+}
+
 void* CEEInfo::getHelperFtn(CorInfoHelpFunc    ftnNum,         /* IN  */
                             void **            ppIndirection)  /* OUT */
 {
index 62faa9e..465dfce 100644 (file)
@@ -534,6 +534,9 @@ public:
     CORINFO_CLASS_HANDLE getBuiltinClass(CorInfoClassId classId);
     void getGSCookie(GSCookie * pCookieVal, GSCookie ** ppCookieVal);
 
+    void setPatchpointInfo(PatchpointInfo* patchpointInfo);
+    PatchpointInfo* getOSRInfo(unsigned* ilOffset);
+
     // "System.Int32" ==> CORINFO_TYPE_INT..
     CorInfoType getTypeForPrimitiveValueClass(
             CORINFO_CLASS_HANDLE        cls
@@ -1298,6 +1301,15 @@ public:
         m_iNativeVarInfo = 0;
         m_pNativeVarInfo = NULL;
 
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+        if (m_pPatchpointInfoFromJit != NULL)
+            delete [] ((BYTE*) m_pPatchpointInfoFromJit);
+
+        m_pPatchpointInfoFromJit = NULL;
+        m_pPatchpointInfoFromRuntime = NULL;
+        m_ilOffset = 0;
+#endif
+
 #ifdef FEATURE_EH_FUNCLETS
         m_moduleBase = NULL;
         m_totalUnwindSize = 0;
@@ -1360,6 +1372,17 @@ public:
     }
 #endif
 
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+    // Called by the runtime to supply patchpoint information to the jit.
+    void SetOSRInfo(PatchpointInfo* patchpointInfo, unsigned ilOffset)
+    {
+        _ASSERTE(m_pPatchpointInfoFromRuntime == NULL);
+        _ASSERTE(patchpointInfo != NULL);
+        m_pPatchpointInfoFromRuntime = patchpointInfo;
+        m_ilOffset = ilOffset;
+    }
+#endif
+
     CEEJitInfo(MethodDesc* fd,  COR_ILMETHOD_DECODER* header,
                EEJitManager* jm, bool fVerifyOnly, bool allowInlining = true)
         : CEEInfo(fd, fVerifyOnly, allowInlining),
@@ -1387,6 +1410,11 @@ public:
           m_pOffsetMapping(NULL),
           m_iNativeVarInfo(0),
           m_pNativeVarInfo(NULL),
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+          m_pPatchpointInfoFromJit(NULL),
+          m_pPatchpointInfoFromRuntime(NULL),
+          m_ilOffset(0),
+#endif
           m_gphCache()
     {
         CONTRACTL
@@ -1413,6 +1441,12 @@ public:
 
         if (m_pNativeVarInfo != NULL)
             delete [] ((BYTE*) m_pNativeVarInfo);
+
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+        if (m_pPatchpointInfoFromJit != NULL)
+            delete [] ((BYTE*) m_pPatchpointInfoFromJit);
+#endif
+
     }
 
     // ICorDebugInfo stuff.
@@ -1448,6 +1482,9 @@ public:
 
     void BackoutJitData(EEJitManager * jitMgr);
 
+    void setPatchpointInfo(PatchpointInfo* patchpointInfo);
+    PatchpointInfo* getOSRInfo(unsigned* ilOffset);
+
 protected :
     EEJitManager*           m_jitManager;   // responsible for allocating memory
     CodeHeader*             m_CodeHeader;   // descriptor for JITTED code
@@ -1483,6 +1520,12 @@ protected :
     ULONG32                 m_iNativeVarInfo;
     ICorDebugInfo::NativeVarInfo * m_pNativeVarInfo;
 
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+    PatchpointInfo        * m_pPatchpointInfoFromJit;
+    PatchpointInfo        * m_pPatchpointInfoFromRuntime;
+    unsigned                m_ilOffset;
+#endif
+
     // The first time a call is made to CEEJitInfo::GetProfilingHandle() from this thread
     // for this method, these values are filled in.   Thereafter, these values are used
     // in lieu of calling into the base CEEInfo::GetProfilingHandle() again.  This protects the
index d39cf0a..406f45a 100644 (file)
@@ -58,6 +58,10 @@ LoaderAllocator::LoaderAllocator()
     m_callCountingManager = NULL;
 #endif
 
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+    m_onStackReplacementManager = NULL;
+#endif
+
     m_fGCPressure = false;
     m_fTerminated = false;
     m_fUnloaded = false;
@@ -1343,6 +1347,14 @@ void LoaderAllocator::Terminate()
     }
 #endif
 
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+    if (m_onStackReplacementManager != NULL)
+    {
+        delete m_onStackReplacementManager;
+        m_onStackReplacementManager = NULL;
+    }
+#endif
+
     // In collectible types we merge the low frequency and high frequency heaps
     // So don't destroy them twice.
     if ((m_pLowFrequencyHeap != NULL) && (m_pLowFrequencyHeap != m_pHighFrequencyHeap))
@@ -2019,3 +2031,34 @@ BOOL LoaderAllocator::InsertComInteropData(MethodTable* pMT, InteropMethodTableD
 #endif // FEATURE_COMINTEROP
 
 #endif // !DACCESS_COMPILE
+
+
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+#ifndef DACCESS_COMPILE
+PTR_OnStackReplacementManager LoaderAllocator::GetOnStackReplacementManager()
+{
+    CONTRACTL
+    {
+        THROWS;
+        GC_TRIGGERS;
+        MODE_ANY;
+        INJECT_FAULT(COMPlusThrowOM(););
+    }
+    CONTRACTL_END;
+
+    if (m_onStackReplacementManager == NULL)
+    {
+        OnStackReplacementManager * newManager = new OnStackReplacementManager(this);
+
+        if (FastInterlockCompareExchangePointer(&m_onStackReplacementManager, newManager, NULL) != NULL)
+        {
+            // some thread swooped in and set the field
+            delete newManager;
+        }
+    }
+    _ASSERTE(m_onStackReplacementManager != NULL);
+    return m_onStackReplacementManager;
+}
+#endif //
+#endif // FEATURE_ON_STACK_REPLACEMENT
+
index 4938b95..ea3da4a 100644 (file)
@@ -23,6 +23,7 @@ class FuncPtrStubs;
 #include "callcounting.h"
 #include "methoddescbackpatchinfo.h"
 #include "crossloaderallocatorhash.h"
+#include "onstackreplacement.h"
 
 #define VPTRU_LoaderAllocator 0x3200
 
@@ -283,6 +284,10 @@ private:
     MethodDescBackpatchInfoTracker m_methodDescBackpatchInfoTracker;
 #endif
 
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+    PTR_OnStackReplacementManager m_onStackReplacementManager;
+#endif
+
 #ifndef DACCESS_COMPILE
 
 public:
@@ -611,6 +616,12 @@ public:
         return &m_methodDescBackpatchInfoTracker;
     }
 #endif
+
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+public:
+    PTR_OnStackReplacementManager GetOnStackReplacementManager();
+#endif // FEATURE_ON_STACK_REPLACEMENT
+
 };  // class LoaderAllocator
 
 typedef VPTR(LoaderAllocator) PTR_LoaderAllocator;
index 7efecae..9a558d2 100644 (file)
@@ -2038,6 +2038,9 @@ public:
     virtual BOOL SetNativeCode(PCODE pCode, PCODE * ppAlternateCodeToUse);
     virtual COR_ILMETHOD* GetILHeader();
     virtual CORJIT_FLAGS GetJitCompilationFlags();
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+    virtual unsigned GetILOffset() const { return 0; }
+#endif
     BOOL ProfilerRejectedPrecompiledCode();
     BOOL ReadyToRunRejectedPrecompiledCode();
     void SetProfilerRejectedPrecompiledCode();
@@ -2100,6 +2103,7 @@ public:
         Optimized,
         QuickJitted,
         OptimizedTier1,
+        OptimizedTier1OSR,
 
         Count
     };
diff --git a/src/coreclr/src/vm/onstackreplacement.cpp b/src/coreclr/src/vm/onstackreplacement.cpp
new file mode 100644 (file)
index 0000000..4bb80ef
--- /dev/null
@@ -0,0 +1,84 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+// ===========================================================================
+// File: onstackreplacement.cpp
+//
+// ===========================================================================
+
+#include "common.h"
+#include "onstackreplacement.h"
+
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+
+
+CrstStatic OnStackReplacementManager::s_lock;
+
+#if _DEBUG
+int OnStackReplacementManager::s_patchpointId = 0;
+#endif
+
+#ifndef DACCESS_COMPILE
+
+void OnStackReplacementManager::StaticInitialize()
+{
+    WRAPPER_NO_CONTRACT;
+    s_lock.Init(CrstJitPatchpoint, CrstFlags(CRST_UNSAFE_COOPGC));
+}
+
+OnStackReplacementManager::OnStackReplacementManager(LoaderAllocator * loaderAllocator) : m_allocator(loaderAllocator), m_jitPatchpointTable()
+{
+    CONTRACTL
+    {
+        GC_NOTRIGGER;
+        CAN_TAKE_LOCK;
+        MODE_ANY;
+    }
+    CONTRACTL_END;
+
+    LockOwner lock = {&s_lock, IsOwnerOfCrst};
+    m_jitPatchpointTable.Init(INITIAL_TABLE_SIZE, &lock, m_allocator->GetLowFrequencyHeap());
+}
+
+// Fetch or create patchpoint info for this patchpoint.
+PerPatchpointInfo* OnStackReplacementManager::GetPerPatchpointInfo(PCODE ip)
+{
+    CONTRACTL
+    {
+        GC_NOTRIGGER;
+        CAN_TAKE_LOCK;
+        MODE_COOPERATIVE;
+    }
+    CONTRACTL_END;
+
+    PTR_PCODE ppId = dac_cast<PTR_PCODE>(ip);
+    PTR_PerPatchpointInfo ppInfo = NULL;
+
+    BOOL hasData = m_jitPatchpointTable.GetValueSpeculative(ppId, (HashDatum*)&ppInfo);
+
+    if (!hasData)
+    {
+        CrstHolder lock(&s_lock);
+        hasData = m_jitPatchpointTable.GetValue(ppId, (HashDatum*)&ppInfo);
+
+        if (!hasData)
+        {
+            void * pMem = m_allocator->GetLowFrequencyHeap()->AllocMem(S_SIZE_T(sizeof(PerPatchpointInfo)));
+            ppInfo = dac_cast<PTR_PerPatchpointInfo>(new (pMem) PerPatchpointInfo());
+            m_jitPatchpointTable.InsertValue(ppId, (HashDatum)ppInfo);
+
+#if _DEBUG
+            ppInfo->m_patchpointId = ++s_patchpointId;
+#endif
+
+        }
+    }
+
+    return ppInfo;
+}
+
+#endif // !DACCESS_COMPILE
+
+#endif // FEATURE_ON_STACK_REPLACEMENT
+
diff --git a/src/coreclr/src/vm/onstackreplacement.h b/src/coreclr/src/vm/onstackreplacement.h
new file mode 100644 (file)
index 0000000..8ef3188
--- /dev/null
@@ -0,0 +1,113 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+// ===========================================================================
+// File: onstackreplacement.h
+//
+// ===========================================================================
+
+#ifndef ON_STACK_REPLACEMENT_H
+#define ON_STACK_REPLACEMENT_H
+
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+
+#include "daccess.h"
+#include "eehash.h"
+
+// PerPatchpointInfo is the runtime state tracked for each active patchpoint.
+//
+// A patchpoint becomes active when the JIT_HELP_PATCHPOINT helper is invoked
+// by jitted code.
+//
+struct PerPatchpointInfo
+{
+    PerPatchpointInfo() : 
+        m_osrMethodCode(0),
+        m_patchpointCount(0),
+        m_flags(0)
+#if _DEBUG
+        , m_patchpointId(0)
+#endif
+    {
+    }
+
+    // Flag bits
+    enum 
+    {
+        patchpoint_triggered = 0x1,
+        patchpoint_invalid = 0x2
+    };
+
+    // The OSR method entry point for this patchpoint.
+    // NULL if no method has yet been jitted, or jitting failed.
+    PCODE m_osrMethodCode;
+    // Number of times jitted code has called the helper at this patchpoint.
+    LONG m_patchpointCount;
+    // Status of this patchpoint
+    LONG m_flags;
+
+#if _DEBUG
+    int m_patchpointId;
+#endif
+};
+
+typedef DPTR(PerPatchpointInfo) PTR_PerPatchpointInfo;
+typedef EEPtrHashTable JitPatchpointTable;
+
+// OnStackReplacementManager keeps track of mapping from patchpoint id to 
+// per patchpoint info.
+//
+// Patchpoint identity is currently the return address of the helper call
+//  in the jitted code.
+//
+class OnStackReplacementManager
+{
+#if DACCESS_COMPILE
+public:
+    OnStackReplacementManager(LoaderAllocator *) {};
+#else
+public:
+    static void StaticInitialize();
+
+public:
+    OnStackReplacementManager(LoaderAllocator * loaderHeaAllocator);
+
+public:
+    PerPatchpointInfo* GetPerPatchpointInfo(PCODE ip);
+#endif // DACCESS_COMPILE
+
+private:
+
+    enum
+    {
+        INITIAL_TABLE_SIZE = 10
+    };
+
+    static CrstStatic s_lock;
+
+#if _DEBUG
+    static int s_patchpointId;
+#endif
+
+private:
+
+    PTR_LoaderAllocator m_allocator;
+    JitPatchpointTable m_jitPatchpointTable;
+};
+
+#else // FEATURE_TIERED_COMPILATION
+
+class OnStackReplacementManager
+{
+public:
+    static void StaticInitialize() {}
+public:
+
+    OnStackReplacementManager(LoaderAllocator *) {}
+};
+
+#endif // FEATURE_TIERED_COMPILATION
+
+typedef DPTR(OnStackReplacementManager) PTR_OnStackReplacementManager;
+
+#endif // ON_STACK_REPLACEMENT_H
index 6d7eb7e..e2c9501 100644 (file)
@@ -997,10 +997,13 @@ PCODE MethodDesc::JitCompileCodeLocked(PrepareCodeConfig* pConfig, JitListLockEn
 
     // The profiler may have changed the code on the callback.  Need to
     // pick up the new code.
+    //
+    // (don't want this for OSR, need to see how it works)
     COR_ILMETHOD_DECODER ilDecoderTemp;
     COR_ILMETHOD_DECODER *pilHeader = GetAndVerifyILHeader(pConfig, &ilDecoderTemp);
     *pFlags = pConfig->GetJitCompilationFlags();
     PCODE pOtherCode = NULL;
+
     EX_TRY
     {
 #ifndef CROSSGEN_COMPILE
@@ -1287,6 +1290,9 @@ PrepareCodeConfig::JitOptimizationTier PrepareCodeConfig::GetJitOptimizationTier
                 case NativeCodeVersion::OptimizationTier1:
                     return JitOptimizationTier::OptimizedTier1;
 
+                case NativeCodeVersion::OptimizationTier1OSR:
+                    return JitOptimizationTier::OptimizedTier1OSR;
+
                 case NativeCodeVersion::OptimizationTierOptimized:
                     return JitOptimizationTier::Optimized;
 
@@ -1311,6 +1317,7 @@ const char *PrepareCodeConfig::GetJitOptimizationTierStr(PrepareCodeConfig *conf
         case JitOptimizationTier::Optimized: return "Optimized";
         case JitOptimizationTier::QuickJitted: return "QuickJitted";
         case JitOptimizationTier::OptimizedTier1: return "OptimizedTier1";
+        case JitOptimizationTier::OptimizedTier1OSR: return "OptimizedTier1OSR";
 
         default:
             UNREACHABLE();
index ffd986a..e2ff313 100644 (file)
@@ -933,6 +933,12 @@ CORJIT_FLAGS TieredCompilationManager::GetJitFlags(NativeCodeVersion nativeCodeV
             nativeCodeVersion.SetOptimizationTier(NativeCodeVersion::OptimizationTierOptimized);
             goto Optimized;
 
+#ifdef FEATURE_ON_STACK_REPLACEMENT
+        case NativeCodeVersion::OptimizationTier1OSR:
+            flags.Set(CORJIT_FLAGS::CORJIT_FLAG_OSR);
+            // fall through
+#endif
+
         case NativeCodeVersion::OptimizationTier1:
             flags.Set(CORJIT_FLAGS::CORJIT_FLAG_TIER1);
             // fall through
index f844361..816661a 100644 (file)
@@ -509,7 +509,7 @@ void ZapInfo::CompileMethod()
                                                      &m_currentMethodInfo,
                                                      CORJIT_FLAGS::CORJIT_FLAG_CALL_GETJITFLAGS,
                                                      &pCode,
-                                                     &cCode );
+                                                     &cCode);
         if (FAILED(res))
         {
             // We will fall back to the "main" JIT on failure.
@@ -2988,6 +2988,18 @@ void ZapInfo::setVars(CORINFO_METHOD_HANDLE ftn,
     return;
 }
 
+void ZapInfo::setPatchpointInfo(PatchpointInfo* patchpointInfo)
+{
+    // No patchpoint info when prejitting
+    UNREACHABLE();
+}
+
+PatchpointInfo* ZapInfo::getOSRInfo(unsigned * ilOffset)
+{
+    // No patchpoint info when prejitting
+    UNREACHABLE();
+}
+
 void * ZapInfo::allocateArray(size_t cBytes)
 {
     return new BYTE[cBytes];
index 22fb718..7f8b837 100644 (file)
@@ -701,6 +701,10 @@ public:
 
     void getGSCookie(GSCookie * pCookieVal,
                      GSCookie** ppCookieVal);
+
+    void setPatchpointInfo(PatchpointInfo * patchpointInfo);
+    PatchpointInfo * getOSRInfo(unsigned * ilOffset);
+
     // ICorErrorInfo
 
     HRESULT GetErrorHRESULT(struct _EXCEPTION_POINTERS *pExceptionPointers);
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/addressexposedlocal.cs b/src/coreclr/tests/src/JIT/opt/OSR/addressexposedlocal.cs
new file mode 100644 (file)
index 0000000..897c56b
--- /dev/null
@@ -0,0 +1,37 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Runtime.CompilerServices;
+
+// OSR method has address exposed local
+
+class AddressExposedLocal
+{
+    // [MethodImpl(MethodImplOptions.NoInlining)]
+    public static unsafe int I(ref int p) => p;
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public static unsafe void J(ref int p)  {}
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public static unsafe int F(int from, int to)
+    {
+        int result = 0;
+        J(ref result);
+        for (int i = from; i < to; i++)
+        {
+            result = I(ref result) + i;
+        }
+        return result;
+    }
+
+    public static int Main()
+    {
+        Console.WriteLine($"starting sum");
+        int result = F(0, 1_000_000);
+        Console.WriteLine($"done, sum is {result}");
+        return (result == 1783293664) ? 100 : -1;
+    }  
+}
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/addressexposedlocal.csproj b/src/coreclr/tests/src/JIT/opt/OSR/addressexposedlocal.csproj
new file mode 100644 (file)
index 0000000..881e48e
--- /dev/null
@@ -0,0 +1,25 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType />
+    <Optimize>True</Optimize>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <PropertyGroup>
+    <CLRTestBatchPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+set COMPlus_TieredCompilation=1
+set COMPlus_TC_QuickJitForLoops=1
+set COMPlus_TC_OnStackReplacement=1
+]]></CLRTestBatchPreCommands>
+    <BashCLRTestPreCommands><![CDATA[
+$(BashCLRTestPreCommands)
+export COMPlus_TieredCompilation=1
+export COMPlus_TC_QuickJitForLoops=1
+export COMPlus_TC_OnStackReplacement=1
+]]></BashCLRTestPreCommands>
+  </PropertyGroup>
+</Project>
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/doublestackalloc.cs b/src/coreclr/tests/src/JIT/opt/OSR/doublestackalloc.cs
new file mode 100644 (file)
index 0000000..7265d18
--- /dev/null
@@ -0,0 +1,32 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// OSR test case with two stackallocs.
+//
+// Original method frame is variable sized when we reach patchpoint
+// OSR method frame is also variable sized.
+
+using System;
+
+class DoubleStackAlloc
+{
+    static int outerSize = 1000;
+    static int innerSize = 1;
+    public static unsafe int Main()
+    {
+        long* result = stackalloc long[outerSize];
+        *result = 0;
+        for (int i = 0; i < 1_000_000; i++)
+        {
+            if ((i % 8192) == 0)
+            {
+                long *nresult = stackalloc long[innerSize];
+                *nresult = *result;
+                result = nresult;
+            }
+            *result += i;
+        }
+        return *result == 499999500000 ? 100 : -1;
+    }  
+}
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/doublestackalloc.csproj b/src/coreclr/tests/src/JIT/opt/OSR/doublestackalloc.csproj
new file mode 100644 (file)
index 0000000..881e48e
--- /dev/null
@@ -0,0 +1,25 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType />
+    <Optimize>True</Optimize>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <PropertyGroup>
+    <CLRTestBatchPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+set COMPlus_TieredCompilation=1
+set COMPlus_TC_QuickJitForLoops=1
+set COMPlus_TC_OnStackReplacement=1
+]]></CLRTestBatchPreCommands>
+    <BashCLRTestPreCommands><![CDATA[
+$(BashCLRTestPreCommands)
+export COMPlus_TieredCompilation=1
+export COMPlus_TC_QuickJitForLoops=1
+export COMPlus_TC_OnStackReplacement=1
+]]></BashCLRTestPreCommands>
+  </PropertyGroup>
+</Project>
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/example.cs b/src/coreclr/tests/src/JIT/opt/OSR/example.cs
new file mode 100644 (file)
index 0000000..1f360ee
--- /dev/null
@@ -0,0 +1,43 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+
+// Example from the OSR doc
+
+class OSR_Example
+{
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public static double F(int from, int to)
+    {
+        double result = 0;
+        for (int i = from; i < to; i++)
+        {
+            result += (double)i;
+        }
+        return result;
+    }
+
+    public static int Main(string[] args)
+    {
+        int final = args.Length <= 0 ? 1_000_000 : Int32.Parse(args[0]);
+        long frequency = Stopwatch.Frequency;
+        long nanosecPerTick = (1000L*1000L*1000L) / frequency;
+        // Console.WriteLine($"computing sum over {final} ints");
+        // Get some of the initial jit cost out of the way
+        Stopwatch s = new Stopwatch();
+        s.Start();
+        s.Stop();
+
+        s = new Stopwatch();
+        s.Start();
+        double result = F(0, final);
+        s.Stop();
+        double elapsedTime = 1000.0 * (double) s.ElapsedTicks / (double) frequency;
+        Console.WriteLine($"{final} iterations took {elapsedTime:F2}ms");
+        return result == 499999500000 ? 100 : -1;
+    }  
+}
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/example.csproj b/src/coreclr/tests/src/JIT/opt/OSR/example.csproj
new file mode 100644 (file)
index 0000000..9620f75
--- /dev/null
@@ -0,0 +1,24 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType />
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <PropertyGroup>
+    <CLRTestBatchPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+set COMPlus_TieredCompilation=1
+set COMPlus_TC_QuickJitForLoops=1
+set COMPlus_TC_OnStackReplacement=1
+]]></CLRTestBatchPreCommands>
+    <BashCLRTestPreCommands><![CDATA[
+$(BashCLRTestPreCommands)
+export COMPlus_TieredCompilation=1
+export COMPlus_TC_QuickJitForLoops=1
+export COMPlus_TC_OnStackReplacement=1
+]]></BashCLRTestPreCommands>
+  </PropertyGroup>
+</Project>
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/genericmethodpatchpoint.cs b/src/coreclr/tests/src/JIT/opt/OSR/genericmethodpatchpoint.cs
new file mode 100644 (file)
index 0000000..a3f9dea
--- /dev/null
@@ -0,0 +1,31 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Runtime.CompilerServices;
+
+// Patchpoint in generic method
+
+class GenericMethodPatchpoint
+{
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public static int F<T>(T[] data, int from, int to) where T : class
+    {
+        int result = 0;
+        for (int i = from; i < to; i++)
+        {
+            if (data[i] == null) result++;
+        }
+        return result;
+    }
+
+    public static int Main()
+    {
+        string[] a = new string[1000];
+        a[111] = "hello, world";
+        int result = F(a, 0, a.Length);
+        Console.WriteLine($"done, result is {result}");
+        return result == 999 ? 100 : -1;
+    }  
+}
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/genericmethodpatchpoint.csproj b/src/coreclr/tests/src/JIT/opt/OSR/genericmethodpatchpoint.csproj
new file mode 100644 (file)
index 0000000..9620f75
--- /dev/null
@@ -0,0 +1,24 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType />
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <PropertyGroup>
+    <CLRTestBatchPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+set COMPlus_TieredCompilation=1
+set COMPlus_TC_QuickJitForLoops=1
+set COMPlus_TC_OnStackReplacement=1
+]]></CLRTestBatchPreCommands>
+    <BashCLRTestPreCommands><![CDATA[
+$(BashCLRTestPreCommands)
+export COMPlus_TieredCompilation=1
+export COMPlus_TC_QuickJitForLoops=1
+export COMPlus_TC_OnStackReplacement=1
+]]></BashCLRTestPreCommands>
+  </PropertyGroup>
+</Project>
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/innerloop.cs b/src/coreclr/tests/src/JIT/opt/OSR/innerloop.cs
new file mode 100644 (file)
index 0000000..7dadb6d
--- /dev/null
@@ -0,0 +1,26 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+
+class InnerLoop
+{
+    public static int Main()
+    {
+        int[] a = new int[1000];
+        a[555] = 1;
+
+        int result = 0;
+
+        for (int i = 0; i < 1000; i++)
+        {
+            for (int j = i; j < 1000; j++)
+            {
+                result += a[j];
+            }
+        }
+
+        return result - 456;
+    }
+}
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/innerloop.csproj b/src/coreclr/tests/src/JIT/opt/OSR/innerloop.csproj
new file mode 100644 (file)
index 0000000..9620f75
--- /dev/null
@@ -0,0 +1,24 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType />
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <PropertyGroup>
+    <CLRTestBatchPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+set COMPlus_TieredCompilation=1
+set COMPlus_TC_QuickJitForLoops=1
+set COMPlus_TC_OnStackReplacement=1
+]]></CLRTestBatchPreCommands>
+    <BashCLRTestPreCommands><![CDATA[
+$(BashCLRTestPreCommands)
+export COMPlus_TieredCompilation=1
+export COMPlus_TC_QuickJitForLoops=1
+export COMPlus_TC_OnStackReplacement=1
+]]></BashCLRTestPreCommands>
+  </PropertyGroup>
+</Project>
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/integersumloop.cs b/src/coreclr/tests/src/JIT/opt/OSR/integersumloop.cs
new file mode 100644 (file)
index 0000000..3742958
--- /dev/null
@@ -0,0 +1,36 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+
+class IntegerSumLoop
+{
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public static int F(int from, int to)
+    {
+        int result = 0;
+        for (int i = from; i < to; i++)
+        {
+            result += i;
+        }
+        return result;
+    }
+
+    public static int Main(string[] args)
+    {
+        int final = 1_000_000;
+        long frequency = Stopwatch.Frequency;
+        long nanosecPerTick = (1000L*1000L*1000L) / frequency;
+        F(0, 10);
+        Stopwatch s = new Stopwatch();
+        s.Start();
+        int result = F(0, final);
+        s.Stop();
+        double elapsedTime = 1000.0 * (double) s.ElapsedTicks / (double) frequency;
+        Console.WriteLine($"{final} iterations took {elapsedTime:F2}ms");
+        return result == 1783293664 ? 100 : -1;
+    }  
+}
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/integersumloop.csproj b/src/coreclr/tests/src/JIT/opt/OSR/integersumloop.csproj
new file mode 100644 (file)
index 0000000..9620f75
--- /dev/null
@@ -0,0 +1,24 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType />
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <PropertyGroup>
+    <CLRTestBatchPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+set COMPlus_TieredCompilation=1
+set COMPlus_TC_QuickJitForLoops=1
+set COMPlus_TC_OnStackReplacement=1
+]]></CLRTestBatchPreCommands>
+    <BashCLRTestPreCommands><![CDATA[
+$(BashCLRTestPreCommands)
+export COMPlus_TieredCompilation=1
+export COMPlus_TC_QuickJitForLoops=1
+export COMPlus_TC_OnStackReplacement=1
+]]></BashCLRTestPreCommands>
+  </PropertyGroup>
+</Project>
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/livelocaladdress.cs b/src/coreclr/tests/src/JIT/opt/OSR/livelocaladdress.cs
new file mode 100644 (file)
index 0000000..652475c
--- /dev/null
@@ -0,0 +1,26 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+
+// An example where OSR must preserve original method addreses for locals
+
+class LiveLocalAddress
+{
+    public static unsafe int Main()        
+    {
+        long result = 0;
+        int a = 0;
+        int *c = &a;
+        int b = 0;
+        long distance = c - &b;
+        
+        for (int i = 0; i < 100_000; i++)
+        {
+            result += &a - &b;
+        }
+
+        return (int)(result / (1000 * distance));
+    }
+}
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/livelocaladdress.csproj b/src/coreclr/tests/src/JIT/opt/OSR/livelocaladdress.csproj
new file mode 100644 (file)
index 0000000..53a9cc3
--- /dev/null
@@ -0,0 +1,25 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType />
+    <Optimize>True</Optimize>
+   <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <PropertyGroup>
+    <CLRTestBatchPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+set COMPlus_TieredCompilation=1
+set COMPlus_TC_QuickJitForLoops=1
+set COMPlus_TC_OnStackReplacement=1
+]]></CLRTestBatchPreCommands>
+    <BashCLRTestPreCommands><![CDATA[
+$(BashCLRTestPreCommands)
+export COMPlus_TieredCompilation=1
+export COMPlus_TC_QuickJitForLoops=1
+export COMPlus_TC_OnStackReplacement=1
+]]></BashCLRTestPreCommands>
+  </PropertyGroup>
+</Project>
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/livelocalstackalloc.cs b/src/coreclr/tests/src/JIT/opt/OSR/livelocalstackalloc.cs
new file mode 100644 (file)
index 0000000..b6f1ec1
--- /dev/null
@@ -0,0 +1,30 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+
+// Example where local address is live in a stackalloc region
+
+class LiveLocalStackalloc
+{
+    static int n = 100;
+    static int j = 30;
+
+    public static unsafe int Main()
+    {
+        int nn = n;
+        int** ptrs = stackalloc int*[nn];
+        int a = 100;
+        *(ptrs + j) = &a;
+        int result = 0;
+
+        for (int i = 0; i < nn; i++)
+        {
+            int* p = *(ptrs + i);
+            if (p != null)  result += *p;
+        }
+
+        return result;
+    }
+}
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/livelocalstackalloc.csproj b/src/coreclr/tests/src/JIT/opt/OSR/livelocalstackalloc.csproj
new file mode 100644 (file)
index 0000000..881e48e
--- /dev/null
@@ -0,0 +1,25 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType />
+    <Optimize>True</Optimize>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <PropertyGroup>
+    <CLRTestBatchPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+set COMPlus_TieredCompilation=1
+set COMPlus_TC_QuickJitForLoops=1
+set COMPlus_TC_OnStackReplacement=1
+]]></CLRTestBatchPreCommands>
+    <BashCLRTestPreCommands><![CDATA[
+$(BashCLRTestPreCommands)
+export COMPlus_TieredCompilation=1
+export COMPlus_TC_QuickJitForLoops=1
+export COMPlus_TC_OnStackReplacement=1
+]]></BashCLRTestPreCommands>
+  </PropertyGroup>
+</Project>
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/mainloop.cs b/src/coreclr/tests/src/JIT/opt/OSR/mainloop.cs
new file mode 100644 (file)
index 0000000..e63bdca
--- /dev/null
@@ -0,0 +1,20 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+
+// Simple OSR test case -- long running loop in Main
+
+class MainLoop
+{
+   public static int Main()
+   {
+       long result = 0;
+       for (int i = 0; i < 1_000_000; i++)
+       {
+           result += (long)i;
+       }
+       return result == 499999500000 ? 100 : -1;
+   }  
+}
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/mainloop.csproj b/src/coreclr/tests/src/JIT/opt/OSR/mainloop.csproj
new file mode 100644 (file)
index 0000000..9620f75
--- /dev/null
@@ -0,0 +1,24 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType />
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <PropertyGroup>
+    <CLRTestBatchPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+set COMPlus_TieredCompilation=1
+set COMPlus_TC_QuickJitForLoops=1
+set COMPlus_TC_OnStackReplacement=1
+]]></CLRTestBatchPreCommands>
+    <BashCLRTestPreCommands><![CDATA[
+$(BashCLRTestPreCommands)
+export COMPlus_TieredCompilation=1
+export COMPlus_TC_QuickJitForLoops=1
+export COMPlus_TC_OnStackReplacement=1
+]]></BashCLRTestPreCommands>
+  </PropertyGroup>
+</Project>
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/mainloop2.cs b/src/coreclr/tests/src/JIT/opt/OSR/mainloop2.cs
new file mode 100644 (file)
index 0000000..2654ea3
--- /dev/null
@@ -0,0 +1,24 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+
+// Simple OSR test case -- nested loop in Main
+
+class MainNestedLoop
+{
+   public static int Main()
+   {
+       long result = 0;
+       for (int i = 0; i < 1_000; i++)
+       {
+           for (int j = 0; j < 1_000; j++)
+           {
+               result += (long)(i * 1_000 + j);
+           }
+       }
+
+       return result == 499999500000 ? 100 : -1;
+   }  
+}
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/mainloop2.csproj b/src/coreclr/tests/src/JIT/opt/OSR/mainloop2.csproj
new file mode 100644 (file)
index 0000000..9620f75
--- /dev/null
@@ -0,0 +1,24 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType />
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <PropertyGroup>
+    <CLRTestBatchPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+set COMPlus_TieredCompilation=1
+set COMPlus_TC_QuickJitForLoops=1
+set COMPlus_TC_OnStackReplacement=1
+]]></CLRTestBatchPreCommands>
+    <BashCLRTestPreCommands><![CDATA[
+$(BashCLRTestPreCommands)
+export COMPlus_TieredCompilation=1
+export COMPlus_TC_QuickJitForLoops=1
+export COMPlus_TC_OnStackReplacement=1
+]]></BashCLRTestPreCommands>
+  </PropertyGroup>
+</Project>
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/mainlooptry.cs b/src/coreclr/tests/src/JIT/opt/OSR/mainlooptry.cs
new file mode 100644 (file)
index 0000000..0fb0b69
--- /dev/null
@@ -0,0 +1,28 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+
+// OSR entry in a try region
+
+class MainLoopTry
+{
+   public static int Main()
+   {
+       Console.WriteLine($"starting sum");
+       int result = 0;
+       try 
+       {
+           for (int i = 0; i < 1_000_000; i++)
+           {
+               result += i;
+           }
+       }
+       finally
+       {
+           Console.WriteLine($"done, sum is {result}");
+       }
+       return result == 1783293664 ? 100 : -1;
+   }  
+}
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/mainlooptry.csproj b/src/coreclr/tests/src/JIT/opt/OSR/mainlooptry.csproj
new file mode 100644 (file)
index 0000000..9620f75
--- /dev/null
@@ -0,0 +1,24 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType />
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <PropertyGroup>
+    <CLRTestBatchPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+set COMPlus_TieredCompilation=1
+set COMPlus_TC_QuickJitForLoops=1
+set COMPlus_TC_OnStackReplacement=1
+]]></CLRTestBatchPreCommands>
+    <BashCLRTestPreCommands><![CDATA[
+$(BashCLRTestPreCommands)
+export COMPlus_TieredCompilation=1
+export COMPlus_TC_QuickJitForLoops=1
+export COMPlus_TC_OnStackReplacement=1
+]]></BashCLRTestPreCommands>
+  </PropertyGroup>
+</Project>
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/mainlooptry2.cs b/src/coreclr/tests/src/JIT/opt/OSR/mainlooptry2.cs
new file mode 100644 (file)
index 0000000..2eef5c9
--- /dev/null
@@ -0,0 +1,39 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+
+// Multiple patchpoints each in a try
+
+class MainLoopTry2
+{
+   public static int Main()
+   {
+       Console.WriteLine($"starting sum");
+       int result = 0;
+       try 
+       {
+           for (int i = 0; i < 1_000; i++)
+           {
+               int temp = result;
+               try 
+               {
+                   for (int j = 0; j < 1_000; j++)
+                   {
+                       temp += 1000 * i + j;
+                   }
+               }
+               finally
+               {
+                   result = temp;
+               }
+           }
+       }
+       finally
+       {
+           Console.WriteLine($"done, sum is {result}");
+       }
+       return result == 1783293664 ? 100 : -1;
+   }  
+}
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/mainlooptry2.csproj b/src/coreclr/tests/src/JIT/opt/OSR/mainlooptry2.csproj
new file mode 100644 (file)
index 0000000..9620f75
--- /dev/null
@@ -0,0 +1,24 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType />
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <PropertyGroup>
+    <CLRTestBatchPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+set COMPlus_TieredCompilation=1
+set COMPlus_TC_QuickJitForLoops=1
+set COMPlus_TC_OnStackReplacement=1
+]]></CLRTestBatchPreCommands>
+    <BashCLRTestPreCommands><![CDATA[
+$(BashCLRTestPreCommands)
+export COMPlus_TieredCompilation=1
+export COMPlus_TC_QuickJitForLoops=1
+export COMPlus_TC_OnStackReplacement=1
+]]></BashCLRTestPreCommands>
+  </PropertyGroup>
+</Project>
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/mainlooptry3.cs b/src/coreclr/tests/src/JIT/opt/OSR/mainlooptry3.cs
new file mode 100644 (file)
index 0000000..0c14692
--- /dev/null
@@ -0,0 +1,45 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// Tricky case for OSR with patchpoint in try region.
+//
+// If we need to OSR at inner loop head, then both try
+// regions need trimming, but they can't trim down to the
+// same block, and the branch to the logical trimmed
+// entry point is not from the OSR entry.
+
+using System;
+
+class MainLoopCloselyNestedTry
+{
+   public static int Main()
+   {
+       Console.WriteLine($"starting sum");
+       int result = 0;
+       try 
+       {
+           try 
+           {
+               int temp = 0;
+               for (int i = 0; i < 1_000; i++)
+               {
+                   for (int j = 0; j < 1_000; j++)
+                   {
+                       temp += 1000 * i + j;
+                   }
+               }
+               result = temp;
+           }
+           catch (Exception)
+           {
+               
+           }
+       }
+       finally
+       {
+           Console.WriteLine($"done, sum is {result}");
+       }
+       return result == 1783293664 ? 100 : -1;
+   }  
+}
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/mainlooptry3.csproj b/src/coreclr/tests/src/JIT/opt/OSR/mainlooptry3.csproj
new file mode 100644 (file)
index 0000000..9620f75
--- /dev/null
@@ -0,0 +1,24 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType />
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <PropertyGroup>
+    <CLRTestBatchPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+set COMPlus_TieredCompilation=1
+set COMPlus_TC_QuickJitForLoops=1
+set COMPlus_TC_OnStackReplacement=1
+]]></CLRTestBatchPreCommands>
+    <BashCLRTestPreCommands><![CDATA[
+$(BashCLRTestPreCommands)
+export COMPlus_TieredCompilation=1
+export COMPlus_TC_QuickJitForLoops=1
+export COMPlus_TC_OnStackReplacement=1
+]]></BashCLRTestPreCommands>
+  </PropertyGroup>
+</Project>
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/mainlooptry4.cs b/src/coreclr/tests/src/JIT/opt/OSR/mainlooptry4.cs
new file mode 100644 (file)
index 0000000..8b7afbf
--- /dev/null
@@ -0,0 +1,43 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// Tricky case for OSR with patchpoint in try region.
+//
+// If we need to OSR at inner loop head, then both try
+// regions need trimming.
+//
+// Mutually protective try example.
+
+using System;
+
+class E : Exception {}
+
+class MainLoopMutuallyProtectiveTry
+{
+   public static int Main()
+   {
+       Console.WriteLine($"starting sum");
+       int result = 0;
+       try 
+       {
+           int temp = 0;
+           for (int i = 0; i < 1_000; i++)
+           {
+               for (int j = 0; j < 1_000; j++)
+               {
+                   temp += 1000 * i + j;
+               }
+           }
+           result = temp;
+       }
+       catch (E)
+       {
+       }
+       catch (Exception)
+       {
+       }
+       Console.WriteLine($"done, sum is {result}");
+       return result == 1783293664 ? 100 : -1;
+   }  
+}
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/mainlooptry4.csproj b/src/coreclr/tests/src/JIT/opt/OSR/mainlooptry4.csproj
new file mode 100644 (file)
index 0000000..9620f75
--- /dev/null
@@ -0,0 +1,24 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType />
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <PropertyGroup>
+    <CLRTestBatchPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+set COMPlus_TieredCompilation=1
+set COMPlus_TC_QuickJitForLoops=1
+set COMPlus_TC_OnStackReplacement=1
+]]></CLRTestBatchPreCommands>
+    <BashCLRTestPreCommands><![CDATA[
+$(BashCLRTestPreCommands)
+export COMPlus_TieredCompilation=1
+export COMPlus_TC_QuickJitForLoops=1
+export COMPlus_TC_OnStackReplacement=1
+]]></BashCLRTestPreCommands>
+  </PropertyGroup>
+</Project>
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/memoryargument.cs b/src/coreclr/tests/src/JIT/opt/OSR/memoryargument.cs
new file mode 100644 (file)
index 0000000..7b8045c
--- /dev/null
@@ -0,0 +1,29 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Runtime.CompilerServices;
+
+// OSR method must access memory argument
+
+class MemoryArgument
+{
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public static int F(int a, int b, int c, int d, int from, int to)
+    {
+        int result = 0;
+        for (int i = from; i < to; i++)
+        {
+            result += i;
+        }
+        return result;
+    }
+
+    public static int Main(string[] args)
+    {
+        int final = 1_000_000;
+        int result = F(0, 0, 0, 0, 0, final);
+        return result == 1783293664 ? 100 : -1;
+    }  
+}
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/memoryargument.csproj b/src/coreclr/tests/src/JIT/opt/OSR/memoryargument.csproj
new file mode 100644 (file)
index 0000000..9620f75
--- /dev/null
@@ -0,0 +1,24 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType />
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <PropertyGroup>
+    <CLRTestBatchPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+set COMPlus_TieredCompilation=1
+set COMPlus_TC_QuickJitForLoops=1
+set COMPlus_TC_OnStackReplacement=1
+]]></CLRTestBatchPreCommands>
+    <BashCLRTestPreCommands><![CDATA[
+$(BashCLRTestPreCommands)
+export COMPlus_TieredCompilation=1
+export COMPlus_TC_QuickJitForLoops=1
+export COMPlus_TC_OnStackReplacement=1
+]]></BashCLRTestPreCommands>
+  </PropertyGroup>
+</Project>
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/nesteddoloops.cs b/src/coreclr/tests/src/JIT/opt/OSR/nesteddoloops.cs
new file mode 100644 (file)
index 0000000..160aeb5
--- /dev/null
@@ -0,0 +1,32 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Runtime.CompilerServices;
+
+// Nested do lops
+
+class NestedDoLoops
+{
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public static int F(int inner, int outer, int innerTo, int outerTo)
+    {
+        do {
+            do {} while (inner++ < innerTo);
+            inner = 0;
+        } 
+        while (outer++ < outerTo);
+
+        return outer;
+    }
+
+    public static int Main()
+    {
+        Console.WriteLine($"starting sum");
+        int result1 = F(0, 10, 0, 100_000);
+        int result2 = F(0, 100_000, 0, 10);
+        Console.WriteLine($"done, sum is {result1} and {result2}");
+        return (result1 == result2) && (result1 == 100_001) ? 100 : -1;
+    }  
+}
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/nesteddoloops.csproj b/src/coreclr/tests/src/JIT/opt/OSR/nesteddoloops.csproj
new file mode 100644 (file)
index 0000000..9620f75
--- /dev/null
@@ -0,0 +1,24 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType />
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <PropertyGroup>
+    <CLRTestBatchPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+set COMPlus_TieredCompilation=1
+set COMPlus_TC_QuickJitForLoops=1
+set COMPlus_TC_OnStackReplacement=1
+]]></CLRTestBatchPreCommands>
+    <BashCLRTestPreCommands><![CDATA[
+$(BashCLRTestPreCommands)
+export COMPlus_TieredCompilation=1
+export COMPlus_TC_QuickJitForLoops=1
+export COMPlus_TC_OnStackReplacement=1
+]]></BashCLRTestPreCommands>
+  </PropertyGroup>
+</Project>
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/originalstackalloc.cs b/src/coreclr/tests/src/JIT/opt/OSR/originalstackalloc.cs
new file mode 100644 (file)
index 0000000..cdb6cbf
--- /dev/null
@@ -0,0 +1,31 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Runtime.CompilerServices;
+
+// Orignal method has stackalloc, osr does not
+
+class OriginalStackalloc
+{
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public static unsafe int F(int from, int to, int s)
+    {
+        int* result = stackalloc int[s];
+        *result = 0;
+        for (int i = from; i < to; i++)
+        {
+            *result += i;
+        }
+        return *result;
+    }
+
+    public static int Main()
+    {
+        Console.WriteLine($"starting sum");
+        int result = F(0, 1_000_000, 1);
+        Console.WriteLine($"done, sum is {result}");
+        return (result == 1783293664) ? 100 : -1;
+    }  
+}
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/originalstackalloc.csproj b/src/coreclr/tests/src/JIT/opt/OSR/originalstackalloc.csproj
new file mode 100644 (file)
index 0000000..881e48e
--- /dev/null
@@ -0,0 +1,25 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType />
+    <Optimize>True</Optimize>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <PropertyGroup>
+    <CLRTestBatchPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+set COMPlus_TieredCompilation=1
+set COMPlus_TC_QuickJitForLoops=1
+set COMPlus_TC_OnStackReplacement=1
+]]></CLRTestBatchPreCommands>
+    <BashCLRTestPreCommands><![CDATA[
+$(BashCLRTestPreCommands)
+export COMPlus_TieredCompilation=1
+export COMPlus_TC_QuickJitForLoops=1
+export COMPlus_TC_OnStackReplacement=1
+]]></BashCLRTestPreCommands>
+  </PropertyGroup>
+</Project>
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/osrcontainstry.cs b/src/coreclr/tests/src/JIT/opt/OSR/osrcontainstry.cs
new file mode 100644 (file)
index 0000000..ce89aef
--- /dev/null
@@ -0,0 +1,39 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Runtime.CompilerServices;
+
+// OSR method contains try
+
+class OSRContainsTry
+{
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public static unsafe int I(ref int p) => p;
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public static unsafe int F(int from, int to)
+    {
+        int result = 0;
+        for (int i = from; i < to; i++)
+        {
+            try 
+            {
+                result = I(ref result) + i;
+            }
+            catch (Exception e)
+            {
+            }
+        }
+        return result;
+    }
+
+    public static int Main()
+    {
+        Console.WriteLine($"starting sum");
+        int result = F(0, 1_000_000);
+        Console.WriteLine($"done, sum is {result}");
+        return result == 1783293664 ? 100 : -1;
+    }  
+}
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/osrcontainstry.csproj b/src/coreclr/tests/src/JIT/opt/OSR/osrcontainstry.csproj
new file mode 100644 (file)
index 0000000..881e48e
--- /dev/null
@@ -0,0 +1,25 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType />
+    <Optimize>True</Optimize>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <PropertyGroup>
+    <CLRTestBatchPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+set COMPlus_TieredCompilation=1
+set COMPlus_TC_QuickJitForLoops=1
+set COMPlus_TC_OnStackReplacement=1
+]]></CLRTestBatchPreCommands>
+    <BashCLRTestPreCommands><![CDATA[
+$(BashCLRTestPreCommands)
+export COMPlus_TieredCompilation=1
+export COMPlus_TC_QuickJitForLoops=1
+export COMPlus_TC_OnStackReplacement=1
+]]></BashCLRTestPreCommands>
+  </PropertyGroup>
+</Project>
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/promoted.cs b/src/coreclr/tests/src/JIT/opt/OSR/promoted.cs
new file mode 100644 (file)
index 0000000..0fde778
--- /dev/null
@@ -0,0 +1,53 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+
+// OSR complications with promoted structs
+
+struct Y
+{
+    public Y(int _a, int _b)
+    {
+        a = _a;
+        b = _b;
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public static void Init(int _a, int _b)
+    {
+        s_y = new Y(_a, _b);
+    }
+
+    public static Y s_y;
+    public int a;
+    public int b;
+}
+
+class OSRMethodStructPromotion
+{
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public static int F(int from, int to)
+    {
+        Y.Init(from, to);
+        Y y = Y.s_y;
+        int result = 0;
+        for (int i = y.a; i < y.b; i++)
+        {
+            result += i;
+        }
+        return result;
+    }
+
+    public static int Main(string[] args)
+    {
+        int final = 1_000_000;
+        F(0, 10);
+        int result = F(0, final);
+        int expected = 1783293664;
+        return result == expected ? 100 : -1;
+    }  
+}
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/promoted.csproj b/src/coreclr/tests/src/JIT/opt/OSR/promoted.csproj
new file mode 100644 (file)
index 0000000..9620f75
--- /dev/null
@@ -0,0 +1,24 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType />
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <PropertyGroup>
+    <CLRTestBatchPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+set COMPlus_TieredCompilation=1
+set COMPlus_TC_QuickJitForLoops=1
+set COMPlus_TC_OnStackReplacement=1
+]]></CLRTestBatchPreCommands>
+    <BashCLRTestPreCommands><![CDATA[
+$(BashCLRTestPreCommands)
+export COMPlus_TieredCompilation=1
+export COMPlus_TC_QuickJitForLoops=1
+export COMPlus_TC_OnStackReplacement=1
+]]></BashCLRTestPreCommands>
+  </PropertyGroup>
+</Project>
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/tailrecurse.cs b/src/coreclr/tests/src/JIT/opt/OSR/tailrecurse.cs
new file mode 100644 (file)
index 0000000..61a8294
--- /dev/null
@@ -0,0 +1,35 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+
+// OSR and tail recursion
+
+class OSRTailRecursion
+{
+    public static int F(int from, int to, int n, int a)
+    {
+        int result = a;
+        for (int i = from; i < to; i++)
+        {
+            result += i;
+        }
+
+        if (n <= 0) return result;
+
+        int delta = to - from;
+
+        return F(to, to + to - from, n-1, result);
+    }
+
+    public static int Main()
+    {
+        Console.WriteLine($"starting sum");
+        int result = F(0, 100_000, 9, 0);
+        bool ok = (result == 1783293664);
+        string msg = ok ? "Pass" : "Fail";
+        Console.WriteLine($"done, sum is {result}, {msg}");
+        return  ok ? 100 : -1;
+    }  
+}
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/tailrecurse.csproj b/src/coreclr/tests/src/JIT/opt/OSR/tailrecurse.csproj
new file mode 100644 (file)
index 0000000..9620f75
--- /dev/null
@@ -0,0 +1,24 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType />
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <PropertyGroup>
+    <CLRTestBatchPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+set COMPlus_TieredCompilation=1
+set COMPlus_TC_QuickJitForLoops=1
+set COMPlus_TC_OnStackReplacement=1
+]]></CLRTestBatchPreCommands>
+    <BashCLRTestPreCommands><![CDATA[
+$(BashCLRTestPreCommands)
+export COMPlus_TieredCompilation=1
+export COMPlus_TC_QuickJitForLoops=1
+export COMPlus_TC_OnStackReplacement=1
+]]></BashCLRTestPreCommands>
+  </PropertyGroup>
+</Project>
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/tailrecursetry.cs b/src/coreclr/tests/src/JIT/opt/OSR/tailrecursetry.cs
new file mode 100644 (file)
index 0000000..c6481a6
--- /dev/null
@@ -0,0 +1,42 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+
+// Tail recursion, OSR entry in try region
+
+class TailRecursionWithOsrEntryInTry
+{
+    public static int F(int from, int to, int n, int a)
+    {
+        int result = a;
+
+        try 
+        {
+            for (int i = from; i < to; i++)
+            {
+                result += i;
+            }
+        }
+        catch(Exception)
+        {
+        }
+
+        if (n <= 0) return result;
+
+        int delta = to - from;
+
+        return F(to, to + to - from, n-1, result);
+    }
+
+    public static int Main()
+    {
+        Console.WriteLine($"starting sum");
+        int result = F(0, 100_000, 9, 0);
+        bool ok = (result == 1783293664);
+        string msg = ok ? "Pass" : "Fail";
+        Console.WriteLine($"done, sum is {result}, {msg}");
+        return  ok ? 100 : -1;
+    }  
+}
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/tailrecursetry.csproj b/src/coreclr/tests/src/JIT/opt/OSR/tailrecursetry.csproj
new file mode 100644 (file)
index 0000000..9620f75
--- /dev/null
@@ -0,0 +1,24 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType />
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <PropertyGroup>
+    <CLRTestBatchPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+set COMPlus_TieredCompilation=1
+set COMPlus_TC_QuickJitForLoops=1
+set COMPlus_TC_OnStackReplacement=1
+]]></CLRTestBatchPreCommands>
+    <BashCLRTestPreCommands><![CDATA[
+$(BashCLRTestPreCommands)
+export COMPlus_TieredCompilation=1
+export COMPlus_TC_QuickJitForLoops=1
+export COMPlus_TC_OnStackReplacement=1
+]]></BashCLRTestPreCommands>
+  </PropertyGroup>
+</Project>
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/tailrecursetry2.cs b/src/coreclr/tests/src/JIT/opt/OSR/tailrecursetry2.cs
new file mode 100644 (file)
index 0000000..dfe13ef
--- /dev/null
@@ -0,0 +1,42 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Runtime.CompilerServices;
+
+// Tail recursion candidate with OSR entry in a try region
+
+class TailRecursionCandidateOSREntryInTry
+{
+    public unsafe static int F(int from, int to, int n, int result, int *x)
+    {
+        try 
+        {
+            for (int i = from; i < to; i++)
+            {
+                result += i;
+            }
+        }
+        catch(Exception)
+        {
+        }
+
+        if (n <= 0) return result;
+
+        int delta = to - from;
+
+        // Tail recursive site, but can't tail call
+        return F(to, to + delta, n-1, result, &result);
+    }
+
+    public static unsafe int Main()
+    {
+        int x = 0;
+        int result = F(0, 100_000, 9, 0, &x);
+        bool ok = (result == 1783293664);
+        string msg = ok ? "Pass" : "Fail";
+        Console.WriteLine($"done, sum is {result}, {msg}");
+        return ok ? 100 : -1;
+    }  
+}
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/tailrecursetry2.csproj b/src/coreclr/tests/src/JIT/opt/OSR/tailrecursetry2.csproj
new file mode 100644 (file)
index 0000000..881e48e
--- /dev/null
@@ -0,0 +1,25 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType />
+    <Optimize>True</Optimize>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <PropertyGroup>
+    <CLRTestBatchPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+set COMPlus_TieredCompilation=1
+set COMPlus_TC_QuickJitForLoops=1
+set COMPlus_TC_OnStackReplacement=1
+]]></CLRTestBatchPreCommands>
+    <BashCLRTestPreCommands><![CDATA[
+$(BashCLRTestPreCommands)
+export COMPlus_TieredCompilation=1
+export COMPlus_TC_QuickJitForLoops=1
+export COMPlus_TC_OnStackReplacement=1
+]]></BashCLRTestPreCommands>
+  </PropertyGroup>
+</Project>
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/twoosrmethods.cs b/src/coreclr/tests/src/JIT/opt/OSR/twoosrmethods.cs
new file mode 100644 (file)
index 0000000..8128e00
--- /dev/null
@@ -0,0 +1,45 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Runtime.CompilerServices;
+
+// Method creates has two OSR methods
+
+class TwoOSRMethods
+{
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public static void I(ref int p, int i) => p = p + i;
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public static int F(int from, int to, bool b)
+    {
+        int result = 0;
+
+        if (b)
+        {
+            for (int i = from; i < to; i++)
+            {
+                I(ref result, i);
+            }
+        }
+        else
+        {
+            for (int i = from; i < to; i++)
+            {
+                result += i;
+            }
+
+        }
+        return result;
+    }
+
+    public static int Main(string[] args)
+    {
+        int final = 1_000_000;
+        int result1 = F(0, final, true);
+        int result2 = F(0, final, false);
+        return (result1 == result2) && (result1 == 1783293664) ? 100 : -1;
+    }  
+}
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/twoosrmethods.csproj b/src/coreclr/tests/src/JIT/opt/OSR/twoosrmethods.csproj
new file mode 100644 (file)
index 0000000..9620f75
--- /dev/null
@@ -0,0 +1,24 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType />
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <PropertyGroup>
+    <CLRTestBatchPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+set COMPlus_TieredCompilation=1
+set COMPlus_TC_QuickJitForLoops=1
+set COMPlus_TC_OnStackReplacement=1
+]]></CLRTestBatchPreCommands>
+    <BashCLRTestPreCommands><![CDATA[
+$(BashCLRTestPreCommands)
+export COMPlus_TieredCompilation=1
+export COMPlus_TC_QuickJitForLoops=1
+export COMPlus_TC_OnStackReplacement=1
+]]></BashCLRTestPreCommands>
+  </PropertyGroup>
+</Project>
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/twoosrmethods1.cs b/src/coreclr/tests/src/JIT/opt/OSR/twoosrmethods1.cs
new file mode 100644 (file)
index 0000000..42eb9dc
--- /dev/null
@@ -0,0 +1,42 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Runtime.CompilerServices;
+
+// Two OSR methods from one orignal method
+
+class X
+{
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public static int F(int from, int to, bool sumUp)
+    {
+        int result = 0;
+
+        if (sumUp)
+        {
+            for (int i = from; i < to; i++)
+            {
+                result += i;
+            }
+        }
+        else
+        {
+            for (int i = to; i > from; i--)
+            {
+                result += (i-1);
+            }
+
+        }
+        return result;
+    }
+
+    public static int Main(string[] args)
+    {
+        int final = 1_000_000;
+        int result1 = F(0, final, true);
+        int result2 = F(0, final, false);
+        return (result1 == result2) && (result1 == 1783293664) ? 100 : -1;
+    }  
+}
diff --git a/src/coreclr/tests/src/JIT/opt/OSR/twoosrmethods1.csproj b/src/coreclr/tests/src/JIT/opt/OSR/twoosrmethods1.csproj
new file mode 100644 (file)
index 0000000..9620f75
--- /dev/null
@@ -0,0 +1,24 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType />
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <PropertyGroup>
+    <CLRTestBatchPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+set COMPlus_TieredCompilation=1
+set COMPlus_TC_QuickJitForLoops=1
+set COMPlus_TC_OnStackReplacement=1
+]]></CLRTestBatchPreCommands>
+    <BashCLRTestPreCommands><![CDATA[
+$(BashCLRTestPreCommands)
+export COMPlus_TieredCompilation=1
+export COMPlus_TC_QuickJitForLoops=1
+export COMPlus_TC_OnStackReplacement=1
+]]></BashCLRTestPreCommands>
+  </PropertyGroup>
+</Project>