Implement genProfilingEnterCallback genProfilingLeaveCallback on Arm64 (dotnet/corecl...
authorEgor Chesakov <Egor.Chesakov@microsoft.com>
Fri, 20 Sep 2019 03:35:34 +0000 (20:35 -0700)
committerGitHub <noreply@github.com>
Fri, 20 Sep 2019 03:35:34 +0000 (20:35 -0700)
* Split genProfilingEnterCallback and genProfilingLeaveCallback into architecture specific versions

* Remove redundant genStackLevel save/restore logic on Arm, Arm64, Amd64

* Implement JIT_ProfilerEnterLeaveTailcallStub in assembly

* Define RBM_PROFILER_{ENTER,LEAVE,TAILCALL}_TRASH for TARGET_ARM64

* Define REG_PROFILER_{ENTER,LEAVE}_ARG_FUNC_ID and RBM_PROFILER_{ENTER,LEAVE}_ARG_CALLER_SP

* Simplify r0Trashed logic in src/jit/codegenarm.cpp

* Remove wrong comment in src/jit/codegenarm.cpp

* On Arm genPrologPadForReJit does nothing so remove it in src/jit/codegenarm.cpp

* Implement LinearScan::BuildNode for GT_PROF_HOOK and GT_RETURN in src/jit/lsraarm64.cpp

* Shouldn't a call to CORINFO_HELP_PROF_FCN_TAILCALL be marked as a No-GC?

* Implement genProfilingEnterCallback genProfilingLeaveCallback in src/jit/codegenarm64.cpp

* Implement NYI profiler methods in src/vm/arm64/profiler.cpp

* Implement ProfileEnterNaked ProfileLeaveNaked ProfileTailcallNaked in src/vm/arm64/asmhelpers.S

* Implement profiler helpers on win-arm64

* Remove logic for !FINAL_FRAME_LAYOUT in codegenarm64.cpp

* Remove unused macro in src\jit\target.h

* genProfilingLeaveCallback ignores helper on arm in src\jit\codegenarm.cpp

* Refactor genProfilingLeaveCallback in src\jit\codegenarm.cpp

Commit migrated from https://github.com/dotnet/coreclr/commit/d88bc184d054fe8e4915964330ca65378d59ef27

16 files changed:
src/coreclr/src/jit/codegen.h
src/coreclr/src/jit/codegenarm.cpp
src/coreclr/src/jit/codegenarm64.cpp
src/coreclr/src/jit/codegencommon.cpp
src/coreclr/src/jit/codegenxarch.cpp
src/coreclr/src/jit/emit.cpp
src/coreclr/src/jit/lsraarm64.cpp
src/coreclr/src/jit/target.h
src/coreclr/src/vm/CMakeLists.txt
src/coreclr/src/vm/arm/asmhelpers.S
src/coreclr/src/vm/arm/asmhelpers.asm
src/coreclr/src/vm/arm64/asmhelpers.S
src/coreclr/src/vm/arm64/asmhelpers.asm
src/coreclr/src/vm/arm64/profiler.cpp [new file with mode: 0644]
src/coreclr/src/vm/arm64/stubs.cpp
src/coreclr/src/vm/jithelpers.cpp

index b298b36..7a7bd82 100644 (file)
@@ -414,7 +414,7 @@ protected:
 
 #ifdef PROFILING_SUPPORTED
     void genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed);
-    void genProfilingLeaveCallback(unsigned helper = CORINFO_HELP_PROF_FCN_LEAVE);
+    void genProfilingLeaveCallback(unsigned helper);
 #endif // PROFILING_SUPPORTED
 
     void genPrologPadForReJit();
index 32bb1d1..ffd797f 100644 (file)
@@ -1623,4 +1623,153 @@ void CodeGen::genCodeForMulLong(GenTreeMultiRegOp* node)
     genProduceReg(node);
 }
 
+#ifdef PROFILING_SUPPORTED
+
+//-----------------------------------------------------------------------------------
+// genProfilingEnterCallback: Generate the profiling function enter callback.
+//
+// Arguments:
+//     initReg        - register to use as scratch register
+//     pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is
+//                      not zero after this call.
+//
+// Return Value:
+//     None
+//
+void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
+{
+    assert(compiler->compGeneratingProlog);
+
+    // Give profiler a chance to back out of hooking this method
+    if (!compiler->compIsProfilerHookNeeded())
+    {
+        return;
+    }
+
+    // On Arm arguments are prespilled on stack, which frees r0-r3.
+    // For generating Enter callout we would need two registers and one of them has to be r0 to pass profiler handle.
+    // The call target register could be any free register.
+    regNumber argReg     = REG_PROFILER_ENTER_ARG;
+    regMaskTP argRegMask = genRegMask(argReg);
+    assert((regSet.rsMaskPreSpillRegArg & argRegMask) != 0);
+
+    if (compiler->compProfilerMethHndIndirected)
+    {
+        getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd);
+        regSet.verifyRegUsed(argReg);
+    }
+    else
+    {
+        instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd);
+    }
+
+    genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER,
+                      0,           // argSize. Again, we have to lie about it
+                      EA_UNKNOWN); // retSize
+
+    if (initReg == argReg)
+    {
+        *pInitRegZeroed = false;
+    }
+}
+
+//-----------------------------------------------------------------------------------
+// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback.
+// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node.
+//
+// Arguments:
+//     helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL
+//
+// Return Value:
+//     None
+//
+void CodeGen::genProfilingLeaveCallback(unsigned helper)
+{
+    assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL));
+
+    // Only hook if profiler says it's okay.
+    if (!compiler->compIsProfilerHookNeeded())
+    {
+        return;
+    }
+
+    compiler->info.compProfilerCallback = true;
+
+    //
+    // Push the profilerHandle
+    //
+
+    // Contract between JIT and Profiler Leave callout on arm:
+    // Return size <= 4 bytes: REG_PROFILER_RET_SCRATCH will contain return value
+    // Return size > 4 and <= 8: <REG_PROFILER_RET_SCRATCH,r1> will contain return value.
+    // Floating point or double or HFA return values will be in s0-s15 in case of non-vararg methods.
+    // It is assumed that profiler Leave callback doesn't trash registers r1,REG_PROFILER_RET_SCRATCH and s0-s15.
+    //
+    // In the following cases r0 doesn't contain a return value and hence need not be preserved before emitting Leave
+    // callback.
+    bool     r0InUse;
+    emitAttr attr = EA_UNKNOWN;
+
+    if (compiler->info.compRetType == TYP_VOID)
+    {
+        r0InUse = false;
+    }
+    else if (varTypeIsFloating(compiler->info.compRetType) ||
+             compiler->IsHfa(compiler->info.compMethodInfo->args.retTypeClass))
+    {
+        r0InUse = !compiler->info.compIsVarArgs && !compiler->opts.compUseSoftFP;
+    }
+    else
+    {
+        r0InUse = true;
+    }
+
+    if (r0InUse)
+    {
+        if (varTypeIsGC(compiler->info.compRetType))
+        {
+            attr = emitActualTypeSize(compiler->info.compRetType);
+        }
+        else
+        {
+            attr = EA_PTRSIZE;
+        }
+    }
+
+    if (r0InUse)
+    {
+        // Has a return value and r0 is in use. For emitting Leave profiler callout we would need r0 for passing
+        // profiler handle. Therefore, r0 is moved to REG_PROFILER_RETURN_SCRATCH as per contract.
+        getEmitter()->emitIns_R_R(INS_mov, attr, REG_PROFILER_RET_SCRATCH, REG_R0);
+        genTransferRegGCState(REG_PROFILER_RET_SCRATCH, REG_R0);
+        regSet.verifyRegUsed(REG_PROFILER_RET_SCRATCH);
+    }
+
+    if (compiler->compProfilerMethHndIndirected)
+    {
+        getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
+    }
+    else
+    {
+        instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
+    }
+
+    gcInfo.gcMarkRegSetNpt(RBM_R0);
+    regSet.verifyRegUsed(REG_R0);
+
+    genEmitHelperCall(helper,
+                      0,           // argSize
+                      EA_UNKNOWN); // retSize
+
+    // Restore state that existed before profiler callback
+    if (r0InUse)
+    {
+        getEmitter()->emitIns_R_R(INS_mov, attr, REG_R0, REG_PROFILER_RET_SCRATCH);
+        genTransferRegGCState(REG_R0, REG_PROFILER_RET_SCRATCH);
+        gcInfo.gcMarkRegSetNpt(RBM_PROFILER_RET_SCRATCH);
+    }
+}
+
+#endif // PROFILING_SUPPORTED
+
 #endif // _TARGET_ARM_
index b7c7c5e..a10fb40 100644 (file)
@@ -5883,6 +5883,96 @@ void CodeGen::genHWIntrinsicShaRotateOp(GenTreeHWIntrinsic* node)
 
 #endif // FEATURE_HW_INTRINSICS
 
+#ifdef PROFILING_SUPPORTED
+
+//-----------------------------------------------------------------------------------
+// genProfilingEnterCallback: Generate the profiling function enter callback.
+//
+// Arguments:
+//     initReg        - register to use as scratch register
+//     pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is
+//                      not zero after this call.
+//
+// Return Value:
+//     None
+//
+void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
+{
+    assert(compiler->compGeneratingProlog);
+
+    if (!compiler->compIsProfilerHookNeeded())
+    {
+        return;
+    }
+
+    if (compiler->compProfilerMethHndIndirected)
+    {
+        instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, REG_PROFILER_ENTER_ARG_FUNC_ID,
+                               (ssize_t)compiler->compProfilerMethHnd);
+        getEmitter()->emitIns_R_R(INS_ldr, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_FUNC_ID, REG_PROFILER_ENTER_ARG_FUNC_ID);
+    }
+    else
+    {
+        genSetRegToIcon(REG_PROFILER_ENTER_ARG_FUNC_ID, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
+    }
+
+    int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
+    genInstrWithConstant(INS_add, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_CALLER_SP, genFramePointerReg(),
+                         (ssize_t)(-callerSPOffset), REG_PROFILER_ENTER_ARG_CALLER_SP);
+
+    genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN);
+
+    if ((genRegMask(initReg) & RBM_PROFILER_ENTER_TRASH) != RBM_NONE)
+    {
+        *pInitRegZeroed = false;
+    }
+}
+
+//-----------------------------------------------------------------------------------
+// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback.
+// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node.
+//
+// Arguments:
+//     helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL
+//
+// Return Value:
+//     None
+//
+void CodeGen::genProfilingLeaveCallback(unsigned helper)
+{
+    assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL));
+
+    if (!compiler->compIsProfilerHookNeeded())
+    {
+        return;
+    }
+
+    compiler->info.compProfilerCallback = true;
+
+    if (compiler->compProfilerMethHndIndirected)
+    {
+        instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, REG_PROFILER_LEAVE_ARG_FUNC_ID,
+                               (ssize_t)compiler->compProfilerMethHnd);
+        getEmitter()->emitIns_R_R(INS_ldr, EA_PTRSIZE, REG_PROFILER_LEAVE_ARG_FUNC_ID, REG_PROFILER_LEAVE_ARG_FUNC_ID);
+    }
+    else
+    {
+        genSetRegToIcon(REG_PROFILER_LEAVE_ARG_FUNC_ID, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
+    }
+
+    gcInfo.gcMarkRegSetNpt(RBM_PROFILER_LEAVE_ARG_FUNC_ID);
+
+    int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
+    genInstrWithConstant(INS_add, EA_PTRSIZE, REG_PROFILER_LEAVE_ARG_CALLER_SP, genFramePointerReg(),
+                         (ssize_t)(-callerSPOffset), REG_PROFILER_LEAVE_ARG_CALLER_SP);
+
+    gcInfo.gcMarkRegSetNpt(RBM_PROFILER_LEAVE_ARG_CALLER_SP);
+
+    genEmitHelperCall(helper, 0, EA_UNKNOWN);
+}
+
+#endif // PROFILING_SUPPORTED
+
 /*****************************************************************************
  * Unit testing of the ARM64 emitter: generate a bunch of instructions into the prolog
  * (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late
index ebfd9d6..1891b6f 100644 (file)
@@ -6512,581 +6512,6 @@ void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed
 #endif // !ARM64 !ARM
 }
 
-#ifdef PROFILING_SUPPORTED
-
-//-----------------------------------------------------------------------------------
-// genProfilingEnterCallback: Generate the profiling function enter callback.
-//
-// Arguments:
-//     initReg        - register to use as scratch register
-//     pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is
-//                      not zero after this call.
-//
-// Return Value:
-//     None
-//
-// Notes:
-// The x86 profile enter helper has the following requirements (see ProfileEnterNaked in
-// VM\i386\asmhelpers.asm for details):
-// 1. The calling sequence for calling the helper is:
-//          push FunctionIDOrClientID
-//          call ProfileEnterHelper
-// 2. The calling function has an EBP frame.
-// 3. EBP points to the saved ESP which is the first thing saved in the function. Thus,
-//    the following prolog is assumed:
-//          push ESP
-//          mov EBP, ESP
-// 4. All registers are preserved.
-// 5. The helper pops the FunctionIDOrClientID argument from the stack.
-//
-void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
-{
-    assert(compiler->compGeneratingProlog);
-
-    // Give profiler a chance to back out of hooking this method
-    if (!compiler->compIsProfilerHookNeeded())
-    {
-        return;
-    }
-
-#if defined(_TARGET_AMD64_)
-#if !defined(UNIX_AMD64_ABI)
-
-    unsigned   varNum;
-    LclVarDsc* varDsc;
-
-    // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
-    noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
-    noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
-
-    // Home all arguments passed in arg registers (RCX, RDX, R8 and R9).
-    // In case of vararg methods, arg regs are already homed.
-    //
-    // Note: Here we don't need to worry about updating gc'info since enter
-    // callback is generated as part of prolog which is non-gc interruptible.
-    // Moreover GC cannot kick while executing inside profiler callback which is a
-    // profiler requirement so it can examine arguments which could be obj refs.
-    if (!compiler->info.compIsVarArgs)
-    {
-        for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
-        {
-            noway_assert(varDsc->lvIsParam);
-
-            if (!varDsc->lvIsRegArg)
-            {
-                continue;
-            }
-
-            var_types storeType = varDsc->lvaArgType();
-            regNumber argReg    = varDsc->lvArgReg;
-
-            instruction store_ins = ins_Store(storeType);
-
-#ifdef FEATURE_SIMD
-            if ((storeType == TYP_SIMD8) && genIsValidIntReg(argReg))
-            {
-                store_ins = INS_mov;
-            }
-#endif // FEATURE_SIMD
-
-            getEmitter()->emitIns_S_R(store_ins, emitTypeSize(storeType), argReg, varNum, 0);
-        }
-    }
-
-    // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP)
-    // RCX = ProfilerMethHnd
-    if (compiler->compProfilerMethHndIndirected)
-    {
-        // Profiler hooks enabled during Ngen time.
-        // Profiler handle needs to be accessed through an indirection of a pointer.
-        getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
-    }
-    else
-    {
-        // No need to record relocations, if we are generating ELT hooks under the influence
-        // of COMPlus_JitELTHookEnabled=1
-        if (compiler->opts.compJitELTHookEnabled)
-        {
-            genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
-        }
-        else
-        {
-            instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
-        }
-    }
-
-    // RDX = caller's SP
-    // Notes
-    //   1) Here we can query caller's SP offset since prolog will be generated after final frame layout.
-    //   2) caller's SP relative offset to FramePointer will be negative.  We need to add absolute value
-    //      of that offset to FramePointer to obtain caller's SP value.
-    assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
-    int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
-    getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
-
-    // Can't have a call until we have enough padding for rejit
-    genPrologPadForReJit();
-
-    // This will emit either
-    // "call ip-relative 32-bit offset" or
-    // "mov rax, helper addr; call rax"
-    genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN);
-
-    // TODO-AMD64-CQ: Rather than reloading, see if this could be optimized by combining with prolog
-    // generation logic that moves args around as required by first BB entry point conditions
-    // computed by LSRA.  Code pointers for investigating this further: genFnPrologCalleeRegArgs()
-    // and genEnregisterIncomingStackArgs().
-    //
-    // Now reload arg registers from home locations.
-    // Vararg methods:
-    //   - we need to reload only known (i.e. fixed) reg args.
-    //   - if floating point type, also reload it into corresponding integer reg
-    for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
-    {
-        noway_assert(varDsc->lvIsParam);
-
-        if (!varDsc->lvIsRegArg)
-        {
-            continue;
-        }
-
-        var_types loadType = varDsc->lvaArgType();
-        regNumber argReg   = varDsc->lvArgReg;
-
-        instruction load_ins = ins_Load(loadType);
-
-#ifdef FEATURE_SIMD
-        if ((loadType == TYP_SIMD8) && genIsValidIntReg(argReg))
-        {
-            load_ins = INS_mov;
-        }
-#endif // FEATURE_SIMD
-
-        getEmitter()->emitIns_R_S(load_ins, emitTypeSize(loadType), argReg, varNum, 0);
-
-#if FEATURE_VARARG
-        if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType))
-        {
-            regNumber   intArgReg = compiler->getCallArgIntRegister(argReg);
-            instruction ins       = ins_CopyFloatToInt(loadType, TYP_LONG);
-            inst_RV_RV(ins, argReg, intArgReg, loadType);
-        }
-#endif //  FEATURE_VARARG
-    }
-
-    // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
-    if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0)
-    {
-        *pInitRegZeroed = false;
-    }
-
-#else // !defined(UNIX_AMD64_ABI)
-
-    // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP)
-    // R14 = ProfilerMethHnd
-    if (compiler->compProfilerMethHndIndirected)
-    {
-        // Profiler hooks enabled during Ngen time.
-        // Profiler handle needs to be accessed through an indirection of a pointer.
-        getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_PROFILER_ENTER_ARG_0,
-                                   (ssize_t)compiler->compProfilerMethHnd);
-    }
-    else
-    {
-        // No need to record relocations, if we are generating ELT hooks under the influence
-        // of COMPlus_JitELTHookEnabled=1
-        if (compiler->opts.compJitELTHookEnabled)
-        {
-            genSetRegToIcon(REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
-        }
-        else
-        {
-            instGen_Set_Reg_To_Imm(EA_8BYTE, REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
-        }
-    }
-
-    // R15 = caller's SP
-    // Notes
-    //   1) Here we can query caller's SP offset since prolog will be generated after final frame layout.
-    //   2) caller's SP relative offset to FramePointer will be negative.  We need to add absolute value
-    //      of that offset to FramePointer to obtain caller's SP value.
-    assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
-    int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
-    getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_1, genFramePointerReg(), -callerSPOffset);
-
-    // Can't have a call until we have enough padding for rejit
-    genPrologPadForReJit();
-
-    // We can use any callee trash register (other than RAX, RDI, RSI) for call target.
-    // We use R11 here. This will emit either
-    // "call ip-relative 32-bit offset" or
-    // "mov r11, helper addr; call r11"
-    genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET);
-
-    // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
-    if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0)
-    {
-        *pInitRegZeroed = false;
-    }
-
-#endif // !defined(UNIX_AMD64_ABI)
-
-#elif defined(_TARGET_X86_) || defined(_TARGET_ARM_)
-
-    unsigned saveStackLvl2 = genStackLevel;
-
-#if defined(_TARGET_X86_)
-// Important note: when you change enter probe layout, you must also update SKIP_ENTER_PROF_CALLBACK()
-// for x86 stack unwinding
-
-#if defined(UNIX_X86_ABI)
-    // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall()
-    getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, 0xC);
-#endif // UNIX_X86_ABI
-
-    // Push the profilerHandle
-    if (compiler->compProfilerMethHndIndirected)
-    {
-        getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
-    }
-    else
-    {
-        inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
-    }
-
-#elif defined(_TARGET_ARM_)
-    // On Arm arguments are prespilled on stack, which frees r0-r3.
-    // For generating Enter callout we would need two registers and one of them has to be r0 to pass profiler handle.
-    // The call target register could be any free register.
-    regNumber argReg     = REG_PROFILER_ENTER_ARG;
-    regMaskTP argRegMask = genRegMask(argReg);
-    assert((regSet.rsMaskPreSpillRegArg & argRegMask) != 0);
-
-    if (compiler->compProfilerMethHndIndirected)
-    {
-        getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd);
-        regSet.verifyRegUsed(argReg);
-    }
-    else
-    {
-        instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd);
-    }
-#else  // _TARGET_*
-    NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers");
-#endif // _TARGET_*
-
-    //
-    // Can't have a call until we have enough padding for rejit
-    //
-    genPrologPadForReJit();
-
-    // This will emit either
-    // "call ip-relative 32-bit offset" or
-    // "mov rax, helper addr; call rax"
-    genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER,
-                      0,           // argSize. Again, we have to lie about it
-                      EA_UNKNOWN); // retSize
-
-#if defined(_TARGET_X86_)
-    // Check that we have place for the push.
-    assert(compiler->fgPtrArgCntMax >= 1);
-
-#if defined(UNIX_X86_ABI)
-    // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall
-    getEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10);
-#endif // UNIX_X86_ABI
-
-#elif defined(_TARGET_ARM_)
-    if (initReg == argReg)
-    {
-        *pInitRegZeroed = false;
-    }
-#else  // _TARGET_*
-    NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers");
-#endif // _TARGET_*
-
-    /* Restore the stack level */
-
-    SetStackLevel(saveStackLvl2);
-
-#else  // target
-    NYI("Emit Profiler Enter callback");
-#endif // target
-}
-
-//-----------------------------------------------------------------------------------
-// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback.
-// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node.
-//
-// Arguments:
-//     helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL
-//
-// Return Value:
-//     None
-//
-// Notes:
-// The x86 profile leave/tailcall helper has the following requirements (see ProfileLeaveNaked and
-// ProfileTailcallNaked in VM\i386\asmhelpers.asm for details):
-// 1. The calling sequence for calling the helper is:
-//          push FunctionIDOrClientID
-//          call ProfileLeaveHelper or ProfileTailcallHelper
-// 2. The calling function has an EBP frame.
-// 3. EBP points to the saved ESP which is the first thing saved in the function. Thus,
-//    the following prolog is assumed:
-//          push ESP
-//          mov EBP, ESP
-// 4. helper == CORINFO_HELP_PROF_FCN_LEAVE: All registers are preserved.
-//    helper == CORINFO_HELP_PROF_FCN_TAILCALL: Only argument registers are preserved.
-// 5. The helper pops the FunctionIDOrClientID argument from the stack.
-//
-void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FCN_LEAVE*/)
-{
-    assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL));
-
-    // Only hook if profiler says it's okay.
-    if (!compiler->compIsProfilerHookNeeded())
-    {
-        return;
-    }
-
-    compiler->info.compProfilerCallback = true;
-
-    // Need to save on to the stack level, since the helper call will pop the argument
-    unsigned saveStackLvl2 = genStackLevel;
-
-#if defined(_TARGET_AMD64_)
-#if !defined(UNIX_AMD64_ABI)
-
-    // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
-    noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
-    noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
-
-    // If thisPtr needs to be kept alive and reported, it cannot be one of the callee trash
-    // registers that profiler callback kills.
-    if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvIsInReg())
-    {
-        regMaskTP thisPtrMask = genRegMask(compiler->lvaTable[compiler->info.compThisArg].lvRegNum);
-        noway_assert((RBM_PROFILER_LEAVE_TRASH & thisPtrMask) == 0);
-    }
-
-    // At this point return value is computed and stored in RAX or XMM0.
-    // On Amd64, Leave callback preserves the return register.  We keep
-    // RAX alive by not reporting as trashed by helper call.  Also note
-    // that GC cannot kick-in while executing inside profiler callback,
-    // which is a requirement of profiler as well since it needs to examine
-    // return value which could be an obj ref.
-
-    // RCX = ProfilerMethHnd
-    if (compiler->compProfilerMethHndIndirected)
-    {
-        // Profiler hooks enabled during Ngen time.
-        // Profiler handle needs to be accessed through an indirection of an address.
-        getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
-    }
-    else
-    {
-        // Don't record relocations, if we are generating ELT hooks under the influence
-        // of COMPlus_JitELTHookEnabled=1
-        if (compiler->opts.compJitELTHookEnabled)
-        {
-            genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
-        }
-        else
-        {
-            instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
-        }
-    }
-
-    // RDX = caller's SP
-    // TODO-AMD64-Cleanup: Once we start doing codegen after final frame layout, retain the "if" portion
-    // of the stmnts to execute unconditionally and clean-up rest.
-    if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
-    {
-        // Caller's SP relative offset to FramePointer will be negative.  We need to add absolute
-        // value of that offset to FramePointer to obtain caller's SP value.
-        int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
-        getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
-    }
-    else
-    {
-        // If we are here means that it is a tentative frame layout during which we
-        // cannot use caller's SP offset since it is an estimate.  For now we require the
-        // method to have at least a single arg so that we can use it to obtain caller's
-        // SP.
-        LclVarDsc* varDsc = compiler->lvaTable;
-        NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params");
-
-        // lea rdx, [FramePointer + Arg0's offset]
-        getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0);
-    }
-
-    // We can use any callee trash register (other than RAX, RCX, RDX) for call target.
-    // We use R8 here. This will emit either
-    // "call ip-relative 32-bit offset" or
-    // "mov r8, helper addr; call r8"
-    genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_ARG_2);
-
-#else // !defined(UNIX_AMD64_ABI)
-
-    // RDI = ProfilerMethHnd
-    if (compiler->compProfilerMethHndIndirected)
-    {
-        getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
-    }
-    else
-    {
-        if (compiler->opts.compJitELTHookEnabled)
-        {
-            genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
-        }
-        else
-        {
-            instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
-        }
-    }
-
-    // RSI = caller's SP
-    if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
-    {
-        int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
-        getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
-    }
-    else
-    {
-        LclVarDsc* varDsc = compiler->lvaTable;
-        NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params");
-
-        // lea rdx, [FramePointer + Arg0's offset]
-        getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0);
-    }
-
-    // We can use any callee trash register (other than RAX, RDI, RSI) for call target.
-    // We use R11 here. This will emit either
-    // "call ip-relative 32-bit offset" or
-    // "mov r11, helper addr; call r11"
-    genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET);
-
-#endif // !defined(UNIX_AMD64_ABI)
-
-#elif defined(_TARGET_X86_)
-
-#if defined(UNIX_X86_ABI)
-    // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall()
-    getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, 0xC);
-    AddStackLevel(0xC);
-    AddNestedAlignment(0xC);
-#endif // UNIX_X86_ABI
-
-    //
-    // Push the profilerHandle
-    //
-
-    if (compiler->compProfilerMethHndIndirected)
-    {
-        getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
-    }
-    else
-    {
-        inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
-    }
-    genSinglePush();
-
-#if defined(UNIX_X86_ABI)
-    int argSize = -REGSIZE_BYTES; // negative means caller-pop (cdecl)
-#else
-    int argSize = REGSIZE_BYTES;
-#endif
-    genEmitHelperCall(helper, argSize, EA_UNKNOWN /* retSize */);
-
-    // Check that we have place for the push.
-    assert(compiler->fgPtrArgCntMax >= 1);
-
-#if defined(UNIX_X86_ABI)
-    // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall
-    getEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10);
-    SubtractStackLevel(0x10);
-    SubtractNestedAlignment(0xC);
-#endif // UNIX_X86_ABI
-
-#elif defined(_TARGET_ARM_)
-    //
-    // Push the profilerHandle
-    //
-
-    // Contract between JIT and Profiler Leave callout on arm:
-    // Return size <= 4 bytes: REG_PROFILER_RET_SCRATCH will contain return value
-    // Return size > 4 and <= 8: <REG_PROFILER_RET_SCRATCH,r1> will contain return value.
-    // Floating point or double or HFA return values will be in s0-s15 in case of non-vararg methods.
-    // It is assumed that profiler Leave callback doesn't trash registers r1,REG_PROFILER_RET_SCRATCH and s0-s15.
-    //
-    // In the following cases r0 doesn't contain a return value and hence need not be preserved before emitting Leave
-    // callback.
-    bool     r0Trashed;
-    emitAttr attr = EA_UNKNOWN;
-
-    if (compiler->info.compRetType == TYP_VOID || (!compiler->info.compIsVarArgs && !compiler->opts.compUseSoftFP &&
-                                                   (varTypeIsFloating(compiler->info.compRetType) ||
-                                                    compiler->IsHfa(compiler->info.compMethodInfo->args.retTypeClass))))
-    {
-        r0Trashed = false;
-    }
-    else
-    {
-        // Has a return value and r0 is in use. For emitting Leave profiler callout we would need r0 for passing
-        // profiler handle. Therefore, r0 is moved to REG_PROFILER_RETURN_SCRATCH as per contract.
-        if (RBM_ARG_0 & gcInfo.gcRegGCrefSetCur)
-        {
-            attr = EA_GCREF;
-            gcInfo.gcMarkRegSetGCref(RBM_PROFILER_RET_SCRATCH);
-        }
-        else if (RBM_ARG_0 & gcInfo.gcRegByrefSetCur)
-        {
-            attr = EA_BYREF;
-            gcInfo.gcMarkRegSetByref(RBM_PROFILER_RET_SCRATCH);
-        }
-        else
-        {
-            attr = EA_4BYTE;
-        }
-
-        getEmitter()->emitIns_R_R(INS_mov, attr, REG_PROFILER_RET_SCRATCH, REG_ARG_0);
-        regSet.verifyRegUsed(REG_PROFILER_RET_SCRATCH);
-        gcInfo.gcMarkRegSetNpt(RBM_ARG_0);
-        r0Trashed = true;
-    }
-
-    if (compiler->compProfilerMethHndIndirected)
-    {
-        getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
-        regSet.verifyRegUsed(REG_ARG_0);
-    }
-    else
-    {
-        instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
-    }
-
-    genEmitHelperCall(CORINFO_HELP_PROF_FCN_LEAVE,
-                      0,           // argSize
-                      EA_UNKNOWN); // retSize
-
-    // Restore state that existed before profiler callback
-    if (r0Trashed)
-    {
-        getEmitter()->emitIns_R_R(INS_mov, attr, REG_ARG_0, REG_PROFILER_RET_SCRATCH);
-        regSet.verifyRegUsed(REG_ARG_0);
-        gcInfo.gcMarkRegSetNpt(RBM_PROFILER_RET_SCRATCH);
-    }
-
-#else  // target
-    NYI("Emit Profiler Leave callback");
-#endif // target
-
-    /* Restore the stack level */
-    SetStackLevel(saveStackLvl2);
-}
-
-#endif // PROFILING_SUPPORTED
-
 /*****************************************************************************
 
 Esp frames :
@@ -11708,7 +11133,7 @@ void CodeGen::genReturn(GenTree* treeNode)
             }
         }
 
-        genProfilingLeaveCallback();
+        genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_LEAVE);
 
         if (varTypeIsGC(compiler->info.compRetType))
         {
index 9bc231a..d6f26ed 100644 (file)
@@ -8754,4 +8754,517 @@ void CodeGen::genAmd64EmitterUnitTests()
 
 #endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
 
+#ifdef PROFILING_SUPPORTED
+
+#ifdef _TARGET_X86_
+
+//-----------------------------------------------------------------------------------
+// genProfilingEnterCallback: Generate the profiling function enter callback.
+//
+// Arguments:
+//     initReg        - register to use as scratch register
+//     pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is
+//                      not zero after this call.
+//
+// Return Value:
+//     None
+//
+// Notes:
+// The x86 profile enter helper has the following requirements (see ProfileEnterNaked in
+// VM\i386\asmhelpers.asm for details):
+// 1. The calling sequence for calling the helper is:
+//          push FunctionIDOrClientID
+//          call ProfileEnterHelper
+// 2. The calling function has an EBP frame.
+// 3. EBP points to the saved ESP which is the first thing saved in the function. Thus,
+//    the following prolog is assumed:
+//          push ESP
+//          mov EBP, ESP
+// 4. All registers are preserved.
+// 5. The helper pops the FunctionIDOrClientID argument from the stack.
+//
+void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
+{
+    assert(compiler->compGeneratingProlog);
+
+    // Give profiler a chance to back out of hooking this method
+    if (!compiler->compIsProfilerHookNeeded())
+    {
+        return;
+    }
+
+    unsigned saveStackLvl2 = genStackLevel;
+
+// Important note: when you change enter probe layout, you must also update SKIP_ENTER_PROF_CALLBACK()
+// for x86 stack unwinding
+
+#if defined(UNIX_X86_ABI)
+    // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall()
+    getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, 0xC);
+#endif // UNIX_X86_ABI
+
+    // Push the profilerHandle
+    if (compiler->compProfilerMethHndIndirected)
+    {
+        getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
+    }
+    else
+    {
+        inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
+    }
+
+    //
+    // Can't have a call until we have enough padding for rejit
+    //
+    genPrologPadForReJit();
+
+    // This will emit either
+    // "call ip-relative 32-bit offset" or
+    // "mov rax, helper addr; call rax"
+    genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER,
+                      0,           // argSize. Again, we have to lie about it
+                      EA_UNKNOWN); // retSize
+
+    // Check that we have place for the push.
+    assert(compiler->fgPtrArgCntMax >= 1);
+
+#if defined(UNIX_X86_ABI)
+    // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall
+    getEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10);
+#endif // UNIX_X86_ABI
+
+    /* Restore the stack level */
+
+    SetStackLevel(saveStackLvl2);
+}
+
+//-----------------------------------------------------------------------------------
+// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback.
+// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node.
+//
+// Arguments:
+//     helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL
+//
+// Return Value:
+//     None
+//
+// Notes:
+// The x86 profile leave/tailcall helper has the following requirements (see ProfileLeaveNaked and
+// ProfileTailcallNaked in VM\i386\asmhelpers.asm for details):
+// 1. The calling sequence for calling the helper is:
+//          push FunctionIDOrClientID
+//          call ProfileLeaveHelper or ProfileTailcallHelper
+// 2. The calling function has an EBP frame.
+// 3. EBP points to the saved ESP which is the first thing saved in the function. Thus,
+//    the following prolog is assumed:
+//          push ESP
+//          mov EBP, ESP
+// 4. helper == CORINFO_HELP_PROF_FCN_LEAVE: All registers are preserved.
+//    helper == CORINFO_HELP_PROF_FCN_TAILCALL: Only argument registers are preserved.
+// 5. The helper pops the FunctionIDOrClientID argument from the stack.
+//
+void CodeGen::genProfilingLeaveCallback(unsigned helper)
+{
+    assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL));
+
+    // Only hook if profiler says it's okay.
+    if (!compiler->compIsProfilerHookNeeded())
+    {
+        return;
+    }
+
+    compiler->info.compProfilerCallback = true;
+
+    // Need to save on to the stack level, since the helper call will pop the argument
+    unsigned saveStackLvl2 = genStackLevel;
+
+#if defined(UNIX_X86_ABI)
+    // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall()
+    getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, 0xC);
+    AddStackLevel(0xC);
+    AddNestedAlignment(0xC);
+#endif // UNIX_X86_ABI
+
+    //
+    // Push the profilerHandle
+    //
+
+    if (compiler->compProfilerMethHndIndirected)
+    {
+        getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
+    }
+    else
+    {
+        inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
+    }
+    genSinglePush();
+
+#if defined(UNIX_X86_ABI)
+    int argSize = -REGSIZE_BYTES; // negative means caller-pop (cdecl)
+#else
+    int argSize = REGSIZE_BYTES;
+#endif
+    genEmitHelperCall(helper, argSize, EA_UNKNOWN /* retSize */);
+
+    // Check that we have place for the push.
+    assert(compiler->fgPtrArgCntMax >= 1);
+
+#if defined(UNIX_X86_ABI)
+    // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall
+    getEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10);
+    SubtractStackLevel(0x10);
+    SubtractNestedAlignment(0xC);
+#endif // UNIX_X86_ABI
+
+    /* Restore the stack level */
+    SetStackLevel(saveStackLvl2);
+}
+
+#endif // _TARGET_X86_
+
+#ifdef _TARGET_AMD64_
+
+//-----------------------------------------------------------------------------------
+// genProfilingEnterCallback: Generate the profiling function enter callback.
+//
+// Arguments:
+//     initReg        - register to use as scratch register
+//     pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is
+//                      not zero after this call.
+//
+// Return Value:
+//     None
+//
+void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
+{
+    assert(compiler->compGeneratingProlog);
+
+    // Give profiler a chance to back out of hooking this method
+    if (!compiler->compIsProfilerHookNeeded())
+    {
+        return;
+    }
+
+#if !defined(UNIX_AMD64_ABI)
+
+    unsigned   varNum;
+    LclVarDsc* varDsc;
+
+    // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
+    noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
+    noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
+
+    // Home all arguments passed in arg registers (RCX, RDX, R8 and R9).
+    // In case of vararg methods, arg regs are already homed.
+    //
+    // Note: Here we don't need to worry about updating gc'info since enter
+    // callback is generated as part of prolog which is non-gc interruptible.
+    // Moreover GC cannot kick while executing inside profiler callback which is a
+    // profiler requirement so it can examine arguments which could be obj refs.
+    if (!compiler->info.compIsVarArgs)
+    {
+        for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
+        {
+            noway_assert(varDsc->lvIsParam);
+
+            if (!varDsc->lvIsRegArg)
+            {
+                continue;
+            }
+
+            var_types storeType = varDsc->lvaArgType();
+            regNumber argReg    = varDsc->lvArgReg;
+
+            instruction store_ins = ins_Store(storeType);
+
+#ifdef FEATURE_SIMD
+            if ((storeType == TYP_SIMD8) && genIsValidIntReg(argReg))
+            {
+                store_ins = INS_mov;
+            }
+#endif // FEATURE_SIMD
+
+            getEmitter()->emitIns_S_R(store_ins, emitTypeSize(storeType), argReg, varNum, 0);
+        }
+    }
+
+    // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP)
+    // RCX = ProfilerMethHnd
+    if (compiler->compProfilerMethHndIndirected)
+    {
+        // Profiler hooks enabled during Ngen time.
+        // Profiler handle needs to be accessed through an indirection of a pointer.
+        getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+    }
+    else
+    {
+        // No need to record relocations, if we are generating ELT hooks under the influence
+        // of COMPlus_JitELTHookEnabled=1
+        if (compiler->opts.compJitELTHookEnabled)
+        {
+            genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
+        }
+        else
+        {
+            instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+        }
+    }
+
+    // RDX = caller's SP
+    // Notes
+    //   1) Here we can query caller's SP offset since prolog will be generated after final frame layout.
+    //   2) caller's SP relative offset to FramePointer will be negative.  We need to add absolute value
+    //      of that offset to FramePointer to obtain caller's SP value.
+    assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
+    int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
+    getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
+
+    // Can't have a call until we have enough padding for rejit
+    genPrologPadForReJit();
+
+    // This will emit either
+    // "call ip-relative 32-bit offset" or
+    // "mov rax, helper addr; call rax"
+    genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN);
+
+    // TODO-AMD64-CQ: Rather than reloading, see if this could be optimized by combining with prolog
+    // generation logic that moves args around as required by first BB entry point conditions
+    // computed by LSRA.  Code pointers for investigating this further: genFnPrologCalleeRegArgs()
+    // and genEnregisterIncomingStackArgs().
+    //
+    // Now reload arg registers from home locations.
+    // Vararg methods:
+    //   - we need to reload only known (i.e. fixed) reg args.
+    //   - if floating point type, also reload it into corresponding integer reg
+    for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
+    {
+        noway_assert(varDsc->lvIsParam);
+
+        if (!varDsc->lvIsRegArg)
+        {
+            continue;
+        }
+
+        var_types loadType = varDsc->lvaArgType();
+        regNumber argReg   = varDsc->lvArgReg;
+
+        instruction load_ins = ins_Load(loadType);
+
+#ifdef FEATURE_SIMD
+        if ((loadType == TYP_SIMD8) && genIsValidIntReg(argReg))
+        {
+            load_ins = INS_mov;
+        }
+#endif // FEATURE_SIMD
+
+        getEmitter()->emitIns_R_S(load_ins, emitTypeSize(loadType), argReg, varNum, 0);
+
+#if FEATURE_VARARG
+        if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType))
+        {
+            regNumber   intArgReg = compiler->getCallArgIntRegister(argReg);
+            instruction ins       = ins_CopyFloatToInt(loadType, TYP_LONG);
+            inst_RV_RV(ins, argReg, intArgReg, loadType);
+        }
+#endif //  FEATURE_VARARG
+    }
+
+    // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
+    if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0)
+    {
+        *pInitRegZeroed = false;
+    }
+
+#else // !defined(UNIX_AMD64_ABI)
+
+    // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP)
+    // R14 = ProfilerMethHnd
+    if (compiler->compProfilerMethHndIndirected)
+    {
+        // Profiler hooks enabled during Ngen time.
+        // Profiler handle needs to be accessed through an indirection of a pointer.
+        getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_PROFILER_ENTER_ARG_0,
+                                   (ssize_t)compiler->compProfilerMethHnd);
+    }
+    else
+    {
+        // No need to record relocations, if we are generating ELT hooks under the influence
+        // of COMPlus_JitELTHookEnabled=1
+        if (compiler->opts.compJitELTHookEnabled)
+        {
+            genSetRegToIcon(REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
+        }
+        else
+        {
+            instGen_Set_Reg_To_Imm(EA_8BYTE, REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+        }
+    }
+
+    // R15 = caller's SP
+    // Notes
+    //   1) Here we can query caller's SP offset since prolog will be generated after final frame layout.
+    //   2) caller's SP relative offset to FramePointer will be negative.  We need to add absolute value
+    //      of that offset to FramePointer to obtain caller's SP value.
+    assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
+    int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
+    getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_1, genFramePointerReg(), -callerSPOffset);
+
+    // Can't have a call until we have enough padding for rejit
+    genPrologPadForReJit();
+
+    // We can use any callee trash register (other than RAX, RDI, RSI) for call target.
+    // We use R11 here. This will emit either
+    // "call ip-relative 32-bit offset" or
+    // "mov r11, helper addr; call r11"
+    genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET);
+
+    // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
+    if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0)
+    {
+        *pInitRegZeroed = false;
+    }
+
+#endif // !defined(UNIX_AMD64_ABI)
+}
+
+//-----------------------------------------------------------------------------------
+// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback.
+// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node.
+//
+// Arguments:
+//     helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL
+//
+// Return Value:
+//     None
+//
+void CodeGen::genProfilingLeaveCallback(unsigned helper)
+{
+    assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL));
+
+    // Only hook if profiler says it's okay.
+    if (!compiler->compIsProfilerHookNeeded())
+    {
+        return;
+    }
+
+    compiler->info.compProfilerCallback = true;
+
+#if !defined(UNIX_AMD64_ABI)
+
+    // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
+    noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
+    noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
+
+    // If thisPtr needs to be kept alive and reported, it cannot be one of the callee trash
+    // registers that profiler callback kills.
+    if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvIsInReg())
+    {
+        regMaskTP thisPtrMask = genRegMask(compiler->lvaTable[compiler->info.compThisArg].lvRegNum);
+        noway_assert((RBM_PROFILER_LEAVE_TRASH & thisPtrMask) == 0);
+    }
+
+    // At this point return value is computed and stored in RAX or XMM0.
+    // On Amd64, Leave callback preserves the return register.  We keep
+    // RAX alive by not reporting as trashed by helper call.  Also note
+    // that GC cannot kick-in while executing inside profiler callback,
+    // which is a requirement of profiler as well since it needs to examine
+    // return value which could be an obj ref.
+
+    // RCX = ProfilerMethHnd
+    if (compiler->compProfilerMethHndIndirected)
+    {
+        // Profiler hooks enabled during Ngen time.
+        // Profiler handle needs to be accessed through an indirection of an address.
+        getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+    }
+    else
+    {
+        // Don't record relocations, if we are generating ELT hooks under the influence
+        // of COMPlus_JitELTHookEnabled=1
+        if (compiler->opts.compJitELTHookEnabled)
+        {
+            genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
+        }
+        else
+        {
+            instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+        }
+    }
+
+    // RDX = caller's SP
+    // TODO-AMD64-Cleanup: Once we start doing codegen after final frame layout, retain the "if" portion
+    // of the stmnts to execute unconditionally and clean-up rest.
+    if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
+    {
+        // Caller's SP relative offset to FramePointer will be negative.  We need to add absolute
+        // value of that offset to FramePointer to obtain caller's SP value.
+        int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
+        getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
+    }
+    else
+    {
+        // If we are here means that it is a tentative frame layout during which we
+        // cannot use caller's SP offset since it is an estimate.  For now we require the
+        // method to have at least a single arg so that we can use it to obtain caller's
+        // SP.
+        LclVarDsc* varDsc = compiler->lvaTable;
+        NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params");
+
+        // lea rdx, [FramePointer + Arg0's offset]
+        getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0);
+    }
+
+    // We can use any callee trash register (other than RAX, RCX, RDX) for call target.
+    // We use R8 here. This will emit either
+    // "call ip-relative 32-bit offset" or
+    // "mov r8, helper addr; call r8"
+    genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_ARG_2);
+
+#else // !defined(UNIX_AMD64_ABI)
+
+    // RDI = ProfilerMethHnd
+    if (compiler->compProfilerMethHndIndirected)
+    {
+        getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+    }
+    else
+    {
+        if (compiler->opts.compJitELTHookEnabled)
+        {
+            genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
+        }
+        else
+        {
+            instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+        }
+    }
+
+    // RSI = caller's SP
+    if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
+    {
+        int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
+        getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
+    }
+    else
+    {
+        LclVarDsc* varDsc = compiler->lvaTable;
+        NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params");
+
+        // lea rdx, [FramePointer + Arg0's offset]
+        getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0);
+    }
+
+    // We can use any callee trash register (other than RAX, RDI, RSI) for call target.
+    // We use R11 here. This will emit either
+    // "call ip-relative 32-bit offset" or
+    // "mov r11, helper addr; call r11"
+    genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET);
+
+#endif // !defined(UNIX_AMD64_ABI)
+}
+
 #endif // _TARGET_AMD64_
+
+#endif // PROFILING_SUPPORTED
+
+#endif // _TARGET_XARCH_
index 02529bc..e12f7ea 100644 (file)
@@ -2301,9 +2301,7 @@ bool emitter::emitNoGChelper(CorInfoHelpFunc helpFunc)
 
         case CORINFO_HELP_PROF_FCN_LEAVE:
         case CORINFO_HELP_PROF_FCN_ENTER:
-#if defined(_TARGET_XARCH_)
         case CORINFO_HELP_PROF_FCN_TAILCALL:
-#endif
         case CORINFO_HELP_LLSH:
         case CORINFO_HELP_LRSH:
         case CORINFO_HELP_LRSZ:
@@ -7628,7 +7626,6 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper)
             assert(!"unknown arch");
 #endif
 
-#if defined(_TARGET_XARCH_) || defined(_TARGET_ARM_)
         case CORINFO_HELP_PROF_FCN_ENTER:
             result = RBM_PROFILER_ENTER_TRASH;
             break;
@@ -7636,12 +7633,10 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper)
         case CORINFO_HELP_PROF_FCN_LEAVE:
             result = RBM_PROFILER_LEAVE_TRASH;
             break;
-#if defined(_TARGET_XARCH_)
+
         case CORINFO_HELP_PROF_FCN_TAILCALL:
             result = RBM_PROFILER_TAILCALL_TRASH;
             break;
-#endif // defined(_TARGET_XARCH_)
-#endif // defined(_TARGET_XARCH_) || defined(_TARGET_ARM_)
 
 #if defined(_TARGET_ARMARCH_)
         case CORINFO_HELP_ASSIGN_REF:
index 198f285..7d64976 100644 (file)
@@ -126,9 +126,15 @@ int LinearScan::BuildNode(GenTree* tree)
         case GT_ARGPLACE:
         case GT_NO_OP:
         case GT_START_NONGC:
+            srcCount = 0;
+            assert(dstCount == 0);
+            break;
+
         case GT_PROF_HOOK:
             srcCount = 0;
             assert(dstCount == 0);
+            killMask = getKillSetForProfilerHook();
+            BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
             break;
 
         case GT_START_PREEMPTGC:
@@ -176,6 +182,8 @@ int LinearScan::BuildNode(GenTree* tree)
 
         case GT_RETURN:
             srcCount = BuildReturn(tree);
+            killMask = getKillSetForReturn();
+            BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
             break;
 
         case GT_RETFILT:
index 3a79634..cf84b2a 100644 (file)
@@ -1106,10 +1106,6 @@ typedef unsigned char   regNumberSmall;
   #define RBM_PROFILER_ENTER_ARG           RBM_R0
   #define REG_PROFILER_RET_SCRATCH         REG_R2
   #define RBM_PROFILER_RET_SCRATCH         RBM_R2
-  #define RBM_PROFILER_RET_USED            (RBM_R0 | RBM_R1 | RBM_R2)
-  #define REG_PROFILER_JMP_ARG             REG_R0
-  #define RBM_PROFILER_JMP_USED            RBM_R0
-  #define RBM_PROFILER_TAIL_USED           (RBM_R0 | RBM_R12 | RBM_LR)
   
   // The registers trashed by profiler enter/leave/tailcall hook
   // See vm\arm\asmhelpers.asm for more details.
@@ -1420,14 +1416,19 @@ typedef unsigned char   regNumberSmall;
   #define REG_PREV(reg)           ((regNumber)((unsigned)(reg) - 1))
 
   // The following registers are used in emitting Enter/Leave/Tailcall profiler callbacks
-  #define REG_PROFILER_ENTER_ARG           REG_R0
-  #define RBM_PROFILER_ENTER_ARG           RBM_R0
-  #define REG_PROFILER_RET_SCRATCH         REG_R2
-  #define RBM_PROFILER_RET_SCRATCH         RBM_R2
-  #define RBM_PROFILER_RET_USED            (RBM_R0 | RBM_R1 | RBM_R2)
-  #define REG_PROFILER_JMP_ARG             REG_R0
-  #define RBM_PROFILER_JMP_USED            RBM_R0
-  #define RBM_PROFILER_TAIL_USED           (RBM_R0 | RBM_R12 | RBM_LR)
+  #define REG_PROFILER_ENTER_ARG_FUNC_ID    REG_R10
+  #define RBM_PROFILER_ENTER_ARG_FUNC_ID    RBM_R10
+  #define REG_PROFILER_ENTER_ARG_CALLER_SP  REG_R11
+  #define RBM_PROFILER_ENTER_ARG_CALLER_SP  RBM_R11
+  #define REG_PROFILER_LEAVE_ARG_FUNC_ID    REG_R10
+  #define RBM_PROFILER_LEAVE_ARG_FUNC_ID    RBM_R10
+  #define REG_PROFILER_LEAVE_ARG_CALLER_SP  REG_R11
+  #define RBM_PROFILER_LEAVE_ARG_CALLER_SP  RBM_R11
+
+  // The registers trashed by profiler enter/leave/tailcall hook
+  #define RBM_PROFILER_ENTER_TRASH     (RBM_CALLEE_TRASH & ~(RBM_ARG_REGS|RBM_ARG_RET_BUFF|RBM_FLTARG_REGS|RBM_FP))
+  #define RBM_PROFILER_LEAVE_TRASH     (RBM_CALLEE_TRASH & ~(RBM_ARG_REGS|RBM_ARG_RET_BUFF|RBM_FLTARG_REGS|RBM_FP))
+  #define RBM_PROFILER_TAILCALL_TRASH  RBM_PROFILER_LEAVE_TRASH
 
   // Which register are int and long values returned in ?
   #define REG_INTRET               REG_R0
index 5ee54cb..b036efb 100644 (file)
@@ -847,6 +847,10 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64)
         exceptionhandling.h
     )
 
+    set(VM_SOURCES_WKS_ARCH
+        ${ARCH_SOURCES_DIR}/profiler.cpp
+    )
+
     if(CLR_CMAKE_PLATFORM_UNIX)
         list(APPEND VM_SOURCES_DAC_AND_WKS_ARCH
             ${ARCH_SOURCES_DIR}/arm64singlestepper.cpp
index 034e687..1234813 100644 (file)
@@ -561,6 +561,15 @@ ThePreStubPatchLabel:
 
         .endm
 
+#ifdef PROFILING_SUPPORTED
+
+//
+// EXTERN_C void JIT_ProfilerEnterLeaveTailcallStub(UINT_PTR ProfilerHandle)
+//
+LEAF_ENTRY JIT_ProfilerEnterLeaveTailcallStub, _TEXT
+    bx lr
+LEAF_END JIT_ProfilerEnterLeaveTailcallStub, _TEXT
+
 //
 // EXTERN_C void ProfileEnterNaked(FunctionIDOrClientID functionIDOrClientID);
 //
@@ -690,6 +699,8 @@ NESTED_ENTRY ProfileTailcallNaked, _TEXT, NoHandler
     EPILOG_POP "{r1, r2, r4, r5, r7, r11, pc}"
 NESTED_END ProfileTailcallNaked, _TEXT
 
+#endif
+
 // EXTERN_C int __fastcall HelperMethodFrameRestoreState(
 //         INDEBUG_COMMA(HelperMethodFrame *pFrame)
 //         MachState *pState
index 35c8986..21e0f65 100644 (file)
@@ -937,6 +937,12 @@ PROFILE_ENTER           equ 1
 PROFILE_LEAVE           equ 2
 PROFILE_TAILCALL        equ 4
 
+        ; ------------------------------------------------------------------
+        ; void JIT_ProfilerEnterLeaveTailcallStub(UINT_PTR ProfilerHandle)
+        LEAF_ENTRY  JIT_ProfilerEnterLeaveTailcallStub
+        bx lr
+        LEAF_END
+
         ; Define the layout of the PROFILE_PLATFORM_SPECIFIC_DATA we push on the stack for all profiler
         ; helpers.
         map 0
index 28daf9a..f323b94 100644 (file)
@@ -1378,3 +1378,57 @@ LEAF_ENTRY  JIT_Stelem_DoWrite, _TEXT
     // single or multi-proc code based on the current CPU
     b       C_FUNC(JIT_WriteBarrier)
 LEAF_END JIT_Stelem_DoWrite, _TEXT
+
+#ifdef PROFILING_SUPPORTED
+
+// ------------------------------------------------------------------
+LEAF_ENTRY JIT_ProfilerEnterLeaveTailcallStub, _TEXT
+    ret     lr
+LEAF_END JIT_ProfilerEnterLeaveTailcallStub, _TEXT
+
+// ------------------------------------------------------------------
+#define PROFILE_ENTER    1
+#define PROFILE_LEAVE    2
+#define PROFILE_TAILCALL 4
+#define SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA 256
+
+// ------------------------------------------------------------------
+.macro GenerateProfileHelper helper, flags
+NESTED_ENTRY \helper\()Naked, _TEXT, NoHandler
+    // On entry:
+    //   x10 = functionIDOrClientID
+    //   x11 = profiledSp
+    //   x12 = throwable
+    //
+    // On exit:
+    //   Values of x0-x8, q0-q7, fp are preserved.
+    //   Values of other volatile registers are not preserved.
+
+    PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA // Allocate space and save Fp, Pc.
+    SAVE_ARGUMENT_REGISTERS sp, 16          // Save x8 and argument registers (x0-x7).
+    str     xzr, [sp, 88]                   // Clear functionId.
+    SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96    // Save floating-point/SIMD registers (q0-q7).
+    add     x12, fp, SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA // Compute probeSp - initial value of Sp on entry to the helper.
+    stp     x12, x11, [sp, 224]             // Save probeSp, profiledSp.
+    str     xzr, [sp, 240]                  // Clear hiddenArg.
+    mov     w12, \flags
+    stp     w12, wzr, [sp, 248]             // Save flags and clear unused field.
+
+    mov     x0, x10
+    mov     x1, sp
+    bl      \helper
+
+    RESTORE_ARGUMENT_REGISTERS sp, 16       // Restore x8 and argument registers.
+    RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96 // Restore floating-point/SIMD registers.
+
+    EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA
+    EPILOG_RETURN
+
+NESTED_END \helper\()Naked, _TEXT
+.endmacro
+
+GenerateProfileHelper ProfileEnter, PROFILE_ENTER
+GenerateProfileHelper ProfileLeave, PROFILE_LEAVE
+GenerateProfileHelper ProfileTailcall, PROFILE_TAILCALL
+
+#endif
index c9700db..c1f8429 100644 (file)
@@ -1600,7 +1600,69 @@ DoWrite
     ; Branch to the write barrier (which is already correctly overwritten with
     ; single or multi-proc code based on the current CPU
     b       JIT_WriteBarrier
-    LEAF_END 
-       
+    LEAF_END
+
+#ifdef PROFILING_SUPPORTED
+
+; ------------------------------------------------------------------
+; void JIT_ProfilerEnterLeaveTailcallStub(UINT_PTR ProfilerHandle)
+   LEAF_ENTRY  JIT_ProfilerEnterLeaveTailcallStub
+   ret      lr
+   LEAF_END
+
+ #define PROFILE_ENTER    1
+ #define PROFILE_LEAVE    2
+ #define PROFILE_TAILCALL 4
+ #define SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA 256
+
+; ------------------------------------------------------------------
+    MACRO
+    GenerateProfileHelper $helper, $flags
+
+    LCLS __HelperNakedFuncName
+__HelperNakedFuncName SETS "$helper":CC:"Naked"
+    IMPORT $helper
+
+    NESTED_ENTRY $__HelperNakedFuncName
+        ; On entry:
+        ;   x10 = functionIDOrClientID
+        ;   x11 = profiledSp
+        ;   x12 = throwable
+        ;
+        ; On exit:
+        ;   Values of x0-x8, q0-q7, fp are preserved.
+        ;   Values of other volatile registers are not preserved.
+
+        PROLOG_SAVE_REG_PAIR fp, lr, -SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA! ; Allocate space and save Fp, Pc.
+        SAVE_ARGUMENT_REGISTERS sp, 16          ; Save x8 and argument registers (x0-x7).
+        str     xzr, [sp, #88]                  ; Clear functionId.
+        SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96    ; Save floating-point/SIMD registers (q0-q7).
+        add     x12, fp, SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA ; Compute probeSp - initial value of Sp on entry to the helper.
+        stp     x12, x11, [sp, #224]            ; Save probeSp, profiledSp.
+        str     xzr, [sp, #240]                 ; Clear hiddenArg.
+        mov     w12, $flags
+        stp     w12, wzr, [sp, #248]            ; Save flags and clear unused field.
+
+        mov     x0, x10
+        mov     x1, sp
+        bl $helper
+
+        RESTORE_ARGUMENT_REGISTERS sp, 16       ; Restore x8 and argument registers.
+        RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96 ; Restore floating-point/SIMD registers.
+
+        EPILOG_RESTORE_REG_PAIR fp, lr, SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA!
+        EPILOG_RETURN
+
+    NESTED_END
+0
+
+    MEND
+
+    GenerateProfileHelper ProfileEnter, PROFILE_ENTER
+    GenerateProfileHelper ProfileLeave, PROFILE_LEAVE
+    GenerateProfileHelper ProfileTailcall, PROFILE_TAILCALL
+
+#endif
+
 ; Must be at very end of file
     END
diff --git a/src/coreclr/src/vm/arm64/profiler.cpp b/src/coreclr/src/vm/arm64/profiler.cpp
new file mode 100644 (file)
index 0000000..91c4640
--- /dev/null
@@ -0,0 +1,256 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "common.h"
+
+#ifdef PROFILING_SUPPORTED
+#include "proftoeeinterfaceimpl.h"
+
+#define PROFILE_ENTER    1
+#define PROFILE_LEAVE    2
+#define PROFILE_TAILCALL 4
+
+typedef struct _PROFILE_PLATFORM_SPECIFIC_DATA
+{
+    void*                  Fp;
+    void*                  Pc;
+    void*                  x8;
+    ArgumentRegisters      argumentRegisters;
+    FunctionID             functionId;
+    FloatArgumentRegisters floatArgumentRegisters;
+    void*                  probeSp;
+    void*                  profiledSp;
+    void*                  hiddenArg;
+    UINT32                 flags;
+    UINT32                 unused;
+} PROFILE_PLATFORM_SPECIFIC_DATA, *PPROFILE_PLATFORM_SPECIFIC_DATA;
+
+UINT_PTR ProfileGetIPFromPlatformSpecificHandle(void* pPlatformSpecificHandle)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast<PROFILE_PLATFORM_SPECIFIC_DATA*>(pPlatformSpecificHandle);
+    return (UINT_PTR)pData->Pc;
+}
+
+void ProfileSetFunctionIDInPlatformSpecificHandle(void* pPlatformSpecificHandle, FunctionID functionId)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    _ASSERTE(pPlatformSpecificHandle != nullptr);
+    _ASSERTE(functionId != 0);
+
+    PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast<PROFILE_PLATFORM_SPECIFIC_DATA*>(pPlatformSpecificHandle);
+    pData->functionId = functionId;
+}
+
+ProfileArgIterator::ProfileArgIterator(MetaSig* pSig, void* pPlatformSpecificHandle)
+    : m_argIterator(pSig)
+{
+    WRAPPER_NO_CONTRACT;
+
+    _ASSERTE(pSig != nullptr);
+    _ASSERTE(pPlatformSpecificHandle != nullptr);
+
+    m_handle = pPlatformSpecificHandle;
+
+    PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast<PROFILE_PLATFORM_SPECIFIC_DATA*>(pPlatformSpecificHandle);
+#ifdef _DEBUG
+    // Unwind a frame and get the SP for the profiled method to make sure it matches
+    // what the JIT gave us
+
+    // Setup the context to represent the frame that called ProfileEnterNaked
+    CONTEXT ctx;
+    memset(&ctx, 0, sizeof(CONTEXT));
+
+    ctx.Sp = (DWORD64)pData->probeSp;
+    ctx.Fp = (DWORD64)pData->Fp;
+    ctx.Pc = (DWORD64)pData->Pc;
+
+    // Walk up a frame to the caller frame (called the managed method which called ProfileEnterNaked)
+    Thread::VirtualUnwindCallFrame(&ctx);
+
+    _ASSERTE(pData->profiledSp == (void*)ctx.Sp);
+#endif
+
+    // Get the hidden arg if there is one
+    MethodDesc* pMD = FunctionIdToMethodDesc(pData->functionId);
+
+    if ((pData->hiddenArg == nullptr) && (pMD->RequiresInstArg() || pMD->AcquiresInstMethodTableFromThis()))
+    {
+        if ((pData->flags & PROFILE_ENTER) != 0)
+        {
+            if (pMD->AcquiresInstMethodTableFromThis())
+            {
+                pData->hiddenArg = GetThis();
+            }
+            else
+            {
+                // On ARM64 the generic instantiation parameter comes after the optional "this" pointer.
+                if (m_argIterator.HasThis())
+                {
+                    pData->hiddenArg = (void*)pData->argumentRegisters.x[1];
+                }
+                else
+                {
+                    pData->hiddenArg = (void*)pData->argumentRegisters.x[0];
+                }
+            }
+        }
+        else
+        {
+            EECodeInfo codeInfo((PCODE)pData->Pc);
+
+            // We want to pass the caller SP here.
+            pData->hiddenArg = EECodeManager::GetExactGenericsToken((SIZE_T)(pData->profiledSp), &codeInfo);
+        }
+    }
+}
+
+ProfileArgIterator::~ProfileArgIterator()
+{
+    LIMITED_METHOD_CONTRACT;
+
+    m_handle = nullptr;
+}
+
+LPVOID ProfileArgIterator::GetNextArgAddr()
+{
+    WRAPPER_NO_CONTRACT;
+
+    _ASSERTE(m_handle != nullptr);
+
+    PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast<PROFILE_PLATFORM_SPECIFIC_DATA*>(m_handle);
+
+    if ((pData->flags & (PROFILE_LEAVE | PROFILE_TAILCALL)) != 0)
+    {
+        _ASSERTE(!"GetNextArgAddr() - arguments are not available in leave and tailcall probes");
+        return nullptr;
+    }
+
+    int argOffset = m_argIterator.GetNextOffset();
+
+    if (argOffset == TransitionBlock::InvalidOffset)
+    {
+        return nullptr;
+    }
+
+    if (TransitionBlock::IsFloatArgumentRegisterOffset(argOffset))
+    {
+        return (LPBYTE)&pData->floatArgumentRegisters + (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters());
+    }
+
+    LPVOID pArg = nullptr;
+
+    if (TransitionBlock::IsArgumentRegisterOffset(argOffset))
+    {
+        pArg = (LPBYTE)&pData->argumentRegisters + (argOffset - TransitionBlock::GetOffsetOfArgumentRegisters());
+    }
+    else
+    {
+        _ASSERTE(TransitionBlock::IsStackArgumentOffset(argOffset));
+
+        pArg = (LPBYTE)pData->profiledSp + (argOffset - TransitionBlock::GetOffsetOfArgs());
+    }
+
+    if (m_argIterator.IsArgPassedByRef())
+    {
+        pArg = *(LPVOID*)pArg;
+    }
+
+    return pArg;
+}
+
+LPVOID ProfileArgIterator::GetHiddenArgValue(void)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast<PROFILE_PLATFORM_SPECIFIC_DATA*>(m_handle);
+
+    return pData->hiddenArg;
+}
+
+LPVOID ProfileArgIterator::GetThis(void)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+    }
+    CONTRACTL_END;
+
+    PROFILE_PLATFORM_SPECIFIC_DATA* pData = (PROFILE_PLATFORM_SPECIFIC_DATA*)m_handle;
+    MethodDesc* pMD = FunctionIdToMethodDesc(pData->functionId);
+
+    // We guarantee to return the correct "this" pointer in the enter probe.
+    // For the leave and tailcall probes, we only return a valid "this" pointer if it is the generics token.
+    if (pData->hiddenArg != nullptr)
+    {
+        if (pMD->AcquiresInstMethodTableFromThis())
+        {
+            return pData->hiddenArg;
+        }
+    }
+
+    if ((pData->flags & PROFILE_ENTER) != 0)
+    {
+        if (m_argIterator.HasThis())
+        {
+            return (LPVOID)pData->argumentRegisters.x[0];
+        }
+    }
+
+    return nullptr;
+}
+
+LPVOID ProfileArgIterator::GetReturnBufferAddr(void)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+    }
+    CONTRACTL_END;
+
+    PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast<PROFILE_PLATFORM_SPECIFIC_DATA*>(m_handle);
+
+    if ((pData->flags & PROFILE_TAILCALL) != 0)
+    {
+        _ASSERTE(!"GetReturnBufferAddr() - return buffer address is not available in tailcall probe");
+        return nullptr;
+    }
+
+    if (m_argIterator.HasRetBuffArg())
+    {
+        if ((pData->flags & PROFILE_ENTER) != 0)
+        {
+            return (LPVOID)pData->x8;
+        }
+        else
+        {
+            // On ARM64 there is no requirement for the method to preserve the value stored in x8.
+            // In order to workaround this JIT will explicitly return the return buffer address in x0.
+            _ASSERTE((pData->flags & PROFILE_LEAVE) != 0);
+            return (LPVOID)pData->argumentRegisters.x[0];
+        }
+    }
+
+    if (m_argIterator.GetFPReturnSize() != 0)
+    {
+        return &pData->floatArgumentRegisters.q[0];
+    }
+
+    if (!m_argIterator.GetSig()->IsReturnTypeVoid())
+    {
+        return &pData->argumentRegisters.x[0];
+    }
+
+    return nullptr;
+}
+
+#undef PROFILE_ENTER
+#undef PROFILE_LEAVE
+#undef PROFILE_TAILCALL
+
+#endif // PROFILING_SUPPORTED
index b5dee2e..463a379 100644 (file)
@@ -1070,12 +1070,6 @@ void emitCOMStubCall (ComCallMethodDesc *pCOMMethod, PCODE target)
 }
 #endif // FEATURE_COMINTEROP
 
-
-void STDMETHODCALLTYPE JIT_ProfilerEnterLeaveTailcallStub(UINT_PTR ProfilerHandle)
-{
-    _ASSERTE(!"ARM64:NYI");
-}
-
 void JIT_TailCall() 
 {
     _ASSERTE(!"ARM64:NYI");
@@ -1114,19 +1108,6 @@ void InitJITHelpers1()
 EXTERN_C void JIT_UpdateWriteBarrierState(bool) {}
 #endif // !defined(DACCESS_COMPILE) && !defined(CROSSGEN_COMPILE)
 
-EXTERN_C void __stdcall ProfileEnterNaked(UINT_PTR clientData)
-{
-    _ASSERTE(!"ARM64:NYI");
-}
-EXTERN_C void __stdcall ProfileLeaveNaked(UINT_PTR clientData)
-{
-    _ASSERTE(!"ARM64:NYI");
-}
-EXTERN_C void __stdcall ProfileTailcallNaked(UINT_PTR clientData)
-{
-    _ASSERTE(!"ARM64:NYI");
-}
-
 PTR_CONTEXT GetCONTEXTFromRedirectedStubStackFrame(T_DISPATCHER_CONTEXT * pDispatcherContext)
 {
     LIMITED_METHOD_DAC_CONTRACT;
@@ -1268,56 +1249,6 @@ void UMEntryThunkCode::Poison()
 
 #endif // DACCESS_COMPILE
 
-#ifdef PROFILING_SUPPORTED
-#include "proftoeeinterfaceimpl.h"
-
-extern UINT_PTR ProfileGetIPFromPlatformSpecificHandle(void * handle)
-{
-    _ASSERTE(!"ARM64:NYI");
-    return NULL;
-}
-
-extern void ProfileSetFunctionIDInPlatformSpecificHandle(void * pPlatformSpecificHandle, FunctionID functionID)
-{
-    _ASSERTE(!"ARM64:NYI");
-}
-
-ProfileArgIterator::ProfileArgIterator(MetaSig * pMetaSig, void* platformSpecificHandle)
-    : m_argIterator(pMetaSig)
-{
-    _ASSERTE(!"ARM64:NYI");
-}
-
-ProfileArgIterator::~ProfileArgIterator()
-{
-    _ASSERTE(!"ARM64:NYI");
-}
-
-LPVOID ProfileArgIterator::GetNextArgAddr()
-{
-    _ASSERTE(!"ARM64:NYI");
-    return NULL;
-}
-
-LPVOID ProfileArgIterator::GetHiddenArgValue(void)
-{
-    _ASSERTE(!"ARM64:NYI");
-    return NULL;
-}
-
-LPVOID ProfileArgIterator::GetThis(void)
-{
-    _ASSERTE(!"ARM64:NYI");
-    return NULL;
-}
-
-LPVOID ProfileArgIterator::GetReturnBufferAddr(void)
-{
-    _ASSERTE(!"ARM64:NYI");
-    return NULL;
-}
-#endif
-
 #if !defined(DACCESS_COMPILE)
 VOID ResetCurrentContext()
 {
index 09d0057..0e69794 100644 (file)
@@ -5279,13 +5279,6 @@ HCIMPL0(void, JIT_DbgIsJustMyCode)
 }
 HCIMPLEND
 
-#if !(defined(_TARGET_X86_) || defined(BIT64))
-void JIT_ProfilerEnterLeaveTailcallStub(UINT_PTR ProfilerHandle)
-{
-    return;
-}
-#endif // !(_TARGET_X86_ || BIT64)
-
 #ifdef PROFILING_SUPPORTED
 
 //---------------------------------------------------------------------------------------