1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
7 // This file contains stub functions for unimplemented features need to
8 // run on the ARM platform.
11 #include "jitinterface.h"
12 #include "comdelegate.h"
13 #include "invokeutil.h"
17 #include "dllimportcallback.h"
18 #include "dllimport.h"
21 #include "asmconstants.h"
22 #include "virtualcallstub.h"
24 #include "rtlfunctions.h"
27 #include "threadsuspend.h"
29 // target write barriers
30 EXTERN_C void JIT_WriteBarrier(Object **dst, Object *ref);
31 EXTERN_C void JIT_WriteBarrier_End();
32 EXTERN_C void JIT_CheckedWriteBarrier(Object **dst, Object *ref);
33 EXTERN_C void JIT_CheckedWriteBarrier_End();
34 EXTERN_C void JIT_ByRefWriteBarrier_End();
35 EXTERN_C void JIT_ByRefWriteBarrier_SP(Object **dst, Object *ref);
37 // source write barriers
38 EXTERN_C void JIT_WriteBarrier_SP_Pre(Object **dst, Object *ref);
39 EXTERN_C void JIT_WriteBarrier_SP_Pre_End();
40 EXTERN_C void JIT_WriteBarrier_SP_Post(Object **dst, Object *ref);
41 EXTERN_C void JIT_WriteBarrier_SP_Post_End();
42 EXTERN_C void JIT_WriteBarrier_MP_Pre(Object **dst, Object *ref);
43 EXTERN_C void JIT_WriteBarrier_MP_Pre_End();
44 EXTERN_C void JIT_WriteBarrier_MP_Post(Object **dst, Object *ref);
45 EXTERN_C void JIT_WriteBarrier_MP_Post_End();
47 EXTERN_C void JIT_CheckedWriteBarrier_SP_Pre(Object **dst, Object *ref);
48 EXTERN_C void JIT_CheckedWriteBarrier_SP_Pre_End();
49 EXTERN_C void JIT_CheckedWriteBarrier_SP_Post(Object **dst, Object *ref);
50 EXTERN_C void JIT_CheckedWriteBarrier_SP_Post_End();
51 EXTERN_C void JIT_CheckedWriteBarrier_MP_Pre(Object **dst, Object *ref);
52 EXTERN_C void JIT_CheckedWriteBarrier_MP_Pre_End();
53 EXTERN_C void JIT_CheckedWriteBarrier_MP_Post(Object **dst, Object *ref);
54 EXTERN_C void JIT_CheckedWriteBarrier_MP_Post_End();
56 EXTERN_C void JIT_ByRefWriteBarrier_SP_Pre();
57 EXTERN_C void JIT_ByRefWriteBarrier_SP_Pre_End();
58 EXTERN_C void JIT_ByRefWriteBarrier_SP_Post();
59 EXTERN_C void JIT_ByRefWriteBarrier_SP_Post_End();
60 EXTERN_C void JIT_ByRefWriteBarrier_MP_Pre();
61 EXTERN_C void JIT_ByRefWriteBarrier_MP_Pre_End();
62 EXTERN_C void JIT_ByRefWriteBarrier_MP_Post(Object **dst, Object *ref);
63 EXTERN_C void JIT_ByRefWriteBarrier_MP_Post_End();
65 EXTERN_C void JIT_PatchedWriteBarrierStart();
66 EXTERN_C void JIT_PatchedWriteBarrierLast();
68 #ifndef DACCESS_COMPILE
69 //-----------------------------------------------------------------------
70 // InstructionFormat for conditional jump.
71 //-----------------------------------------------------------------------
72 class ThumbCondJump : public InstructionFormat
75 ThumbCondJump() : InstructionFormat(InstructionFormat::k16)
77 LIMITED_METHOD_CONTRACT;
80 virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
82 LIMITED_METHOD_CONTRACT
84 _ASSERTE(refsize == InstructionFormat::k16);
89 virtual UINT GetHotSpotOffset(UINT refsize, UINT variationCode)
91 LIMITED_METHOD_CONTRACT
93 _ASSERTE(refsize == InstructionFormat::k16);
99 //Encoding 1|0|1|1|op|0|i|1|imm5|Rn
100 //op = Bit3(variation)
101 //Rn = Bits2-0(variation)
102 virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
104 LIMITED_METHOD_CONTRACT
106 _ASSERTE(refsize == InstructionFormat::k16);
108 if(fixedUpReference <0 || fixedUpReference > 126)
109 COMPlusThrow(kNotSupportedException);
111 _ASSERTE((fixedUpReference & 0x1) == 0);
113 pOutBuffer[0] = static_cast<BYTE>(((0x3e & fixedUpReference) << 2) | (0x7 & variationCode));
114 pOutBuffer[1] = static_cast<BYTE>(0xb1 | (0x8 & variationCode)| ((0x40 & fixedUpReference)>>5));
118 //-----------------------------------------------------------------------
119 // InstructionFormat for near Jump and short Jump
120 //-----------------------------------------------------------------------
121 class ThumbNearJump : public InstructionFormat
124 ThumbNearJump() : InstructionFormat(InstructionFormat::k16|InstructionFormat::k32)
126 LIMITED_METHOD_CONTRACT;
129 virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
131 LIMITED_METHOD_CONTRACT
133 if(refsize == InstructionFormat::k16)
135 else if(refsize == InstructionFormat::k32)
138 _ASSERTE(!"Unknown refsize");
142 virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT cond, BYTE *pDataBuffer)
144 LIMITED_METHOD_CONTRACT
148 //offsets must be in multiples of 2
149 _ASSERTE((fixedUpReference & 0x1) == 0);
151 if(cond == 0xe) //Always execute
153 if(fixedUpReference >= -2048 && fixedUpReference <= 2046)
155 if(refsize != InstructionFormat::k16)
156 _ASSERTE(!"Expected refSize to be 2");
158 //Emit T2 encoding of B<c> <label> instruction
159 pOutBuffer[0] = static_cast<BYTE>((fixedUpReference & 0x1fe)>>1);
160 pOutBuffer[1] = static_cast<BYTE>(0xe0 | ((fixedUpReference & 0xe00)>>9));
162 else if(fixedUpReference >= -16777216 && fixedUpReference <= 16777214)
164 if(refsize != InstructionFormat::k32)
165 _ASSERTE(!"Expected refSize to be 4");
167 //Emit T4 encoding of B<c> <label> instruction
168 int s = (fixedUpReference & 0x1000000) >> 24;
169 int i1 = (fixedUpReference & 0x800000) >> 23;
170 int i2 = (fixedUpReference & 0x400000) >> 22;
171 pOutBuffer[0] = static_cast<BYTE>((fixedUpReference & 0xff000) >> 12);
172 pOutBuffer[1] = static_cast<BYTE>(0xf0 | (s << 2) |( (fixedUpReference & 0x300000) >>20));
173 pOutBuffer[2] = static_cast<BYTE>((fixedUpReference & 0x1fe) >> 1);
174 pOutBuffer[3] = static_cast<BYTE>(0x90 | (~(i1^s)) << 5 | (~(i2^s)) << 3 | (fixedUpReference & 0xe00) >> 9);
178 COMPlusThrow(kNotSupportedException);
181 else // conditional branch based on flags
183 if(fixedUpReference >= -256 && fixedUpReference <= 254)
185 if(refsize != InstructionFormat::k16)
186 _ASSERTE(!"Expected refSize to be 2");
188 //Emit T1 encoding of B<c> <label> instruction
189 pOutBuffer[0] = static_cast<BYTE>((fixedUpReference & 0x1fe)>>1);
190 pOutBuffer[1] = static_cast<BYTE>(0xd0 | (cond & 0xf));
192 else if(fixedUpReference >= -1048576 && fixedUpReference <= 1048574)
194 if(refsize != InstructionFormat::k32)
195 _ASSERTE(!"Expected refSize to be 4");
197 //Emit T3 encoding of B<c> <label> instruction
198 pOutBuffer[0] = static_cast<BYTE>(((cond & 0x3) << 6) | ((fixedUpReference & 0x3f000) >>12));
199 pOutBuffer[1] = static_cast<BYTE>(0xf0 | ((fixedUpReference & 0x100000) >>18) | ((cond & 0xc) >> 2));
200 pOutBuffer[2] = static_cast<BYTE>((fixedUpReference & 0x1fe) >> 1);
201 pOutBuffer[3] = static_cast<BYTE>(0x80 | ((fixedUpReference & 0x40000) >> 13) | ((fixedUpReference & 0x80000) >> 16) | ((fixedUpReference & 0xe00) >> 9));
205 COMPlusThrow(kNotSupportedException);
210 virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset)
212 LIMITED_METHOD_CONTRACT
223 case InstructionFormat::k16:
224 if(variationCode == 0xe)
225 return (offset >= -2048 && offset <= 2046 && (offset & 0x1) == 0);
227 return (offset >= -256 && offset <= 254 && (offset & 0x1) == 0);
228 case InstructionFormat::k32:
229 if(variationCode == 0xe)
230 return ((offset >= -16777216) && (offset <= 16777214) && ((offset & 0x1) == 0));
232 return ((offset >= -1048576) && (offset <= 1048574) && ((offset & 0x1) == 0));
234 _ASSERTE(!"Unknown refsize");
240 virtual UINT GetHotSpotOffset(UINT refsize, UINT variationCode)
242 LIMITED_METHOD_CONTRACT
244 _ASSERTE(refsize == InstructionFormat::k16 || refsize == InstructionFormat::k32);
251 //static conditional jump instruction format object
252 static BYTE gThumbCondJump[sizeof(ThumbCondJump)];
254 //static near jump instruction format object
255 static BYTE gThumbNearJump[sizeof(ThumbNearJump)];
257 void StubLinkerCPU::Init(void)
259 //Initialize the object
260 new (gThumbCondJump) ThumbCondJump();
261 new (gThumbNearJump) ThumbNearJump();
264 #ifndef CROSSGEN_COMPILE
266 // GC write barrier support.
268 // To optimize our write barriers we code the values of several GC globals (e.g. g_lowest_address) directly
269 // into the barrier function itself, thus avoiding a double memory indirection. Every time the GC modifies one
270 // of these globals we need to update all of the write barriers accordingly.
272 // In order to keep this process non-brittle we don't hard code the offsets of the instructions that need to
273 // be changed. Instead the code used to create these barriers is implemented using special macros that record
274 // the necessary offsets in a descriptor table. Search for "GC write barrier support" in vm\arm\asmhelpers.asm
277 // Structure describing the layout of a single write barrier descriptor. This must be kept in sync with the
278 // code in vm\arm\asmhelpers.asm in the WRITE_BARRIER_END macro. Each offset recorded is for one of the
279 // supported GC globals (an offset of 0xffff is encoded if that global is not used by the particular barrier
280 // function). We currently only support one usage of each global by any single barrier function. The offset is
281 // the byte offset from the start of the function at which a movw,movt instruction pair is used to load the
282 // value of the global into a register.
283 struct WriteBarrierDescriptor
285 BYTE * m_pFuncStart; // Pointer to the start of the barrier function
286 BYTE * m_pFuncEnd; // Pointer to the end of the barrier function
287 DWORD m_dw_g_lowest_address_offset; // Offset of the instruction reading g_lowest_address
288 DWORD m_dw_g_highest_address_offset; // Offset of the instruction reading g_highest_address
289 DWORD m_dw_g_ephemeral_low_offset; // Offset of the instruction reading g_ephemeral_low
290 DWORD m_dw_g_ephemeral_high_offset; // Offset of the instruction reading g_ephemeral_high
291 DWORD m_dw_g_card_table_offset; // Offset of the instruction reading g_card_table
294 // Infrastructure used for mapping of the source and destination of current WB patching
295 struct WriteBarrierMapping
297 PBYTE to; // Pointer to the write-barrier where it was copied over
298 PBYTE from; // Pointer to write-barrier from which it was copied
301 const int WriteBarrierIndex = 0;
302 const int CheckedWriteBarrierIndex = 1;
303 const int ByRefWriteBarrierIndex = 2;
304 const int MaxWriteBarrierIndex = 3;
306 WriteBarrierMapping wbMapping[MaxWriteBarrierIndex] =
308 {(PBYTE)JIT_WriteBarrier, NULL},
309 {(PBYTE)JIT_CheckedWriteBarrier, NULL},
310 {(PBYTE)JIT_ByRefWriteBarrier, NULL}
313 PBYTE FindWBMapping(PBYTE from)
315 for(int i = 0; i < MaxWriteBarrierIndex; ++i)
317 if(wbMapping[i].from == from)
318 return wbMapping[i].to;
323 // Pointer to the start of the descriptor table. The end of the table is marked by a sentinel entry
324 // (m_pFuncStart is NULL).
325 EXTERN_C WriteBarrierDescriptor g_rgWriteBarrierDescriptors;
327 // Determine the range of memory containing all the write barrier implementations (these are clustered
328 // together and should fit in a page or maybe two).
329 void ComputeWriteBarrierRange(BYTE ** ppbStart, DWORD * pcbLength)
331 DWORD size = (PBYTE)JIT_PatchedWriteBarrierLast - (PBYTE)JIT_PatchedWriteBarrierStart;
332 *ppbStart = (PBYTE)JIT_PatchedWriteBarrierStart;
336 void CopyWriteBarrier(PCODE dstCode, PCODE srcCode, PCODE endCode)
338 TADDR dst = PCODEToPINSTR(dstCode);
339 TADDR src = PCODEToPINSTR(srcCode);
340 TADDR end = PCODEToPINSTR(endCode);
342 size_t size = (PBYTE)end - (PBYTE)src;
343 memcpy((PVOID)dst, (PVOID)src, size);
347 void ValidateWriteBarriers()
349 // Post-grow WB are bigger than pre-grow so validating that target WB has space to accomodate those
350 _ASSERTE( ((PBYTE)JIT_WriteBarrier_End - (PBYTE)JIT_WriteBarrier) >= ((PBYTE)JIT_WriteBarrier_MP_Post_End - (PBYTE)JIT_WriteBarrier_MP_Post));
351 _ASSERTE( ((PBYTE)JIT_WriteBarrier_End - (PBYTE)JIT_WriteBarrier) >= ((PBYTE)JIT_WriteBarrier_SP_Post_End - (PBYTE)JIT_WriteBarrier_SP_Post));
353 _ASSERTE( ((PBYTE)JIT_CheckedWriteBarrier_End - (PBYTE)JIT_CheckedWriteBarrier) >= ((PBYTE)JIT_CheckedWriteBarrier_MP_Post_End - (PBYTE)JIT_CheckedWriteBarrier_MP_Post));
354 _ASSERTE( ((PBYTE)JIT_CheckedWriteBarrier_End - (PBYTE)JIT_CheckedWriteBarrier) >= ((PBYTE)JIT_CheckedWriteBarrier_SP_Post_End - (PBYTE)JIT_CheckedWriteBarrier_SP_Post));
356 _ASSERTE( ((PBYTE)JIT_ByRefWriteBarrier_End - (PBYTE)JIT_ByRefWriteBarrier) >= ((PBYTE)JIT_ByRefWriteBarrier_MP_Post_End - (PBYTE)JIT_ByRefWriteBarrier_MP_Post));
357 _ASSERTE( ((PBYTE)JIT_ByRefWriteBarrier_End - (PBYTE)JIT_ByRefWriteBarrier) >= ((PBYTE)JIT_ByRefWriteBarrier_SP_Post_End - (PBYTE)JIT_ByRefWriteBarrier_SP_Post));
362 #define UPDATE_WB(_proc,_grow) \
363 CopyWriteBarrier((PCODE)JIT_WriteBarrier, (PCODE)JIT_WriteBarrier_ ## _proc ## _ ## _grow , (PCODE)JIT_WriteBarrier_ ## _proc ## _ ## _grow ## _End); \
364 wbMapping[WriteBarrierIndex].from = (PBYTE)JIT_WriteBarrier_ ## _proc ## _ ## _grow ; \
366 CopyWriteBarrier((PCODE)JIT_CheckedWriteBarrier, (PCODE)JIT_CheckedWriteBarrier_ ## _proc ## _ ## _grow , (PCODE)JIT_CheckedWriteBarrier_ ## _proc ## _ ## _grow ## _End); \
367 wbMapping[CheckedWriteBarrierIndex].from = (PBYTE)JIT_CheckedWriteBarrier_ ## _proc ## _ ## _grow ; \
369 CopyWriteBarrier((PCODE)JIT_ByRefWriteBarrier, (PCODE)JIT_ByRefWriteBarrier_ ## _proc ## _ ## _grow , (PCODE)JIT_ByRefWriteBarrier_ ## _proc ## _ ## _grow ## _End); \
370 wbMapping[ByRefWriteBarrierIndex].from = (PBYTE)JIT_ByRefWriteBarrier_ ## _proc ## _ ## _grow ; \
372 // Update the instructions in our various write barrier implementations that refer directly to the values
373 // of GC globals such as g_lowest_address and g_card_table. We don't particularly care which values have
374 // changed on each of these callbacks, it's pretty cheap to refresh them all.
375 void UpdateGCWriteBarriers(bool postGrow = false)
377 // Define a helper macro that abstracts the minutia of patching the instructions to access the value of a
378 // particular GC global.
381 ValidateWriteBarriers();
384 static bool wbCopyRequired = true; // We begin with a wb copy
385 static bool wbIsPostGrow = false; // We begin with pre-Grow write barrier
387 if(postGrow && !wbIsPostGrow)
390 wbCopyRequired = true;
395 BOOL mp = g_SystemInfo.dwNumberOfProcessors > 1;
419 wbCopyRequired = false;
421 #define GWB_PATCH_OFFSET(_global) \
422 if (pDesc->m_dw_##_global##_offset != 0xffff) \
423 PutThumb2Mov32((UINT16*)(to + pDesc->m_dw_##_global##_offset - 1), (UINT32)(dac_cast<TADDR>(_global)));
425 // Iterate through the write barrier patch table created in the .clrwb section
426 // (see write barrier asm code)
427 WriteBarrierDescriptor * pDesc = &g_rgWriteBarrierDescriptors;
428 while (pDesc->m_pFuncStart)
430 // If the write barrier is being currently used (as in copied over to the patchable site)
431 // then read the patch location from the table and use the offset to patch the target asm code
432 PBYTE to = FindWBMapping(pDesc->m_pFuncStart);
435 GWB_PATCH_OFFSET(g_lowest_address);
436 GWB_PATCH_OFFSET(g_highest_address);
437 GWB_PATCH_OFFSET(g_ephemeral_low);
438 GWB_PATCH_OFFSET(g_ephemeral_high);
439 GWB_PATCH_OFFSET(g_card_table);
446 int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck)
448 // The runtime is not always suspended when this is called (unlike StompWriteBarrierEphemeral) but we have
449 // no way to update the barrier code atomically on ARM since each 32-bit value we change is loaded over
450 // two instructions. So we have to suspend the EE (which forces code out of the barrier functions) before
451 // proceeding. Luckily the case where the runtime is not already suspended is relatively rare (allocation
452 // of a new large object heap segment). Skip the suspend for the case where we're called during runtime
455 // suspend/resuming the EE under GC stress will trigger a GC and if we're holding the
456 // GC lock due to allocating a LOH segment it will cause a deadlock so disable it here.
457 GCStressPolicy::InhibitHolder iholder;
458 int stompWBCompleteActions = SWB_ICACHE_FLUSH;
460 if (!isRuntimeSuspended)
462 ThreadSuspend::SuspendEE(ThreadSuspend::SUSPEND_OTHER);
463 stompWBCompleteActions |= SWB_EE_RESTART;
466 UpdateGCWriteBarriers(bReqUpperBoundsCheck);
468 return stompWBCompleteActions;
471 int StompWriteBarrierEphemeral(bool isRuntimeSuspended)
473 UNREFERENCED_PARAMETER(isRuntimeSuspended);
474 _ASSERTE(isRuntimeSuspended);
475 UpdateGCWriteBarriers();
476 return SWB_ICACHE_FLUSH;
479 void FlushWriteBarrierInstructionCache()
481 // We've changed code so we must flush the instruction cache.
482 BYTE *pbAlteredRange;
483 DWORD cbAlteredRange;
484 ComputeWriteBarrierRange(&pbAlteredRange, &cbAlteredRange);
485 FlushInstructionCache(GetCurrentProcess(), pbAlteredRange, cbAlteredRange);
488 #endif // CROSSGEN_COMPILE
490 #endif // !DACCESS_COMPILE
492 #ifndef CROSSGEN_COMPILE
493 void LazyMachState::unwindLazyState(LazyMachState* baseState,
494 MachState* unwoundstate,
497 HostCallPreference hostCallPreference)
500 T_KNONVOLATILE_CONTEXT_POINTERS nonVolRegPtrs;
502 ctx.Pc = baseState->captureIp;
503 ctx.Sp = baseState->captureSp;
505 ctx.R4 = unwoundstate->captureR4_R11[0] = baseState->captureR4_R11[0];
506 ctx.R5 = unwoundstate->captureR4_R11[1] = baseState->captureR4_R11[1];
507 ctx.R6 = unwoundstate->captureR4_R11[2] = baseState->captureR4_R11[2];
508 ctx.R7 = unwoundstate->captureR4_R11[3] = baseState->captureR4_R11[3];
509 ctx.R8 = unwoundstate->captureR4_R11[4] = baseState->captureR4_R11[4];
510 ctx.R9 = unwoundstate->captureR4_R11[5] = baseState->captureR4_R11[5];
511 ctx.R10 = unwoundstate->captureR4_R11[6] = baseState->captureR4_R11[6];
512 ctx.R11 = unwoundstate->captureR4_R11[7] = baseState->captureR4_R11[7];
514 #if !defined(DACCESS_COMPILE)
515 // For DAC, if we get here, it means that the LazyMachState is uninitialized and we have to unwind it.
516 // The API we use to unwind in DAC is StackWalk64(), which does not support the context pointers.
518 // Restore the integer registers to KNONVOLATILE_CONTEXT_POINTERS to be used for unwinding.
519 nonVolRegPtrs.R4 = &unwoundstate->captureR4_R11[0];
520 nonVolRegPtrs.R5 = &unwoundstate->captureR4_R11[1];
521 nonVolRegPtrs.R6 = &unwoundstate->captureR4_R11[2];
522 nonVolRegPtrs.R7 = &unwoundstate->captureR4_R11[3];
523 nonVolRegPtrs.R8 = &unwoundstate->captureR4_R11[4];
524 nonVolRegPtrs.R9 = &unwoundstate->captureR4_R11[5];
525 nonVolRegPtrs.R10 = &unwoundstate->captureR4_R11[6];
526 nonVolRegPtrs.R11 = &unwoundstate->captureR4_R11[7];
527 #endif // DACCESS_COMPILE
529 LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK LazyMachState::unwindLazyState(ip:%p,sp:%p)\n", baseState->captureIp, baseState->captureSp));
536 pvControlPc = Thread::VirtualUnwindCallFrame(&ctx, &nonVolRegPtrs);
537 #else // !FEATURE_PAL
538 #ifdef DACCESS_COMPILE
539 HRESULT hr = DacVirtualUnwind(threadId, &ctx, &nonVolRegPtrs);
544 #else // DACCESS_COMPILE
545 BOOL success = PAL_VirtualUnwind(&ctx, &nonVolRegPtrs);
548 _ASSERTE(!"unwindLazyState: Unwinding failed");
549 EEPOLICY_HANDLE_FATAL_ERROR(COR_E_EXECUTIONENGINE);
551 #endif // DACCESS_COMPILE
552 pvControlPc = GetIP(&ctx);
553 #endif // !FEATURE_PAL
554 if (funCallDepth > 0)
557 if (funCallDepth == 0)
562 // Determine whether given IP resides in JITted code. (It returns nonzero in that case.)
563 // Use it now to see if we've unwound to managed code yet.
564 BOOL fFailedReaderLock = FALSE;
565 BOOL fIsManagedCode = ExecutionManager::IsManagedCode(pvControlPc, hostCallPreference, &fFailedReaderLock);
566 if (fFailedReaderLock)
568 // We don't know if we would have been able to find a JIT
569 // manager, because we couldn't enter the reader lock without
570 // yielding (and our caller doesn't want us to yield). So abort
573 // Invalidate the lazyState we're returning, so the caller knows
574 // we aborted before we could fully unwind
575 unwoundstate->_isValid = false;
586 // Update unwoundState so that HelperMethodFrameRestoreState knows which
587 // registers have been potentially modified.
590 unwoundstate->_pc = ctx.Pc;
591 unwoundstate->_sp = ctx.Sp;
593 #ifdef DACCESS_COMPILE
594 // For DAC builds, we update the registers directly since we dont have context pointers
595 unwoundstate->captureR4_R11[0] = ctx.R4;
596 unwoundstate->captureR4_R11[1] = ctx.R5;
597 unwoundstate->captureR4_R11[2] = ctx.R6;
598 unwoundstate->captureR4_R11[3] = ctx.R7;
599 unwoundstate->captureR4_R11[4] = ctx.R8;
600 unwoundstate->captureR4_R11[5] = ctx.R9;
601 unwoundstate->captureR4_R11[6] = ctx.R10;
602 unwoundstate->captureR4_R11[7] = ctx.R11;
603 #else // !DACCESS_COMPILE
604 // For non-DAC builds, update the register state from context pointers
605 unwoundstate->_R4_R11[0] = (PDWORD)nonVolRegPtrs.R4;
606 unwoundstate->_R4_R11[1] = (PDWORD)nonVolRegPtrs.R5;
607 unwoundstate->_R4_R11[2] = (PDWORD)nonVolRegPtrs.R6;
608 unwoundstate->_R4_R11[3] = (PDWORD)nonVolRegPtrs.R7;
609 unwoundstate->_R4_R11[4] = (PDWORD)nonVolRegPtrs.R8;
610 unwoundstate->_R4_R11[5] = (PDWORD)nonVolRegPtrs.R9;
611 unwoundstate->_R4_R11[6] = (PDWORD)nonVolRegPtrs.R10;
612 unwoundstate->_R4_R11[7] = (PDWORD)nonVolRegPtrs.R11;
613 #endif // DACCESS_COMPILE
615 unwoundstate->_isValid = true;
618 void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
629 pRD->IsCallerContextValid = FALSE;
630 pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary.
633 // Copy the saved state from the frame to the current context.
636 LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK HelperMethodFrame::UpdateRegDisplay cached ip:%p, sp:%p\n", m_MachState._pc, m_MachState._sp));
638 #if defined(DACCESS_COMPILE)
639 // For DAC, we may get here when the HMF is still uninitialized.
640 // So we may need to unwind here.
641 if (!m_MachState.isValid())
643 // This allocation throws on OOM.
644 MachState* pUnwoundState = (MachState*)DacAllocHostOnlyInstance(sizeof(*pUnwoundState), true);
646 InsureInit(false, pUnwoundState);
648 pRD->pCurrentContext->Pc = pRD->ControlPC = pUnwoundState->_pc;
649 pRD->pCurrentContext->Sp = pRD->SP = pUnwoundState->_sp;
651 pRD->pCurrentContext->R4 = (DWORD)(pUnwoundState->captureR4_R11[0]);
652 pRD->pCurrentContext->R5 = (DWORD)(pUnwoundState->captureR4_R11[1]);
653 pRD->pCurrentContext->R6 = (DWORD)(pUnwoundState->captureR4_R11[2]);
654 pRD->pCurrentContext->R7 = (DWORD)(pUnwoundState->captureR4_R11[3]);
655 pRD->pCurrentContext->R8 = (DWORD)(pUnwoundState->captureR4_R11[4]);
656 pRD->pCurrentContext->R9 = (DWORD)(pUnwoundState->captureR4_R11[5]);
657 pRD->pCurrentContext->R10 = (DWORD)(pUnwoundState->captureR4_R11[6]);
658 pRD->pCurrentContext->R11 = (DWORD)(pUnwoundState->captureR4_R11[7]);
662 #endif // DACCESS_COMPILE
664 // reset pContext; it's only valid for active (top-most) frame
665 pRD->pContext = NULL;
666 pRD->ControlPC = GetReturnAddress();
667 pRD->SP = (DWORD)(size_t)m_MachState._sp;
669 pRD->pCurrentContext->Pc = pRD->ControlPC;
670 pRD->pCurrentContext->Sp = pRD->SP;
672 pRD->pCurrentContext->R4 = *m_MachState._R4_R11[0];
673 pRD->pCurrentContext->R5 = *m_MachState._R4_R11[1];
674 pRD->pCurrentContext->R6 = *m_MachState._R4_R11[2];
675 pRD->pCurrentContext->R7 = *m_MachState._R4_R11[3];
676 pRD->pCurrentContext->R8 = *m_MachState._R4_R11[4];
677 pRD->pCurrentContext->R9 = *m_MachState._R4_R11[5];
678 pRD->pCurrentContext->R10 = *m_MachState._R4_R11[6];
679 pRD->pCurrentContext->R11 = *m_MachState._R4_R11[7];
681 pRD->pCurrentContextPointers->R4 = m_MachState._R4_R11[0];
682 pRD->pCurrentContextPointers->R5 = m_MachState._R4_R11[1];
683 pRD->pCurrentContextPointers->R6 = m_MachState._R4_R11[2];
684 pRD->pCurrentContextPointers->R7 = m_MachState._R4_R11[3];
685 pRD->pCurrentContextPointers->R8 = m_MachState._R4_R11[4];
686 pRD->pCurrentContextPointers->R9 = m_MachState._R4_R11[5];
687 pRD->pCurrentContextPointers->R10 = m_MachState._R4_R11[6];
688 pRD->pCurrentContextPointers->R11 = m_MachState._R4_R11[7];
689 pRD->pCurrentContextPointers->Lr = NULL;
691 #endif // !CROSSGEN_COMPILE
693 TADDR FixupPrecode::GetMethodDesc()
695 LIMITED_METHOD_DAC_CONTRACT;
697 // This lookup is also manually inlined in PrecodeFixupThunk assembly code
698 TADDR base = *PTR_TADDR(GetBase());
701 return base + (m_MethodDescChunkIndex * MethodDesc::ALIGNMENT);
704 #ifdef DACCESS_COMPILE
705 void FixupPrecode::EnumMemoryRegions(CLRDataEnumMemoryFlags flags)
708 DacEnumMemoryRegion(dac_cast<TADDR>(this), sizeof(FixupPrecode));
710 DacEnumMemoryRegion(GetBase(), sizeof(TADDR));
712 #endif // DACCESS_COMPILE
714 #ifndef DACCESS_COMPILE
716 void StubPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator)
722 m_rgCode[n++] = 0xf8df; // ldr r12, [pc, #8]
723 m_rgCode[n++] = 0xc008;
724 m_rgCode[n++] = 0xf8df; // ldr pc, [pc, #0]
725 m_rgCode[n++] = 0xf000;
727 _ASSERTE(n == _countof(m_rgCode));
729 m_pTarget = GetPreStubEntryPoint();
730 m_pMethodDesc = (TADDR)pMD;
733 #ifdef FEATURE_NATIVE_IMAGE_GENERATION
734 void StubPrecode::Fixup(DataImage *image)
738 image->FixupFieldToNode(this, offsetof(StubPrecode, m_pTarget),
739 image->GetHelperThunk(CORINFO_HELP_EE_PRESTUB),
741 IMAGE_REL_BASED_PTR);
743 image->FixupField(this, offsetof(StubPrecode, m_pMethodDesc),
744 (void*)GetMethodDesc(),
746 IMAGE_REL_BASED_PTR);
748 #endif // FEATURE_NATIVE_IMAGE_GENERATION
750 void NDirectImportPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator)
756 m_rgCode[n++] = 0xf8df; // ldr r12, [pc, #4]
757 m_rgCode[n++] = 0xc004;
758 m_rgCode[n++] = 0xf8df; // ldr pc, [pc, #4]
759 m_rgCode[n++] = 0xf004;
761 _ASSERTE(n == _countof(m_rgCode));
763 m_pMethodDesc = (TADDR)pMD;
764 m_pTarget = GetEEFuncEntryPoint(NDirectImportThunk);
767 #ifdef FEATURE_NATIVE_IMAGE_GENERATION
768 void NDirectImportPrecode::Fixup(DataImage *image)
772 image->FixupField(this, offsetof(NDirectImportPrecode, m_pMethodDesc),
773 (void*)GetMethodDesc(),
775 IMAGE_REL_BASED_PTR);
777 image->FixupFieldToNode(this, offsetof(NDirectImportPrecode, m_pTarget),
778 image->GetHelperThunk(CORINFO_HELP_EE_PINVOKE_FIXUP),
780 IMAGE_REL_BASED_PTR);
784 void FixupPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator, int iMethodDescChunkIndex /*=0*/, int iPrecodeChunkIndex /*=0*/)
788 m_rgCode[0] = 0x46fc; // mov r12, pc
789 m_rgCode[1] = 0xf8df; // ldr pc, [pc, #4]
790 m_rgCode[2] = 0xf004;
792 // Initialize chunk indices only if they are not initialized yet. This is necessary to make MethodDesc::Reset work.
793 if (m_PrecodeChunkIndex == 0)
795 _ASSERTE(FitsInU1(iPrecodeChunkIndex));
796 m_PrecodeChunkIndex = static_cast<BYTE>(iPrecodeChunkIndex);
799 if (iMethodDescChunkIndex != -1)
801 if (m_MethodDescChunkIndex == 0)
803 _ASSERTE(FitsInU1(iMethodDescChunkIndex));
804 m_MethodDescChunkIndex = static_cast<BYTE>(iMethodDescChunkIndex);
807 if (*(void**)GetBase() == NULL)
808 *(void**)GetBase() = (BYTE*)pMD - (iMethodDescChunkIndex * MethodDesc::ALIGNMENT);
811 _ASSERTE(GetMethodDesc() == (TADDR)pMD);
813 if (pLoaderAllocator != NULL)
815 m_pTarget = GetEEFuncEntryPoint(PrecodeFixupThunk);
819 #ifdef FEATURE_NATIVE_IMAGE_GENERATION
820 // Partial initialization. Used to save regrouped chunks.
821 void FixupPrecode::InitForSave(int iPrecodeChunkIndex)
823 STANDARD_VM_CONTRACT;
825 m_rgCode[0] = 0x46fc; // mov r12, pc
826 m_rgCode[1] = 0xf8df; // ldr pc, [pc, #4]
827 m_rgCode[2] = 0xf004;
829 _ASSERTE(FitsInU1(iPrecodeChunkIndex));
830 m_PrecodeChunkIndex = static_cast<BYTE>(iPrecodeChunkIndex);
832 // The rest is initialized in code:FixupPrecode::Fixup
835 void FixupPrecode::Fixup(DataImage *image, MethodDesc * pMD)
837 STANDARD_VM_CONTRACT;
839 // Note that GetMethodDesc() does not return the correct value because of
840 // regrouping of MethodDescs into hot and cold blocks. That's why the caller
841 // has to supply the actual MethodDesc
843 SSIZE_T mdChunkOffset;
844 ZapNode * pMDChunkNode = image->GetNodeForStructure(pMD, &mdChunkOffset);
845 ZapNode * pHelperThunk = image->GetHelperThunk(CORINFO_HELP_EE_PRECODE_FIXUP);
847 image->FixupFieldToNode(this, offsetof(FixupPrecode, m_pTarget), pHelperThunk);
849 // Set the actual chunk index
850 FixupPrecode * pNewPrecode = (FixupPrecode *)image->GetImagePointer(this);
852 size_t mdOffset = mdChunkOffset - sizeof(MethodDescChunk);
853 size_t chunkIndex = mdOffset / MethodDesc::ALIGNMENT;
854 _ASSERTE(FitsInU1(chunkIndex));
855 pNewPrecode->m_MethodDescChunkIndex = (BYTE) chunkIndex;
857 // Fixup the base of MethodDescChunk
858 if (m_PrecodeChunkIndex == 0)
860 image->FixupFieldToNode(this, (BYTE *)GetBase() - (BYTE *)this,
861 pMDChunkNode, sizeof(MethodDescChunk));
864 #endif // FEATURE_NATIVE_IMAGE_GENERATION
866 void ThisPtrRetBufPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator)
872 m_rgCode[n++] = 0x4684; // mov r12, r0
873 m_rgCode[n++] = 0x4608; // mov r0, r1
874 m_rgCode[n++] = 0xea4f; // mov r1, r12
875 m_rgCode[n++] = 0x010c;
876 m_rgCode[n++] = 0xf8df; // ldr pc, [pc, #0]
877 m_rgCode[n++] = 0xf000;
879 _ASSERTE(n == _countof(m_rgCode));
881 m_pTarget = GetPreStubEntryPoint();
882 m_pMethodDesc = (TADDR)pMD;
886 #ifdef HAS_REMOTING_PRECODE
888 void RemotingPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator)
894 m_rgCode[n++] = 0xb502; // push {r1,lr}
895 m_rgCode[n++] = 0x4904; // ldr r1, [pc, #16] ; =m_pPrecodeRemotingThunk
896 m_rgCode[n++] = 0x4788; // blx r1
897 m_rgCode[n++] = 0xe8bd; // pop {r1,lr}
898 m_rgCode[n++] = 0x4002;
899 m_rgCode[n++] = 0xf8df; // ldr pc, [pc, #12] ; =m_pLocalTarget
900 m_rgCode[n++] = 0xf00c;
901 m_rgCode[n++] = 0xbf00; // nop ; padding for alignment
903 _ASSERTE(n == _countof(m_rgCode));
905 m_pMethodDesc = (TADDR)pMD;
906 m_pPrecodeRemotingThunk = GetEEFuncEntryPoint(PrecodeRemotingThunk);
907 m_pLocalTarget = GetPreStubEntryPoint();
910 #ifdef FEATURE_NATIVE_IMAGE_GENERATION
911 void RemotingPrecode::Fixup(DataImage *image, ZapNode *pCodeNode)
916 image->FixupFieldToNode(this, offsetof(RemotingPrecode, m_pLocalTarget),
919 IMAGE_REL_BASED_PTR);
921 image->FixupFieldToNode(this, offsetof(RemotingPrecode, m_pLocalTarget),
922 image->GetHelperThunk(CORINFO_HELP_EE_PRESTUB),
924 IMAGE_REL_BASED_PTR);
926 image->FixupFieldToNode(this, offsetof(RemotingPrecode, m_pPrecodeRemotingThunk),
927 image->GetHelperThunk(CORINFO_HELP_EE_REMOTING_THUNK),
929 IMAGE_REL_BASED_PTR);
931 image->FixupField(this, offsetof(RemotingPrecode, m_pMethodDesc),
932 (void*)GetMethodDesc(),
934 IMAGE_REL_BASED_PTR);
936 #endif // FEATURE_NATIVE_IMAGE_GENERATION
938 void CTPMethodTable::ActivatePrecodeRemotingThunk()
940 // Nothing to do for ARM version of remoting precode (we don't burn the TP MethodTable pointer into
941 // PrecodeRemotingThunk directly).
944 #endif // HAS_REMOTING_PRECODE
947 #ifndef CROSSGEN_COMPILE
949 Rough pseudo-code of interface dispatching:
951 // jitted code sets r0, r4:
953 r4 = indirectionCell;
954 // jitted code calls *indirectionCell
955 switch (*indirectionCell)
957 case LookupHolder._stub:
958 // ResolveWorkerAsmStub:
959 *indirectionCell = DispatchHolder._stub;
960 call ResolveWorkerStatic, jump to target method;
961 case DispatchHolder._stub:
962 if (r0.methodTable == expectedMethodTable) jump to target method;
963 // ResolveHolder._stub._failEntryPoint:
964 jump to case ResolveHolder._stub._resolveEntryPoint;
965 case ResolveHolder._stub._resolveEntryPoint:
966 if (r0.methodTable in hashTable) jump to target method;
967 // ResolveHolder._stub._slowEntryPoint:
968 // ResolveWorkerChainLookupAsmStub:
969 // ResolveWorkerAsmStub:
970 if (_failEntryPoint called too many times) *indirectionCell = ResolveHolder._stub._resolveEntryPoint;
971 call ResolveWorkerStatic, jump to target method;
974 Note that ResolveWorkerChainLookupAsmStub currently points directly
975 to ResolveWorkerAsmStub; in the future, this could be separate.
978 void LookupHolder::InitializeStatic()
980 // Nothing to initialize
983 void LookupHolder::Initialize(PCODE resolveWorkerTarget, size_t dispatchToken)
985 // Called directly by JITTED code
986 // See ResolveWorkerAsmStub
988 // ldr r12, [pc + 8] ; #_token
989 _stub._entryPoint[0] = 0xf8df;
990 _stub._entryPoint[1] = 0xc008;
991 // ldr pc, [pc] ; #_resolveWorkerTarget
992 _stub._entryPoint[2] = 0xf8df;
993 _stub._entryPoint[3] = 0xf000;
995 _stub._resolveWorkerTarget = resolveWorkerTarget;
996 _stub._token = dispatchToken;
997 _ASSERTE(4 == LookupStub::entryPointLen);
1000 void DispatchHolder::InitializeStatic()
1002 // Nothing to initialize
1005 void DispatchHolder::Initialize(PCODE implTarget, PCODE failTarget, size_t expectedMT)
1007 // Called directly by JITTED code
1008 // DispatchHolder._stub._entryPoint(r0:object, r1, r2, r3, r4:IndirectionCell)
1010 // if (r0.methodTable == this._expectedMT) (this._implTarget)(r0, r1, r2, r3);
1011 // else (this._failTarget)(r0, r1, r2, r3, r4);
1017 // We rely on the stub entry-point being DWORD aligned (so we can tell whether any subsequent WORD is
1018 // DWORD-aligned or not, which matters in the calculation of PC-relative offsets).
1019 _ASSERTE(((UINT_PTR)_stub._entryPoint & 0x3) == 0);
1021 // Compute a PC-relative offset for use in an instruction encoding. Must call this prior to emitting the
1022 // instruction halfword to which it applies. For thumb-2 encodings the offset must be computed before emitting
1023 // the first of the halfwords.
1024 #undef PC_REL_OFFSET
1025 #define PC_REL_OFFSET(_field) (WORD)(offsetof(DispatchStub, _field) - (offsetof(DispatchStub, _entryPoint[n + 2]) & 0xfffffffc))
1027 // r0 : object. It can be null as well.
1028 // when it is null the code causes an AV. This AV is seen by the VM's personality routine
1029 // and it converts it into nullRef. We want the AV to happen before modifying the stack so that we can get the
1030 // call stack in windbg at the point of AV. So therefore "ldr r12, [r0]" should be the first instruction.
1032 // ldr r12, [r0 + #Object.m_pMethTab]
1033 _stub._entryPoint[n++] = DISPATCH_STUB_FIRST_WORD;
1034 _stub._entryPoint[n++] = 0xc000;
1037 _stub._entryPoint[n++] = 0xb420;
1039 // ldr r5, [pc + #_expectedMT]
1040 offset = PC_REL_OFFSET(_expectedMT);
1041 _ASSERTE((offset & 0x3) == 0);
1042 _stub._entryPoint[n++] = 0x4d00 | (offset >> 2);
1045 _stub._entryPoint[n++] = 0x4565;
1048 _stub._entryPoint[n++] = 0xbc20;
1051 _stub._entryPoint[n++] = 0xd101;
1053 // ldr pc, [pc + #_implTarget]
1054 offset = PC_REL_OFFSET(_implTarget);
1055 _stub._entryPoint[n++] = 0xf8df;
1056 _stub._entryPoint[n++] = 0xf000 | offset;
1059 // ldr pc, [pc + #_failTarget]
1060 offset = PC_REL_OFFSET(_failTarget);
1061 _stub._entryPoint[n++] = 0xf8df;
1062 _stub._entryPoint[n++] = 0xf000 | offset;
1064 // nop - insert padding
1065 _stub._entryPoint[n++] = 0xbf00;
1067 _ASSERTE(n == DispatchStub::entryPointLen);
1069 // Make sure that the data members below are aligned
1070 _ASSERTE((n & 1) == 0);
1072 _stub._expectedMT = DWORD(expectedMT);
1073 _stub._failTarget = failTarget;
1074 _stub._implTarget = implTarget;
1077 void ResolveHolder::InitializeStatic()
1081 void ResolveHolder::Initialize(PCODE resolveWorkerTarget, PCODE patcherTarget,
1082 size_t dispatchToken, UINT32 hashedToken,
1083 void * cacheAddr, INT32 * counterAddr)
1085 // Called directly by JITTED code
1086 // ResolveStub._resolveEntryPoint(r0:Object*, r1, r2, r3, r4:IndirectionCellAndFlags)
1088 // MethodTable mt = r0.m_pMethTab;
1089 // int i = ((mt + mt >> 12) ^ this._hashedToken) & this._cacheMask
1090 // ResolveCacheElem e = this._cacheAddress + i
1093 // if (mt == e.pMT && this._token == e.token) (e.target)(r0, r1, r2, r3);
1095 // } while (e != null)
1096 // (this._slowEntryPoint)(r0, r1, r2, r3, r4);
1103 // We rely on the stub entry-point being DWORD aligned (so we can tell whether any subsequent WORD is
1104 // DWORD-aligned or not, which matters in the calculation of PC-relative offsets).
1105 _ASSERTE(((UINT_PTR)_stub._resolveEntryPoint & 0x3) == 0);
1107 // Compute a PC-relative offset for use in an instruction encoding. Must call this prior to emitting the
1108 // instruction halfword to which it applies. For thumb-2 encodings the offset must be computed before emitting
1109 // the first of the halfwords.
1110 #undef PC_REL_OFFSET
1111 #define PC_REL_OFFSET(_field) (WORD)(offsetof(ResolveStub, _field) - (offsetof(ResolveStub, _resolveEntryPoint[n + 2]) & 0xfffffffc))
1113 // ldr r12, [r0 + #Object.m_pMethTab]
1114 _stub._resolveEntryPoint[n++] = RESOLVE_STUB_FIRST_WORD;
1115 _stub._resolveEntryPoint[n++] = 0xc000;
1117 // ;; We need two scratch registers, r5 and r6
1119 _stub._resolveEntryPoint[n++] = 0xb460;
1121 // ;; Compute i = ((mt + mt >> 12) ^ this._hashedToken) & this._cacheMask
1123 // add r6, r12, r12 lsr #12
1124 _stub._resolveEntryPoint[n++] = 0xeb0c;
1125 _stub._resolveEntryPoint[n++] = 0x361c;
1127 // ldr r5, [pc + #_hashedToken]
1128 offset = PC_REL_OFFSET(_hashedToken);
1129 _ASSERTE((offset & 0x3) == 0);
1130 _stub._resolveEntryPoint[n++] = 0x4d00 | (offset >> 2);
1133 _stub._resolveEntryPoint[n++] = 0xea86;
1134 _stub._resolveEntryPoint[n++] = 0x0605;
1136 // ldr r5, [pc + #_cacheMask]
1137 offset = PC_REL_OFFSET(_cacheMask);
1138 _ASSERTE((offset & 0x3) == 0);
1139 _stub._resolveEntryPoint[n++] = 0x4d00 | (offset >> 2);
1142 _stub._resolveEntryPoint[n++] = 0xea06;
1143 _stub._resolveEntryPoint[n++] = 0x0605;
1145 // ;; ResolveCacheElem e = this._cacheAddress + i
1146 // ldr r5, [pc + #_cacheAddress]
1147 offset = PC_REL_OFFSET(_cacheAddress);
1148 _ASSERTE((offset & 0x3) == 0);
1149 _stub._resolveEntryPoint[n++] = 0x4d00 | (offset >> 2);
1151 // ldr r6, [r5 + r6] ;; r6 = e = this._cacheAddress + i
1152 _stub._resolveEntryPoint[n++] = 0x59ae;
1157 // ;; Check mt == e.pMT
1158 // ldr r5, [r6 + #ResolveCacheElem.pMT]
1159 offset = offsetof(ResolveCacheElem, pMT);
1160 _ASSERTE(offset <= 124 && (offset & 0x3) == 0);
1161 _stub._resolveEntryPoint[n++] = 0x6835 | (offset<< 4);
1164 _stub._resolveEntryPoint[n++] = 0x45ac;
1167 _stub._resolveEntryPoint[n++] = 0xd108;
1169 // ;; Check this._token == e.token
1170 // ldr r5, [pc + #_token]
1171 offset = PC_REL_OFFSET(_token);
1172 _ASSERTE((offset & 0x3) == 0);
1173 _stub._resolveEntryPoint[n++] = 0x4d00 | (offset>>2);
1175 // ldr r12, [r6 + #ResolveCacheElem.token]
1176 offset = offsetof(ResolveCacheElem, token);
1177 _stub._resolveEntryPoint[n++] = 0xf8d6;
1178 _stub._resolveEntryPoint[n++] = 0xc000 | offset;
1181 _stub._resolveEntryPoint[n++] = 0x45ac;
1184 _stub._resolveEntryPoint[n++] = 0xd103;
1186 // ldr r12, [r6 + #ResolveCacheElem.target] ;; r12 : e.target
1187 offset = offsetof(ResolveCacheElem, target);
1188 _stub._resolveEntryPoint[n++] = 0xf8d6;
1189 _stub._resolveEntryPoint[n++] = 0xc000 | offset;
1191 // ;; Restore r5 and r6
1193 _stub._resolveEntryPoint[n++] = 0xbc60;
1195 // ;; Branch to e.target
1196 // bx r12 ;; (e.target)(r0,r1,r2,r3)
1197 _stub._resolveEntryPoint[n++] = 0x4760;
1201 // ldr r6, [r6 + #ResolveCacheElem.pNext]
1202 offset = offsetof(ResolveCacheElem, pNext);
1203 _ASSERTE(offset <=124 && (offset & 0x3) == 0);
1204 _stub._resolveEntryPoint[n++] = 0x6836 | (offset << 4);
1206 // ;; } while(e != null);
1207 // cbz r6, slowEntryPoint
1208 _stub._resolveEntryPoint[n++] = 0xb116;
1210 // ldr r12, [r0 + #Object.m_pMethTab]
1211 _stub._resolveEntryPoint[n++] = 0xf8d0;
1212 _stub._resolveEntryPoint[n++] = 0xc000;
1215 offset = (WORD)((loop - (n + 2)) * sizeof(WORD));
1216 offset = (offset >> 1) & 0x07ff;
1217 _stub._resolveEntryPoint[n++] = 0xe000 | offset;
1221 _stub._resolveEntryPoint[n++] = 0xbc60;
1223 // nop for alignment
1224 _stub._resolveEntryPoint[n++] = 0xbf00;
1226 // the slow entry point be DWORD-aligned (see _ASSERTE below) insert nops if necessary .
1228 // ARMSTUB TODO: promotion
1230 // fall through to slow case
1231 _ASSERTE(_stub._resolveEntryPoint + n == _stub._slowEntryPoint);
1232 _ASSERTE(n == ResolveStub::resolveEntryPointLen);
1234 // ResolveStub._slowEntryPoint(r0:MethodToken, r1, r2, r3, r4:IndirectionCellAndFlags)
1236 // r12 = this._tokenSlow;
1237 // this._resolveWorkerTarget(r0, r1, r2, r3, r4, r12);
1240 // The following macro relies on this entry point being DWORD-aligned. We've already asserted that the
1241 // overall stub is aligned above, just need to check that the preceding stubs occupy an even number of
1243 _ASSERTE((n & 1) == 0);
1245 #undef PC_REL_OFFSET
1246 #define PC_REL_OFFSET(_field) (WORD)(offsetof(ResolveStub, _field) - (offsetof(ResolveStub, _slowEntryPoint[n + 2]) & 0xfffffffc))
1250 // ldr r12, [pc + #_tokenSlow]
1251 offset = PC_REL_OFFSET(_tokenSlow);
1252 _stub._slowEntryPoint[n++] = 0xf8df;
1253 _stub._slowEntryPoint[n++] = 0xc000 | offset;
1255 // ldr pc, [pc + #_resolveWorkerTarget]
1256 offset = PC_REL_OFFSET(_resolveWorkerTarget);
1257 _stub._slowEntryPoint[n++] = 0xf8df;
1258 _stub._slowEntryPoint[n++] = 0xf000 | offset;
1260 _ASSERTE(n == ResolveStub::slowEntryPointLen);
1262 // ResolveStub._failEntryPoint(r0:MethodToken, r1, r2, r3, r4:IndirectionCellAndFlags)
1264 // if(--*(this._pCounter) < 0) r4 = r4 | SDF_ResolveBackPatch;
1265 // this._resolveEntryPoint(r0, r1, r2, r3, r4);
1268 // The following macro relies on this entry point being DWORD-aligned. We've already asserted that the
1269 // overall stub is aligned above, just need to check that the preceding stubs occupy an even number of
1271 _ASSERTE((n & 1) == 0);
1273 #undef PC_REL_OFFSET
1274 #define PC_REL_OFFSET(_field) (WORD)(offsetof(ResolveStub, _field) - (offsetof(ResolveStub, _failEntryPoint[n + 2]) & 0xfffffffc))
1279 _stub._failEntryPoint[n++] = 0xb420;
1281 // ldr r5, [pc + #_pCounter]
1282 offset = PC_REL_OFFSET(_pCounter);
1283 _ASSERTE((offset & 0x3) == 0);
1284 _stub._failEntryPoint[n++] = 0x4d00 | (offset >>2);
1287 _stub._failEntryPoint[n++] = 0xf8d5;
1288 _stub._failEntryPoint[n++] = 0xc000;
1290 // subs r12, r12, #1
1291 _stub._failEntryPoint[n++] = 0xf1bc;
1292 _stub._failEntryPoint[n++] = 0x0c01;
1295 _stub._failEntryPoint[n++] = 0xf8c5;
1296 _stub._failEntryPoint[n++] = 0xc000;
1299 _stub._failEntryPoint[n++] = 0xbc20;
1301 // bge resolveEntryPoint
1302 _stub._failEntryPoint[n++] = 0xda01;
1304 // or r4, r4, SDF_ResolveBackPatch
1305 _ASSERTE(SDF_ResolveBackPatch < 256);
1306 _stub._failEntryPoint[n++] = 0xf044;
1307 _stub._failEntryPoint[n++] = 0x0400 | SDF_ResolveBackPatch;
1309 // resolveEntryPoint:
1310 // b _resolveEntryPoint
1311 offset = (WORD)(offsetof(ResolveStub, _resolveEntryPoint) - offsetof(ResolveStub, _failEntryPoint[n + 2]));
1312 _ASSERTE((offset & 1) == 0);
1313 offset = (offset >> 1) & 0x07ff;
1314 _stub._failEntryPoint[n++] = 0xe000 | offset;
1316 // nop for alignment
1317 _stub._failEntryPoint[n++] = 0xbf00;
1319 _ASSERTE(n == ResolveStub::failEntryPointLen);
1321 _stub._pCounter = counterAddr;
1322 _stub._hashedToken = hashedToken << LOG2_PTRSIZE;
1323 _stub._cacheAddress = (size_t) cacheAddr;
1324 _stub._token = dispatchToken;
1325 _stub._tokenSlow = dispatchToken;
1326 _stub._resolveWorkerTarget = resolveWorkerTarget;
1327 _stub._cacheMask = CALL_STUB_CACHE_MASK * sizeof(void*);
1329 _ASSERTE(resolveWorkerTarget == (PCODE)ResolveWorkerChainLookupAsmStub);
1330 _ASSERTE(patcherTarget == NULL);
1333 BOOL DoesSlotCallPrestub(PCODE pCode)
1335 PTR_WORD pInstr = dac_cast<PTR_WORD>(PCODEToPINSTR(pCode));
1337 #ifdef HAS_COMPACT_ENTRYPOINTS
1338 if (MethodDescChunk::GetMethodDescFromCompactEntryPoint(pCode, TRUE) != NULL)
1342 #endif // HAS_COMPACT_ENTRYPOINTS
1345 if (pInstr[0] == 0x46fc && // // mov r12, pc
1346 pInstr[1] == 0xf8df &&
1347 pInstr[2] == 0xf004)
1349 PCODE pTarget = dac_cast<PTR_FixupPrecode>(pInstr)->m_pTarget;
1351 // Check for jump stub (NGen case)
1352 if (isJump(pTarget))
1354 pTarget = decodeJump(pTarget);
1357 return pTarget == (TADDR)PrecodeFixupThunk;
1361 if (pInstr[0] == 0xf8df && // ldr r12, [pc + 8]
1362 pInstr[1] == 0xc008 &&
1363 pInstr[2] == 0xf8df && // ldr pc, [pc]
1364 pInstr[3] == 0xf000)
1366 PCODE pTarget = dac_cast<PTR_StubPrecode>(pInstr)->m_pTarget;
1368 // Check for jump stub (NGen case)
1369 if (isJump(pTarget))
1371 pTarget = decodeJump(pTarget);
1374 return pTarget == GetPreStubEntryPoint();
1380 Stub *GenerateInitPInvokeFrameHelper()
1388 POSTCONDITION(CheckPointer(RETVAL));
1393 CPUSTUBLINKER *psl = &sl;
1395 CORINFO_EE_INFO::InlinedCallFrameInfo FrameInfo;
1396 InlinedCallFrame::GetEEInfo(&FrameInfo);
1398 // R4 contains address of the frame on stack (the frame ptr, not its neg space)
1399 unsigned negSpace = FrameInfo.offsetOfFrameVptr;
1401 ThumbReg regFrame = ThumbReg(4);
1402 ThumbReg regThread = ThumbReg(5);
1403 ThumbReg regScratch = ThumbReg(6);
1406 // Erect frame to perform call to GetThread
1407 psl->ThumbEmitProlog(1, sizeof(ArgumentRegisters), FALSE); // Save r4 for aligned stack
1409 // Save argument registers around the GetThread call. Don't bother with using ldm/stm since this inefficient path anyway.
1410 for (int reg = 0; reg < 4; reg++)
1411 psl->ThumbEmitStoreRegIndirect(ThumbReg(reg), thumbRegSp, offsetof(ArgumentRegisters, r[reg]));
1414 psl->ThumbEmitGetThread(regThread);
1417 for (int reg = 0; reg < 4; reg++)
1418 psl->ThumbEmitLoadRegIndirect(ThumbReg(reg), thumbRegSp, offsetof(ArgumentRegisters, r[reg]));
1421 // mov [regFrame + FrameInfo.offsetOfGSCookie], GetProcessGSCookie()
1422 psl->ThumbEmitMovConstant(regScratch, GetProcessGSCookie());
1423 psl->ThumbEmitStoreRegIndirect(regScratch, regFrame, FrameInfo.offsetOfGSCookie - negSpace);
1425 // mov [regFrame + FrameInfo.offsetOfFrameVptr], InlinedCallFrame::GetMethodFrameVPtr()
1426 psl->ThumbEmitMovConstant(regScratch, InlinedCallFrame::GetMethodFrameVPtr());
1427 psl->ThumbEmitStoreRegIndirect(regScratch, regFrame, FrameInfo.offsetOfFrameVptr - negSpace);
1429 // ldr regScratch, [regThread + offsetof(Thread, m_pFrame)]
1430 // str regScratch, [regFrame + FrameInfo.offsetOfFrameLink]
1431 psl->ThumbEmitLoadRegIndirect(regScratch, regThread, offsetof(Thread, m_pFrame));
1432 psl->ThumbEmitStoreRegIndirect(regScratch, regFrame, FrameInfo.offsetOfFrameLink - negSpace);
1434 // str FP, [regFrame + FrameInfo.offsetOfCalleeSavedEbp]
1435 psl->ThumbEmitStoreRegIndirect(thumbRegFp, regFrame, FrameInfo.offsetOfCalleeSavedFP - negSpace);
1437 // mov [regFrame + FrameInfo.offsetOfReturnAddress], 0
1438 psl->ThumbEmitMovConstant(regScratch, 0);
1439 psl->ThumbEmitStoreRegIndirect(regScratch, regFrame, FrameInfo.offsetOfReturnAddress - negSpace);
1442 DWORD cbSavedRegs = sizeof(ArgumentRegisters) + 2 * 4; // r0-r3, r4, lr
1443 psl->ThumbEmitAdd(regScratch, thumbRegSp, cbSavedRegs);
1444 psl->ThumbEmitStoreRegIndirect(regScratch, regFrame, FrameInfo.offsetOfCallSiteSP - negSpace);
1446 // str SP, [regFrame + FrameInfo.offsetOfCallSiteSP]
1447 psl->ThumbEmitStoreRegIndirect(thumbRegSp, regFrame, FrameInfo.offsetOfCallSiteSP - negSpace);
1450 // mov [regThread + offsetof(Thread, m_pFrame)], regFrame
1451 psl->ThumbEmitStoreRegIndirect(regFrame, regThread, offsetof(Thread, m_pFrame));
1453 // leave current Thread in R4
1456 psl->ThumbEmitEpilog();
1458 // Return. The return address has been restored into LR at this point.
1460 psl->ThumbEmitJumpRegister(thumbRegLr);
1463 // A single process-wide stub that will never unload
1464 RETURN psl->Link(SystemDomain::GetGlobalLoaderAllocator()->GetStubHeap());
1467 void StubLinkerCPU::ThumbEmitGetThread(ThumbReg dest)
1471 ThumbEmitMovConstant(ThumbReg(0), (TADDR)GetThread);
1473 ThumbEmitCallRegister(ThumbReg(0));
1475 if (dest != ThumbReg(0))
1477 ThumbEmitMovRegReg(dest, ThumbReg(0));
1480 #else // FEATURE_PAL
1482 // mrc p15, 0, dest, c13, c0, 2
1484 Emit16((WORD)(0x0f50 | (dest << 12)));
1486 ThumbEmitLoadRegIndirect(dest, dest, offsetof(TEB, ThreadLocalStoragePointer));
1488 ThumbEmitLoadRegIndirect(dest, dest, sizeof(void *) * (g_TlsIndex & 0xFFFF));
1490 ThumbEmitLoadRegIndirect(dest, dest, (g_TlsIndex & 0x7FFF0000) >> 16);
1492 #endif // FEATURE_PAL
1494 #endif // CROSSGEN_COMPILE
1497 // Emits code to adjust for a static delegate target.
1498 VOID StubLinkerCPU::EmitShuffleThunk(ShuffleEntry *pShuffleEntryArray)
1500 // Scan the shuffle entries to see if there any stack-to-stack operations. If there aren't we can emit a
1501 // much simpler thunk (simply because we generate code that doesn't require more than one scratch
1503 bool fSimpleCase = true;
1504 ShuffleEntry *pEntry = pShuffleEntryArray;
1505 while (pEntry->srcofs != ShuffleEntry::SENTINEL)
1507 // It's enough to check whether we have a destination stack location (there are no register to stack
1509 if (!(pEntry->dstofs & ShuffleEntry::REGMASK))
1511 fSimpleCase = false;
1519 // No real prolog for the simple case, we're a tail call so we shouldn't be on the stack for any walk
1522 // On entry r0 holds the delegate instance. Look up the real target address stored in the MethodPtrAux
1523 // field and stash it in r12.
1524 // ldr r12, [r0, #offsetof(DelegateObject, _methodPtrAux)]
1525 ThumbEmitLoadRegIndirect(ThumbReg(12), ThumbReg(0), DelegateObject::GetOffsetOfMethodPtrAux());
1527 // Emit the instructions to rewrite the argument registers. Most will be register-to-register (e.g.
1528 // move r1 to r0) but one or two of them might move values from the top of the incoming stack
1529 // arguments into registers r2 and r3. Note that the entries are ordered so that we don't need to
1530 // worry about a move overwriting a register we'll need to use as input for the next move (i.e. we get
1531 // move r1 to r0, move r2 to r1 etc.).
1532 pEntry = pShuffleEntryArray;
1533 while (pEntry->srcofs != ShuffleEntry::SENTINEL)
1535 _ASSERTE(pEntry->dstofs & ShuffleEntry::REGMASK);
1537 if (pEntry->srcofs & ShuffleEntry::REGMASK)
1539 // Move from register case.
1540 ThumbEmitMovRegReg(ThumbReg(pEntry->dstofs & ShuffleEntry::OFSMASK),
1541 ThumbReg(pEntry->srcofs & ShuffleEntry::OFSMASK));
1545 // Move from the stack case.
1546 // ldr <dest>, [sp + #source_offset]
1547 ThumbEmitLoadRegIndirect(ThumbReg(pEntry->dstofs & ShuffleEntry::OFSMASK),
1549 (pEntry->srcofs & ShuffleEntry::OFSMASK) * 4);
1555 // Tail call to real target.
1557 ThumbEmitJumpRegister(ThumbReg(12));
1562 // In the more complex case we need to re-write at least some of the arguments on the stack as well as
1563 // argument registers. We need some temporary registers to perform stack-to-stack copies and we've
1564 // reserved our one remaining volatile register, r12, to store the eventual target method address. So
1565 // we're going to generate a hybrid-tail call. Using a tail call has the advantage that we don't need to
1566 // erect and link an explicit CLR frame to enable crawling of this thunk. Additionally re-writing the
1567 // stack can be more peformant in some scenarios than copying the stack (in the presence of floating point
1568 // or arguments requieing 64-bit alignment we might not have to move some or even most of the values).
1569 // The hybrid nature is that we'll erect a standard native frame (with a proper prolog and epilog) so we
1570 // can save some non-volatile registers to act as temporaries. Once we've performed the stack re-write
1571 // we'll poke the saved LR value (which will become a PC value on the pop in the epilog) to return to the
1572 // target method instead of us, thus atomically removing our frame from the stack and tail-calling the
1576 ThumbEmitProlog(3, // Save r4-r6,lr (count doesn't include lr)
1577 0, // No additional space in the stack frame required
1578 FALSE); // Don't push argument registers
1580 // On entry r0 holds the delegate instance. Look up the real target address stored in the MethodPtrAux
1581 // field and stash it in r12.
1582 // ldr r12, [r0, #offsetof(DelegateObject, _methodPtrAux)]
1583 ThumbEmitLoadRegIndirect(ThumbReg(12), ThumbReg(0), DelegateObject::GetOffsetOfMethodPtrAux());
1585 // As we copy slots from lower in the argument stack to higher we need to keep track of source and
1586 // destination pointers into those arguments (if we just use offsets from SP we get into trouble with
1587 // argument frames larger than 4K). We'll use r4 to track the source (original location of an argument
1588 // from the caller's perspective) and r5 to track the destination (new location of the argument from the
1589 // callee's perspective). Both start at the current value of SP plus the offset created by pushing our
1590 // stack frame in the prolog.
1591 // add r4, sp, #cbSavedRegs
1592 // add r5, sp, #cbSavedRegs
1593 DWORD cbSavedRegs = 4 * 4; // r4, r5, r6, lr
1594 ThumbEmitAdd(ThumbReg(4), thumbRegSp, cbSavedRegs);
1595 ThumbEmitAdd(ThumbReg(5), thumbRegSp, cbSavedRegs);
1597 // Follow the shuffle array instructions to re-write some subset of r0-r3 and the stacked arguments to
1598 // remove the unwanted delegate instance in r0. Arguments only ever move from higher registers to lower
1599 // registers or higher stack addresses to lower stack addresses and are ordered from lowest register to
1600 // highest stack address. As a result we can do all updates in order and in place and we'll never
1601 // overwrite a register or stack location needed as a source value in a later iteration.
1602 DWORD dwLastSrcIndex = (DWORD)-1;
1603 DWORD dwLastDstIndex = (DWORD)-1;
1604 pEntry = pShuffleEntryArray;
1605 while (pEntry->srcofs != ShuffleEntry::SENTINEL)
1607 // If this is a register-to-register move we can do it in one instruction.
1608 if ((pEntry->srcofs & ShuffleEntry::REGMASK) && (pEntry->dstofs & ShuffleEntry::REGMASK))
1610 ThumbEmitMovRegReg(ThumbReg(pEntry->dstofs & ShuffleEntry::OFSMASK),
1611 ThumbReg(pEntry->srcofs & ShuffleEntry::OFSMASK));
1615 // There is no case where a source argument register is moved into a destination stack slot.
1616 _ASSERTE((pEntry->srcofs & ShuffleEntry::REGMASK) == 0);
1618 // Source or destination stack offsets might not be contiguous (though they often will be).
1619 // Floating point arguments and 64-bit aligned values can cause discontinuities. While we copy
1620 // values we'll use post increment addressing modes to move both source and destination stack
1621 // pointers forward 4 bytes at a time, the common case. But we'll insert additional add
1622 // instructions for any holes we find (we detect these by remembering the last source and
1623 // destination stack offset we used).
1625 // Add any additional offset to the source pointer (r4) to account for holes in the copy.
1626 DWORD dwSrcIndex = pEntry->srcofs & ShuffleEntry::OFSMASK;
1627 if (dwSrcIndex != (dwLastSrcIndex + 1))
1629 _ASSERTE(dwSrcIndex > dwLastSrcIndex);
1631 // add r4, #gap_size
1632 ThumbEmitIncrement(ThumbReg(4), (dwSrcIndex - dwLastSrcIndex - 1) * 4);
1634 dwLastSrcIndex = dwSrcIndex;
1636 // Load the source value from the stack and increment our source pointer (r4) in one instruction.
1637 // If the target is a register we can move the value directly there. Otherwise we move it to the
1638 // r6 temporary register.
1639 if (pEntry->dstofs & ShuffleEntry::REGMASK)
1641 // ldr <regnum>, [r4], #4
1642 ThumbEmitLoadIndirectPostIncrement(ThumbReg(pEntry->dstofs & ShuffleEntry::OFSMASK), ThumbReg(4), 4);
1647 ThumbEmitLoadIndirectPostIncrement(ThumbReg(6), ThumbReg(4), 4);
1649 // Add any additional offset to the destination pointer (r5) to account for holes in the copy.
1650 DWORD dwDstIndex = pEntry->dstofs & ShuffleEntry::OFSMASK;
1651 if (dwDstIndex != (dwLastDstIndex + 1))
1653 _ASSERTE(dwDstIndex > dwLastDstIndex);
1655 // add r5, #gap_size
1656 ThumbEmitIncrement(ThumbReg(5), (dwDstIndex - dwLastDstIndex - 1) * 4);
1658 dwLastDstIndex = dwDstIndex;
1660 // Write the value in r6 to it's final home on the stack and increment our destination pointer
1663 ThumbEmitStoreIndirectPostIncrement(ThumbReg(6), ThumbReg(5), 4);
1670 // Arguments are copied. Now we modify the saved value of LR we created in our prolog (which will be
1671 // popped back off into PC in our epilog) so that it points to the real target address in r12 rather than
1672 // our return address. We haven't modified LR ourselves, so the net result is that executing our epilog
1673 // will pop our frame and tail call to the real method.
1674 // str r12, [sp + #(cbSavedRegs-4)]
1675 ThumbEmitStoreRegIndirect(ThumbReg(12), thumbRegSp, cbSavedRegs - 4);
1681 #ifndef CROSSGEN_COMPILE
1683 void StubLinkerCPU::ThumbEmitCallManagedMethod(MethodDesc *pMD, bool fTailcall)
1685 bool isRelative = MethodTable::VTableIndir2_t::isRelative
1686 && pMD->IsVtableSlot();
1688 #ifndef FEATURE_NGEN_RELOCS_OPTIMIZATIONS
1689 _ASSERTE(!isRelative);
1692 // Use direct call if possible.
1693 if (pMD->HasStableEntryPoint())
1695 // mov r12, #entry_point
1696 ThumbEmitMovConstant(ThumbReg(12), (TADDR)pMD->GetStableEntryPoint());
1700 // mov r12, #slotaddress
1701 ThumbEmitMovConstant(ThumbReg(12), (TADDR)pMD->GetAddrOfSlot());
1708 ThumbEmitStoreRegIndirect(ThumbReg(4), thumbRegSp, 0);
1712 ThumbEmitMovRegReg(ThumbReg(4), ThumbReg(12));
1716 ThumbEmitLoadRegIndirect(ThumbReg(12), ThumbReg(12), 0);
1721 ThumbEmitAddReg(ThumbReg(12), ThumbReg(4));
1726 ThumbEmitLoadRegIndirect(ThumbReg(4), thumbRegSp, 0);
1736 ThumbEmitJumpRegister(ThumbReg(12));
1740 // Replace LR with R12 on stack: hybrid-tail call, same as for EmitShuffleThunk
1742 ThumbEmitStoreRegIndirect(ThumbReg(12), thumbRegSp, 4);
1748 ThumbEmitCallRegister(ThumbReg(12));
1752 // Common code used to generate either an instantiating method stub or an unboxing stub (in the case where the
1753 // unboxing stub also needs to provide a generic instantiation parameter). The stub needs to add the
1754 // instantiation parameter provided in pHiddenArg and re-arrange the rest of the incoming arguments as a
1755 // result (since on ARM this hidden parameter is inserted before explicit user arguments we need a type of
1756 // shuffle thunk in the reverse direction of the type used for static delegates). If pHiddenArg == NULL it
1757 // indicates that we're in the unboxing case and should add sizeof(MethodTable*) to the incoming this pointer
1758 // before dispatching to the target. In this case the instantiating parameter is always the non-shared
1759 // MethodTable pointer we can deduce directly from the incoming 'this' reference.
1760 void StubLinkerCPU::ThumbEmitCallWithGenericInstantiationParameter(MethodDesc *pMD, void *pHiddenArg)
1762 // There is a simple case and a complex case.
1763 // 1) In the simple case the addition of the hidden arg doesn't push any user args onto the stack. In
1764 // this case we only have to re-arrange/initialize some argument registers and tail call to the
1766 // 2) In the complex case we have to modify the stack by pushing some of the register based user
1767 // arguments. We can't tail call in this case because we've altered the size of the stack and our
1768 // caller doesn't expect this and can't compensate. Instead we'll need to create a stack frame
1769 // (including an explicit Frame to make it crawlable to the runtime) and copy the incoming arguments
1772 // First we need to analyze the signature of the target method both with and without the extra
1773 // instantiation argument. We use ArgIterator to determine the difference in location
1774 // (register or stack offset) for each argument between the two cases. This forms a set instructions that
1775 // tell us how to copy incoming arguments into outgoing arguments (and if those instructions don't include
1776 // any writes to stack locations in the outgoing case then we know we can generate a simple thunk).
1778 SigTypeContext sTypeContext(pMD, TypeHandle());
1780 // Incoming, source, method signature.
1781 MetaSig sSrcSig(pMD->GetSignature(),
1784 MetaSig::sigMember);
1786 // Outgoing, destination, method signature.
1787 MetaSig sDstSig(pMD->GetSignature(),
1790 MetaSig::sigMember);
1792 sDstSig.SetHasParamTypeArg();
1794 // Wrap calling convention parsers round the source and destination signatures. These will be responsible
1795 // for determining where each argument lives in registers or on the stack.
1796 ArgIterator sSrcArgLocations(&sSrcSig);
1797 ArgIterator sDstArgLocations(&sDstSig);
1799 // Define an argument descriptor type that describes how a single 4 byte portion of an argument is mapped
1800 // in the source and destination signature. We only have to worry about general registers and stack
1801 // locations here; floating point argument registers are left unmodified by this thunk.
1804 int m_idxSrc; // Source register or stack offset
1805 int m_idxDst; // Destination register or stack offset
1806 bool m_fSrcIsReg; // Source index is a register number
1807 bool m_fDstIsReg; // Destination index is a register number
1810 // The number of argument move descriptors we'll need is a function of the number of 4-byte registers or
1811 // stack slots the arguments occupy. The following calculation will over-estimate in a few side cases, but
1812 // not by much (it assumes all four argument registers are used plus the number of stack slots that
1813 // MetaSig calculates are needed for the rest of the arguments).
1814 DWORD cArgDescriptors = 4 + (sSrcArgLocations.SizeOfArgStack() / 4);
1816 // Allocate the array of argument descriptors.
1817 CQuickArray<ArgDesc> rgArgDescs;
1818 rgArgDescs.AllocThrows(cArgDescriptors);
1820 // We only need to map translations for arguments that could come after the instantiation parameter we're
1821 // inserting. On the ARM the only implicit argument that could follow is a vararg signature cookie, but
1822 // it's disallowed in this case. So we simply walk the user arguments.
1823 _ASSERTE(!sSrcSig.IsVarArg());
1828 DWORD idxCurrentDesc = 0;
1829 while ((srcOffset = sSrcArgLocations.GetNextOffset()) != TransitionBlock::InvalidOffset)
1831 dstOffset = sDstArgLocations.GetNextOffset();
1833 // Get the placement for a single argument in the source and destination signatures (may include
1834 // multiple registers and/or stack locations if the argument is larger than 4 bytes).
1835 ArgLocDesc sSrcArgLoc;
1836 sSrcArgLocations.GetArgLoc(srcOffset, &sSrcArgLoc);
1837 ArgLocDesc sDstArgLoc;
1838 sDstArgLocations.GetArgLoc(dstOffset, &sDstArgLoc);
1840 // Fill in as many single-slot descriptors as the argument needs. Note that we ignore any floating
1841 // point register cases (m_cFloatReg > 0) since these will never change due to the hidden arg
1843 while (sSrcArgLoc.m_cGenReg || sSrcArgLoc.m_cStack)
1845 _ASSERTE(idxCurrentDesc < cArgDescriptors);
1847 if (sSrcArgLoc.m_cGenReg)
1849 sSrcArgLoc.m_cGenReg--;
1850 rgArgDescs[idxCurrentDesc].m_idxSrc = sSrcArgLoc.m_idxGenReg++;
1851 rgArgDescs[idxCurrentDesc].m_fSrcIsReg = true;
1855 _ASSERTE(sSrcArgLoc.m_cStack > 0);
1856 sSrcArgLoc.m_cStack--;
1857 rgArgDescs[idxCurrentDesc].m_idxSrc = sSrcArgLoc.m_idxStack++;
1858 rgArgDescs[idxCurrentDesc].m_fSrcIsReg = false;
1861 if (sDstArgLoc.m_cGenReg)
1863 sDstArgLoc.m_cGenReg--;
1864 rgArgDescs[idxCurrentDesc].m_idxDst = sDstArgLoc.m_idxGenReg++;
1865 rgArgDescs[idxCurrentDesc].m_fDstIsReg = true;
1869 _ASSERTE(sDstArgLoc.m_cStack > 0);
1870 sDstArgLoc.m_cStack--;
1871 rgArgDescs[idxCurrentDesc].m_idxDst = sDstArgLoc.m_idxStack++;
1872 rgArgDescs[idxCurrentDesc].m_fDstIsReg = false;
1879 bool isRelative = MethodTable::VTableIndir2_t::isRelative
1880 && pMD->IsVtableSlot();
1882 #ifndef FEATURE_NGEN_RELOCS_OPTIMIZATIONS
1883 _ASSERTE(!isRelative);
1886 // Update descriptor count to the actual number used.
1887 cArgDescriptors = idxCurrentDesc;
1889 // Note the position at which we have the first move to a stack location
1890 DWORD idxFirstMoveToStack = -1;
1892 // We have a problem where register to register moves are concerned. Since we're adding an argument the
1893 // moves will be from a lower numbered register to a higher numbered one (e.g. r0 -> r1). But the argument
1894 // descriptors we just produced will order them starting from the lowest registers. If we emit move
1895 // instructions in this order we'll end up copying the value of the lowest register into all of the rest
1896 // (e.g. r0 -> r1, r1 -> r2 etc.). We don't have this problem with stack based arguments since the
1897 // argument stacks don't overlap in the same fashion. To solve this we'll reverse the order of the
1898 // descriptors with register destinations (there will be at most four of these so it's fairly cheap).
1899 if (cArgDescriptors > 1)
1901 // Start by assuming we have all four register destination descriptors.
1902 DWORD idxLastRegDesc = min(3, cArgDescriptors - 1);
1904 // Adjust that count to match reality.
1905 while (!rgArgDescs[idxLastRegDesc].m_fDstIsReg)
1907 _ASSERTE(idxLastRegDesc > 0);
1911 // First move to stack location happens after the last move to register location
1912 idxFirstMoveToStack = idxLastRegDesc+1;
1914 // Calculate how many descriptors we'll need to swap.
1915 DWORD cSwaps = (idxLastRegDesc + 1) / 2;
1917 // Finally we can swap the descriptors.
1918 DWORD idxFirstRegDesc = 0;
1921 ArgDesc sTempDesc = rgArgDescs[idxLastRegDesc];
1922 rgArgDescs[idxLastRegDesc] = rgArgDescs[idxFirstRegDesc];
1923 rgArgDescs[idxFirstRegDesc] = sTempDesc;
1925 _ASSERTE(idxFirstRegDesc < idxLastRegDesc);
1932 // If we're ever required to write to the destination stack then we can't implement this case with a
1933 // simple tail call stub. (That's not technically true: there are edge cases caused by 64-bit alignment
1934 // requirements that might allow us to use a simple stub since the extra argument fits in a "hole" in the
1935 // arguments, but these are infrequent enough that it's likely not worth the effort of detecting them).
1936 ArgDesc *pLastArg = cArgDescriptors ? &rgArgDescs[cArgDescriptors - 1] : NULL;
1937 if ((pLastArg == NULL) || pLastArg->m_fDstIsReg)
1939 // Simple case where we can just rearrange a few argument registers and tail call.
1941 for (idxCurrentDesc = 0; idxCurrentDesc < cArgDescriptors; idxCurrentDesc++)
1943 // Because we're in the simple case we know we'll never be asked to move a value onto the stack
1944 // and since we're adding a parameter we should never be required to move a value from the stack
1945 // to a register either. So all of the descriptors should be register to register moves.
1946 _ASSERTE(rgArgDescs[idxCurrentDesc].m_fSrcIsReg && rgArgDescs[idxCurrentDesc].m_fDstIsReg);
1947 ThumbEmitMovRegReg(ThumbReg(rgArgDescs[idxCurrentDesc].m_idxDst),
1948 ThumbReg(rgArgDescs[idxCurrentDesc].m_idxSrc));
1951 // Place instantiation parameter into the correct register.
1952 ArgLocDesc sInstArgLoc;
1953 sDstArgLocations.GetParamTypeLoc(&sInstArgLoc);
1954 int regHidden = sInstArgLoc.m_idxGenReg;
1955 _ASSERTE(regHidden != -1);
1958 // mov regHidden, #pHiddenArg
1959 ThumbEmitMovConstant(ThumbReg(regHidden), (TADDR)pHiddenArg);
1963 // Extract MethodTable pointer (the hidden arg) from the object instance.
1964 // ldr regHidden, [r0]
1965 ThumbEmitLoadRegIndirect(ThumbReg(regHidden), ThumbReg(0), 0);
1968 if (pHiddenArg == NULL)
1970 // Unboxing stub case.
1972 // Skip over the MethodTable* to find the address of the unboxed value type.
1973 // add r0, #sizeof(MethodTable*)
1974 ThumbEmitIncrement(ThumbReg(0), sizeof(MethodTable*));
1977 // Emit a tail call to the target method.
1980 ThumbEmitProlog(1, 0, FALSE);
1983 ThumbEmitCallManagedMethod(pMD, true);
1992 // Complex case where we need to emit a new stack frame and copy the arguments.
1994 // Calculate the size of the new stack frame:
1997 // SP -> | | <-- Space for helper arg, if isRelative is true
2000 // : : | Outgoing arguments
2003 // | Padding | <-- Optional, maybe required so that SP is 64-bit aligned
2007 // +-> | vtable ptr |
2012 // Stub | +------------+ |
2014 // Frame | +------------+ | Callee saved registers
2016 // | +------------+ |
2017 // | | LR/RetAddr | <-+
2020 // | +------------+ |
2021 // | : : | Argument registers
2022 // | +------------+ |
2027 DWORD cbStackArgs = (pLastArg->m_idxDst + 1) * 4;
2028 DWORD cbStackFrame = cbStackArgs + sizeof(GSCookie) + sizeof(StubHelperFrame);
2029 cbStackFrame = ALIGN_UP(cbStackFrame, 8);
2036 DWORD cbStackFrameWithoutSavedRegs = cbStackFrame - (13 * 4); // r0-r11,lr
2039 ThumbEmitProlog(8, // Save r4-r11,lr (count doesn't include lr)
2040 cbStackFrameWithoutSavedRegs, // Additional space in the stack frame required
2041 TRUE); // Push argument registers
2043 DWORD offsetOfFrame = cbStackFrame - sizeof(StubHelperFrame);
2045 // Initialize and link the StubHelperFrame and associated GS cookie.
2046 EmitStubLinkFrame(StubHelperFrame::GetMethodFrameVPtr(), offsetOfFrame, StubHelperFrame::GetOffsetOfTransitionBlock());
2048 // Initialize temporary registers used when copying arguments:
2049 // r6 == pointer to first incoming stack-based argument
2050 // r7 == pointer to first outgoing stack-based argument
2052 // add r6, sp, #cbStackFrame
2053 ThumbEmitAdd(ThumbReg(6), thumbRegSp, cbStackFrame);
2056 ThumbEmitMovRegReg(ThumbReg(7), thumbRegSp);
2058 // Copy incoming to outgoing arguments. Stack arguments are generally written consecutively and as
2059 // such we use post-increment forms of register indirect addressing to keep our input (r6) and output
2060 // (r7) pointers up to date. But sometimes we'll skip four bytes due to 64-bit alignment requirements
2061 // and need to bump one or both of the pointers to compensate. We determine
2063 // At this point, the ArgumentDescriptor array is divied into two parts:
2065 // 1) Reverse sorted register to register moves (see the comment earlier in the method for details)
2066 // 2) Register or Stack to Stack moves (if any) in the original order.
2068 // Its possible that the register to register moves may move to a target register that happens
2069 // to be a source for the register -> stack move. If this happens, and we emit the argument moves
2070 // in the current order, then we can lose the contents of the register involved in register->stack
2071 // move (stack->stack moves are not a problem as the locations dont overlap).
2073 // To address this, we will emit the argument moves in two loops:
2075 // 1) First loop will emit the moves that have stack location as the target
2076 // 2) Second loop will emit moves that have register as the target.
2077 DWORD idxCurrentLoopBegin = 0, idxCurrentLoopEnd = cArgDescriptors;
2078 if (idxFirstMoveToStack != -1)
2080 _ASSERTE(idxFirstMoveToStack < cArgDescriptors);
2081 idxCurrentLoopBegin = idxFirstMoveToStack;
2083 for (idxCurrentDesc = idxCurrentLoopBegin; idxCurrentDesc < idxCurrentLoopEnd; idxCurrentDesc++)
2085 ArgDesc *pArgDesc = &rgArgDescs[idxCurrentDesc];
2087 if (pArgDesc->m_fSrcIsReg)
2089 // Source value is in a register.
2091 _ASSERTE(!pArgDesc->m_fDstIsReg);
2092 // Register to stack. Calculate delta from last stack write; normally it will be 4 bytes
2093 // and our pointer has already been set up correctly by the post increment of the last
2094 // write. But in some cases we need to skip four bytes due to a 64-bit alignment
2095 // requirement. In those cases we need to emit an extra add to keep the pointer correct.
2096 // Note that the first stack argument is guaranteed to be 64-bit aligned by the ABI and as
2097 // such the first stack slot is never skipped.
2098 if ((pArgDesc->m_idxDst > 0) &&
2099 (pArgDesc->m_idxDst != (rgArgDescs[idxCurrentDesc - 1].m_idxDst + 1)))
2101 _ASSERTE(pArgDesc->m_idxDst == (rgArgDescs[idxCurrentDesc - 1].m_idxDst + 2));
2102 ThumbEmitIncrement(ThumbReg(7), 4);
2105 // str srcReg, [r7], #4
2106 ThumbEmitStoreIndirectPostIncrement(pArgDesc->m_idxSrc, ThumbReg(7), 4);
2110 // Source value is on the stack. We should have no cases where a stack argument moves back to
2111 // a register (because we're adding an argument).
2112 _ASSERTE(!pArgDesc->m_fDstIsReg);
2114 // Stack to stack move. We need to use register (r6) to store the value temporarily between
2115 // the read and the write. See the comments above for why we need to check stack deltas and
2116 // possibly insert extra add instructions in some cases.
2117 if ((pArgDesc->m_idxSrc > 0) &&
2118 (pArgDesc->m_idxSrc != (rgArgDescs[idxCurrentDesc - 1].m_idxSrc + 1)))
2120 _ASSERTE(pArgDesc->m_idxSrc == (rgArgDescs[idxCurrentDesc - 1].m_idxSrc + 2));
2121 ThumbEmitIncrement(ThumbReg(6), 4);
2123 if ((pArgDesc->m_idxDst > 0) &&
2124 (pArgDesc->m_idxDst != (rgArgDescs[idxCurrentDesc - 1].m_idxDst + 1)))
2126 _ASSERTE(pArgDesc->m_idxDst == (rgArgDescs[idxCurrentDesc - 1].m_idxDst + 2));
2127 ThumbEmitIncrement(ThumbReg(7), 4);
2131 ThumbEmitLoadIndirectPostIncrement(ThumbReg(8), ThumbReg(6), 4);
2134 ThumbEmitStoreIndirectPostIncrement(ThumbReg(8), ThumbReg(7), 4);
2138 // Update the indexes to be used for the second loop
2139 idxCurrentLoopEnd = idxCurrentLoopBegin;
2140 idxCurrentLoopBegin = 0;
2143 // Now, perform the register to register moves
2144 for (idxCurrentDesc = idxCurrentLoopBegin; idxCurrentDesc < idxCurrentLoopEnd; idxCurrentDesc++)
2146 ArgDesc *pArgDesc = &rgArgDescs[idxCurrentDesc];
2148 // All moves to stack locations have been done (if applicable).
2149 // Since we are moving to a register destination, the source
2150 // will also be a register and cannot be a stack location (refer to the previous loop).
2151 _ASSERTE(pArgDesc->m_fSrcIsReg && pArgDesc->m_fDstIsReg);
2153 // Register to register case.
2154 ThumbEmitMovRegReg(pArgDesc->m_idxDst, pArgDesc->m_idxSrc);
2158 // Place instantiation parameter into the correct register.
2159 ArgLocDesc sInstArgLoc;
2160 sDstArgLocations.GetParamTypeLoc(&sInstArgLoc);
2161 int regHidden = sInstArgLoc.m_idxGenReg;
2162 _ASSERTE(regHidden != -1);
2165 // mov regHidden, #pHiddenArg
2166 ThumbEmitMovConstant(ThumbReg(regHidden), (TADDR)pHiddenArg);
2170 // Extract MethodTable pointer (the hidden arg) from the object instance.
2171 // ldr regHidden, [r0]
2172 ThumbEmitLoadRegIndirect(ThumbReg(regHidden), ThumbReg(0), 0);
2175 if (pHiddenArg == NULL)
2177 // Unboxing stub case.
2179 // Skip over the MethodTable* to find the address of the unboxed value type.
2180 // add r0, #sizeof(MethodTable*)
2181 ThumbEmitIncrement(ThumbReg(0), sizeof(MethodTable*));
2184 // Emit a regular (non-tail) call to the target method.
2185 ThumbEmitCallManagedMethod(pMD, false);
2187 // Unlink the StubHelperFrame.
2188 EmitStubUnlinkFrame();
2195 #if defined(FEATURE_SHARE_GENERIC_CODE)
2196 // The stub generated by this method passes an extra dictionary argument before jumping to
2197 // shared-instantiation generic code.
2199 // pSharedMD is either
2200 // * An InstantiatedMethodDesc for a generic method whose code is shared across instantiations.
2201 // In this case, the extra argument is the InstantiatedMethodDesc for the instantiation-specific stub itself.
2202 // or * A MethodDesc for a static method in a generic class whose code is shared across instantiations.
2203 // In this case, the extra argument is the MethodTable pointer of the instantiated type.
2204 VOID StubLinkerCPU::EmitInstantiatingMethodStub(MethodDesc* pSharedMD, void* extra)
2210 INJECT_FAULT(COMPlusThrowOM(););
2211 PRECONDITION(pSharedMD->RequiresInstMethodTableArg() || pSharedMD->RequiresInstMethodDescArg());
2215 // Share code with the instantiating version of the unboxing stub (see below).
2216 ThumbEmitCallWithGenericInstantiationParameter(pSharedMD, extra);
2218 #endif // FEATURE_SHARE_GENERIC_CODE
2220 void StubLinkerCPU::EmitUnboxMethodStub(MethodDesc *pMD)
2222 if (pMD->RequiresInstMethodTableArg())
2224 // In this case we also have to add an instantiating parameter (which is always the MethodTable* from
2225 // the instance we're called on). Most of this code is shared with the instantiating method stub
2226 // above, the NULL parameter informs the emitter that we're both an unboxing stub and that the extra
2227 // parameter can be deduced from the 'this' reference.
2228 ThumbEmitCallWithGenericInstantiationParameter(pMD, NULL);
2232 // We assume that we'll never see a case where a boxed value type method will require an instantiated
2233 // method desc as a parameter. The stubs on other platforms make this assumption (and indeed this
2234 // method isn't even passed an additional instantiation parameter). This is trivially true for the
2235 // non-interface call case: the only methods callable directly on the boxed instance are the methods
2236 // of Object, none of which are generic. For the interface dispatch case we're relying on the fact
2237 // that the jit always provides the instantiating argument explicitly.
2238 _ASSERTE(!pMD->RequiresInstMethodDescArg());
2240 // Address of the value type is address of the boxed instance plus four.
2242 ThumbEmitIncrement(ThumbReg(0), 4);
2244 bool isRelative = MethodTable::VTableIndir2_t::isRelative
2245 && pMD->IsVtableSlot();
2247 #ifndef FEATURE_NGEN_RELOCS_OPTIMIZATIONS
2248 _ASSERTE(!isRelative);
2253 ThumbEmitProlog(1, 0, FALSE);
2256 // Tail call the real target.
2257 ThumbEmitCallManagedMethod(pMD, true /* tail call */);
2266 #endif // CROSSGEN_COMPILE
2268 #endif // !DACCESS_COMPILE
2270 LONG CLRNoCatchHandler(EXCEPTION_POINTERS* pExceptionInfo, PVOID pv)
2272 return EXCEPTION_CONTINUE_SEARCH;
2275 void UpdateRegDisplayFromCalleeSavedRegisters(REGDISPLAY * pRD, CalleeSavedRegisters * pRegs)
2277 LIMITED_METHOD_CONTRACT;
2279 T_CONTEXT * pContext = pRD->pCurrentContext;
2280 pContext->R4 = pRegs->r4;
2281 pContext->R5 = pRegs->r5;
2282 pContext->R6 = pRegs->r6;
2283 pContext->R7 = pRegs->r7;
2284 pContext->R8 = pRegs->r8;
2285 pContext->R9 = pRegs->r9;
2286 pContext->R10 = pRegs->r10;
2287 pContext->R11 = pRegs->r11;
2288 pContext->Lr = pRegs->r14;
2290 T_KNONVOLATILE_CONTEXT_POINTERS * pContextPointers = pRD->pCurrentContextPointers;
2291 pRD->pCurrentContextPointers->R4 = (PDWORD)&pRegs->r4;
2292 pRD->pCurrentContextPointers->R5 = (PDWORD)&pRegs->r5;
2293 pRD->pCurrentContextPointers->R6 = (PDWORD)&pRegs->r6;
2294 pRD->pCurrentContextPointers->R7 = (PDWORD)&pRegs->r7;
2295 pRD->pCurrentContextPointers->R8 = (PDWORD)&pRegs->r8;
2296 pRD->pCurrentContextPointers->R9 = (PDWORD)&pRegs->r9;
2297 pRD->pCurrentContextPointers->R10 = (PDWORD)&pRegs->r10;
2298 pRD->pCurrentContextPointers->R11 = (PDWORD)&pRegs->r11;
2299 pRD->pCurrentContextPointers->Lr = NULL;
2302 #ifndef CROSSGEN_COMPILE
2303 void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
2305 pRD->IsCallerContextValid = FALSE;
2306 pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary.
2308 // Copy the saved argument registers into the current context
2309 ArgumentRegisters * pArgRegs = GetArgumentRegisters();
2310 pRD->pCurrentContext->R0 = pArgRegs->r[0];
2311 pRD->pCurrentContext->R1 = pArgRegs->r[1];
2312 pRD->pCurrentContext->R2 = pArgRegs->r[2];
2313 pRD->pCurrentContext->R3 = pArgRegs->r[3];
2315 // Next, copy all the callee saved registers
2316 UpdateRegDisplayFromCalleeSavedRegisters(pRD, GetCalleeSavedRegisters());
2318 // Set ControlPC to be the same as the saved "return address"
2319 // value, which is actually a ControlPC in the frameless method (e.g.
2320 // faulting address incase of AV or TAE).
2321 pRD->pCurrentContext->Pc = GetReturnAddress();
2323 // Set the caller SP
2324 pRD->pCurrentContext->Sp = this->GetSP();
2326 // Finally, syncup the regdisplay with the context
2327 SyncRegDisplayToCurrentContext(pRD);
2329 LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK TransitionFrame::UpdateRegDisplay(rip:%p, rsp:%p)\n", pRD->ControlPC, pRD->SP));
2332 void TailCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
2334 pRD->IsCallerContextValid = FALSE;
2335 pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary.
2337 // Next, copy all the callee saved registers
2338 UpdateRegDisplayFromCalleeSavedRegisters(pRD, &m_calleeSavedRegisters);
2340 // Set ControlPC to be the same as the saved "return address"
2341 // value, which is actually a ControlPC in the frameless method (e.g.
2342 // faulting address incase of AV or TAE).
2343 pRD->pCurrentContext->Pc = m_ReturnAddress;
2345 // Set the caller SP
2346 pRD->pCurrentContext->Sp = dac_cast<TADDR>(this) + sizeof(*this);
2348 // Finally, syncup the regdisplay with the context
2349 SyncRegDisplayToCurrentContext(pRD);
2351 LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK TransitionFrame::UpdateRegDisplay(rip:%p, rsp:%p)\n", pRD->ControlPC, pRD->SP));
2354 #ifndef DACCESS_COMPILE
2356 void TailCallFrame::InitFromContext(T_CONTEXT * pContext)
2358 WRAPPER_NO_CONTRACT;
2366 r10 = pContext->R10;
2367 r11 = pContext->R11;
2368 m_ReturnAddress = pContext->Lr;
2371 #endif // !DACCESS_COMPILE
2373 void FaultingExceptionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
2375 LIMITED_METHOD_DAC_CONTRACT;
2377 // Copy the context to regdisplay
2378 memcpy(pRD->pCurrentContext, &m_ctx, sizeof(T_CONTEXT));
2380 pRD->ControlPC = ::GetIP(&m_ctx);
2381 pRD->SP = ::GetSP(&m_ctx);
2383 // Update the integer registers in KNONVOLATILE_CONTEXT_POINTERS from
2384 // the exception context we have.
2385 pRD->pCurrentContextPointers->R4 = (PDWORD)&m_ctx.R4;
2386 pRD->pCurrentContextPointers->R5 = (PDWORD)&m_ctx.R5;
2387 pRD->pCurrentContextPointers->R6 = (PDWORD)&m_ctx.R6;
2388 pRD->pCurrentContextPointers->R7 = (PDWORD)&m_ctx.R7;
2389 pRD->pCurrentContextPointers->R8 = (PDWORD)&m_ctx.R8;
2390 pRD->pCurrentContextPointers->R9 = (PDWORD)&m_ctx.R9;
2391 pRD->pCurrentContextPointers->R10 = (PDWORD)&m_ctx.R10;
2392 pRD->pCurrentContextPointers->R11 = (PDWORD)&m_ctx.R11;
2393 pRD->pCurrentContextPointers->Lr = NULL;
2395 pRD->IsCallerContextValid = FALSE;
2396 pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary.
2399 void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
2405 // We should skip over InlinedCallFrame if it is not active.
2406 // It will be part of a JITed method's frame, and the stack-walker
2407 // can handle such a case.
2408 #ifdef PROFILING_SUPPORTED
2409 PRECONDITION(CORProfilerStackSnapshotEnabled() || InlinedCallFrame::FrameHasActiveCall(this));
2417 // @TODO: Remove this after the debugger is fixed to avoid stack-walks from bad places
2418 // @TODO: This may be still needed for sampling profilers
2419 if (!InlinedCallFrame::FrameHasActiveCall(this))
2421 LOG((LF_CORDB, LL_ERROR, "WARNING: InlinedCallFrame::UpdateRegDisplay called on inactive frame %p\n", this));
2425 // reset pContext; it's only valid for active (top-most) frame
2426 pRD->pContext = NULL;
2428 *(pRD->pPC) = m_pCallerReturnAddress;
2429 pRD->SP = (DWORD) dac_cast<TADDR>(m_pCallSiteSP);
2431 pRD->IsCallerContextValid = FALSE;
2432 pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary.
2434 pRD->pCurrentContext->Pc = *(pRD->pPC);
2435 pRD->pCurrentContext->Sp = pRD->SP;
2437 // Update the frame pointer in the current context.
2438 pRD->pCurrentContext->R11 = m_pCalleeSavedFP;
2439 pRD->pCurrentContextPointers->R11 = &m_pCalleeSavedFP;
2441 // This is necessary to unwind methods with alloca. This needs to stay
2442 // in sync with definition of REG_SAVED_LOCALLOC_SP in the JIT.
2443 pRD->pCurrentContext->R9 = (DWORD) dac_cast<TADDR>(m_pCallSiteSP);
2444 pRD->pCurrentContextPointers->R9 = (DWORD *)&m_pCallSiteSP;
2449 #ifdef FEATURE_HIJACK
2450 TADDR ResumableFrame::GetReturnAddressPtr(void)
2452 LIMITED_METHOD_DAC_CONTRACT;
2453 return dac_cast<TADDR>(m_Regs) + offsetof(T_CONTEXT, Pc);
2456 void ResumableFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
2467 CopyMemory(pRD->pCurrentContext, m_Regs, sizeof(T_CONTEXT));
2469 pRD->ControlPC = m_Regs->Pc;
2470 pRD->SP = m_Regs->Sp;
2472 pRD->pCurrentContextPointers->R4 = &m_Regs->R4;
2473 pRD->pCurrentContextPointers->R5 = &m_Regs->R5;
2474 pRD->pCurrentContextPointers->R6 = &m_Regs->R6;
2475 pRD->pCurrentContextPointers->R7 = &m_Regs->R7;
2476 pRD->pCurrentContextPointers->R8 = &m_Regs->R8;
2477 pRD->pCurrentContextPointers->R9 = &m_Regs->R9;
2478 pRD->pCurrentContextPointers->R10 = &m_Regs->R10;
2479 pRD->pCurrentContextPointers->R11 = &m_Regs->R11;
2480 pRD->pCurrentContextPointers->Lr = &m_Regs->Lr;
2482 pRD->volatileCurrContextPointers.R0 = &m_Regs->R0;
2483 pRD->volatileCurrContextPointers.R1 = &m_Regs->R1;
2484 pRD->volatileCurrContextPointers.R2 = &m_Regs->R2;
2485 pRD->volatileCurrContextPointers.R3 = &m_Regs->R3;
2486 pRD->volatileCurrContextPointers.R12 = &m_Regs->R12;
2488 pRD->IsCallerContextValid = FALSE;
2489 pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary.
2492 void HijackFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
2501 pRD->IsCallerContextValid = FALSE;
2502 pRD->IsCallerSPValid = FALSE;
2504 pRD->pCurrentContext->Pc = m_ReturnAddress;
2505 pRD->pCurrentContext->Sp = PTR_TO_TADDR(m_Args) + sizeof(struct HijackArgs);
2507 pRD->pCurrentContext->R0 = m_Args->R0;
2509 pRD->pCurrentContext->R4 = m_Args->R4;
2510 pRD->pCurrentContext->R5 = m_Args->R5;
2511 pRD->pCurrentContext->R6 = m_Args->R6;
2512 pRD->pCurrentContext->R7 = m_Args->R7;
2513 pRD->pCurrentContext->R8 = m_Args->R8;
2514 pRD->pCurrentContext->R9 = m_Args->R9;
2515 pRD->pCurrentContext->R10 = m_Args->R10;
2516 pRD->pCurrentContext->R11 = m_Args->R11;
2518 pRD->pCurrentContextPointers->R4 = &m_Args->R4;
2519 pRD->pCurrentContextPointers->R5 = &m_Args->R5;
2520 pRD->pCurrentContextPointers->R6 = &m_Args->R6;
2521 pRD->pCurrentContextPointers->R7 = &m_Args->R7;
2522 pRD->pCurrentContextPointers->R8 = &m_Args->R8;
2523 pRD->pCurrentContextPointers->R9 = &m_Args->R9;
2524 pRD->pCurrentContextPointers->R10 = &m_Args->R10;
2525 pRD->pCurrentContextPointers->R11 = &m_Args->R11;
2526 pRD->pCurrentContextPointers->Lr = NULL;
2528 SyncRegDisplayToCurrentContext(pRD);
2530 #endif // FEATURE_HIJACK
2531 #endif // !CROSSGEN_COMPILE
2533 class UMEntryThunk * UMEntryThunk::Decode(void *pCallback)
2535 _ASSERTE(offsetof(UMEntryThunkCode, m_code) == 0);
2536 UMEntryThunkCode * pCode = (UMEntryThunkCode*)((ULONG_PTR)pCallback & ~THUMB_CODE);
2538 // We may be called with an unmanaged external code pointer instead. So if it doesn't look like one of our
2539 // stubs (see UMEntryThunkCode::Encode below) then we'll return NULL. Luckily in these scenarios our
2540 // caller will perform a hash lookup on successful return to verify our result in case random unmanaged
2541 // code happens to look like ours.
2542 if ((pCode->m_code[0] == 0xf8df) &&
2543 (pCode->m_code[1] == 0xc008) &&
2544 (pCode->m_code[2] == 0xf8df) &&
2545 (pCode->m_code[3] == 0xf000))
2547 return (UMEntryThunk*)pCode->m_pvSecretParam;
2553 void UMEntryThunkCode::Encode(BYTE* pTargetCode, void* pvSecretParam)
2555 // ldr r12, [pc + 8]
2562 m_pTargetCode = (TADDR)pTargetCode;
2563 m_pvSecretParam = (TADDR)pvSecretParam;
2565 FlushInstructionCache(GetCurrentProcess(),&m_code,sizeof(m_code));
2568 #ifndef DACCESS_COMPILE
2570 void UMEntryThunkCode::Poison()
2572 m_pTargetCode = (TADDR)UMEntryThunk::ReportViolation;
2579 ClrFlushInstructionCache(&m_code,sizeof(m_code));
2582 #endif // DACCESS_COMPILE
2584 ///////////////////////////// UNIMPLEMENTED //////////////////////////////////
2586 #ifndef DACCESS_COMPILE
2588 #ifndef CROSSGEN_COMPILE
2590 extern "C" void STDCALL JIT_PatchedCodeStart();
2591 extern "C" void STDCALL JIT_PatchedCodeLast();
2593 void InitJITHelpers1()
2595 STANDARD_VM_CONTRACT;
2597 // Allocation helpers, faster but non-logging.
2598 if (!(TrackAllocationsEnabled()
2599 || LoggingOn(LF_GCALLOC, LL_INFO10)
2601 || (g_pConfig->ShouldInjectFault(INJECTFAULT_GCHEAP) != 0)
2605 _ASSERTE(GCHeapUtilities::UseThreadAllocationContexts());
2607 SetJitHelperFunction(CORINFO_HELP_NEWSFAST, JIT_NewS_MP_FastPortable);
2608 SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable);
2609 SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable);
2611 ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateString_MP_FastPortable), ECall::FastAllocateString);
2615 // +64 stack-based arguments here
2616 // -- MulticastFrame end
2617 // +48 r0-r3 argument registers
2618 // +44 lr return address
2619 // +40 fp frame pointer
2620 // +12 r4-r10 callee saved registers
2621 // +8 datum (typically a MethodDesc*)
2623 // +0 the frame vptr
2624 // -- MulticastFrame start
2626 // -... floating point argument registers
2627 void StubLinkerCPU::EmitMulticastInvoke(UINT_PTR hash)
2629 //Decode Multicast Delegate hash
2630 unsigned int numStackBytes = hash >> 8;
2631 _ASSERTE(numStackBytes <= 0x7fff);
2633 unsigned int numFPRegs = (hash & 0xf8) >> 3;
2634 _ASSERTE(numFPRegs <= 16);
2636 unsigned int numGenRegs = hash & 0x7;
2637 _ASSERTE(numGenRegs <= 4);
2639 DWORD offsetOfFPRegs = 0;
2641 DWORD cbStackFrame = numStackBytes;
2644 cbStackFrame = ALIGN_UP(cbStackFrame, 8);
2645 offsetOfFPRegs = cbStackFrame;
2646 cbStackFrame += 4 * numFPRegs;
2648 cbStackFrame += sizeof(GSCookie) + sizeof(MulticastFrame);
2649 cbStackFrame = ALIGN_UP(cbStackFrame, 8);
2650 DWORD cbStackFrameWithoutSavedRegs = cbStackFrame - (13 * 4); // r0-r11,lr
2653 ThumbEmitProlog(8, // Save r4-r11,lr (count doesn't include lr)
2654 cbStackFrameWithoutSavedRegs, // Additional space in the stack frame required
2655 TRUE); // Push argument registers
2657 DWORD offsetOfFrame = cbStackFrame - sizeof(MulticastFrame);
2659 // Move the MethodDesc* we're calling to r12.
2660 // ldr r12, [r0, #offsetof(DelegateObject, _methodPtrAux)]
2661 ThumbEmitLoadRegIndirect(ThumbReg(12), ThumbReg(0), DelegateObject::GetOffsetOfMethodPtrAux());
2663 // Initialize MulticastFrame::m_pMD to the MethodDesc* we're calling
2664 // str r12, [sp + #(offsetOfFrame + offsetof(MulticastFrame, m_pMD))]
2665 ThumbEmitStoreRegIndirect(ThumbReg(12), thumbRegSp, offsetOfFrame + MulticastFrame::GetOffsetOfDatum());
2669 ThumbEmitAdd(ThumbReg(4), thumbRegSp, offsetOfFPRegs);
2671 // save floating point arguments at offsetOfFPRegs
2672 //vstm{IA} R4,{s0-s(numFPRegs -1)}
2674 Emit16(0x0a00 | (WORD)numFPRegs);
2677 // Initialize and link the MulticastFrame and associated GS cookie.
2678 EmitStubLinkFrame(MulticastFrame::GetMethodFrameVPtr(), offsetOfFrame, MulticastFrame::GetOffsetOfTransitionBlock());
2680 //r7 as counter. Initialize it to 0.
2682 ThumbEmitMovConstant(ThumbReg(7), 0);
2684 //initialize r9 to _invocationCount
2685 ThumbEmitLoadRegIndirect(ThumbReg(9), ThumbReg(0), DelegateObject::GetOffsetOfInvocationCount());
2687 CodeLabel *pLoopLabel = NewCodeLabel();
2688 CodeLabel *pEndLoopLabel = NewCodeLabel();
2691 EmitLabel(pLoopLabel);
2694 ThumbEmitCmpReg(ThumbReg(7), ThumbReg(9));
2696 // if equal goto endloop
2698 ThumbEmitCondFlagJump(pEndLoopLabel, 0);
2703 //r1 = pos for stack args in Frame
2704 ThumbEmitAdd(ThumbReg(1), ThumbReg(4), MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs());
2706 //r2 = stack pos for args of calling func
2707 ThumbEmitMovRegReg(ThumbReg(2), thumbRegSp);
2709 // ..move stack args..
2710 _ASSERTE(numStackBytes%4 == 0);
2711 while (count != numStackBytes)
2713 ThumbEmitLoadIndirectPostIncrement(ThumbReg(0), ThumbReg(1), 4);
2714 ThumbEmitStoreIndirectPostIncrement(ThumbReg(0), ThumbReg(2), 4);
2720 while(count < numGenRegs)
2722 ThumbEmitLoadRegIndirect(ThumbReg(count), ThumbReg(4), MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgumentRegisters() + count*4);
2728 ThumbEmitAdd(ThumbReg(0), thumbRegSp, offsetOfFPRegs);
2729 //vldm{IA}.32 R0, s0-s(numFPRegs-1)
2731 Emit16(0x0a00 | (WORD)numFPRegs);
2734 //ldr r0, [r4+0x30] // get the first argument
2735 ThumbEmitLoadRegIndirect(ThumbReg(0),ThumbReg(4), MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgumentRegisters());
2737 // ldr r6, [r0+0x14] //invocationList
2738 ThumbEmitLoadRegIndirect(ThumbReg(6), ThumbReg(0), DelegateObject::GetOffsetOfInvocationList());
2740 // r6 - address of first delegate in invocation list
2742 ThumbEmitAdd(ThumbReg(6), ThumbReg(6), PtrArray::GetDataOffset());
2744 //ldr r8,[r6+r7*4] //get delegate object
2745 ThumbEmitLoadOffsetScaledReg(ThumbReg(8), ThumbReg(6), ThumbReg(7), 2);
2747 // ldr r0, [r8+0x04] //_target from the delegate
2748 ThumbEmitLoadRegIndirect(ThumbReg(0), ThumbReg(8), DelegateObject::GetOffsetOfTarget());
2750 // ldr r8, [r8+0xC] // methodPtr from the delegate
2751 ThumbEmitLoadRegIndirect(ThumbReg(8), ThumbReg(8), DelegateObject::GetOffsetOfMethodPtr());
2754 ThumbEmitCallRegister(ThumbReg(8));
2757 ThumbEmitAdd(ThumbReg(7), ThumbReg(7), 1);
2759 // The debugger may need to stop here, so grab the offset of this code.
2763 ThumbEmitNearJump(pLoopLabel);
2766 EmitLabel(pEndLoopLabel);
2769 //At this point of the stub:
2770 //r4 must point to Frame
2771 //and r5 must be current Thread*
2773 EmitStubUnlinkFrame();
2779 void StubLinkerCPU::EmitSecureDelegateInvoke(UINT_PTR hash)
2781 //Decode Multicast Delegate hash
2782 unsigned int numStackBytes = hash >> 8;
2783 _ASSERTE(numStackBytes <= 0x7fff);
2785 DWORD cbStackFrame = numStackBytes + sizeof(GSCookie) + sizeof(SecureDelegateFrame);
2786 cbStackFrame = ALIGN_UP(cbStackFrame, 8);
2787 DWORD cbStackFrameWithoutSavedRegs = cbStackFrame - (13 * 4); // r0-r11,lr
2790 ThumbEmitProlog(8, // Save r4-r11,lr (count doesn't include lr)
2791 cbStackFrameWithoutSavedRegs, // Additional space in the stack frame required
2792 TRUE); // Push argument registers
2794 DWORD offsetOfFrame = cbStackFrame - sizeof(SecureDelegateFrame);
2796 // Move the MethodDesc* we're calling to r12.
2797 // ldr r12, [r0, #offsetof(DelegateObject, _invocationCount)]
2798 ThumbEmitLoadRegIndirect(ThumbReg(12), ThumbReg(0), DelegateObject::GetOffsetOfInvocationCount());
2800 // Initialize SecureDelegateFrame::m_pMD to the MethodDesc* we're calling
2801 // str r12, [sp + #(offsetOfFrame + offsetof(SecureDelegateFrame, m_pMD))]
2802 ThumbEmitStoreRegIndirect(ThumbReg(12), thumbRegSp, offsetOfFrame + SecureDelegateFrame::GetOffsetOfDatum());
2804 // Initialize and link the SecureDelegateFrame and associated GS cookie.
2805 EmitStubLinkFrame(SecureDelegateFrame::GetMethodFrameVPtr(), offsetOfFrame, SecureDelegateFrame::GetOffsetOfTransitionBlock());
2808 // r0 : secure delegate
2809 // r4 : SecureDelegateFrame *
2814 // Copy stack based arguments from the calling frame into this one. Use the following registers:
2815 // r6 : pointer to source arguments
2816 // r7 : pointer to destination arguments
2817 // r8 : temporary storage during copy operation
2819 // add r6, r4, #MulticastFrame::GetOffsetOfArgs()
2820 ThumbEmitAdd(ThumbReg(6), ThumbReg(4), MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs());
2823 ThumbEmitMovRegReg(ThumbReg(7), thumbRegSp);
2825 // Unrolled loop to copy the stack based arguments. Might want to consider a second path with a loop
2826 // for large argument lists if anyone complains about this.
2827 _ASSERTE((numStackBytes % 4) == 0);
2828 for (unsigned int i = 0; i < numStackBytes; i += 4)
2830 // Read one 4-byte value from the source stack and copy it to the new stack, post-incrementing
2831 // both source and destination as we go.
2834 ThumbEmitLoadIndirectPostIncrement(ThumbReg(8), ThumbReg(6), 4);
2835 ThumbEmitStoreIndirectPostIncrement(ThumbReg(8), ThumbReg(7), 4);
2839 // Stack-based arguments are copied. Floating point argument registers and r1-r3 are all still correct.
2840 // All we need to do now is calculate the real value for r0 and the target address. Secure delegates wrap
2841 // an inner delegate (kept in _invocationList). We retrieve this inner delegate and then perform the usual
2842 // delegate invocation pattern on that.
2844 // Get "real" delegate.
2845 // ldr r0, [r0, #offsetof(DelegateObject, _invocationList)]
2846 ThumbEmitLoadRegIndirect(ThumbReg(0), ThumbReg(0), DelegateObject::GetOffsetOfInvocationList());
2848 // Load the destination address from the inner delegate.
2849 // ldr r12, [r0, #offsetof(DelegateObject, _methodPtr)]
2850 ThumbEmitLoadRegIndirect(ThumbReg(12), ThumbReg(0), DelegateObject::GetOffsetOfMethodPtr());
2852 // This is only required for unbound delegates which use VSD stubs..but does not harm if done unconditionally
2853 // add r4, r0+#offsetof(DelegateObject, _methodPtrAux) ; // r4 now contains indirection cell
2854 ThumbEmitAdd(ThumbReg(4), ThumbReg(0), DelegateObject::GetOffsetOfMethodPtrAux());
2856 // Replace the delegate reference with the object cached as the delegate's target.
2857 // ldr r0, [r0, #offsetof(DelegateObject, _target)]
2858 ThumbEmitLoadRegIndirect(ThumbReg(0), ThumbReg(0), DelegateObject::GetOffsetOfTarget());
2860 // Perform the call.
2862 ThumbEmitCallRegister(ThumbReg(12));
2864 // restore frame pointer in r4
2865 ThumbEmitAdd(ThumbReg(4), thumbRegSp, offsetOfFrame);
2867 // Unlink SecureDelegateFrame. This requires the frame pointer in r4 and the thread pointer in r5.
2868 EmitStubUnlinkFrame();
2874 //The function expects r4 to point to frame
2875 //and r5 must be current Thread*
2876 void StubLinkerCPU::EmitStubUnlinkFrame()
2879 // EmitStubUnlinkFrame is emitted just before the epilog.
2880 // Thus, at this point, all other callee-saved registers
2881 // could be used since we are anyways going to restore them
2882 // via epilog execution.
2884 // Ensure that GSCookie is valid
2886 // ldr r6, [r4-4]; Load the value of GSCookie
2887 ThumbEmitSub(ThumbReg(6), ThumbReg(4), 4);
2888 ThumbEmitLoadRegIndirect(ThumbReg(6), ThumbReg(6), 0);
2890 // mov r7, s_gsCookie
2891 ThumbEmitMovConstant(ThumbReg(7), GetProcessGSCookie());
2893 // cmp r6, r7 ; Are the GSCookie values in sync?
2894 ThumbEmitCmpReg(ThumbReg(6), ThumbReg(7));
2896 CodeLabel *pAllDoneLabel = NewCodeLabel();
2898 // beq AllDone; yes, GSCookie is good.
2899 ThumbEmitCondFlagJump(pAllDoneLabel, 0);
2901 // If we are here, then GSCookie was bad.
2902 // Call into DoJITFailFast.
2904 // mov r12, DoJITFailFast
2905 ThumbEmitMovConstant(ThumbReg(12), (int)DoJITFailFast);
2907 ThumbEmitCallRegister(ThumbReg(12));
2908 // Emit a breakpoint - we are not expected to come here at all
2909 // if we performed a FailFast.
2910 ThumbEmitBreakpoint();
2913 EmitLabel(pAllDoneLabel);
2916 // Unlink the MulticastFrame.
2917 // ldr r6, [r4 + #offsetof(MulticastFrame, m_Next)]
2918 // str r6, [r5 + #offsetof(Thread, m_pFrame)]
2919 ThumbEmitLoadRegIndirect(ThumbReg(6), ThumbReg(4), Frame::GetOffsetOfNextLink());
2920 ThumbEmitStoreRegIndirect(ThumbReg(6), ThumbReg(5), offsetof(Thread, m_pFrame));
2924 //pFrameVptr = vtable ptr of Frame
2925 //offsetOfFrame = Frame offset in bytes from sp
2926 //After this method: r4 points to the Frame on stack
2927 // and r5 has current Thread*
2928 void StubLinkerCPU::EmitStubLinkFrame(TADDR pFrameVptr, int offsetOfFrame, int offsetOfTransitionBlock)
2930 // Initialize r4 to point to where we start filling the frame.
2931 ThumbEmitAdd(ThumbReg(4), thumbRegSp, offsetOfFrame - sizeof(GSCookie));
2933 // Write the initial GS cookie value
2934 // mov r5, s_gsCookie
2936 ThumbEmitMovConstant(ThumbReg(5), s_gsCookie);
2937 ThumbEmitStoreIndirectPostIncrement(ThumbReg(5), ThumbReg(4), 4);
2939 // Initialize the vtable pointer.
2941 // str r5, [r4 + #offsetof(Frame, _vfptr)]
2942 ThumbEmitMovConstant(ThumbReg(5), pFrameVptr);
2943 ThumbEmitStoreRegIndirect(ThumbReg(5), ThumbReg(4), 0);
2945 // Link the frame to the thread's frame chain.
2946 // r5 <- current Thread*
2947 // ldr r6, [r5 + #offsetof(Thread, m_pFrame)]
2948 // str r6, [r4 + #offsetof(MulticastFrame, m_Next)]
2949 // str r4, [r5 + #offsetof(Thread, m_pFrame)]
2951 ThumbEmitGetThread(ThumbReg(5));
2953 // reload argument registers that could have been corrupted by the call
2954 for (int reg = 0; reg < 4; reg++)
2955 ThumbEmitLoadRegIndirect(ThumbReg(reg), ThumbReg(4),
2956 offsetOfTransitionBlock + TransitionBlock::GetOffsetOfArgumentRegisters() + offsetof(ArgumentRegisters, r[reg]));
2959 ThumbEmitLoadRegIndirect(ThumbReg(6), ThumbReg(5), Thread::GetOffsetOfCurrentFrame());
2960 ThumbEmitStoreRegIndirect(ThumbReg(6), ThumbReg(4), Frame::GetOffsetOfNextLink());
2961 ThumbEmitStoreRegIndirect(ThumbReg(4), ThumbReg(5), Thread::GetOffsetOfCurrentFrame());
2964 #endif // CROSSGEN_COMPILE
2966 void StubLinkerCPU::ThumbEmitNearJump(CodeLabel *target)
2968 WRAPPER_NO_CONTRACT;
2969 EmitLabelRef(target, reinterpret_cast<ThumbNearJump&>(gThumbNearJump), 0xe);
2972 void StubLinkerCPU::ThumbEmitCondFlagJump(CodeLabel *target, UINT cond)
2974 WRAPPER_NO_CONTRACT;
2975 EmitLabelRef(target, reinterpret_cast<ThumbNearJump&>(gThumbNearJump), cond);
2978 void StubLinkerCPU::ThumbEmitCondRegJump(CodeLabel *target, BOOL nonzero, ThumbReg reg)
2980 WRAPPER_NO_CONTRACT;
2982 UINT variation = reg;
2984 variation = variation | 0x8;
2985 EmitLabelRef(target, reinterpret_cast<ThumbCondJump&>(gThumbCondJump), variation);
2988 UINT_PTR StubLinkerCPU::HashMulticastInvoke(MetaSig *pSig)
2990 // Generate a hash key as follows:
2991 // Bit0-2 : num of general purpose registers used
2992 // Bit3-7 : num of FP regs used (counting in terms of s0,s1...)
2993 // Bit8-22 : num of stack bytes used
2995 ArgIterator delegateCallConv(pSig);
2997 UINT numStackBytes = delegateCallConv.SizeOfArgStack();
2999 if (numStackBytes > 0x7FFF)
3000 COMPlusThrow(kNotSupportedException, W("NotSupported_TooManyArgs"));
3002 int cGenReg = 1; // r0 is always used for this pointer
3005 // if it has a return buffer argument r1 is also used
3006 if(delegateCallConv.HasRetBuffArg())
3010 while ((argOffset = delegateCallConv.GetNextOffset()) != TransitionBlock::InvalidOffset)
3012 ArgLocDesc currArgLoc;
3013 delegateCallConv.GetArgLoc(argOffset, &currArgLoc);
3015 if(currArgLoc.m_idxGenReg != -1)
3016 cGenReg = currArgLoc.m_idxGenReg + currArgLoc.m_cGenReg;
3018 if(currArgLoc.m_idxFloatReg != -1)
3019 cFPReg = currArgLoc.m_idxFloatReg + currArgLoc.m_cFloatReg;
3022 // only r0-r3 can be used for arguments
3023 _ASSERTE(cGenReg <= 4);
3025 // only s0-s15 can be used for arguments
3026 _ASSERTE(cFPReg <= 16);
3028 return (numStackBytes << 8 | cFPReg << 3 | cGenReg);
3031 void StubLinkerCPU::ThumbCopyOneTailCallArg(UINT * pnSrcAlign, const ArgLocDesc * pArgLoc, UINT * pcbStackSpace)
3033 if (pArgLoc->m_fRequires64BitAlignment && (*pnSrcAlign & 1)) {
3035 ThumbEmitIncrement(ThumbReg(0), 4);
3039 // Integer register arguments
3040 if (pArgLoc->m_cGenReg > 0) {
3041 int iReg = pArgLoc->m_idxGenReg;
3042 int maxReg = iReg + pArgLoc->m_cGenReg;
3043 while (iReg + 2 <= maxReg) {
3044 // LDM r0!, {r4,r5} ; Post incremented loads (2 bytes)
3045 ThumbEmitLoadStoreMultiple(ThumbReg(0), true, ThumbReg(4).Mask() | ThumbReg(5).Mask());
3046 // STR r4, [R1, #offset of arg reg] ; (2 bytes)
3047 ThumbEmitStoreRegIndirect(ThumbReg(4), ThumbReg(1), offsetof(T_CONTEXT, R0) + (iReg * sizeof(DWORD)));
3049 // STR r5, [R1, #offset of arg reg] ; (2 bytes)
3050 ThumbEmitStoreRegIndirect(ThumbReg(5), ThumbReg(1), offsetof(T_CONTEXT, R0) + (iReg * sizeof(DWORD)));
3053 if (iReg < maxReg) {
3054 // LDR r3, [R0], #+4 ; Post incremented load (4 bytes)
3055 ThumbEmitLoadIndirectPostIncrement(ThumbReg(3), ThumbReg(0), 4);
3058 // STR r3, [R1, #offset of arg reg] ; (2 bytes)
3059 ThumbEmitStoreRegIndirect(ThumbReg(3), ThumbReg(1), offsetof(T_CONTEXT, R0) + (iReg * sizeof(DWORD)));
3062 if (pArgLoc->m_cFloatReg > 0) {
3063 int iReg = pArgLoc->m_idxFloatReg;
3064 int maxReg = iReg + pArgLoc->m_cFloatReg;
3065 while (iReg + 2 <= maxReg) {
3066 // LDM r0!, {r4,r5} ; Post incremented loads (2 bytes)
3067 ThumbEmitLoadStoreMultiple(ThumbReg(0), true, ThumbReg(4).Mask() | ThumbReg(5).Mask());
3068 // STR r4, [R1, #offset of arg reg] ; (2 bytes)
3069 ThumbEmitStoreRegIndirect(ThumbReg(4), ThumbReg(1), offsetof(T_CONTEXT, S) + (iReg * sizeof(DWORD)));
3071 // STR r5, [R1, #offset of arg reg] ; (2 bytes)
3072 ThumbEmitStoreRegIndirect(ThumbReg(5), ThumbReg(1), offsetof(T_CONTEXT, S) + (iReg * sizeof(DWORD)));
3075 if (iReg < maxReg) {
3076 // LDR r3, [R0], #+4 ; Post incremented load (4 bytes)
3077 ThumbEmitLoadIndirectPostIncrement(ThumbReg(3), ThumbReg(0), 4);
3080 // STR r3, [R1, #offset of arg reg] ; (2 bytes)
3081 ThumbEmitStoreRegIndirect(ThumbReg(3), ThumbReg(1), offsetof(T_CONTEXT, S) + (iReg * sizeof(DWORD)));
3085 if (pArgLoc->m_cStack > 0) {
3086 // Copy to the stack
3087 // Be careful because this can get big and ugly.
3088 _ASSERTE(*pcbStackSpace <= (pArgLoc->m_idxStack * sizeof(DWORD)));
3091 if (*pcbStackSpace < (pArgLoc->m_idxStack * sizeof(DWORD)))
3093 const UINT cbPad = ((pArgLoc->m_idxStack * sizeof(DWORD)) - *pcbStackSpace);
3094 _ASSERTE(cbPad == 4);
3096 ThumbEmitIncrement(ThumbReg(2), cbPad);
3097 *pcbStackSpace += cbPad;
3099 int cStack = pArgLoc->m_cStack;
3100 *pcbStackSpace += (cStack * sizeof(DWORD));
3102 // Now start the copying
3104 // Loop to copy in 16-byte chunks per loop.
3105 // Sacrifice r3 for the loop counter
3106 ThumbEmitMovConstant(ThumbReg(3), pArgLoc->m_cStack & ~3);
3108 CodeLabel *pLoopLabel = NewCodeLabel();
3109 EmitLabel(pLoopLabel);
3110 const WORD mask = ThumbReg(4).Mask() | ThumbReg(5).Mask() | ThumbReg(6).Mask() | ThumbReg(7).Mask();
3111 // LDM r0!, {r4,r5,r6,r7} ; Post incremented loads (2 bytes)
3112 ThumbEmitLoadStoreMultiple(ThumbReg(0), true, mask);
3113 // STM r2!, {r4,r5,r6,r7} ; Post incremented stores (2 bytes)
3114 ThumbEmitLoadStoreMultiple(ThumbReg(2), false, mask);
3116 Emit16((WORD)(0x3800 | (ThumbReg(3) << 8) | 4));
3118 ThumbEmitCondFlagJump(pLoopLabel, thumbCondNe.cond);
3120 cStack = cStack % 4;
3121 // Now deal with the tail if any
3123 _ASSERTE(cStack <= 8);
3125 while (cStack > 1) {
3126 _ASSERTE(cStack >= 2);
3127 WORD mask = ThumbReg(4).Mask() | ThumbReg(5).Mask();
3130 mask |= ThumbReg(6).Mask();
3132 // Instead of copying 4 slots and leaving a single slot remainder
3133 // which would require us to use the bigger opcodes for the tail
3134 // Only copy 3 slots this loop, saving 2 for next time. :)
3135 if (cStack == 1 || cStack > 2) {
3136 mask |= ThumbReg(7).Mask();
3140 // We're reading an odd amount from the stack
3145 // LDM r0!, {r4,r5,r6,r7} ; Post incremented loads (2 bytes)
3146 ThumbEmitLoadStoreMultiple(ThumbReg(0), true, mask);
3147 // STM r2!, {r4,r5,r6,r7} ; Post incremented stores (2 bytes)
3148 ThumbEmitLoadStoreMultiple(ThumbReg(2), false, mask);
3149 _ASSERTE((cStack == 0) || (cStack >= 2));
3152 _ASSERTE(cStack == 1);
3153 // We're reading an odd amount from the stack
3155 // LDR r12, [R0], #+4 ; Post incremented load (4 bytes)
3156 ThumbEmitLoadIndirectPostIncrement(ThumbReg(12), ThumbReg(0), 4);
3157 // STR r12, [R2], #+4 ; Post incremented store (4 bytes)
3158 ThumbEmitStoreIndirectPostIncrement(ThumbReg(12), ThumbReg(2), 4);
3164 Stub * StubLinkerCPU::CreateTailCallCopyArgsThunk(CORINFO_SIG_INFO * pSig,
3166 CorInfoHelperTailCallSpecialHandling flags)
3168 STANDARD_VM_CONTRACT;
3171 CPUSTUBLINKER* pSl = &sl;
3173 // Generates a function that looks like this:
3174 // size_t CopyArguments(va_list args, (R0)
3175 // CONTEXT *pCtx, (R1)
3176 // DWORD *pvStack, (R2)
3177 // size_t cbStack) (R3)
3179 // if (pCtx != NULL) {
3180 // foreach (arg in args) {
3181 // copy into pCtx or pvStack
3184 // return <size of stack needed>;
3188 Module * module = GetModule(pSig->scope);
3189 Instantiation classInst((TypeHandle*)pSig->sigInst.classInst, pSig->sigInst.classInstCount);
3190 Instantiation methodInst((TypeHandle*)pSig->sigInst.methInst, pSig->sigInst.methInstCount);
3191 SigTypeContext typeCtxt(classInst, methodInst);
3193 // The -8 is because R11 points at the pushed {R11, LR} pair, and it is aligned.
3194 // This is the magic distance, between the frame pointer and the Frame.
3195 const UINT cbFrameOffset = (sizeof(FrameWithCookie<TailCallFrame>) - 8);
3197 bool fNeedExtraRegs = false;
3198 UINT copyEstimate = 0;
3200 // Do a quick scan of the arguments looking for ones that will probably need extra registers
3201 // and guestimating the size of the method
3202 if (flags & CORINFO_TAILCALL_STUB_DISPATCH_ARG)
3205 if (pSig->hasThis())
3208 MetaSig msig(pSig->pSig, pSig->cbSig, module, &typeCtxt);
3209 if (pSig->hasTypeArg())
3210 msig.SetHasParamTypeArg();
3211 ArgIterator argPlacer(&msig);
3213 if (argPlacer.HasRetBuffArg()) {
3217 if (pSig->hasTypeArg() || pSig->isVarArg())
3221 while ((argOffset = argPlacer.GetNextOffset()) != TransitionBlock::InvalidOffset)
3224 argPlacer.GetArgLoc(argOffset, &argLoc);
3226 if (argLoc.m_cStack > 1 || argLoc.m_cGenReg > 1 || argLoc.m_cFloatReg > 1) {
3227 fNeedExtraRegs = true;
3235 if (fNeedExtraRegs) {
3236 // Inject a proper prolog
3238 pSl->ThumbEmitProlog(4, 0, false);
3241 CodeLabel *pNullLabel = pSl->NewCodeLabel();
3243 if (!fNeedExtraRegs && copyEstimate < 100) {
3244 // The real range of BCZ is 0-126, but that's hard to estimate that precisely
3245 // and we don't want to do that much work just to save a few bytes
3247 // BCZ R1, NullLabel
3248 pSl->ThumbEmitCondRegJump(pNullLabel, false, ThumbReg(1));
3251 // CMP R1, 0 ; T1 encoding
3252 pSl->Emit16((WORD)(0x2900));
3255 pSl->ThumbEmitCondFlagJump(pNullLabel, thumbCondEq.cond);
3258 UINT cbStackSpace = 0;
3259 UINT cbReturnBufferSpace = 0;
3262 if (flags & CORINFO_TAILCALL_STUB_DISPATCH_ARG) {
3263 // This is set for stub dispatch or 'thisInSecretRegister'
3264 // The JIT placed an extra argument in the list that needs to
3265 // get shoved into R4, and not counted.
3266 // pCtx->R4 = va_arg(args, DWORD);
3268 // LDR r3, [R0], #+4 ; Post incremented load (4 bytes)
3269 pSl->ThumbEmitLoadIndirectPostIncrement(ThumbReg(3), ThumbReg(0), 4);
3270 // STR r3, [R1, #offset of R4] ; (2 bytes)
3271 pSl->ThumbEmitStoreRegIndirect(ThumbReg(3), ThumbReg(1), offsetof(T_CONTEXT, R4));
3276 MetaSig msig(pSig->pSig, pSig->cbSig, module, &typeCtxt);
3277 if (pSig->hasTypeArg())
3278 msig.SetHasParamTypeArg();
3279 ArgIterator argPlacer(&msig);
3282 // First comes the 'this' pointer
3283 if (argPlacer.HasThis()) {
3284 argPlacer.GetThisLoc(&argLoc);
3285 pSl->ThumbCopyOneTailCallArg(&nSrcAlign, &argLoc, &cbStackSpace);
3288 // Next comes the return buffer
3289 if (argPlacer.HasRetBuffArg()) {
3290 // We always reserve space for the return buffer, but we never zero it out,
3291 // and we never report it. Thus the callee shouldn't do RVO and expect
3292 // to be able to read GC pointers from it.
3293 // If the passed in return buffer is already pointing above the frame,
3294 // then we need to pass it along (so it will get passed out).
3295 // Otherwise we assume the caller is returning void, so we just pass in
3296 // dummy space to be overwritten.
3298 argPlacer.GetRetBuffArgLoc(&argLoc);
3299 _ASSERTE(argLoc.m_cStack == 0);
3300 _ASSERTE(argLoc.m_cFloatReg == 0);
3301 _ASSERTE(argLoc.m_cGenReg == 1);
3303 // Grab some space from the top of the frame and pass that in as a dummy
3304 // buffer if needed. Align to 8-byte boundary (after taking in account the Frame).
3305 // Do this by adding the Frame size, align, then remove the Frame size...
3306 _ASSERTE((pSig->retType == CORINFO_TYPE_REFANY) || (pSig->retType == CORINFO_TYPE_VALUECLASS));
3307 TypeHandle th(pSig->retTypeClass);
3308 UINT cbUsed = ((th.GetSize() + cbFrameOffset + 0x7) & ~0x7) - cbFrameOffset;
3309 _ASSERTE(cbUsed >= th.GetSize());
3310 cbReturnBufferSpace += cbUsed;
3312 // LDR r3, [R0], #+4 ; Post incremented load (4 bytes)
3313 pSl->ThumbEmitLoadIndirectPostIncrement(ThumbReg(3), ThumbReg(0), 4);
3315 // LDR r12, [R1, #offset of R11] ; (2 bytes)
3316 pSl->ThumbEmitLoadRegIndirect(ThumbReg(12), ThumbReg(1), offsetof(T_CONTEXT, R11));
3318 // CMP r3, r12 ; (2 bytes)
3319 pSl->ThumbEmitCmpReg(ThumbReg(3), ThumbReg(12));
3321 CodeLabel *pSkipLabel = pSl->NewCodeLabel();
3322 // BHI NullLabel ; skip if R3 > R12 unsigned (2 bytes)
3323 pSl->ThumbEmitCondFlagJump(pSkipLabel, thumbCondHi.cond);
3325 // Also check the lower bound of the stack in case the return buffer is on the GC heap
3326 // and the GC heap is below the stack
3327 // CMP r3, sp ; (2 bytes)
3328 pSl->ThumbEmitCmpReg(ThumbReg(3), thumbRegSp);
3329 // BLO NullLabel ; skip if r3 < sp unsigned (2 bytes)
3330 pSl->ThumbEmitCondFlagJump(pSkipLabel, thumbCondCc.cond);
3332 // If the caller is expecting us to simulate a return buffer for the callee
3333 // pass that pointer in now, by subtracting from R11 space for the Frame
3334 // and space for the return buffer.
3335 UINT offset = cbUsed + cbFrameOffset;
3336 if (offset < 4096) {
3337 // SUB r3, r12, #offset ; (4 bytes)
3338 pSl->ThumbEmitSub(ThumbReg(3), ThumbReg(12), offset);
3341 offset = UINT(-int(offset)); // Silence the @#$%^ warning
3342 // MOVW/MOVT (4-8 bytes)
3343 // ADD r3, r12; (2 bytes)
3344 pSl->ThumbEmitAdd(ThumbReg(3), ThumbReg(12), offset);
3347 pSl->EmitLabel(pSkipLabel);
3348 // STR r3, [R1, #offset of arg reg] ; (2 bytes)
3349 pSl->ThumbEmitStoreRegIndirect(ThumbReg(3), ThumbReg(1), offsetof(T_CONTEXT, R0) + (argLoc.m_idxGenReg * sizeof(DWORD)));
3354 // Generics Instantiation Parameter
3355 if (pSig->hasTypeArg()) {
3356 argPlacer.GetParamTypeLoc(&argLoc);
3357 pSl->ThumbCopyOneTailCallArg(&nSrcAlign, &argLoc, &cbStackSpace);
3360 // VarArgs Cookie Parameter
3361 if (pSig->isVarArg()) {
3362 argPlacer.GetVASigCookieLoc(&argLoc);
3363 pSl->ThumbCopyOneTailCallArg(&nSrcAlign, &argLoc, &cbStackSpace);
3366 // Now for *all* the 'real' arguments
3368 while ((argOffset = argPlacer.GetNextOffset()) != TransitionBlock::InvalidOffset)
3370 argPlacer.GetArgLoc(argOffset, &argLoc);
3372 pSl->ThumbCopyOneTailCallArg(&nSrcAlign, &argLoc, &cbStackSpace);
3375 // Now that we are done moving arguments, add back in the stack space we reserved
3376 // for the return buffer.
3377 cbStackSpace += cbReturnBufferSpace;
3379 // Keep the stack space 8-byte aligned
3380 if ((cbStackSpace + cbFrameOffset) & 7) {
3383 _ASSERTE(((cbStackSpace + cbFrameOffset) & 7) == 0);
3385 CodeLabel *pReturnLabel = pSl->NewCodeLabel();
3387 pSl->ThumbEmitNearJump(pReturnLabel);
3390 pSl->EmitLabel(pNullLabel);
3391 // MOVW/MOVT r0, 0 ; No GCLayout info
3392 pSl->ThumbEmitMovConstant(ThumbReg(0), 0);
3394 pSl->ThumbEmitStoreRegIndirect(ThumbReg(0), ThumbReg(3), 0);
3397 pSl->EmitLabel(pReturnLabel);
3399 // MOVW/MOVT r0, #cbStackSpace
3400 pSl->ThumbEmitMovConstant(ThumbReg(0), cbStackSpace);
3402 if (fNeedExtraRegs) {
3403 // Inject a proper prolog
3405 pSl->ThumbEmitEpilog();
3409 pSl->ThumbEmitJumpRegister(thumbRegLr);
3412 LoaderHeap* pHeap = pMD->GetLoaderAllocatorForCode()->GetStubHeap();
3413 return pSl->Link(pHeap);
3417 VOID ResetCurrentContext()
3419 LIMITED_METHOD_CONTRACT;
3421 #endif // !DACCESS_COMPILE
3424 #ifdef FEATURE_COMINTEROP
3425 void emitCOMStubCall (ComCallMethodDesc *pCOMMethod, PCODE target)
3427 WRAPPER_NO_CONTRACT;
3438 BYTE *pBuffer = (BYTE*)pCOMMethod - COMMETHOD_CALL_PRESTUB_SIZE;
3440 memcpy(pBuffer, rgCode, sizeof(rgCode));
3441 *((PCODE*)(pBuffer + sizeof(rgCode) + 2)) = target;
3443 // Ensure that the updated instructions get actually written
3444 ClrFlushInstructionCache(pBuffer, COMMETHOD_CALL_PRESTUB_SIZE);
3446 _ASSERTE(IS_ALIGNED(pBuffer + COMMETHOD_CALL_PRESTUB_ADDRESS_OFFSET, sizeof(void*)) &&
3447 *((PCODE*)(pBuffer + COMMETHOD_CALL_PRESTUB_ADDRESS_OFFSET)) == target);
3449 #endif // FEATURE_COMINTEROP
3451 #ifndef DACCESS_COMPILE
3453 #ifndef CROSSGEN_COMPILE
3455 #ifdef FEATURE_READYTORUN
3458 // Allocation of dynamic helpers
3461 #define DYNAMIC_HELPER_ALIGNMENT sizeof(TADDR)
3463 #define BEGIN_DYNAMIC_HELPER_EMIT(size) \
3465 SIZE_T cbAligned = ALIGN_UP(cb, DYNAMIC_HELPER_ALIGNMENT); \
3466 BYTE * pStart = (BYTE *)(void *)pAllocator->GetDynamicHelpersHeap()->AllocAlignedMem(cbAligned, DYNAMIC_HELPER_ALIGNMENT); \
3469 #define END_DYNAMIC_HELPER_EMIT() \
3470 _ASSERTE(pStart + cb == p); \
3471 while (p < pStart + cbAligned) { *(WORD *)p = 0xdefe; p += 2; } \
3472 ClrFlushInstructionCache(pStart, cbAligned); \
3473 return (PCODE)((TADDR)pStart | THUMB_CODE)
3475 static void MovRegImm(BYTE* p, int reg, TADDR imm)
3477 LIMITED_METHOD_CONTRACT;
3478 *(WORD *)(p + 0) = 0xF240;
3479 *(WORD *)(p + 2) = (UINT16)(reg << 8);
3480 *(WORD *)(p + 4) = 0xF2C0;
3481 *(WORD *)(p + 6) = (UINT16)(reg << 8);
3482 PutThumb2Mov32((UINT16 *)p, imm);
3485 PCODE DynamicHelpers::CreateHelper(LoaderAllocator * pAllocator, TADDR arg, PCODE target)
3487 STANDARD_VM_CONTRACT;
3489 BEGIN_DYNAMIC_HELPER_EMIT(18);
3492 MovRegImm(p, 0, arg);
3496 MovRegImm(p, 12, target);
3500 *(WORD *)p = 0x4760;
3503 END_DYNAMIC_HELPER_EMIT();
3506 void DynamicHelpers::EmitHelperWithArg(BYTE*& p, LoaderAllocator * pAllocator, TADDR arg, PCODE target)
3509 MovRegImm(p, 1, arg);
3513 MovRegImm(p, 12, target);
3517 *(WORD *)p = 0x4760;
3521 PCODE DynamicHelpers::CreateHelperWithArg(LoaderAllocator * pAllocator, TADDR arg, PCODE target)
3523 BEGIN_DYNAMIC_HELPER_EMIT(18);
3525 EmitHelperWithArg(p, pAllocator, arg, target);
3527 END_DYNAMIC_HELPER_EMIT();
3530 PCODE DynamicHelpers::CreateHelper(LoaderAllocator * pAllocator, TADDR arg, TADDR arg2, PCODE target)
3532 BEGIN_DYNAMIC_HELPER_EMIT(26);
3535 MovRegImm(p, 0, arg);
3539 MovRegImm(p, 1, arg2);
3543 MovRegImm(p, 12, target);
3547 *(WORD *)p = 0x4760;
3550 END_DYNAMIC_HELPER_EMIT();
3553 PCODE DynamicHelpers::CreateHelperArgMove(LoaderAllocator * pAllocator, TADDR arg, PCODE target)
3555 BEGIN_DYNAMIC_HELPER_EMIT(20);
3558 *(WORD *)p = 0x4601;
3562 MovRegImm(p, 0, arg);
3566 MovRegImm(p, 12, target);
3570 *(WORD *)p = 0x4760;
3573 END_DYNAMIC_HELPER_EMIT();
3576 PCODE DynamicHelpers::CreateReturn(LoaderAllocator * pAllocator)
3578 BEGIN_DYNAMIC_HELPER_EMIT(2);
3580 *(WORD *)p = 0x4770; // bx lr
3583 END_DYNAMIC_HELPER_EMIT();
3586 PCODE DynamicHelpers::CreateReturnConst(LoaderAllocator * pAllocator, TADDR arg)
3588 BEGIN_DYNAMIC_HELPER_EMIT(10);
3591 MovRegImm(p, 0, arg);
3595 *(WORD *)p = 0x4770;
3598 END_DYNAMIC_HELPER_EMIT();
3601 PCODE DynamicHelpers::CreateReturnIndirConst(LoaderAllocator * pAllocator, TADDR arg, INT8 offset)
3603 BEGIN_DYNAMIC_HELPER_EMIT((offset != 0) ? 16 : 12);
3606 MovRegImm(p, 0, arg);
3610 *(WORD *)p = 0x6800;
3615 // add r0, r0, <offset>
3616 *(WORD *)(p + 0) = 0xF100;
3617 *(WORD *)(p + 2) = offset;
3622 *(WORD *)p = 0x4770;
3625 END_DYNAMIC_HELPER_EMIT();
3628 PCODE DynamicHelpers::CreateHelperWithTwoArgs(LoaderAllocator * pAllocator, TADDR arg, PCODE target)
3630 BEGIN_DYNAMIC_HELPER_EMIT(18);
3633 MovRegImm(p, 2, arg);
3637 MovRegImm(p, 12, target);
3641 *(WORD *)p = 0x4760;
3644 END_DYNAMIC_HELPER_EMIT();
3647 PCODE DynamicHelpers::CreateHelperWithTwoArgs(LoaderAllocator * pAllocator, TADDR arg, TADDR arg2, PCODE target)
3649 BEGIN_DYNAMIC_HELPER_EMIT(26);
3652 MovRegImm(p, 2, arg);
3656 MovRegImm(p, 3, arg2);
3660 MovRegImm(p, 12, target);
3664 *(WORD *)p = 0x4760;
3667 END_DYNAMIC_HELPER_EMIT();
3670 PCODE DynamicHelpers::CreateDictionaryLookupHelper(LoaderAllocator * pAllocator, CORINFO_RUNTIME_LOOKUP * pLookup, DWORD dictionaryIndexAndSlot, Module * pModule)
3672 STANDARD_VM_CONTRACT;
3674 PCODE helperAddress = (pLookup->helper == CORINFO_HELP_RUNTIMEHANDLE_METHOD ?
3675 GetEEFuncEntryPoint(JIT_GenericHandleMethodWithSlotAndModule) :
3676 GetEEFuncEntryPoint(JIT_GenericHandleClassWithSlotAndModule));
3678 GenericHandleArgs * pArgs = (GenericHandleArgs *)(void *)pAllocator->GetDynamicHelpersHeap()->AllocAlignedMem(sizeof(GenericHandleArgs), DYNAMIC_HELPER_ALIGNMENT);
3679 pArgs->dictionaryIndexAndSlot = dictionaryIndexAndSlot;
3680 pArgs->signature = pLookup->signature;
3681 pArgs->module = (CORINFO_MODULE_HANDLE)pModule;
3683 // It's available only via the run-time helper function,
3685 if (pLookup->indirections == CORINFO_USEHELPER)
3687 BEGIN_DYNAMIC_HELPER_EMIT(18);
3689 EmitHelperWithArg(p, pAllocator, (TADDR)pArgs, helperAddress);
3691 END_DYNAMIC_HELPER_EMIT();
3695 int indirectionsSize = 0;
3696 for (WORD i = 0; i < pLookup->indirections; i++)
3698 if ((i == 0 && pLookup->indirectFirstOffset) || (i == 1 && pLookup->indirectSecondOffset))
3700 indirectionsSize += (pLookup->offsets[i] >= 0xFFF ? 10 : 2);
3701 indirectionsSize += 4;
3705 indirectionsSize += (pLookup->offsets[i] >= 0xFFF ? 10 : 4);
3709 int codeSize = indirectionsSize + (pLookup->testForNull ? 26 : 2);
3711 BEGIN_DYNAMIC_HELPER_EMIT(codeSize);
3713 if (pLookup->testForNull)
3716 *(WORD *)p = 0x4603;
3720 for (WORD i = 0; i < pLookup->indirections; i++)
3722 if ((i == 0 && pLookup->indirectFirstOffset) || (i == 1 && pLookup->indirectSecondOffset))
3724 if (pLookup->offsets[i] >= 0xFF)
3727 MovRegImm(p, 2, pLookup->offsets[i]);
3731 *(WORD *)p = 0x4410;
3737 *(WORD *)p = (WORD)((WORD)0x3000 | (WORD)((0x00FF) & pLookup->offsets[i]));
3741 // r0 is pointer + offset[0]
3743 *(WORD *)p = 0x6802;
3748 *(WORD *)p = 0x4410;
3753 if (pLookup->offsets[i] >= 0xFFF)
3756 MovRegImm(p, 2, pLookup->offsets[i]);
3760 *(WORD *)p = 0x5880;
3765 // ldr r0, [r0 + offset]
3766 *(WORD *)p = 0xF8D0;
3768 *(WORD *)p = (WORD)(0xFFF & pLookup->offsets[i]);
3774 // No null test required
3775 if (!pLookup->testForNull)
3778 *(WORD *)p = 0x46F7;
3783 // cbz r0, nullvaluelabel
3784 *(WORD *)p = 0xB100;
3787 *(WORD *)p = 0x46F7;
3791 *(WORD *)p = 0x4618;
3794 EmitHelperWithArg(p, pAllocator, (TADDR)pArgs, helperAddress);
3797 END_DYNAMIC_HELPER_EMIT();
3800 #endif // FEATURE_READYTORUN
3802 #endif // CROSSGEN_COMPILE
3804 #endif // !DACCESS_COMPILE