[Tizen] Implement ASan wrapper for Linux ARM32
[platform/upstream/dotnet/runtime.git] / src / coreclr / vm / arm / stubs.cpp
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 //
4 // File: stubs.cpp
5 //
6 // This file contains stub functions for unimplemented features need to
7 // run on the ARM platform.
8
9 #include "common.h"
10 #include "jitinterface.h"
11 #include "comdelegate.h"
12 #include "invokeutil.h"
13 #include "excep.h"
14 #include "class.h"
15 #include "field.h"
16 #include "dllimportcallback.h"
17 #include "dllimport.h"
18 #include "eeconfig.h"
19 #include "cgensys.h"
20 #include "asmconstants.h"
21 #include "virtualcallstub.h"
22 #include "gcdump.h"
23 #include "rtlfunctions.h"
24 #include "codeman.h"
25 #include "ecall.h"
26 #include "threadsuspend.h"
27
28 #if defined(TIZEN_ASAN_ENVIRONMENT) && !defined(CROSS_COMPILE) && !defined(DACCESS_COMPILE)
29 #include <tizenasanenv.h>
30 #endif
31
32 // target write barriers
33 EXTERN_C void JIT_WriteBarrier(Object **dst, Object *ref);
34 EXTERN_C void JIT_WriteBarrier_End();
35 EXTERN_C void JIT_CheckedWriteBarrier(Object **dst, Object *ref);
36 EXTERN_C void JIT_CheckedWriteBarrier_End();
37 EXTERN_C void JIT_ByRefWriteBarrier_End();
38 EXTERN_C void JIT_ByRefWriteBarrier_SP(Object **dst, Object *ref);
39
40 // source write barriers
41 EXTERN_C void JIT_WriteBarrier_SP_Pre(Object **dst, Object *ref);
42 EXTERN_C void JIT_WriteBarrier_SP_Pre_End();
43 EXTERN_C void JIT_WriteBarrier_SP_Post(Object **dst, Object *ref);
44 EXTERN_C void JIT_WriteBarrier_SP_Post_End();
45 EXTERN_C void JIT_WriteBarrier_MP_Pre(Object **dst, Object *ref);
46 EXTERN_C void JIT_WriteBarrier_MP_Pre_End();
47 EXTERN_C void JIT_WriteBarrier_MP_Post(Object **dst, Object *ref);
48 EXTERN_C void JIT_WriteBarrier_MP_Post_End();
49
50 EXTERN_C void JIT_CheckedWriteBarrier_SP_Pre(Object **dst, Object *ref);
51 EXTERN_C void JIT_CheckedWriteBarrier_SP_Pre_End();
52 EXTERN_C void JIT_CheckedWriteBarrier_SP_Post(Object **dst, Object *ref);
53 EXTERN_C void JIT_CheckedWriteBarrier_SP_Post_End();
54 EXTERN_C void JIT_CheckedWriteBarrier_MP_Pre(Object **dst, Object *ref);
55 EXTERN_C void JIT_CheckedWriteBarrier_MP_Pre_End();
56 EXTERN_C void JIT_CheckedWriteBarrier_MP_Post(Object **dst, Object *ref);
57 EXTERN_C void JIT_CheckedWriteBarrier_MP_Post_End();
58
59 EXTERN_C void JIT_ByRefWriteBarrier_SP_Pre();
60 EXTERN_C void JIT_ByRefWriteBarrier_SP_Pre_End();
61 EXTERN_C void JIT_ByRefWriteBarrier_SP_Post();
62 EXTERN_C void JIT_ByRefWriteBarrier_SP_Post_End();
63 EXTERN_C void JIT_ByRefWriteBarrier_MP_Pre();
64 EXTERN_C void JIT_ByRefWriteBarrier_MP_Pre_End();
65 EXTERN_C void JIT_ByRefWriteBarrier_MP_Post(Object **dst, Object *ref);
66 EXTERN_C void JIT_ByRefWriteBarrier_MP_Post_End();
67
68 EXTERN_C void JIT_PatchedWriteBarrierStart();
69 EXTERN_C void JIT_PatchedWriteBarrierLast();
70
71 #ifndef DACCESS_COMPILE
72 //-----------------------------------------------------------------------
73 // InstructionFormat for conditional jump.
74 //-----------------------------------------------------------------------
75 class ThumbCondJump : public InstructionFormat
76 {
77     public:
78         ThumbCondJump() : InstructionFormat(InstructionFormat::k16)
79         {
80             LIMITED_METHOD_CONTRACT;
81         }
82
83         virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
84         {
85             LIMITED_METHOD_CONTRACT
86
87             _ASSERTE(refsize == InstructionFormat::k16);
88
89             return 2;
90         }
91
92         virtual UINT GetHotSpotOffset(UINT refsize, UINT variationCode)
93         {
94             LIMITED_METHOD_CONTRACT
95
96             _ASSERTE(refsize == InstructionFormat::k16);
97
98             return 4;
99         }
100
101         //CB{N}Z Rn, <Label>
102         //Encoding 1|0|1|1|op|0|i|1|imm5|Rn
103         //op = Bit3(variation)
104         //Rn = Bits2-0(variation)
105         virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBufferRX, BYTE *pOutBufferRW, UINT variationCode, BYTE *pDataBuffer)
106         {
107             LIMITED_METHOD_CONTRACT
108
109             _ASSERTE(refsize == InstructionFormat::k16);
110
111             if(fixedUpReference <0 || fixedUpReference > 126)
112                 COMPlusThrow(kNotSupportedException);
113
114             _ASSERTE((fixedUpReference & 0x1) == 0);
115
116             pOutBufferRW[0] = static_cast<BYTE>(((0x3e & fixedUpReference) << 2) | (0x7 & variationCode));
117             pOutBufferRW[1] = static_cast<BYTE>(0xb1 | (0x8 & variationCode)| ((0x40 & fixedUpReference)>>5));
118         }
119 };
120
121 //-----------------------------------------------------------------------
122 // InstructionFormat for near Jump and short Jump
123 //-----------------------------------------------------------------------
124 class ThumbNearJump : public InstructionFormat
125 {
126     public:
127         ThumbNearJump() : InstructionFormat(InstructionFormat::k16|InstructionFormat::k32)
128         {
129             LIMITED_METHOD_CONTRACT;
130         }
131
132         virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
133         {
134             LIMITED_METHOD_CONTRACT
135
136             if(refsize == InstructionFormat::k16)
137                 return 2;
138             else if(refsize == InstructionFormat::k32)
139                 return 4;
140             else
141                 _ASSERTE(!"Unknown refsize");
142             return 0;
143         }
144
145         virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBufferRX, BYTE *pOutBufferRW, UINT cond, BYTE *pDataBuffer)
146         {
147             LIMITED_METHOD_CONTRACT
148
149             _ASSERTE(cond <15);
150
151             //offsets must be in multiples of 2
152             _ASSERTE((fixedUpReference & 0x1) == 0);
153
154             if(cond == 0xe) //Always execute
155             {
156                 if(fixedUpReference >= -2048 && fixedUpReference <= 2046)
157                 {
158                     if(refsize != InstructionFormat::k16)
159                         _ASSERTE(!"Expected refSize to be 2");
160
161                     //Emit T2 encoding of B<c> <label> instruction
162                     pOutBufferRW[0] = static_cast<BYTE>((fixedUpReference & 0x1fe)>>1);
163                     pOutBufferRW[1] = static_cast<BYTE>(0xe0 | ((fixedUpReference & 0xe00)>>9));
164                 }
165                 else if(fixedUpReference >= -16777216 && fixedUpReference <= 16777214)
166                 {
167                     if(refsize != InstructionFormat::k32)
168                         _ASSERTE(!"Expected refSize to be 4");
169
170                     //Emit T4 encoding of B<c> <label> instruction
171                     int s = (fixedUpReference & 0x1000000) >> 24;
172                     int i1 = (fixedUpReference & 0x800000) >> 23;
173                     int i2 = (fixedUpReference & 0x400000) >> 22;
174                     pOutBufferRW[0] = static_cast<BYTE>((fixedUpReference & 0xff000) >> 12);
175                     pOutBufferRW[1] = static_cast<BYTE>(0xf0 | (s << 2) |( (fixedUpReference & 0x300000) >>20));
176                     pOutBufferRW[2] = static_cast<BYTE>((fixedUpReference & 0x1fe) >> 1);
177                     pOutBufferRW[3] = static_cast<BYTE>(0x90 | (~(i1^s)) << 5 | (~(i2^s)) << 3 | (fixedUpReference & 0xe00) >> 9);
178                 }
179                 else
180                 {
181                     COMPlusThrow(kNotSupportedException);
182                 }
183             }
184             else // conditional branch based on flags
185             {
186                 if(fixedUpReference >= -256 && fixedUpReference <= 254)
187                 {
188                     if(refsize != InstructionFormat::k16)
189                         _ASSERTE(!"Expected refSize to be 2");
190
191                     //Emit T1 encoding of B<c> <label> instruction
192                     pOutBufferRW[0] = static_cast<BYTE>((fixedUpReference & 0x1fe)>>1);
193                     pOutBufferRW[1] = static_cast<BYTE>(0xd0 | (cond & 0xf));
194                 }
195                 else if(fixedUpReference >= -1048576 && fixedUpReference <= 1048574)
196                 {
197                     if(refsize != InstructionFormat::k32)
198                         _ASSERTE(!"Expected refSize to be 4");
199
200                     //Emit T3 encoding of B<c> <label> instruction
201                     pOutBufferRW[0] = static_cast<BYTE>(((cond & 0x3) << 6) | ((fixedUpReference & 0x3f000) >>12));
202                     pOutBufferRW[1] = static_cast<BYTE>(0xf0 | ((fixedUpReference & 0x100000) >>18) | ((cond & 0xc) >> 2));
203                     pOutBufferRW[2] = static_cast<BYTE>((fixedUpReference & 0x1fe) >> 1);
204                     pOutBufferRW[3] = static_cast<BYTE>(0x80 | ((fixedUpReference & 0x40000) >> 13) | ((fixedUpReference & 0x80000) >> 16) | ((fixedUpReference & 0xe00) >> 9));
205                 }
206                 else
207                 {
208                     COMPlusThrow(kNotSupportedException);
209                 }
210             }
211         }
212
213         virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset)
214         {
215             LIMITED_METHOD_CONTRACT
216
217             if (fExternal)
218             {
219                 _ASSERTE(0);
220                 return FALSE;
221             }
222             else
223             {
224                 switch (refsize)
225                 {
226                 case InstructionFormat::k16:
227                     if(variationCode == 0xe)
228                         return  (offset >= -2048 && offset <= 2046 && (offset & 0x1) == 0);
229                     else
230                         return (offset >= -256 && offset <= 254 && (offset & 0x1) == 0);
231                 case InstructionFormat::k32:
232                     if(variationCode == 0xe)
233                         return  ((offset >= -16777216) && (offset <= 16777214) && ((offset & 0x1) == 0));
234                     else
235                         return  ((offset >= -1048576) && (offset <= 1048574) && ((offset & 0x1) == 0));
236                 default:
237                     _ASSERTE(!"Unknown refsize");
238                     return FALSE;
239                 }
240              }
241         }
242
243         virtual UINT GetHotSpotOffset(UINT refsize, UINT variationCode)
244         {
245             LIMITED_METHOD_CONTRACT
246
247             _ASSERTE(refsize == InstructionFormat::k16 || refsize == InstructionFormat::k32);
248
249             return 4;
250         }
251 };
252
253
254 //static conditional jump instruction format object
255 static BYTE gThumbCondJump[sizeof(ThumbCondJump)];
256
257 //static near jump instruction format object
258 static BYTE gThumbNearJump[sizeof(ThumbNearJump)];
259
260 void StubLinkerCPU::Init(void)
261 {
262     //Initialize the object
263     new (gThumbCondJump) ThumbCondJump();
264     new (gThumbNearJump) ThumbNearJump();
265 }
266
267 #ifndef CROSSGEN_COMPILE
268
269 // GC write barrier support.
270 //
271 // To optimize our write barriers we code the values of several GC globals (e.g. g_lowest_address) directly
272 // into the barrier function itself, thus avoiding a double memory indirection. Every time the GC modifies one
273 // of these globals we need to update all of the write barriers accordingly.
274 //
275 // In order to keep this process non-brittle we don't hard code the offsets of the instructions that need to
276 // be changed. Instead the code used to create these barriers is implemented using special macros that record
277 // the necessary offsets in a descriptor table. Search for "GC write barrier support" in vm\arm\asmhelpers.asm
278 // for more details.
279
280 // Structure describing the layout of a single write barrier descriptor. This must be kept in sync with the
281 // code in vm\arm\asmhelpers.asm in the WRITE_BARRIER_END macro. Each offset recorded is for one of the
282 // supported GC globals (an offset of 0xffff is encoded if that global is not used by the particular barrier
283 // function). We currently only support one usage of each global by any single barrier function. The offset is
284 // the byte offset from the start of the function at which a movw,movt instruction pair is used to load the
285 // value of the global into a register.
286 struct WriteBarrierDescriptor
287 {
288 #ifdef TARGET_UNIX
289     DWORD   m_funcStartOffset;              // Offset to the start of the barrier function relative to this struct address
290     DWORD   m_funcEndOffset;                // Offset to the end of the barrier function relative to this struct address
291 #else // TARGET_UNIX
292     BYTE *  m_pFuncStart;                   // Pointer to the start of the barrier function
293     BYTE *  m_pFuncEnd;                     // Pointer to the end of the barrier function
294 #endif // TARGET_UNIX
295     DWORD   m_dw_g_lowest_address_offset;   // Offset of the instruction reading g_lowest_address
296     DWORD   m_dw_g_highest_address_offset;  // Offset of the instruction reading g_highest_address
297     DWORD   m_dw_g_ephemeral_low_offset;    // Offset of the instruction reading g_ephemeral_low
298     DWORD   m_dw_g_ephemeral_high_offset;   // Offset of the instruction reading g_ephemeral_high
299     DWORD   m_dw_g_card_table_offset;       // Offset of the instruction reading g_card_table
300 };
301
302 // Infrastructure used for mapping of the source and destination of current WB patching
303 struct WriteBarrierMapping
304 {
305     PBYTE to;    // Pointer to the write-barrier where it was copied over
306     PBYTE from;  // Pointer to write-barrier from which it was copied
307 };
308
309 const int WriteBarrierIndex         = 0;
310 const int CheckedWriteBarrierIndex  = 1;
311 const int ByRefWriteBarrierIndex    = 2;
312 const int MaxWriteBarrierIndex      = 3;
313
314 WriteBarrierMapping wbMapping[MaxWriteBarrierIndex] =
315                                     {
316                                         {(PBYTE)JIT_WriteBarrier, NULL},
317                                         {(PBYTE)JIT_CheckedWriteBarrier, NULL},
318                                         {(PBYTE)JIT_ByRefWriteBarrier, NULL}
319                                     };
320
321 PBYTE FindWBMapping(PBYTE from)
322 {
323     for(int i = 0; i < MaxWriteBarrierIndex; ++i)
324     {
325         if(wbMapping[i].from == from)
326             return wbMapping[i].to;
327     }
328     return NULL;
329 }
330
331 // Pointer to the start of the descriptor table. The end of the table is marked by a sentinel entry
332 // (m_pFuncStart is NULL).
333 EXTERN_C WriteBarrierDescriptor g_rgWriteBarrierDescriptors;
334
335 // Determine the range of memory containing all the write barrier implementations (these are clustered
336 // together and should fit in a page or maybe two).
337 void ComputeWriteBarrierRange(BYTE ** ppbStart, DWORD * pcbLength)
338 {
339     DWORD size = (PBYTE)JIT_PatchedWriteBarrierLast - (PBYTE)JIT_PatchedWriteBarrierStart;
340     *ppbStart = (PBYTE)JIT_PatchedWriteBarrierStart;
341     if (IsWriteBarrierCopyEnabled())
342     {
343         *ppbStart = GetWriteBarrierCodeLocation(*ppbStart);
344     }
345     *pcbLength = size;
346 }
347
348 void CopyWriteBarrier(PCODE dstCode, PCODE srcCode, PCODE endCode)
349 {
350     TADDR dst = (TADDR)PCODEToPINSTR((PCODE)GetWriteBarrierCodeLocation((void*)dstCode));
351     TADDR src = PCODEToPINSTR(srcCode);
352     TADDR end = PCODEToPINSTR(endCode);
353
354     size_t size = (PBYTE)end - (PBYTE)src;
355
356     ExecutableWriterHolder<void> writeBarrierWriterHolder;
357     if (IsWriteBarrierCopyEnabled())
358     {
359         writeBarrierWriterHolder = ExecutableWriterHolder<void>((void*)dst, size);
360         dst = (TADDR)writeBarrierWriterHolder.GetRW();
361     }
362
363     memcpy((PVOID)dst, (PVOID)src, size);
364 }
365
366 #if _DEBUG
367 void ValidateWriteBarriers()
368 {
369     // Post-grow WB are bigger than pre-grow so validating that target WB has space to accomodate those
370     _ASSERTE( ((PBYTE)JIT_WriteBarrier_End - (PBYTE)JIT_WriteBarrier) >= ((PBYTE)JIT_WriteBarrier_MP_Post_End - (PBYTE)JIT_WriteBarrier_MP_Post));
371     _ASSERTE( ((PBYTE)JIT_WriteBarrier_End - (PBYTE)JIT_WriteBarrier) >= ((PBYTE)JIT_WriteBarrier_SP_Post_End - (PBYTE)JIT_WriteBarrier_SP_Post));
372
373     _ASSERTE( ((PBYTE)JIT_CheckedWriteBarrier_End - (PBYTE)JIT_CheckedWriteBarrier) >= ((PBYTE)JIT_CheckedWriteBarrier_MP_Post_End - (PBYTE)JIT_CheckedWriteBarrier_MP_Post));
374     _ASSERTE( ((PBYTE)JIT_CheckedWriteBarrier_End - (PBYTE)JIT_CheckedWriteBarrier) >= ((PBYTE)JIT_CheckedWriteBarrier_SP_Post_End - (PBYTE)JIT_CheckedWriteBarrier_SP_Post));
375
376     _ASSERTE( ((PBYTE)JIT_ByRefWriteBarrier_End - (PBYTE)JIT_ByRefWriteBarrier) >= ((PBYTE)JIT_ByRefWriteBarrier_MP_Post_End - (PBYTE)JIT_ByRefWriteBarrier_MP_Post));
377     _ASSERTE( ((PBYTE)JIT_ByRefWriteBarrier_End - (PBYTE)JIT_ByRefWriteBarrier) >= ((PBYTE)JIT_ByRefWriteBarrier_SP_Post_End - (PBYTE)JIT_ByRefWriteBarrier_SP_Post));
378
379 }
380 #endif // _DEBUG
381
382 #define UPDATE_WB(_proc,_grow)   \
383     CopyWriteBarrier((PCODE)JIT_WriteBarrier, (PCODE)JIT_WriteBarrier_ ## _proc ## _ ## _grow , (PCODE)JIT_WriteBarrier_ ## _proc ## _ ## _grow ## _End); \
384     wbMapping[WriteBarrierIndex].from = (PBYTE)JIT_WriteBarrier_ ## _proc ## _ ## _grow ; \
385     \
386     CopyWriteBarrier((PCODE)JIT_CheckedWriteBarrier, (PCODE)JIT_CheckedWriteBarrier_ ## _proc ## _ ## _grow , (PCODE)JIT_CheckedWriteBarrier_ ## _proc ## _ ## _grow ## _End); \
387     wbMapping[CheckedWriteBarrierIndex].from = (PBYTE)JIT_CheckedWriteBarrier_ ## _proc ## _ ## _grow ; \
388     \
389     CopyWriteBarrier((PCODE)JIT_ByRefWriteBarrier, (PCODE)JIT_ByRefWriteBarrier_ ## _proc ## _ ## _grow , (PCODE)JIT_ByRefWriteBarrier_ ## _proc ## _ ## _grow ## _End); \
390     wbMapping[ByRefWriteBarrierIndex].from = (PBYTE)JIT_ByRefWriteBarrier_ ## _proc ## _ ## _grow ; \
391
392 // Update the instructions in our various write barrier implementations that refer directly to the values
393 // of GC globals such as g_lowest_address and g_card_table. We don't particularly care which values have
394 // changed on each of these callbacks, it's pretty cheap to refresh them all.
395 void UpdateGCWriteBarriers(bool postGrow = false)
396 {
397     // Define a helper macro that abstracts the minutia of patching the instructions to access the value of a
398     // particular GC global.
399
400 #if _DEBUG
401     ValidateWriteBarriers();
402 #endif // _DEBUG
403
404     static bool wbCopyRequired = true; // We begin with a wb copy
405     static bool wbIsPostGrow = false;  // We begin with pre-Grow write barrier
406
407     if(postGrow && !wbIsPostGrow)
408     {
409         wbIsPostGrow = true;
410         wbCopyRequired = true;
411     }
412
413     if(wbCopyRequired)
414     {
415         BOOL mp = g_SystemInfo.dwNumberOfProcessors > 1;
416         if(mp)
417         {
418             if(wbIsPostGrow)
419             {
420                 UPDATE_WB(MP,Post);
421             }
422             else
423             {
424                 UPDATE_WB(MP,Pre);
425             }
426         }
427         else
428         {
429             if(wbIsPostGrow)
430             {
431                 UPDATE_WB(SP,Post);
432             }
433             else
434             {
435                 UPDATE_WB(SP,Pre);
436             }
437         }
438
439         wbCopyRequired = false;
440     }
441 #define GWB_PATCH_OFFSET(_global)                                       \
442     if (pDesc->m_dw_##_global##_offset != 0xffff)                       \
443         PutThumb2Mov32((UINT16*)(to + pDesc->m_dw_##_global##_offset), (UINT32)(dac_cast<TADDR>(_global)));
444
445     // Iterate through the write barrier patch table created in the .clrwb section
446     // (see write barrier asm code)
447     WriteBarrierDescriptor * pDesc = &g_rgWriteBarrierDescriptors;
448 #ifdef TARGET_UNIX
449     while (pDesc->m_funcStartOffset)
450 #else // TARGET_UNIX
451     while (pDesc->m_pFuncStart)
452 #endif // TARGET_UNIX
453     {
454         // If the write barrier is being currently used (as in copied over to the patchable site)
455         // then read the patch location from the table and use the offset to patch the target asm code
456 #ifdef TARGET_UNIX
457         PBYTE to = FindWBMapping((BYTE *)pDesc + pDesc->m_funcStartOffset);
458         size_t barrierSize = pDesc->m_funcEndOffset - pDesc->m_funcStartOffset;
459 #else // TARGET_UNIX
460         PBYTE to = FindWBMapping(pDesc->m_pFuncStart);
461         size_t barrierSize = pDesc->m_pFuncEnd - pDesc->m_pFuncStart;
462 #endif // TARGET_UNIX
463         if(to)
464         {
465             to = (PBYTE)PCODEToPINSTR((PCODE)GetWriteBarrierCodeLocation(to));
466             ExecutableWriterHolder<BYTE> barrierWriterHolder;
467             if (IsWriteBarrierCopyEnabled())
468             {
469                 barrierWriterHolder = ExecutableWriterHolder<BYTE>(to, barrierSize);
470                 to = barrierWriterHolder.GetRW();
471             }
472             GWB_PATCH_OFFSET(g_lowest_address);
473             GWB_PATCH_OFFSET(g_highest_address);
474             GWB_PATCH_OFFSET(g_ephemeral_low);
475             GWB_PATCH_OFFSET(g_ephemeral_high);
476             GWB_PATCH_OFFSET(g_card_table);
477         }
478
479         pDesc++;
480     }
481 }
482
483 int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck)
484 {
485     // The runtime is not always suspended when this is called (unlike StompWriteBarrierEphemeral) but we have
486     // no way to update the barrier code atomically on ARM since each 32-bit value we change is loaded over
487     // two instructions. So we have to suspend the EE (which forces code out of the barrier functions) before
488     // proceeding. Luckily the case where the runtime is not already suspended is relatively rare (allocation
489     // of a new large object heap segment). Skip the suspend for the case where we're called during runtime
490     // startup.
491
492     // suspend/resuming the EE under GC stress will trigger a GC and if we're holding the
493     // GC lock due to allocating a LOH segment it will cause a deadlock so disable it here.
494     GCStressPolicy::InhibitHolder iholder;
495     int stompWBCompleteActions = SWB_ICACHE_FLUSH;
496
497     if (!isRuntimeSuspended)
498     {
499         ThreadSuspend::SuspendEE(ThreadSuspend::SUSPEND_OTHER);
500         stompWBCompleteActions |= SWB_EE_RESTART;
501     }
502
503     UpdateGCWriteBarriers(bReqUpperBoundsCheck);
504
505     return stompWBCompleteActions;
506 }
507
508 int StompWriteBarrierEphemeral(bool isRuntimeSuspended)
509 {
510     UNREFERENCED_PARAMETER(isRuntimeSuspended);
511     _ASSERTE(isRuntimeSuspended);
512     UpdateGCWriteBarriers();
513     return SWB_ICACHE_FLUSH;
514 }
515
516 void FlushWriteBarrierInstructionCache()
517 {
518     // We've changed code so we must flush the instruction cache.
519     BYTE *pbAlteredRange;
520     DWORD cbAlteredRange;
521     ComputeWriteBarrierRange(&pbAlteredRange, &cbAlteredRange);
522     FlushInstructionCache(GetCurrentProcess(), pbAlteredRange, cbAlteredRange);
523 }
524
525 #endif // CROSSGEN_COMPILE
526
527 #endif // !DACCESS_COMPILE
528
529 #ifndef CROSSGEN_COMPILE
530 void LazyMachState::unwindLazyState(LazyMachState* baseState,
531                                     MachState* unwoundstate,
532                                     DWORD threadId,
533                                     int funCallDepth,
534                                     HostCallPreference hostCallPreference)
535 {
536     T_CONTEXT                         ctx;
537     T_KNONVOLATILE_CONTEXT_POINTERS   nonVolRegPtrs;
538
539     ctx.ContextFlags = 0; // Read by PAL_VirtualUnwind.
540
541     ctx.Pc = baseState->captureIp;
542     ctx.Sp = baseState->captureSp;
543
544     ctx.R4 = unwoundstate->captureR4_R11[0] = baseState->captureR4_R11[0];
545     ctx.R5 = unwoundstate->captureR4_R11[1] = baseState->captureR4_R11[1];
546     ctx.R6 = unwoundstate->captureR4_R11[2] = baseState->captureR4_R11[2];
547     ctx.R7 = unwoundstate->captureR4_R11[3] = baseState->captureR4_R11[3];
548     ctx.R8 = unwoundstate->captureR4_R11[4] = baseState->captureR4_R11[4];
549     ctx.R9 = unwoundstate->captureR4_R11[5] = baseState->captureR4_R11[5];
550     ctx.R10 = unwoundstate->captureR4_R11[6] = baseState->captureR4_R11[6];
551     ctx.R11 = unwoundstate->captureR4_R11[7] = baseState->captureR4_R11[7];
552
553 #if !defined(DACCESS_COMPILE)
554     // For DAC, if we get here, it means that the LazyMachState is uninitialized and we have to unwind it.
555     // The API we use to unwind in DAC is StackWalk64(), which does not support the context pointers.
556     //
557     // Restore the integer registers to KNONVOLATILE_CONTEXT_POINTERS to be used for unwinding.
558     nonVolRegPtrs.R4 = &unwoundstate->captureR4_R11[0];
559     nonVolRegPtrs.R5 = &unwoundstate->captureR4_R11[1];
560     nonVolRegPtrs.R6 = &unwoundstate->captureR4_R11[2];
561     nonVolRegPtrs.R7 = &unwoundstate->captureR4_R11[3];
562     nonVolRegPtrs.R8 = &unwoundstate->captureR4_R11[4];
563     nonVolRegPtrs.R9 = &unwoundstate->captureR4_R11[5];
564     nonVolRegPtrs.R10 = &unwoundstate->captureR4_R11[6];
565     nonVolRegPtrs.R11 = &unwoundstate->captureR4_R11[7];
566 #endif // DACCESS_COMPILE
567
568     LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK    LazyMachState::unwindLazyState(ip:%p,sp:%p)\n", baseState->captureIp, baseState->captureSp));
569
570     PCODE pvControlPc;
571
572     do
573     {
574 #ifndef TARGET_UNIX
575         pvControlPc = Thread::VirtualUnwindCallFrame(&ctx, &nonVolRegPtrs);
576 #else // !TARGET_UNIX
577 #ifdef DACCESS_COMPILE
578         HRESULT hr = DacVirtualUnwind(threadId, &ctx, &nonVolRegPtrs);
579         if (FAILED(hr))
580         {
581             DacError(hr);
582         }
583 #else // DACCESS_COMPILE
584         BOOL success = PAL_VirtualUnwind(&ctx, &nonVolRegPtrs);
585         if (!success)
586         {
587             _ASSERTE(!"unwindLazyState: Unwinding failed");
588             EEPOLICY_HANDLE_FATAL_ERROR(COR_E_EXECUTIONENGINE);
589         }
590 #endif // DACCESS_COMPILE
591         pvControlPc = GetIP(&ctx);
592 #endif // !TARGET_UNIX
593         if (funCallDepth > 0)
594         {
595             --funCallDepth;
596             if (funCallDepth == 0)
597                 break;
598         }
599         else
600         {
601             // Determine  whether given IP resides in JITted code. (It returns nonzero in that case.)
602             // Use it now to see if we've unwound to managed code yet.
603             BOOL fFailedReaderLock = FALSE;
604             BOOL fIsManagedCode = ExecutionManager::IsManagedCode(pvControlPc, hostCallPreference, &fFailedReaderLock);
605             if (fFailedReaderLock)
606             {
607                 // We don't know if we would have been able to find a JIT
608                 // manager, because we couldn't enter the reader lock without
609                 // yielding (and our caller doesn't want us to yield).  So abort
610                 // now.
611
612                 // Invalidate the lazyState we're returning, so the caller knows
613                 // we aborted before we could fully unwind
614                 unwoundstate->_isValid = false;
615                 return;
616             }
617
618             if (fIsManagedCode)
619                 break;
620         }
621     }
622     while(TRUE);
623
624     //
625     // Update unwoundState so that HelperMethodFrameRestoreState knows which
626     // registers have been potentially modified.
627     //
628
629     unwoundstate->_pc = ctx.Pc;
630     unwoundstate->_sp = ctx.Sp;
631
632 #ifdef DACCESS_COMPILE
633     // For DAC builds, we update the registers directly since we dont have context pointers
634     unwoundstate->captureR4_R11[0] = ctx.R4;
635     unwoundstate->captureR4_R11[1] = ctx.R5;
636     unwoundstate->captureR4_R11[2] = ctx.R6;
637     unwoundstate->captureR4_R11[3] = ctx.R7;
638     unwoundstate->captureR4_R11[4] = ctx.R8;
639     unwoundstate->captureR4_R11[5] = ctx.R9;
640     unwoundstate->captureR4_R11[6] = ctx.R10;
641     unwoundstate->captureR4_R11[7] = ctx.R11;
642 #else // !DACCESS_COMPILE
643     // For non-DAC builds, update the register state from context pointers
644     unwoundstate->_R4_R11[0] = (PDWORD)nonVolRegPtrs.R4;
645     unwoundstate->_R4_R11[1] = (PDWORD)nonVolRegPtrs.R5;
646     unwoundstate->_R4_R11[2] = (PDWORD)nonVolRegPtrs.R6;
647     unwoundstate->_R4_R11[3] = (PDWORD)nonVolRegPtrs.R7;
648     unwoundstate->_R4_R11[4] = (PDWORD)nonVolRegPtrs.R8;
649     unwoundstate->_R4_R11[5] = (PDWORD)nonVolRegPtrs.R9;
650     unwoundstate->_R4_R11[6] = (PDWORD)nonVolRegPtrs.R10;
651     unwoundstate->_R4_R11[7] = (PDWORD)nonVolRegPtrs.R11;
652 #endif // DACCESS_COMPILE
653
654     unwoundstate->_isValid = true;
655 }
656
657 void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
658 {
659     CONTRACTL
660     {
661         NOTHROW;
662         GC_NOTRIGGER;
663         MODE_ANY;
664         SUPPORTS_DAC;
665     }
666     CONTRACTL_END;
667
668     pRD->IsCallerContextValid = FALSE;
669     pRD->IsCallerSPValid      = FALSE;        // Don't add usage of this field.  This is only temporary.
670
671     //
672     // Copy the saved state from the frame to the current context.
673     //
674
675     LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK    HelperMethodFrame::UpdateRegDisplay cached ip:%p, sp:%p\n", m_MachState._pc, m_MachState._sp));
676
677  #if defined(DACCESS_COMPILE)
678     // For DAC, we may get here when the HMF is still uninitialized.
679     // So we may need to unwind here.
680     if (!m_MachState.isValid())
681     {
682         // This allocation throws on OOM.
683         MachState* pUnwoundState = (MachState*)DacAllocHostOnlyInstance(sizeof(*pUnwoundState), true);
684
685         InsureInit(false, pUnwoundState);
686
687         pRD->pCurrentContext->Pc = pRD->ControlPC = pUnwoundState->_pc;
688         pRD->pCurrentContext->Sp = pRD->SP        = pUnwoundState->_sp;
689
690         pRD->pCurrentContext->R4 = (DWORD)(pUnwoundState->captureR4_R11[0]);
691         pRD->pCurrentContext->R5 = (DWORD)(pUnwoundState->captureR4_R11[1]);
692         pRD->pCurrentContext->R6 = (DWORD)(pUnwoundState->captureR4_R11[2]);
693         pRD->pCurrentContext->R7 = (DWORD)(pUnwoundState->captureR4_R11[3]);
694         pRD->pCurrentContext->R8 = (DWORD)(pUnwoundState->captureR4_R11[4]);
695         pRD->pCurrentContext->R9 = (DWORD)(pUnwoundState->captureR4_R11[5]);
696         pRD->pCurrentContext->R10 = (DWORD)(pUnwoundState->captureR4_R11[6]);
697         pRD->pCurrentContext->R11 = (DWORD)(pUnwoundState->captureR4_R11[7]);
698
699         return;
700     }
701 #endif // DACCESS_COMPILE
702
703     // reset pContext; it's only valid for active (top-most) frame
704     pRD->pContext = NULL;
705     pRD->ControlPC = GetReturnAddress();
706     pRD->SP = (DWORD)(size_t)m_MachState._sp;
707
708     pRD->pCurrentContext->Pc = pRD->ControlPC;
709     pRD->pCurrentContext->Sp = pRD->SP;
710
711     pRD->pCurrentContext->R4 = *m_MachState._R4_R11[0];
712     pRD->pCurrentContext->R5 = *m_MachState._R4_R11[1];
713     pRD->pCurrentContext->R6 = *m_MachState._R4_R11[2];
714     pRD->pCurrentContext->R7 = *m_MachState._R4_R11[3];
715     pRD->pCurrentContext->R8 = *m_MachState._R4_R11[4];
716     pRD->pCurrentContext->R9 = *m_MachState._R4_R11[5];
717     pRD->pCurrentContext->R10 = *m_MachState._R4_R11[6];
718     pRD->pCurrentContext->R11 = *m_MachState._R4_R11[7];
719
720     pRD->pCurrentContextPointers->R4 = m_MachState._R4_R11[0];
721     pRD->pCurrentContextPointers->R5 = m_MachState._R4_R11[1];
722     pRD->pCurrentContextPointers->R6 = m_MachState._R4_R11[2];
723     pRD->pCurrentContextPointers->R7 = m_MachState._R4_R11[3];
724     pRD->pCurrentContextPointers->R8 = m_MachState._R4_R11[4];
725     pRD->pCurrentContextPointers->R9 = m_MachState._R4_R11[5];
726     pRD->pCurrentContextPointers->R10 = m_MachState._R4_R11[6];
727     pRD->pCurrentContextPointers->R11 = m_MachState._R4_R11[7];
728     pRD->pCurrentContextPointers->Lr = NULL;
729 }
730 #endif // !CROSSGEN_COMPILE
731
732 TADDR FixupPrecode::GetMethodDesc()
733 {
734     LIMITED_METHOD_DAC_CONTRACT;
735
736     // This lookup is also manually inlined in PrecodeFixupThunk assembly code
737     TADDR base = *PTR_TADDR(GetBase());
738     if (base == NULL)
739         return NULL;
740     return base + (m_MethodDescChunkIndex * MethodDesc::ALIGNMENT);
741 }
742
743 #ifdef DACCESS_COMPILE
744 void FixupPrecode::EnumMemoryRegions(CLRDataEnumMemoryFlags flags)
745 {
746     SUPPORTS_DAC;
747     DacEnumMemoryRegion(dac_cast<TADDR>(this), sizeof(FixupPrecode));
748
749     DacEnumMemoryRegion(GetBase(), sizeof(TADDR));
750 }
751 #endif // DACCESS_COMPILE
752
753 #ifndef DACCESS_COMPILE
754
755 void StubPrecode::Init(StubPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator)
756 {
757     WRAPPER_NO_CONTRACT;
758
759     int n = 0;
760
761     m_rgCode[n++] = 0xf8df; // ldr r12, [pc, #8]
762     m_rgCode[n++] = 0xc008;
763     m_rgCode[n++] = 0xf8df; // ldr pc, [pc, #0]
764     m_rgCode[n++] = 0xf000;
765
766     _ASSERTE(n == _countof(m_rgCode));
767
768     m_pTarget = GetPreStubEntryPoint();
769     m_pMethodDesc = (TADDR)pMD;
770 }
771
772 #ifdef FEATURE_NATIVE_IMAGE_GENERATION
773 void StubPrecode::Fixup(DataImage *image)
774 {
775     WRAPPER_NO_CONTRACT;
776
777     image->FixupFieldToNode(this, offsetof(StubPrecode, m_pTarget),
778                             image->GetHelperThunk(CORINFO_HELP_EE_PRESTUB),
779                             0,
780                             IMAGE_REL_BASED_PTR);
781
782     image->FixupField(this, offsetof(StubPrecode, m_pMethodDesc),
783                       (void*)GetMethodDesc(),
784                       0,
785                       IMAGE_REL_BASED_PTR);
786 }
787 #endif // FEATURE_NATIVE_IMAGE_GENERATION
788
789 void NDirectImportPrecode::Init(NDirectImportPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator)
790 {
791     WRAPPER_NO_CONTRACT;
792
793     int n = 0;
794
795     m_rgCode[n++] = 0xf8df; // ldr r12, [pc, #4]
796     m_rgCode[n++] = 0xc004;
797     m_rgCode[n++] = 0xf8df; // ldr pc, [pc, #4]
798     m_rgCode[n++] = 0xf004;
799
800     _ASSERTE(n == _countof(m_rgCode));
801
802     m_pMethodDesc = (TADDR)pMD;
803     m_pTarget = GetEEFuncEntryPoint(NDirectImportThunk);
804 }
805
806 #ifdef FEATURE_NATIVE_IMAGE_GENERATION
807 void NDirectImportPrecode::Fixup(DataImage *image)
808 {
809     WRAPPER_NO_CONTRACT;
810
811     image->FixupField(this, offsetof(NDirectImportPrecode, m_pMethodDesc),
812                       (void*)GetMethodDesc(),
813                       0,
814                       IMAGE_REL_BASED_PTR);
815
816     image->FixupFieldToNode(this, offsetof(NDirectImportPrecode, m_pTarget),
817                             image->GetHelperThunk(CORINFO_HELP_EE_PINVOKE_FIXUP),
818                             0,
819                             IMAGE_REL_BASED_PTR);
820 }
821 #endif
822
823 void FixupPrecode::Init(FixupPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator, int iMethodDescChunkIndex /*=0*/, int iPrecodeChunkIndex /*=0*/)
824 {
825     WRAPPER_NO_CONTRACT;
826
827     m_rgCode[0] = 0x46fc;   // mov r12, pc
828     m_rgCode[1] = 0xf8df;   // ldr pc, [pc, #4]
829     m_rgCode[2] = 0xf004;
830
831     // Initialize chunk indices only if they are not initialized yet. This is necessary to make MethodDesc::Reset work.
832     if (m_PrecodeChunkIndex == 0)
833     {
834         _ASSERTE(FitsInU1(iPrecodeChunkIndex));
835         m_PrecodeChunkIndex = static_cast<BYTE>(iPrecodeChunkIndex);
836     }
837
838     if (iMethodDescChunkIndex != -1)
839     {
840         if (m_MethodDescChunkIndex == 0)
841         {
842             _ASSERTE(FitsInU1(iMethodDescChunkIndex));
843             m_MethodDescChunkIndex = static_cast<BYTE>(iMethodDescChunkIndex);
844         }
845
846         if (*(void**)GetBase() == NULL)
847             *(void**)GetBase() = (BYTE*)pMD - (iMethodDescChunkIndex * MethodDesc::ALIGNMENT);
848     }
849
850     _ASSERTE(GetMethodDesc() == (TADDR)pMD);
851
852     if (pLoaderAllocator != NULL)
853     {
854         m_pTarget = GetEEFuncEntryPoint(PrecodeFixupThunk);
855     }
856 }
857
858 #ifdef FEATURE_NATIVE_IMAGE_GENERATION
859 // Partial initialization. Used to save regrouped chunks.
860 void FixupPrecode::InitForSave(int iPrecodeChunkIndex)
861 {
862     STANDARD_VM_CONTRACT;
863
864     m_rgCode[0] = 0x46fc;   // mov r12, pc
865     m_rgCode[1] = 0xf8df;   // ldr pc, [pc, #4]
866     m_rgCode[2] = 0xf004;
867
868     _ASSERTE(FitsInU1(iPrecodeChunkIndex));
869     m_PrecodeChunkIndex = static_cast<BYTE>(iPrecodeChunkIndex);
870
871     // The rest is initialized in code:FixupPrecode::Fixup
872 }
873
874 void FixupPrecode::Fixup(DataImage *image, MethodDesc * pMD)
875 {
876     STANDARD_VM_CONTRACT;
877
878     // Note that GetMethodDesc() does not return the correct value because of
879     // regrouping of MethodDescs into hot and cold blocks. That's why the caller
880     // has to supply the actual MethodDesc
881
882     SSIZE_T mdChunkOffset;
883     ZapNode * pMDChunkNode = image->GetNodeForStructure(pMD, &mdChunkOffset);
884     ZapNode * pHelperThunk = image->GetHelperThunk(CORINFO_HELP_EE_PRECODE_FIXUP);
885
886     image->FixupFieldToNode(this, offsetof(FixupPrecode, m_pTarget), pHelperThunk);
887
888     // Set the actual chunk index
889     FixupPrecode * pNewPrecode = (FixupPrecode *)image->GetImagePointer(this);
890
891     size_t mdOffset   = mdChunkOffset - sizeof(MethodDescChunk);
892     size_t chunkIndex = mdOffset / MethodDesc::ALIGNMENT;
893     _ASSERTE(FitsInU1(chunkIndex));
894     pNewPrecode->m_MethodDescChunkIndex = (BYTE) chunkIndex;
895
896     // Fixup the base of MethodDescChunk
897     if (m_PrecodeChunkIndex == 0)
898     {
899         image->FixupFieldToNode(this, (BYTE *)GetBase() - (BYTE *)this,
900             pMDChunkNode, sizeof(MethodDescChunk));
901     }
902 }
903 #endif // FEATURE_NATIVE_IMAGE_GENERATION
904
905 void ThisPtrRetBufPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator)
906 {
907     WRAPPER_NO_CONTRACT;
908
909     int n = 0;
910
911     m_rgCode[n++] = 0x4684; // mov r12, r0
912     m_rgCode[n++] = 0x4608; // mov r0, r1
913     m_rgCode[n++] = 0xea4f; // mov r1, r12
914     m_rgCode[n++] = 0x010c;
915     m_rgCode[n++] = 0xf8df; // ldr pc, [pc, #0]
916     m_rgCode[n++] = 0xf000;
917
918     _ASSERTE(n == _countof(m_rgCode));
919
920     m_pTarget = GetPreStubEntryPoint();
921     m_pMethodDesc = (TADDR)pMD;
922 }
923
924
925 #ifndef CROSSGEN_COMPILE
926 /*
927 Rough pseudo-code of interface dispatching:
928
929   // jitted code sets r0, r4:
930   r0 = object;
931   r4 = indirectionCell;
932   // jitted code calls *indirectionCell
933   switch (*indirectionCell)
934   {
935       case LookupHolder._stub:
936           // ResolveWorkerAsmStub:
937           *indirectionCell = DispatchHolder._stub;
938           call ResolveWorkerStatic, jump to target method;
939       case DispatchHolder._stub:
940           if (r0.methodTable == expectedMethodTable) jump to target method;
941           // ResolveHolder._stub._failEntryPoint:
942           jump to case ResolveHolder._stub._resolveEntryPoint;
943       case ResolveHolder._stub._resolveEntryPoint:
944           if (r0.methodTable in hashTable) jump to target method;
945           // ResolveHolder._stub._slowEntryPoint:
946           // ResolveWorkerChainLookupAsmStub:
947           // ResolveWorkerAsmStub:
948           if (_failEntryPoint called too many times) *indirectionCell = ResolveHolder._stub._resolveEntryPoint;
949           call ResolveWorkerStatic, jump to target method;
950   }
951
952 Note that ResolveWorkerChainLookupAsmStub currently points directly
953 to ResolveWorkerAsmStub; in the future, this could be separate.
954 */
955
956 void  LookupHolder::Initialize(LookupHolder* pLookupHolderRX, PCODE resolveWorkerTarget, size_t dispatchToken)
957 {
958     // Called directly by JITTED code
959     // See ResolveWorkerAsmStub
960
961     // ldr r12, [pc + 8]    ; #_token
962     _stub._entryPoint[0] = 0xf8df;
963     _stub._entryPoint[1] = 0xc008;
964     // ldr pc, [pc]         ; #_resolveWorkerTarget
965     _stub._entryPoint[2] = 0xf8df;
966     _stub._entryPoint[3] = 0xf000;
967
968     _stub._resolveWorkerTarget = resolveWorkerTarget;
969     _stub._token               = dispatchToken;
970     _ASSERTE(4 == LookupStub::entryPointLen);
971 }
972
973 void  DispatchHolder::Initialize(DispatchHolder* pDispatchHolderRX, PCODE implTarget, PCODE failTarget, size_t expectedMT)
974 {
975     // Called directly by JITTED code
976     // DispatchHolder._stub._entryPoint(r0:object, r1, r2, r3, r4:IndirectionCell)
977     // {
978     //     if (r0.methodTable == this._expectedMT) (this._implTarget)(r0, r1, r2, r3);
979     //     else (this._failTarget)(r0, r1, r2, r3, r4);
980     // }
981
982     int n = 0;
983     WORD offset;
984
985     // We rely on the stub entry-point being DWORD aligned (so we can tell whether any subsequent WORD is
986     // DWORD-aligned or not, which matters in the calculation of PC-relative offsets).
987     _ASSERTE(((UINT_PTR)_stub._entryPoint & 0x3) == 0);
988
989 // Compute a PC-relative offset for use in an instruction encoding. Must call this prior to emitting the
990 // instruction halfword to which it applies. For thumb-2 encodings the offset must be computed before emitting
991 // the first of the halfwords.
992 #undef PC_REL_OFFSET
993 #define PC_REL_OFFSET(_field) (WORD)(offsetof(DispatchStub, _field) - ((offsetof(DispatchStub, _entryPoint) + sizeof(*DispatchStub::_entryPoint) * (n + 2)) & 0xfffffffc))
994
995     // r0 : object. It can be null as well.
996     // when it is null the code causes an AV. This AV is seen by the VM's personality routine
997     // and it converts it into nullRef. We want the AV to happen before modifying the stack so that we can get the
998     // call stack in windbg at the point of AV. So therefore "ldr r12, [r0]" should be the first instruction.
999
1000     // ldr r12, [r0 + #Object.m_pMethTab]
1001     _stub._entryPoint[n++] = DISPATCH_STUB_FIRST_WORD;
1002     _stub._entryPoint[n++] = 0xc000;
1003
1004     // push {r5}
1005     _stub._entryPoint[n++] = 0xb420;
1006
1007     // ldr r5, [pc + #_expectedMT]
1008     offset = PC_REL_OFFSET(_expectedMT);
1009     _ASSERTE((offset & 0x3) == 0);
1010     _stub._entryPoint[n++] = 0x4d00 | (offset >> 2);
1011
1012     // cmp r5, r12
1013     _stub._entryPoint[n++] = 0x4565;
1014
1015     // pop {r5}
1016     _stub._entryPoint[n++] = 0xbc20;
1017
1018     // bne failTarget
1019     _stub._entryPoint[n++] = 0xd101;
1020
1021     // ldr pc, [pc + #_implTarget]
1022     offset = PC_REL_OFFSET(_implTarget);
1023     _stub._entryPoint[n++] = 0xf8df;
1024     _stub._entryPoint[n++] = 0xf000 | offset;
1025
1026     // failTarget:
1027     // ldr pc, [pc + #_failTarget]
1028     offset = PC_REL_OFFSET(_failTarget);
1029     _stub._entryPoint[n++] = 0xf8df;
1030     _stub._entryPoint[n++] = 0xf000 | offset;
1031
1032     // nop - insert padding
1033     _stub._entryPoint[n++] = 0xbf00;
1034
1035     _ASSERTE(n == DispatchStub::entryPointLen);
1036
1037     // Make sure that the data members below are aligned
1038     _ASSERTE((n & 1) == 0);
1039
1040     _stub._expectedMT = DWORD(expectedMT);
1041     _stub._failTarget = failTarget;
1042     _stub._implTarget = implTarget;
1043 }
1044
1045 void ResolveHolder::Initialize(ResolveHolder* pResolveHolderRX,
1046                                PCODE resolveWorkerTarget, PCODE patcherTarget,
1047                                 size_t dispatchToken, UINT32 hashedToken,
1048                                 void * cacheAddr, INT32 * counterAddr)
1049 {
1050     // Called directly by JITTED code
1051     // ResolveStub._resolveEntryPoint(r0:Object*, r1, r2, r3, r4:IndirectionCellAndFlags)
1052     // {
1053     //    MethodTable mt = r0.m_pMethTab;
1054     //    int i = ((mt + mt >> 12) ^ this._hashedToken) & this._cacheMask
1055     //    ResolveCacheElem e = this._cacheAddress + i
1056     //    do
1057     //    {
1058     //        if (mt == e.pMT && this._token == e.token) (e.target)(r0, r1, r2, r3);
1059     //        e = e.pNext;
1060     //    } while (e != null)
1061     //    (this._slowEntryPoint)(r0, r1, r2, r3, r4);
1062     // }
1063     //
1064
1065     int n = 0;
1066     WORD offset;
1067
1068     // We rely on the stub entry-point being DWORD aligned (so we can tell whether any subsequent WORD is
1069     // DWORD-aligned or not, which matters in the calculation of PC-relative offsets).
1070     _ASSERTE(((UINT_PTR)_stub._resolveEntryPoint & 0x3) == 0);
1071
1072 // Compute a PC-relative offset for use in an instruction encoding. Must call this prior to emitting the
1073 // instruction halfword to which it applies. For thumb-2 encodings the offset must be computed before emitting
1074 // the first of the halfwords.
1075 #undef PC_REL_OFFSET
1076 #define PC_REL_OFFSET(_field) (WORD)(offsetof(ResolveStub, _field) - ((offsetof(ResolveStub, _resolveEntryPoint) + sizeof(*ResolveStub::_resolveEntryPoint) * (n + 2)) & 0xfffffffc))
1077
1078     // ldr r12, [r0 + #Object.m_pMethTab]
1079     _stub._resolveEntryPoint[n++] = RESOLVE_STUB_FIRST_WORD;
1080     _stub._resolveEntryPoint[n++] = 0xc000;
1081
1082     // ;; We need two scratch registers, r5 and r6
1083     // push {r5,r6}
1084     _stub._resolveEntryPoint[n++] = 0xb460;
1085
1086     // ;; Compute i = ((mt + mt >> 12) ^ this._hashedToken) & this._cacheMask
1087
1088     // add r6, r12, r12 lsr #12
1089     _stub._resolveEntryPoint[n++] = 0xeb0c;
1090     _stub._resolveEntryPoint[n++] = 0x361c;
1091
1092     // ldr r5, [pc + #_hashedToken]
1093     offset = PC_REL_OFFSET(_hashedToken);
1094     _ASSERTE((offset & 0x3) == 0);
1095     _stub._resolveEntryPoint[n++] = 0x4d00 | (offset >> 2);
1096
1097     // eor r6, r6, r5
1098     _stub._resolveEntryPoint[n++] = 0xea86;
1099     _stub._resolveEntryPoint[n++] = 0x0605;
1100
1101     // ldr r5, [pc + #_cacheMask]
1102     offset = PC_REL_OFFSET(_cacheMask);
1103     _ASSERTE((offset & 0x3) == 0);
1104     _stub._resolveEntryPoint[n++] = 0x4d00 | (offset >> 2);
1105
1106     // and r6, r6, r5
1107     _stub._resolveEntryPoint[n++] = 0xea06;
1108     _stub._resolveEntryPoint[n++] = 0x0605;
1109
1110     // ;; ResolveCacheElem e = this._cacheAddress + i
1111     // ldr r5, [pc + #_cacheAddress]
1112     offset = PC_REL_OFFSET(_cacheAddress);
1113     _ASSERTE((offset & 0x3) == 0);
1114     _stub._resolveEntryPoint[n++] = 0x4d00 | (offset >> 2);
1115
1116     // ldr r6, [r5 + r6] ;; r6 = e = this._cacheAddress + i
1117     _stub._resolveEntryPoint[n++] = 0x59ae;
1118
1119     // ;; do {
1120     int loop = n;
1121
1122     // ;; Check mt == e.pMT
1123     // ldr r5, [r6 + #ResolveCacheElem.pMT]
1124     offset = offsetof(ResolveCacheElem, pMT);
1125     _ASSERTE(offset <= 124 && (offset & 0x3) == 0);
1126     _stub._resolveEntryPoint[n++] = 0x6835 | (offset<< 4);
1127
1128     // cmp r12, r5
1129     _stub._resolveEntryPoint[n++] = 0x45ac;
1130
1131     // bne nextEntry
1132     _stub._resolveEntryPoint[n++] = 0xd108;
1133
1134     // ;; Check this._token == e.token
1135     // ldr r5, [pc + #_token]
1136     offset = PC_REL_OFFSET(_token);
1137     _ASSERTE((offset & 0x3) == 0);
1138     _stub._resolveEntryPoint[n++] = 0x4d00 | (offset>>2);
1139
1140     // ldr r12, [r6 + #ResolveCacheElem.token]
1141     offset = offsetof(ResolveCacheElem, token);
1142     _stub._resolveEntryPoint[n++] = 0xf8d6;
1143     _stub._resolveEntryPoint[n++] = 0xc000 | offset;
1144
1145     // cmp r12, r5
1146     _stub._resolveEntryPoint[n++] = 0x45ac;
1147
1148     // bne nextEntry
1149     _stub._resolveEntryPoint[n++] = 0xd103;
1150
1151     // ldr r12, [r6 + #ResolveCacheElem.target] ;; r12 : e.target
1152     offset = offsetof(ResolveCacheElem, target);
1153     _stub._resolveEntryPoint[n++] = 0xf8d6;
1154     _stub._resolveEntryPoint[n++] = 0xc000 | offset;
1155
1156     // ;; Restore r5 and r6
1157     // pop {r5,r6}
1158     _stub._resolveEntryPoint[n++] = 0xbc60;
1159
1160     // ;; Branch to e.target
1161     // bx       r12 ;; (e.target)(r0,r1,r2,r3)
1162     _stub._resolveEntryPoint[n++] = 0x4760;
1163
1164     // nextEntry:
1165     // ;; e = e.pNext;
1166     // ldr r6, [r6 + #ResolveCacheElem.pNext]
1167     offset = offsetof(ResolveCacheElem, pNext);
1168     _ASSERTE(offset <=124 && (offset & 0x3) == 0);
1169     _stub._resolveEntryPoint[n++] = 0x6836 | (offset << 4);
1170
1171     // ;; } while(e != null);
1172     // cbz r6, slowEntryPoint
1173     _stub._resolveEntryPoint[n++] = 0xb116;
1174
1175     // ldr r12, [r0 + #Object.m_pMethTab]
1176     _stub._resolveEntryPoint[n++] = 0xf8d0;
1177     _stub._resolveEntryPoint[n++] = 0xc000;
1178
1179     // b loop
1180     offset = (WORD)((loop - (n + 2)) * sizeof(WORD));
1181     offset = (offset >> 1) & 0x07ff;
1182     _stub._resolveEntryPoint[n++] = 0xe000 | offset;
1183
1184     // slowEntryPoint:
1185     // pop {r5,r6}
1186     _stub._resolveEntryPoint[n++] = 0xbc60;
1187
1188     // nop for alignment
1189     _stub._resolveEntryPoint[n++] = 0xbf00;
1190
1191     // the slow entry point be DWORD-aligned (see _ASSERTE below) insert nops if necessary .
1192
1193     // ARMSTUB TODO: promotion
1194
1195     // fall through to slow case
1196     _ASSERTE(_stub._resolveEntryPoint + n == _stub._slowEntryPoint);
1197     _ASSERTE(n == ResolveStub::resolveEntryPointLen);
1198
1199     // ResolveStub._slowEntryPoint(r0:MethodToken, r1, r2, r3, r4:IndirectionCellAndFlags)
1200     // {
1201     //     r12 = this._tokenSlow;
1202     //     this._resolveWorkerTarget(r0, r1, r2, r3, r4, r12);
1203     // }
1204
1205     // The following macro relies on this entry point being DWORD-aligned. We've already asserted that the
1206     // overall stub is aligned above, just need to check that the preceding stubs occupy an even number of
1207     // WORD slots.
1208     _ASSERTE((n & 1) == 0);
1209
1210 #undef PC_REL_OFFSET
1211 #define PC_REL_OFFSET(_field) (WORD)(offsetof(ResolveStub, _field) - ((offsetof(ResolveStub, _slowEntryPoint) + sizeof(*ResolveStub::_slowEntryPoint) * (n + 2)) & 0xfffffffc))
1212
1213     n = 0;
1214
1215     // ldr r12, [pc + #_tokenSlow]
1216     offset = PC_REL_OFFSET(_tokenSlow);
1217     _stub._slowEntryPoint[n++] = 0xf8df;
1218     _stub._slowEntryPoint[n++] = 0xc000 | offset;
1219
1220     // ldr pc, [pc + #_resolveWorkerTarget]
1221     offset = PC_REL_OFFSET(_resolveWorkerTarget);
1222     _stub._slowEntryPoint[n++] = 0xf8df;
1223     _stub._slowEntryPoint[n++] = 0xf000 | offset;
1224
1225     _ASSERTE(n == ResolveStub::slowEntryPointLen);
1226
1227     // ResolveStub._failEntryPoint(r0:MethodToken, r1, r2, r3, r4:IndirectionCellAndFlags)
1228     // {
1229     //     if(--*(this._pCounter) < 0) r4 = r4 | SDF_ResolveBackPatch;
1230     //     this._resolveEntryPoint(r0, r1, r2, r3, r4);
1231     // }
1232
1233     // The following macro relies on this entry point being DWORD-aligned. We've already asserted that the
1234     // overall stub is aligned above, just need to check that the preceding stubs occupy an even number of
1235     // WORD slots.
1236     _ASSERTE((n & 1) == 0);
1237
1238 #undef PC_REL_OFFSET
1239 #define PC_REL_OFFSET(_field) (WORD)(offsetof(ResolveStub, _field) - ((offsetof(ResolveStub, _failEntryPoint) + sizeof(*ResolveStub::_failEntryPoint) * (n + 2)) & 0xfffffffc))
1240
1241     n = 0;
1242
1243     // push {r5}
1244     _stub._failEntryPoint[n++] = 0xb420;
1245
1246     // ldr r5, [pc + #_pCounter]
1247     offset = PC_REL_OFFSET(_pCounter);
1248     _ASSERTE((offset & 0x3) == 0);
1249     _stub._failEntryPoint[n++] = 0x4d00 | (offset >>2);
1250
1251     // ldr r12, [r5]
1252     _stub._failEntryPoint[n++] = 0xf8d5;
1253     _stub._failEntryPoint[n++] = 0xc000;
1254
1255     // subs r12, r12, #1
1256     _stub._failEntryPoint[n++] = 0xf1bc;
1257     _stub._failEntryPoint[n++] = 0x0c01;
1258
1259     // str r12, [r5]
1260     _stub._failEntryPoint[n++] = 0xf8c5;
1261     _stub._failEntryPoint[n++] = 0xc000;
1262
1263     // pop {r5}
1264     _stub._failEntryPoint[n++] = 0xbc20;
1265
1266     // bge resolveEntryPoint
1267     _stub._failEntryPoint[n++] = 0xda01;
1268
1269     // or r4, r4, SDF_ResolveBackPatch
1270     _ASSERTE(SDF_ResolveBackPatch < 256);
1271     _stub._failEntryPoint[n++] = 0xf044;
1272     _stub._failEntryPoint[n++] = 0x0400 | SDF_ResolveBackPatch;
1273
1274     // resolveEntryPoint:
1275     // b _resolveEntryPoint
1276     offset = (WORD)(offsetof(ResolveStub, _resolveEntryPoint) - (offsetof(ResolveStub, _failEntryPoint) + sizeof(*ResolveStub::_failEntryPoint) * (n + 2)));
1277     _ASSERTE((offset & 1) == 0);
1278     offset = (offset >> 1) & 0x07ff;
1279     _stub._failEntryPoint[n++] = 0xe000 | offset;
1280
1281     // nop for alignment
1282     _stub._failEntryPoint[n++] = 0xbf00;
1283
1284     _ASSERTE(n == ResolveStub::failEntryPointLen);
1285
1286     _stub._pCounter            = counterAddr;
1287     _stub._hashedToken         = hashedToken << LOG2_PTRSIZE;
1288     _stub._cacheAddress        = (size_t) cacheAddr;
1289     _stub._token               = dispatchToken;
1290     _stub._tokenSlow           = dispatchToken;
1291     _stub._resolveWorkerTarget = resolveWorkerTarget;
1292     _stub._cacheMask           = CALL_STUB_CACHE_MASK * sizeof(void*);
1293
1294     _ASSERTE(resolveWorkerTarget == (PCODE)ResolveWorkerChainLookupAsmStub);
1295     _ASSERTE(patcherTarget == NULL);
1296 }
1297
1298 BOOL DoesSlotCallPrestub(PCODE pCode)
1299 {
1300     PTR_WORD pInstr = dac_cast<PTR_WORD>(PCODEToPINSTR(pCode));
1301
1302 #ifdef HAS_COMPACT_ENTRYPOINTS
1303     if (MethodDescChunk::GetMethodDescFromCompactEntryPoint(pCode, TRUE) != NULL)
1304     {
1305         return TRUE;
1306     }
1307 #endif // HAS_COMPACT_ENTRYPOINTS
1308
1309     // FixupPrecode
1310     if (pInstr[0] == 0x46fc && // // mov r12, pc
1311         pInstr[1] == 0xf8df &&
1312         pInstr[2] == 0xf004)
1313     {
1314         PCODE pTarget = dac_cast<PTR_FixupPrecode>(pInstr)->m_pTarget;
1315
1316         // Check for jump stub (NGen case)
1317         if (isJump(pTarget))
1318         {
1319             pTarget = decodeJump(pTarget);
1320         }
1321
1322         return pTarget == (TADDR)PrecodeFixupThunk;
1323     }
1324
1325     // StubPrecode
1326     if (pInstr[0] == 0xf8df && // ldr r12, [pc + 8]
1327         pInstr[1] == 0xc008 &&
1328         pInstr[2] == 0xf8df && // ldr pc, [pc]
1329         pInstr[3] == 0xf000)
1330     {
1331         PCODE pTarget = dac_cast<PTR_StubPrecode>(pInstr)->m_pTarget;
1332
1333         // Check for jump stub (NGen case)
1334         if (isJump(pTarget))
1335         {
1336             pTarget = decodeJump(pTarget);
1337         }
1338
1339         return pTarget == GetPreStubEntryPoint();
1340     }
1341
1342     return FALSE;
1343 }
1344
1345 Stub *GenerateInitPInvokeFrameHelper()
1346 {
1347     CONTRACT(Stub*)
1348     {
1349         THROWS;
1350         GC_NOTRIGGER;
1351         MODE_ANY;
1352
1353         POSTCONDITION(CheckPointer(RETVAL));
1354     }
1355     CONTRACT_END;
1356
1357     CPUSTUBLINKER sl;
1358     CPUSTUBLINKER *psl = &sl;
1359
1360     CORINFO_EE_INFO::InlinedCallFrameInfo FrameInfo;
1361     InlinedCallFrame::GetEEInfo(&FrameInfo);
1362
1363     // R4 contains address of the frame on stack (the frame ptr, not its neg space)
1364     unsigned negSpace = FrameInfo.offsetOfFrameVptr;
1365
1366     ThumbReg regFrame   = ThumbReg(4);
1367     ThumbReg regThread  = ThumbReg(5);
1368     ThumbReg regScratch = ThumbReg(6);
1369     ThumbReg regR9 = ThumbReg(9);
1370
1371 #ifdef TARGET_UNIX
1372     // Erect frame to perform call to GetThread
1373     psl->ThumbEmitProlog(1, sizeof(ArgumentRegisters), FALSE); // Save r4 for aligned stack
1374
1375     // Save argument registers around the GetThread call. Don't bother with using ldm/stm since this inefficient path anyway.
1376     for (int reg = 0; reg < 4; reg++)
1377         psl->ThumbEmitStoreRegIndirect(ThumbReg(reg), thumbRegSp, offsetof(ArgumentRegisters, r) + sizeof(*ArgumentRegisters::r) * reg);
1378 #endif
1379
1380     psl->ThumbEmitGetThread(regThread);
1381
1382 #ifdef TARGET_UNIX
1383     for (int reg = 0; reg < 4; reg++)
1384         psl->ThumbEmitLoadRegIndirect(ThumbReg(reg), thumbRegSp, offsetof(ArgumentRegisters, r) + sizeof(*ArgumentRegisters::r) * reg);
1385 #endif
1386
1387     // mov [regFrame + FrameInfo.offsetOfGSCookie], GetProcessGSCookie()
1388     psl->ThumbEmitMovConstant(regScratch, GetProcessGSCookie());
1389     psl->ThumbEmitStoreRegIndirect(regScratch, regFrame, FrameInfo.offsetOfGSCookie - negSpace);
1390
1391     // mov [regFrame + FrameInfo.offsetOfFrameVptr], InlinedCallFrame::GetMethodFrameVPtr()
1392     psl->ThumbEmitMovConstant(regScratch, InlinedCallFrame::GetMethodFrameVPtr());
1393     psl->ThumbEmitStoreRegIndirect(regScratch, regFrame, FrameInfo.offsetOfFrameVptr - negSpace);
1394
1395     // ldr regScratch, [regThread + offsetof(Thread, m_pFrame)]
1396     // str regScratch, [regFrame + FrameInfo.offsetOfFrameLink]
1397     psl->ThumbEmitLoadRegIndirect(regScratch, regThread, offsetof(Thread, m_pFrame));
1398     psl->ThumbEmitStoreRegIndirect(regScratch, regFrame, FrameInfo.offsetOfFrameLink - negSpace);
1399
1400     // str FP, [regFrame + FrameInfo.offsetOfCalleeSavedFP]
1401     psl->ThumbEmitStoreRegIndirect(thumbRegFp, regFrame, FrameInfo.offsetOfCalleeSavedFP - negSpace);
1402
1403     // str R9, [regFrame + FrameInfo.offsetOfSPAfterProlog]
1404     psl->ThumbEmitStoreRegIndirect(regR9, regFrame, FrameInfo.offsetOfSPAfterProlog - negSpace);
1405
1406     // mov [regFrame + FrameInfo.offsetOfReturnAddress], 0
1407     psl->ThumbEmitMovConstant(regScratch, 0);
1408     psl->ThumbEmitStoreRegIndirect(regScratch, regFrame, FrameInfo.offsetOfReturnAddress - negSpace);
1409
1410 #ifdef TARGET_UNIX
1411     DWORD cbSavedRegs = sizeof(ArgumentRegisters) + 2 * 4; // r0-r3, r4, lr
1412     psl->ThumbEmitAdd(regScratch, thumbRegSp, cbSavedRegs);
1413     psl->ThumbEmitStoreRegIndirect(regScratch, regFrame, FrameInfo.offsetOfCallSiteSP - negSpace);
1414 #else
1415     // str SP, [regFrame + FrameInfo.offsetOfCallSiteSP]
1416     psl->ThumbEmitStoreRegIndirect(thumbRegSp, regFrame, FrameInfo.offsetOfCallSiteSP - negSpace);
1417 #endif
1418
1419     // mov [regThread + offsetof(Thread, m_pFrame)], regFrame
1420     psl->ThumbEmitStoreRegIndirect(regFrame, regThread, offsetof(Thread, m_pFrame));
1421
1422     // leave current Thread in R4
1423
1424 #ifdef TARGET_UNIX
1425     psl->ThumbEmitEpilog();
1426 #else
1427     // Return. The return address has been restored into LR at this point.
1428     // bx lr
1429     psl->ThumbEmitJumpRegister(thumbRegLr);
1430 #endif
1431
1432     // A single process-wide stub that will never unload
1433     RETURN psl->Link(SystemDomain::GetGlobalLoaderAllocator()->GetStubHeap());
1434 }
1435
1436 void StubLinkerCPU::ThumbEmitGetThread(ThumbReg dest)
1437 {
1438 #ifdef TARGET_UNIX
1439
1440     ThumbEmitMovConstant(ThumbReg(0), (TADDR)GetThreadHelper);
1441
1442     ThumbEmitCallRegister(ThumbReg(0));
1443
1444     if (dest != ThumbReg(0))
1445     {
1446         ThumbEmitMovRegReg(dest, ThumbReg(0));
1447     }
1448
1449 #else // TARGET_UNIX
1450
1451     // mrc p15, 0, dest, c13, c0, 2
1452     Emit16(0xee1d);
1453     Emit16((WORD)(0x0f50 | (dest << 12)));
1454
1455     ThumbEmitLoadRegIndirect(dest, dest, offsetof(TEB, ThreadLocalStoragePointer));
1456
1457     ThumbEmitLoadRegIndirect(dest, dest, sizeof(void *) * _tls_index);
1458
1459     ThumbEmitLoadRegIndirect(dest, dest, (int)Thread::GetOffsetOfThreadStatic(&gCurrentThreadInfo));
1460
1461 #endif // TARGET_UNIX
1462 }
1463 #endif // CROSSGEN_COMPILE
1464
1465
1466 // Emits code to adjust for a static delegate target.
1467 VOID StubLinkerCPU::EmitShuffleThunk(ShuffleEntry *pShuffleEntryArray)
1468 {
1469     // Scan the shuffle entries to see if there any stack-to-stack operations. If there aren't we can emit a
1470     // much simpler thunk (simply because we generate code that doesn't require more than one scratch
1471     // register).
1472     bool fSimpleCase = true;
1473     ShuffleEntry *pEntry = pShuffleEntryArray;
1474     while (pEntry->srcofs != ShuffleEntry::SENTINEL)
1475     {
1476         // It's enough to check whether we have a destination stack location (there are no register to stack
1477         // scenarios).
1478         if (!(pEntry->dstofs & ShuffleEntry::REGMASK))
1479         {
1480             fSimpleCase = false;
1481             break;
1482         }
1483         pEntry++;
1484     }
1485
1486     if (fSimpleCase)
1487     {
1488         // No real prolog for the simple case, we're a tail call so we shouldn't be on the stack for any walk
1489         // or unwind.
1490
1491         // On entry r0 holds the delegate instance. Look up the real target address stored in the MethodPtrAux
1492         // field and stash it in r12.
1493         //  ldr r12, [r0, #offsetof(DelegateObject, _methodPtrAux)]
1494         ThumbEmitLoadRegIndirect(ThumbReg(12), ThumbReg(0), DelegateObject::GetOffsetOfMethodPtrAux());
1495
1496         // Emit the instructions to rewrite the argument registers. Most will be register-to-register (e.g.
1497         // move r1 to r0) but one or two of them might move values from the top of the incoming stack
1498         // arguments into registers r2 and r3. Note that the entries are ordered so that we don't need to
1499         // worry about a move overwriting a register we'll need to use as input for the next move (i.e. we get
1500         // move r1 to r0, move r2 to r1 etc.).
1501         pEntry = pShuffleEntryArray;
1502         while (pEntry->srcofs != ShuffleEntry::SENTINEL)
1503         {
1504             _ASSERTE(pEntry->dstofs & ShuffleEntry::REGMASK);
1505
1506             if (pEntry->srcofs & ShuffleEntry::REGMASK)
1507             {
1508                 // Move from register case.
1509                 ThumbEmitMovRegReg(ThumbReg(pEntry->dstofs & ShuffleEntry::OFSMASK),
1510                                    ThumbReg(pEntry->srcofs & ShuffleEntry::OFSMASK));
1511             }
1512             else
1513             {
1514                 // Move from the stack case.
1515                 //  ldr <dest>, [sp + #source_offset]
1516                 ThumbEmitLoadRegIndirect(ThumbReg(pEntry->dstofs & ShuffleEntry::OFSMASK),
1517                                          thumbRegSp,
1518                                          (pEntry->srcofs & ShuffleEntry::OFSMASK) * 4);
1519             }
1520
1521             pEntry++;
1522         }
1523
1524         // Tail call to real target.
1525         //  bx r12
1526         ThumbEmitJumpRegister(ThumbReg(12));
1527
1528         return;
1529     }
1530
1531     // In the more complex case we need to re-write at least some of the arguments on the stack as well as
1532     // argument registers. We need some temporary registers to perform stack-to-stack copies and we've
1533     // reserved our one remaining volatile register, r12, to store the eventual target method address. So
1534     // we're going to generate a hybrid-tail call. Using a tail call has the advantage that we don't need to
1535     // erect and link an explicit CLR frame to enable crawling of this thunk. Additionally re-writing the
1536     // stack can be more peformant in some scenarios than copying the stack (in the presence of floating point
1537     // or arguments requieing 64-bit alignment we might not have to move some or even most of the values).
1538     // The hybrid nature is that we'll erect a standard native frame (with a proper prolog and epilog) so we
1539     // can save some non-volatile registers to act as temporaries. Once we've performed the stack re-write
1540     // we'll poke the saved LR value (which will become a PC value on the pop in the epilog) to return to the
1541     // target method instead of us, thus atomically removing our frame from the stack and tail-calling the
1542     // real target.
1543
1544     // Prolog:
1545     ThumbEmitProlog(3,      // Save r4-r6,lr (count doesn't include lr)
1546                     0,      // No additional space in the stack frame required
1547                     FALSE); // Don't push argument registers
1548
1549     // On entry r0 holds the delegate instance. Look up the real target address stored in the MethodPtrAux
1550     // field and stash it in r12.
1551     //  ldr r12, [r0, #offsetof(DelegateObject, _methodPtrAux)]
1552     ThumbEmitLoadRegIndirect(ThumbReg(12), ThumbReg(0), DelegateObject::GetOffsetOfMethodPtrAux());
1553
1554     // As we copy slots from lower in the argument stack to higher we need to keep track of source and
1555     // destination pointers into those arguments (if we just use offsets from SP we get into trouble with
1556     // argument frames larger than 4K). We'll use r4 to track the source (original location of an argument
1557     // from the caller's perspective) and r5 to track the destination (new location of the argument from the
1558     // callee's perspective). Both start at the current value of SP plus the offset created by pushing our
1559     // stack frame in the prolog.
1560     //  add r4, sp, #cbSavedRegs
1561     //  add r5, sp, #cbSavedRegs
1562     DWORD cbSavedRegs = 4 * 4; // r4, r5, r6, lr
1563     ThumbEmitAdd(ThumbReg(4), thumbRegSp, cbSavedRegs);
1564     ThumbEmitAdd(ThumbReg(5), thumbRegSp, cbSavedRegs);
1565
1566     // Follow the shuffle array instructions to re-write some subset of r0-r3 and the stacked arguments to
1567     // remove the unwanted delegate instance in r0. Arguments only ever move from higher registers to lower
1568     // registers or higher stack addresses to lower stack addresses and are ordered from lowest register to
1569     // highest stack address. As a result we can do all updates in order and in place and we'll never
1570     // overwrite a register or stack location needed as a source value in a later iteration.
1571     DWORD dwLastSrcIndex = (DWORD)-1;
1572     DWORD dwLastDstIndex = (DWORD)-1;
1573     pEntry = pShuffleEntryArray;
1574     while (pEntry->srcofs != ShuffleEntry::SENTINEL)
1575     {
1576         // If this is a register-to-register move we can do it in one instruction.
1577         if ((pEntry->srcofs & ShuffleEntry::REGMASK) && (pEntry->dstofs & ShuffleEntry::REGMASK))
1578         {
1579             ThumbEmitMovRegReg(ThumbReg(pEntry->dstofs & ShuffleEntry::OFSMASK),
1580                                ThumbReg(pEntry->srcofs & ShuffleEntry::OFSMASK));
1581         }
1582         else
1583         {
1584             // There is no case where a source argument register is moved into a destination stack slot.
1585             _ASSERTE((pEntry->srcofs & ShuffleEntry::REGMASK) == 0);
1586
1587             // Source or destination stack offsets might not be contiguous (though they often will be).
1588             // Floating point arguments and 64-bit aligned values can cause discontinuities. While we copy
1589             // values we'll use post increment addressing modes to move both source and destination stack
1590             // pointers forward 4 bytes at a time, the common case. But we'll insert additional add
1591             // instructions for any holes we find (we detect these by remembering the last source and
1592             // destination stack offset we used).
1593
1594             // Add any additional offset to the source pointer (r4) to account for holes in the copy.
1595             DWORD dwSrcIndex = pEntry->srcofs & ShuffleEntry::OFSMASK;
1596             if (dwSrcIndex != (dwLastSrcIndex + 1))
1597             {
1598                 _ASSERTE(dwSrcIndex > dwLastSrcIndex);
1599
1600                 // add r4, #gap_size
1601                 ThumbEmitIncrement(ThumbReg(4), (dwSrcIndex - dwLastSrcIndex - 1) * 4);
1602             }
1603             dwLastSrcIndex = dwSrcIndex;
1604
1605             // Load the source value from the stack and increment our source pointer (r4) in one instruction.
1606             // If the target is a register we can move the value directly there. Otherwise we move it to the
1607             // r6 temporary register.
1608             if (pEntry->dstofs & ShuffleEntry::REGMASK)
1609             {
1610                 // ldr <regnum>, [r4], #4
1611                 ThumbEmitLoadIndirectPostIncrement(ThumbReg(pEntry->dstofs & ShuffleEntry::OFSMASK), ThumbReg(4), 4);
1612             }
1613             else
1614             {
1615                 // ldr r6, [r4], #4
1616                 ThumbEmitLoadIndirectPostIncrement(ThumbReg(6), ThumbReg(4), 4);
1617
1618                 // Add any additional offset to the destination pointer (r5) to account for holes in the copy.
1619                 DWORD dwDstIndex = pEntry->dstofs & ShuffleEntry::OFSMASK;
1620                 if (dwDstIndex != (dwLastDstIndex + 1))
1621                 {
1622                     _ASSERTE(dwDstIndex > dwLastDstIndex);
1623
1624                     // add r5, #gap_size
1625                     ThumbEmitIncrement(ThumbReg(5), (dwDstIndex - dwLastDstIndex - 1) * 4);
1626                 }
1627                 dwLastDstIndex = dwDstIndex;
1628
1629                 // Write the value in r6 to it's final home on the stack and increment our destination pointer
1630                 // (r5).
1631                 //  str r6, [r5], #4
1632                 ThumbEmitStoreIndirectPostIncrement(ThumbReg(6), ThumbReg(5), 4);
1633             }
1634         }
1635
1636         pEntry++;
1637     }
1638
1639     // Arguments are copied. Now we modify the saved value of LR we created in our prolog (which will be
1640     // popped back off into PC in our epilog) so that it points to the real target address in r12 rather than
1641     // our return address. We haven't modified LR ourselves, so the net result is that executing our epilog
1642     // will pop our frame and tail call to the real method.
1643     //  str r12, [sp + #(cbSavedRegs-4)]
1644     ThumbEmitStoreRegIndirect(ThumbReg(12), thumbRegSp, cbSavedRegs - 4);
1645
1646     // Epilog:
1647     ThumbEmitEpilog();
1648 }
1649
1650 #ifndef CROSSGEN_COMPILE
1651
1652 void StubLinkerCPU::ThumbEmitTailCallManagedMethod(MethodDesc *pMD)
1653 {
1654     bool isRelative = MethodTable::VTableIndir2_t::isRelative
1655                       && pMD->IsVtableSlot();
1656
1657 #ifndef FEATURE_NGEN_RELOCS_OPTIMIZATIONS
1658     _ASSERTE(!isRelative);
1659 #endif
1660
1661     // Use direct call if possible.
1662     if (pMD->HasStableEntryPoint())
1663     {
1664         // mov r12, #entry_point
1665         ThumbEmitMovConstant(ThumbReg(12), (TADDR)pMD->GetStableEntryPoint());
1666     }
1667     else
1668     {
1669         // mov r12, #slotaddress
1670         ThumbEmitMovConstant(ThumbReg(12), (TADDR)pMD->GetAddrOfSlot());
1671
1672         if (isRelative)
1673         {
1674             // mov r4, r12
1675             ThumbEmitMovRegReg(ThumbReg(4), ThumbReg(12));
1676         }
1677
1678         // ldr r12, [r12]
1679         ThumbEmitLoadRegIndirect(ThumbReg(12), ThumbReg(12), 0);
1680
1681         if (isRelative)
1682         {
1683             // add r12, r4
1684             ThumbEmitAddReg(ThumbReg(12), ThumbReg(4));
1685         }
1686     }
1687
1688     if (!isRelative)
1689     {
1690         // bx r12
1691         ThumbEmitJumpRegister(ThumbReg(12));
1692     }
1693     else
1694     {
1695         // Replace LR with R12 on stack: hybrid-tail call, same as for EmitShuffleThunk
1696         // str r12, [sp, 4]
1697         ThumbEmitStoreRegIndirect(ThumbReg(12), thumbRegSp, 4);
1698     }
1699 }
1700
1701 VOID StubLinkerCPU::EmitComputedInstantiatingMethodStub(MethodDesc* pSharedMD, struct ShuffleEntry *pShuffleEntryArray, void* extraArg)
1702 {
1703     STANDARD_VM_CONTRACT;
1704
1705     struct ShuffleEntry *pEntry = pShuffleEntryArray;
1706     while (pEntry->srcofs != ShuffleEntry::SENTINEL)
1707     {
1708         _ASSERTE(pEntry->dstofs & ShuffleEntry::REGMASK);
1709         _ASSERTE(pEntry->srcofs & ShuffleEntry::REGMASK);
1710         _ASSERTE(!(pEntry->dstofs & ShuffleEntry::FPREGMASK));
1711         _ASSERTE(!(pEntry->srcofs & ShuffleEntry::FPREGMASK));
1712         _ASSERTE(pEntry->dstofs != ShuffleEntry::HELPERREG);
1713         _ASSERTE(pEntry->srcofs != ShuffleEntry::HELPERREG);
1714
1715         ThumbEmitMovRegReg(ThumbReg(pEntry->dstofs & ShuffleEntry::OFSMASK),
1716                             ThumbReg(pEntry->srcofs & ShuffleEntry::OFSMASK));
1717
1718         pEntry++;
1719     }
1720
1721     MetaSig msig(pSharedMD);
1722     ArgIterator argit(&msig);
1723
1724     if (argit.HasParamType())
1725     {
1726         // Place instantiation parameter into the correct register.
1727         ArgLocDesc sInstArgLoc;
1728         argit.GetParamTypeLoc(&sInstArgLoc);
1729         int regHidden = sInstArgLoc.m_idxGenReg;
1730         _ASSERTE(regHidden != -1);
1731         if (extraArg == NULL)
1732         {
1733             if (pSharedMD->RequiresInstMethodTableArg())
1734             {
1735                 // Unboxing stub case
1736                 // Extract MethodTable pointer (the hidden arg) from the object instance.
1737                 //  ldr regHidden, [r0]
1738                 ThumbEmitLoadRegIndirect(ThumbReg(regHidden), ThumbReg(0), 0);
1739             }
1740         }
1741         else
1742         {
1743             // mov regHidden, #pHiddenArg
1744             ThumbEmitMovConstant(ThumbReg(regHidden), (TADDR)extraArg);
1745         }
1746     }
1747
1748     if (extraArg == NULL)
1749     {
1750         // Unboxing stub case
1751         // Skip over the MethodTable* to find the address of the unboxed value type.
1752         //  add r0, #sizeof(MethodTable*)
1753         ThumbEmitIncrement(ThumbReg(0), sizeof(MethodTable*));
1754     }
1755
1756     bool isRelative = MethodTable::VTableIndir2_t::isRelative
1757                       && pSharedMD->IsVtableSlot();
1758
1759 #ifndef FEATURE_NGEN_RELOCS_OPTIMIZATIONS
1760     _ASSERTE(!isRelative);
1761 #endif
1762
1763     if (isRelative)
1764     {
1765         ThumbEmitProlog(1, 0, FALSE);
1766     }
1767
1768     ThumbEmitTailCallManagedMethod(pSharedMD);
1769
1770     if (isRelative)
1771     {
1772         ThumbEmitEpilog();
1773     }
1774 }
1775
1776 #endif // CROSSGEN_COMPILE
1777
1778 #endif // !DACCESS_COMPILE
1779
1780 LONG CLRNoCatchHandler(EXCEPTION_POINTERS* pExceptionInfo, PVOID pv)
1781 {
1782     return EXCEPTION_CONTINUE_SEARCH;
1783 }
1784
1785 void UpdateRegDisplayFromCalleeSavedRegisters(REGDISPLAY * pRD, CalleeSavedRegisters * pRegs)
1786 {
1787     LIMITED_METHOD_CONTRACT;
1788
1789     T_CONTEXT * pContext = pRD->pCurrentContext;
1790     pContext->R4 = pRegs->r4;
1791     pContext->R5 = pRegs->r5;
1792     pContext->R6 = pRegs->r6;
1793     pContext->R7 = pRegs->r7;
1794     pContext->R8 = pRegs->r8;
1795     pContext->R9 = pRegs->r9;
1796     pContext->R10 = pRegs->r10;
1797     pContext->R11 = pRegs->r11;
1798     pContext->Lr = pRegs->r14;
1799
1800     T_KNONVOLATILE_CONTEXT_POINTERS * pContextPointers = pRD->pCurrentContextPointers;
1801     pRD->pCurrentContextPointers->R4 = (PDWORD)&pRegs->r4;
1802     pRD->pCurrentContextPointers->R5 = (PDWORD)&pRegs->r5;
1803     pRD->pCurrentContextPointers->R6 = (PDWORD)&pRegs->r6;
1804     pRD->pCurrentContextPointers->R7 = (PDWORD)&pRegs->r7;
1805     pRD->pCurrentContextPointers->R8 = (PDWORD)&pRegs->r8;
1806     pRD->pCurrentContextPointers->R9 = (PDWORD)&pRegs->r9;
1807     pRD->pCurrentContextPointers->R10 = (PDWORD)&pRegs->r10;
1808     pRD->pCurrentContextPointers->R11 = (PDWORD)&pRegs->r11;
1809     pRD->pCurrentContextPointers->Lr = NULL;
1810 }
1811
1812 #ifndef CROSSGEN_COMPILE
1813 void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
1814 {
1815     pRD->IsCallerContextValid = FALSE;
1816     pRD->IsCallerSPValid      = FALSE;        // Don't add usage of this field.  This is only temporary.
1817
1818     // Copy the saved argument registers into the current context
1819     ArgumentRegisters * pArgRegs = GetArgumentRegisters();
1820     pRD->pCurrentContext->R0 = pArgRegs->r[0];
1821     pRD->pCurrentContext->R1 = pArgRegs->r[1];
1822     pRD->pCurrentContext->R2 = pArgRegs->r[2];
1823     pRD->pCurrentContext->R3 = pArgRegs->r[3];
1824
1825     // Next, copy all the callee saved registers
1826     UpdateRegDisplayFromCalleeSavedRegisters(pRD, GetCalleeSavedRegisters());
1827
1828     // Set ControlPC to be the same as the saved "return address"
1829     // value, which is actually a ControlPC in the frameless method (e.g.
1830     // faulting address incase of AV or TAE).
1831     pRD->pCurrentContext->Pc = GetReturnAddress();
1832
1833     // Set the caller SP
1834     pRD->pCurrentContext->Sp = this->GetSP();
1835
1836     // Finally, syncup the regdisplay with the context
1837     SyncRegDisplayToCurrentContext(pRD);
1838
1839     LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK    TransitionFrame::UpdateRegDisplay(rip:%p, rsp:%p)\n", pRD->ControlPC, pRD->SP));
1840 }
1841
1842 void FaultingExceptionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
1843 {
1844     LIMITED_METHOD_DAC_CONTRACT;
1845
1846     // Copy the context to regdisplay
1847     memcpy(pRD->pCurrentContext, &m_ctx, sizeof(T_CONTEXT));
1848
1849     pRD->ControlPC = ::GetIP(&m_ctx);
1850     pRD->SP = ::GetSP(&m_ctx);
1851
1852     // Update the integer registers in KNONVOLATILE_CONTEXT_POINTERS from
1853     // the exception context we have.
1854     pRD->pCurrentContextPointers->R4 = (PDWORD)&m_ctx.R4;
1855     pRD->pCurrentContextPointers->R5 = (PDWORD)&m_ctx.R5;
1856     pRD->pCurrentContextPointers->R6 = (PDWORD)&m_ctx.R6;
1857     pRD->pCurrentContextPointers->R7 = (PDWORD)&m_ctx.R7;
1858     pRD->pCurrentContextPointers->R8 = (PDWORD)&m_ctx.R8;
1859     pRD->pCurrentContextPointers->R9 = (PDWORD)&m_ctx.R9;
1860     pRD->pCurrentContextPointers->R10 = (PDWORD)&m_ctx.R10;
1861     pRD->pCurrentContextPointers->R11 = (PDWORD)&m_ctx.R11;
1862     pRD->pCurrentContextPointers->Lr = NULL;
1863
1864     pRD->IsCallerContextValid = FALSE;
1865     pRD->IsCallerSPValid      = FALSE;        // Don't add usage of this field.  This is only temporary.
1866 }
1867
1868 void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
1869 {
1870     CONTRACT_VOID
1871     {
1872         NOTHROW;
1873         GC_NOTRIGGER;
1874         // We should skip over InlinedCallFrame if it is not active.
1875         // It will be part of a JITed method's frame, and the stack-walker
1876         // can handle such a case.
1877 #ifdef PROFILING_SUPPORTED
1878         PRECONDITION(CORProfilerStackSnapshotEnabled() || InlinedCallFrame::FrameHasActiveCall(this));
1879 #endif
1880         HOST_NOCALLS;
1881         MODE_ANY;
1882         SUPPORTS_DAC;
1883     }
1884     CONTRACT_END;
1885
1886     // @TODO: Remove this after the debugger is fixed to avoid stack-walks from bad places
1887     // @TODO: This may be still needed for sampling profilers
1888     if (!InlinedCallFrame::FrameHasActiveCall(this))
1889     {
1890         LOG((LF_CORDB, LL_ERROR, "WARNING: InlinedCallFrame::UpdateRegDisplay called on inactive frame %p\n", this));
1891         return;
1892     }
1893
1894     // reset pContext; it's only valid for active (top-most) frame
1895     pRD->pContext = NULL;
1896
1897     *(pRD->pPC) = m_pCallerReturnAddress;
1898     pRD->SP = (DWORD) dac_cast<TADDR>(m_pCallSiteSP);
1899
1900     pRD->IsCallerContextValid = FALSE;
1901     pRD->IsCallerSPValid      = FALSE;        // Don't add usage of this field.  This is only temporary.
1902
1903     pRD->pCurrentContext->Pc = *(pRD->pPC);
1904     pRD->pCurrentContext->Sp = pRD->SP;
1905
1906     // Update the frame pointer in the current context.
1907     pRD->pCurrentContext->R11 = m_pCalleeSavedFP;
1908     pRD->pCurrentContextPointers->R11 = &m_pCalleeSavedFP;
1909
1910     // This is necessary to unwind methods with alloca. This needs to stay
1911     // in sync with definition of REG_SAVED_LOCALLOC_SP in the JIT.
1912     pRD->pCurrentContext->R9 = (DWORD) dac_cast<TADDR>(m_pSPAfterProlog);
1913     pRD->pCurrentContextPointers->R9 = (DWORD *)&m_pSPAfterProlog;
1914
1915     RETURN;
1916 }
1917
1918 #ifdef FEATURE_HIJACK
1919 TADDR ResumableFrame::GetReturnAddressPtr(void)
1920 {
1921     LIMITED_METHOD_DAC_CONTRACT;
1922     return dac_cast<TADDR>(m_Regs) + offsetof(T_CONTEXT, Pc);
1923 }
1924
1925 void ResumableFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
1926 {
1927     CONTRACT_VOID
1928     {
1929         NOTHROW;
1930         GC_NOTRIGGER;
1931         MODE_ANY;
1932         SUPPORTS_DAC;
1933     }
1934     CONTRACT_END;
1935
1936     CopyMemory(pRD->pCurrentContext, m_Regs, sizeof(T_CONTEXT));
1937
1938     pRD->ControlPC = m_Regs->Pc;
1939     pRD->SP = m_Regs->Sp;
1940
1941     pRD->pCurrentContextPointers->R4 = &m_Regs->R4;
1942     pRD->pCurrentContextPointers->R5 = &m_Regs->R5;
1943     pRD->pCurrentContextPointers->R6 = &m_Regs->R6;
1944     pRD->pCurrentContextPointers->R7 = &m_Regs->R7;
1945     pRD->pCurrentContextPointers->R8 = &m_Regs->R8;
1946     pRD->pCurrentContextPointers->R9 = &m_Regs->R9;
1947     pRD->pCurrentContextPointers->R10 = &m_Regs->R10;
1948     pRD->pCurrentContextPointers->R11 = &m_Regs->R11;
1949     pRD->pCurrentContextPointers->Lr = &m_Regs->Lr;
1950
1951     pRD->volatileCurrContextPointers.R0 = &m_Regs->R0;
1952     pRD->volatileCurrContextPointers.R1 = &m_Regs->R1;
1953     pRD->volatileCurrContextPointers.R2 = &m_Regs->R2;
1954     pRD->volatileCurrContextPointers.R3 = &m_Regs->R3;
1955     pRD->volatileCurrContextPointers.R12 = &m_Regs->R12;
1956
1957     pRD->IsCallerContextValid = FALSE;
1958     pRD->IsCallerSPValid      = FALSE;        // Don't add usage of this field.  This is only temporary.
1959 }
1960
1961 void HijackFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
1962 {
1963      CONTRACTL {
1964          NOTHROW;
1965          GC_NOTRIGGER;
1966          SUPPORTS_DAC;
1967      }
1968      CONTRACTL_END;
1969
1970      pRD->IsCallerContextValid = FALSE;
1971      pRD->IsCallerSPValid      = FALSE;
1972
1973      pRD->pCurrentContext->Pc = m_ReturnAddress;
1974      pRD->pCurrentContext->Sp = PTR_TO_TADDR(m_Args) + sizeof(struct HijackArgs);
1975
1976      pRD->pCurrentContext->R0 = m_Args->R0;
1977
1978      pRD->pCurrentContext->R4 = m_Args->R4;
1979      pRD->pCurrentContext->R5 = m_Args->R5;
1980      pRD->pCurrentContext->R6 = m_Args->R6;
1981      pRD->pCurrentContext->R7 = m_Args->R7;
1982      pRD->pCurrentContext->R8 = m_Args->R8;
1983      pRD->pCurrentContext->R9 = m_Args->R9;
1984      pRD->pCurrentContext->R10 = m_Args->R10;
1985      pRD->pCurrentContext->R11 = m_Args->R11;
1986
1987      pRD->pCurrentContextPointers->R4 = &m_Args->R4;
1988      pRD->pCurrentContextPointers->R5 = &m_Args->R5;
1989      pRD->pCurrentContextPointers->R6 = &m_Args->R6;
1990      pRD->pCurrentContextPointers->R7 = &m_Args->R7;
1991      pRD->pCurrentContextPointers->R8 = &m_Args->R8;
1992      pRD->pCurrentContextPointers->R9 = &m_Args->R9;
1993      pRD->pCurrentContextPointers->R10 = &m_Args->R10;
1994      pRD->pCurrentContextPointers->R11 = &m_Args->R11;
1995      pRD->pCurrentContextPointers->Lr = NULL;
1996
1997      SyncRegDisplayToCurrentContext(pRD);
1998 }
1999 #endif // FEATURE_HIJACK
2000 #endif // !CROSSGEN_COMPILE
2001
2002 class UMEntryThunk * UMEntryThunk::Decode(void *pCallback)
2003 {
2004     _ASSERTE(offsetof(UMEntryThunkCode, m_code) == 0);
2005     UMEntryThunkCode * pCode = (UMEntryThunkCode*)((ULONG_PTR)pCallback & ~THUMB_CODE);
2006
2007     // We may be called with an unmanaged external code pointer instead. So if it doesn't look like one of our
2008     // stubs (see UMEntryThunkCode::Encode below) then we'll return NULL. Luckily in these scenarios our
2009     // caller will perform a hash lookup on successful return to verify our result in case random unmanaged
2010     // code happens to look like ours.
2011     if ((pCode->m_code[0] == 0xf8df) &&
2012         (pCode->m_code[1] == 0xc008) &&
2013         (pCode->m_code[2] == 0xf8df) &&
2014         (pCode->m_code[3] == 0xf000))
2015     {
2016         return (UMEntryThunk*)pCode->m_pvSecretParam;
2017     }
2018
2019     return NULL;
2020 }
2021
2022 void UMEntryThunkCode::Encode(UMEntryThunkCode *pEntryThunkCodeRX, BYTE* pTargetCode, void* pvSecretParam)
2023 {
2024 #if defined(TIZEN_ASAN_ENVIRONMENT) && !defined(CROSS_COMPILE) && !defined(DACCESS_COMPILE)
2025     pTargetCode = (BYTE *)TizenASanEnv::CreateWrapperILCode((LPVOID)pTargetCode);
2026 #endif
2027
2028     // ldr r12, [pc + 8]
2029     m_code[0] = 0xf8df;
2030     m_code[1] = 0xc008;
2031     // ldr pc, [pc]
2032     m_code[2] = 0xf8df;
2033     m_code[3] = 0xf000;
2034
2035     m_pTargetCode = (TADDR)pTargetCode;
2036     m_pvSecretParam = (TADDR)pvSecretParam;
2037
2038     FlushInstructionCache(GetCurrentProcess(),&pEntryThunkCodeRX->m_code,sizeof(m_code));
2039 }
2040
2041 #ifndef DACCESS_COMPILE
2042
2043 void UMEntryThunkCode::Poison()
2044 {
2045     ExecutableWriterHolder<UMEntryThunkCode> thunkWriterHolder(this, sizeof(UMEntryThunkCode));
2046     UMEntryThunkCode *pThisRW = thunkWriterHolder.GetRW();
2047
2048     pThisRW->m_pTargetCode = (TADDR)UMEntryThunk::ReportViolation;
2049
2050     // ldr r0, [pc + 8]
2051     pThisRW->m_code[0] = 0x4802;
2052     // nop
2053     pThisRW->m_code[1] = 0xbf00;
2054
2055     ClrFlushInstructionCache(&m_code,sizeof(m_code));
2056 }
2057
2058 #endif // DACCESS_COMPILE
2059
2060 ///////////////////////////// UNIMPLEMENTED //////////////////////////////////
2061
2062 #ifndef DACCESS_COMPILE
2063
2064 #ifndef CROSSGEN_COMPILE
2065
2066 extern "C" void STDCALL JIT_PatchedCodeStart();
2067 extern "C" void STDCALL JIT_PatchedCodeLast();
2068
2069 void InitJITHelpers1()
2070 {
2071     STANDARD_VM_CONTRACT;
2072
2073     // Allocation helpers, faster but non-logging.
2074     if (!(TrackAllocationsEnabled()
2075           || LoggingOn(LF_GCALLOC, LL_INFO10)
2076 #ifdef _DEBUG
2077           || (g_pConfig->ShouldInjectFault(INJECTFAULT_GCHEAP) != 0)
2078 #endif // _DEBUG
2079         ))
2080     {
2081         _ASSERTE(GCHeapUtilities::UseThreadAllocationContexts());
2082
2083         SetJitHelperFunction(CORINFO_HELP_NEWSFAST, JIT_NewS_MP_FastPortable);
2084         SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable);
2085         SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable);
2086
2087         ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateString_MP_FastPortable), ECall::FastAllocateString);
2088     }
2089 }
2090
2091 #endif // CROSSGEN_COMPILE
2092
2093 VOID ResetCurrentContext()
2094 {
2095     LIMITED_METHOD_CONTRACT;
2096 }
2097 #endif // !DACCESS_COMPILE
2098
2099
2100 #ifdef FEATURE_COMINTEROP
2101 void emitCOMStubCall (ComCallMethodDesc *pCOMMethodRX, ComCallMethodDesc *pCOMMethodRW, PCODE target)
2102 {
2103     WRAPPER_NO_CONTRACT;
2104
2105     // mov r12, pc
2106     // ldr pc, [pc, #0]
2107     // dcd 0
2108     // dcd target
2109     WORD rgCode[] = {
2110         0x46fc,
2111         0xf8df, 0xf004
2112     };
2113
2114     BYTE *pBufferRX = (BYTE*)pCOMMethodRX - COMMETHOD_CALL_PRESTUB_SIZE;
2115     BYTE *pBufferRW = (BYTE*)pCOMMethodRW - COMMETHOD_CALL_PRESTUB_SIZE;
2116
2117     memcpy(pBufferRW, rgCode, sizeof(rgCode));
2118     *((PCODE*)(pBufferRW + sizeof(rgCode) + 2)) = target;
2119
2120     // Ensure that the updated instructions get actually written
2121     ClrFlushInstructionCache(pBufferRX, COMMETHOD_CALL_PRESTUB_SIZE);
2122
2123     _ASSERTE(IS_ALIGNED(pBufferRX + COMMETHOD_CALL_PRESTUB_ADDRESS_OFFSET, sizeof(void*)) &&
2124              *((PCODE*)(pBufferRX + COMMETHOD_CALL_PRESTUB_ADDRESS_OFFSET)) == target);
2125 }
2126 #endif // FEATURE_COMINTEROP
2127
2128 void MovRegImm(BYTE* p, int reg, TADDR imm)
2129 {
2130     LIMITED_METHOD_CONTRACT;
2131     *(WORD *)(p + 0) = 0xF240;
2132     *(WORD *)(p + 2) = (UINT16)(reg << 8);
2133     *(WORD *)(p + 4) = 0xF2C0;
2134     *(WORD *)(p + 6) = (UINT16)(reg << 8);
2135     PutThumb2Mov32((UINT16 *)p, imm);
2136 }
2137
2138 #ifndef DACCESS_COMPILE
2139
2140 #ifndef CROSSGEN_COMPILE
2141
2142 #ifdef FEATURE_READYTORUN
2143
2144 //
2145 // Allocation of dynamic helpers
2146 //
2147
2148 #define DYNAMIC_HELPER_ALIGNMENT sizeof(TADDR)
2149
2150 #define BEGIN_DYNAMIC_HELPER_EMIT(size) \
2151     SIZE_T cb = size; \
2152     SIZE_T cbAligned = ALIGN_UP(cb, DYNAMIC_HELPER_ALIGNMENT); \
2153     BYTE * pStartRX = (BYTE *)(void*)pAllocator->GetDynamicHelpersHeap()->AllocAlignedMem(cbAligned, DYNAMIC_HELPER_ALIGNMENT); \
2154     ExecutableWriterHolder<BYTE> startWriterHolder(pStartRX, cbAligned); \
2155     BYTE * pStart = startWriterHolder.GetRW(); \
2156     size_t rxOffset = pStartRX - pStart; \
2157     BYTE * p = pStart;
2158
2159 #define END_DYNAMIC_HELPER_EMIT() \
2160     _ASSERTE(pStart + cb == p); \
2161     while (p < pStart + cbAligned) { *(WORD *)p = 0xdefe; p += 2; } \
2162     ClrFlushInstructionCache(pStartRX, cbAligned); \
2163     return (PCODE)((TADDR)pStartRX | THUMB_CODE)
2164
2165 PCODE DynamicHelpers::CreateHelper(LoaderAllocator * pAllocator, TADDR arg, PCODE target)
2166 {
2167     STANDARD_VM_CONTRACT;
2168
2169     BEGIN_DYNAMIC_HELPER_EMIT(18);
2170
2171     // mov r0, arg
2172     MovRegImm(p, 0, arg);
2173     p += 8;
2174
2175     // mov r12, target
2176     MovRegImm(p, 12, target);
2177     p += 8;
2178
2179     // bx r12
2180     *(WORD *)p = 0x4760;
2181     p += 2;
2182
2183     END_DYNAMIC_HELPER_EMIT();
2184 }
2185
2186 void DynamicHelpers::EmitHelperWithArg(BYTE*& p, size_t rxOffset, LoaderAllocator * pAllocator, TADDR arg, PCODE target)
2187 {
2188     // mov r1, arg
2189     MovRegImm(p, 1, arg);
2190     p += 8;
2191
2192     // mov r12, target
2193     MovRegImm(p, 12, target);
2194     p += 8;
2195
2196     // bx r12
2197     *(WORD *)p = 0x4760;
2198     p += 2;
2199 }
2200
2201 PCODE DynamicHelpers::CreateHelperWithArg(LoaderAllocator * pAllocator, TADDR arg, PCODE target)
2202 {
2203     BEGIN_DYNAMIC_HELPER_EMIT(18);
2204
2205     EmitHelperWithArg(p, rxOffset, pAllocator, arg, target);
2206
2207     END_DYNAMIC_HELPER_EMIT();
2208 }
2209
2210 PCODE DynamicHelpers::CreateHelper(LoaderAllocator * pAllocator, TADDR arg, TADDR arg2, PCODE target)
2211 {
2212     BEGIN_DYNAMIC_HELPER_EMIT(26);
2213
2214     // mov r0, arg
2215     MovRegImm(p, 0, arg);
2216     p += 8;
2217
2218     // mov r1, arg2
2219     MovRegImm(p, 1, arg2);
2220     p += 8;
2221
2222     // mov r12, target
2223     MovRegImm(p, 12, target);
2224     p += 8;
2225
2226     // bx r12
2227     *(WORD *)p = 0x4760;
2228     p += 2;
2229
2230     END_DYNAMIC_HELPER_EMIT();
2231 }
2232
2233 PCODE DynamicHelpers::CreateHelperArgMove(LoaderAllocator * pAllocator, TADDR arg, PCODE target)
2234 {
2235     BEGIN_DYNAMIC_HELPER_EMIT(20);
2236
2237     // mov r1, r0
2238     *(WORD *)p = 0x4601;
2239     p += 2;
2240
2241     // mov r0, arg
2242     MovRegImm(p, 0, arg);
2243     p += 8;
2244
2245     // mov r12, target
2246     MovRegImm(p, 12, target);
2247     p += 8;
2248
2249     // bx r12
2250     *(WORD *)p = 0x4760;
2251     p += 2;
2252
2253     END_DYNAMIC_HELPER_EMIT();
2254 }
2255
2256 PCODE DynamicHelpers::CreateReturn(LoaderAllocator * pAllocator)
2257 {
2258     BEGIN_DYNAMIC_HELPER_EMIT(2);
2259
2260     *(WORD *)p = 0x4770; // bx lr
2261     p += 2;
2262
2263     END_DYNAMIC_HELPER_EMIT();
2264 }
2265
2266 PCODE DynamicHelpers::CreateReturnConst(LoaderAllocator * pAllocator, TADDR arg)
2267 {
2268     BEGIN_DYNAMIC_HELPER_EMIT(10);
2269
2270     // mov r0, arg
2271     MovRegImm(p, 0, arg);
2272     p += 8;
2273
2274     // bx lr
2275     *(WORD *)p = 0x4770;
2276     p += 2;
2277
2278     END_DYNAMIC_HELPER_EMIT();
2279 }
2280
2281 PCODE DynamicHelpers::CreateReturnIndirConst(LoaderAllocator * pAllocator, TADDR arg, INT8 offset)
2282 {
2283     BEGIN_DYNAMIC_HELPER_EMIT((offset != 0) ? 16 : 12);
2284
2285     // mov r0, arg
2286     MovRegImm(p, 0, arg);
2287     p += 8;
2288
2289     // ldr r0, [r0]
2290     *(WORD *)p = 0x6800;
2291     p += 2;
2292
2293     if (offset != 0)
2294     {
2295         // add r0, r0, <offset>
2296         *(WORD *)(p + 0) = 0xF100;
2297         *(WORD *)(p + 2) = offset;
2298         p += 4;
2299     }
2300
2301     // bx lr
2302     *(WORD *)p = 0x4770;
2303     p += 2;
2304
2305     END_DYNAMIC_HELPER_EMIT();
2306 }
2307
2308 PCODE DynamicHelpers::CreateHelperWithTwoArgs(LoaderAllocator * pAllocator, TADDR arg, PCODE target)
2309 {
2310     BEGIN_DYNAMIC_HELPER_EMIT(18);
2311
2312     // mov r2, arg
2313     MovRegImm(p, 2, arg);
2314     p += 8;
2315
2316     // mov r12, target
2317     MovRegImm(p, 12, target);
2318     p += 8;
2319
2320     // bx r12
2321     *(WORD *)p = 0x4760;
2322     p += 2;
2323
2324     END_DYNAMIC_HELPER_EMIT();
2325 }
2326
2327 PCODE DynamicHelpers::CreateHelperWithTwoArgs(LoaderAllocator * pAllocator, TADDR arg, TADDR arg2, PCODE target)
2328 {
2329     BEGIN_DYNAMIC_HELPER_EMIT(26);
2330
2331     // mov r2, arg
2332     MovRegImm(p, 2, arg);
2333     p += 8;
2334
2335     // mov r3, arg
2336     MovRegImm(p, 3, arg2);
2337     p += 8;
2338
2339     // mov r12, target
2340     MovRegImm(p, 12, target);
2341     p += 8;
2342
2343     // bx r12
2344     *(WORD *)p = 0x4760;
2345     p += 2;
2346
2347     END_DYNAMIC_HELPER_EMIT();
2348 }
2349
2350 PCODE DynamicHelpers::CreateDictionaryLookupHelper(LoaderAllocator * pAllocator, CORINFO_RUNTIME_LOOKUP * pLookup, DWORD dictionaryIndexAndSlot, Module * pModule)
2351 {
2352     STANDARD_VM_CONTRACT;
2353
2354     _ASSERTE(!MethodTable::IsPerInstInfoRelative());
2355
2356     PCODE helperAddress = (pLookup->helper == CORINFO_HELP_RUNTIMEHANDLE_METHOD ?
2357         GetEEFuncEntryPoint(JIT_GenericHandleMethodWithSlotAndModule) :
2358         GetEEFuncEntryPoint(JIT_GenericHandleClassWithSlotAndModule));
2359
2360     GenericHandleArgs * pArgs = (GenericHandleArgs *)(void *)pAllocator->GetDynamicHelpersHeap()->AllocAlignedMem(sizeof(GenericHandleArgs), DYNAMIC_HELPER_ALIGNMENT);
2361     ExecutableWriterHolder<GenericHandleArgs> argsWriterHolder(pArgs, sizeof(GenericHandleArgs));
2362     argsWriterHolder.GetRW()->dictionaryIndexAndSlot = dictionaryIndexAndSlot;
2363     argsWriterHolder.GetRW()->signature = pLookup->signature;
2364     argsWriterHolder.GetRW()->module = (CORINFO_MODULE_HANDLE)pModule;
2365
2366     WORD slotOffset = (WORD)(dictionaryIndexAndSlot & 0xFFFF) * sizeof(Dictionary*);
2367
2368     // It's available only via the run-time helper function,
2369
2370     if (pLookup->indirections == CORINFO_USEHELPER)
2371     {
2372         BEGIN_DYNAMIC_HELPER_EMIT(18);
2373
2374         EmitHelperWithArg(p, rxOffset, pAllocator, (TADDR)pArgs, helperAddress);
2375
2376         END_DYNAMIC_HELPER_EMIT();
2377     }
2378     else
2379     {
2380         int indirectionsSize = 0;
2381         if (pLookup->sizeOffset != CORINFO_NO_SIZE_CHECK)
2382         {
2383             indirectionsSize += (pLookup->sizeOffset >= 0xFFF ? 10 : 4);
2384             indirectionsSize += 12;
2385         }
2386         for (WORD i = 0; i < pLookup->indirections; i++)
2387         {
2388             indirectionsSize += (pLookup->offsets[i] >= 0xFFF ? 10 : 4);
2389         }
2390
2391         int codeSize = indirectionsSize + (pLookup->testForNull ? 26 : 2);
2392
2393         BEGIN_DYNAMIC_HELPER_EMIT(codeSize);
2394
2395         if (pLookup->testForNull)
2396         {
2397             // mov r3, r0
2398             *(WORD *)p = 0x4603;
2399             p += 2;
2400         }
2401
2402         BYTE* pBLECall = NULL;
2403
2404         for (WORD i = 0; i < pLookup->indirections; i++)
2405         {
2406             if (i == pLookup->indirections - 1 && pLookup->sizeOffset != CORINFO_NO_SIZE_CHECK)
2407             {
2408                 _ASSERTE(pLookup->testForNull && i > 0);
2409
2410                 if (pLookup->sizeOffset >= 0xFFF)
2411                 {
2412                     // mov r2, offset
2413                     MovRegImm(p, 2, pLookup->sizeOffset); p += 8;
2414                     // ldr r1, [r0, r2]
2415                     *(WORD*)p = 0x5881; p += 2;
2416                 }
2417                 else
2418                 {
2419                     // ldr r1, [r0 + offset]
2420                     *(WORD*)p = 0xF8D0; p += 2;
2421                     *(WORD*)p = (WORD)(0xFFF & pLookup->sizeOffset) | 0x1000; p += 2;
2422                 }
2423
2424                 // mov r2, slotOffset
2425                 MovRegImm(p, 2, slotOffset); p += 8;
2426
2427                 // cmp r1,r2
2428                 *(WORD*)p = 0x4291; p += 2;
2429
2430                 // ble 'CALL HELPER'
2431                 pBLECall = p;       // Offset filled later
2432                 *(WORD*)p = 0xdd00; p += 2;
2433             }
2434             if (pLookup->offsets[i] >= 0xFFF)
2435             {
2436                 // mov r2, offset
2437                 MovRegImm(p, 2, pLookup->offsets[i]);
2438                 p += 8;
2439
2440                 // ldr r0, [r0, r2]
2441                 *(WORD *)p = 0x5880;
2442                 p += 2;
2443             }
2444             else
2445             {
2446                 // ldr r0, [r0 + offset]
2447                 *(WORD *)p = 0xF8D0;
2448                 p += 2;
2449                 *(WORD *)p = (WORD)(0xFFF & pLookup->offsets[i]);
2450                 p += 2;
2451             }
2452         }
2453
2454         // No null test required
2455         if (!pLookup->testForNull)
2456         {
2457             _ASSERTE(pLookup->sizeOffset == CORINFO_NO_SIZE_CHECK);
2458
2459             // mov pc, lr
2460             *(WORD *)p = 0x46F7;
2461             p += 2;
2462         }
2463         else
2464         {
2465             // cbz r0, 'CALL HELPER'
2466             *(WORD *)p = 0xB100;
2467             p += 2;
2468             // mov pc, lr
2469             *(WORD *)p = 0x46F7;
2470             p += 2;
2471
2472             // CALL HELPER:
2473             if (pBLECall != NULL)
2474                 *(WORD*)pBLECall |= (((BYTE)(p - pBLECall) - 4) >> 1);
2475
2476             // mov r0, r3
2477             *(WORD *)p = 0x4618;
2478             p += 2;
2479
2480             EmitHelperWithArg(p, rxOffset, pAllocator, (TADDR)pArgs, helperAddress);
2481         }
2482
2483         END_DYNAMIC_HELPER_EMIT();
2484     }
2485 }
2486 #endif // FEATURE_READYTORUN
2487
2488 #endif // CROSSGEN_COMPILE
2489
2490 #endif // !DACCESS_COMPILE