9479f3f3b1e558ca9594419ce16e36ac0193d20f
[platform/upstream/coreclr.git] / src / vm / amd64 / UMThunkStub.asm
1 ;
2 ; Copyright (c) Microsoft. All rights reserved.
3 ; Licensed under the MIT license. See LICENSE file in the project root for full license information. 
4 ;
5
6 ; ==++==
7 ;
8
9 ;
10 ; ==--==
11
12 include <AsmMacros.inc>
13 include AsmConstants.inc
14
15 ifdef FEATURE_MIXEDMODE
16 IJWNOADThunk__MakeCall                  equ ?MakeCall@IJWNOADThunk@@KAXXZ 
17 IJWNOADThunk__FindThunkTarget           equ ?FindThunkTarget@IJWNOADThunk@@QEAAPEBXXZ
18 endif
19 gfHostConfig                            equ ?g_fHostConfig@@3KA
20 NDirect__IsHostHookEnabled              equ ?IsHostHookEnabled@NDirect@@SAHXZ
21
22 extern CreateThreadBlockThrow:proc
23 extern TheUMEntryPrestubWorker:proc
24 ifdef FEATURE_MIXEDMODE
25 extern IJWNOADThunk__FindThunkTarget:proc
26 endif
27 extern UMEntryPrestubUnwindFrameChainHandler:proc
28 extern UMThunkStubUnwindFrameChainHandler:proc
29 extern g_TrapReturningThreads:dword
30 extern UM2MDoADCallBack:proc
31 extern ReverseEnterRuntimeHelper:proc
32 extern ReverseLeaveRuntimeHelper:proc
33 ifdef FEATURE_INCLUDE_ALL_INTERFACES
34 extern gfHostConfig:dword
35 extern NDirect__IsHostHookEnabled:proc
36 endif
37 extern UMThunkStubRareDisableWorker:proc
38
39
40 ;
41 ; METHODDESC_REGISTER: UMEntryThunk*
42 ;
43 NESTED_ENTRY TheUMEntryPrestub, _TEXT, UMEntryPrestubUnwindFrameChainHandler
44
45 TheUMEntryPrestub_STACK_FRAME_SIZE = SIZEOF_MAX_OUTGOING_ARGUMENT_HOMES
46
47 ; XMM save area
48 TheUMEntryPrestub_XMM_SAVE_OFFSET = TheUMEntryPrestub_STACK_FRAME_SIZE
49 TheUMEntryPrestub_STACK_FRAME_SIZE = TheUMEntryPrestub_STACK_FRAME_SIZE + SIZEOF_MAX_FP_ARG_SPILL
50
51 ; Ensure that the new rsp will be 16-byte aligned.  Note that the caller has
52 ; already pushed the return address.
53 if ((TheUMEntryPrestub_STACK_FRAME_SIZE + 8) MOD 16) ne 0
54 TheUMEntryPrestub_STACK_FRAME_SIZE = TheUMEntryPrestub_STACK_FRAME_SIZE + 8
55 endif
56
57         alloc_stack     TheUMEntryPrestub_STACK_FRAME_SIZE
58
59         save_reg_postrsp    rcx, TheUMEntryPrestub_STACK_FRAME_SIZE + 8h
60         save_reg_postrsp    rdx, TheUMEntryPrestub_STACK_FRAME_SIZE + 10h
61         save_reg_postrsp    r8,  TheUMEntryPrestub_STACK_FRAME_SIZE + 18h
62         save_reg_postrsp    r9,  TheUMEntryPrestub_STACK_FRAME_SIZE + 20h
63
64         save_xmm128_postrsp xmm0, TheUMEntryPrestub_XMM_SAVE_OFFSET
65         save_xmm128_postrsp xmm1, TheUMEntryPrestub_XMM_SAVE_OFFSET + 10h
66         save_xmm128_postrsp xmm2, TheUMEntryPrestub_XMM_SAVE_OFFSET + 20h
67         save_xmm128_postrsp xmm3, TheUMEntryPrestub_XMM_SAVE_OFFSET + 30h
68
69         END_PROLOGUE
70
71         ;
72         ; Do prestub-specific stuff
73         ;
74         mov             rcx, METHODDESC_REGISTER
75         call            TheUMEntryPrestubWorker
76
77         ;
78         ; we're going to tail call to the exec stub that we just setup
79         ;
80
81         mov             rcx, [rsp + TheUMEntryPrestub_STACK_FRAME_SIZE + 8h]
82         mov             rdx, [rsp + TheUMEntryPrestub_STACK_FRAME_SIZE + 10h]
83         mov             r8,  [rsp + TheUMEntryPrestub_STACK_FRAME_SIZE + 18h]
84         mov             r9,  [rsp + TheUMEntryPrestub_STACK_FRAME_SIZE + 20h]
85
86         movdqa          xmm0, [rsp + TheUMEntryPrestub_XMM_SAVE_OFFSET]
87         movdqa          xmm1, [rsp + TheUMEntryPrestub_XMM_SAVE_OFFSET + 10h]
88         movdqa          xmm2, [rsp + TheUMEntryPrestub_XMM_SAVE_OFFSET + 20h]
89         movdqa          xmm3, [rsp + TheUMEntryPrestub_XMM_SAVE_OFFSET + 30h]
90
91         ;
92         ; epilogue
93         ;
94         add             rsp, TheUMEntryPrestub_STACK_FRAME_SIZE
95         TAILJMP_RAX
96         
97 NESTED_END TheUMEntryPrestub, _TEXT
98
99
100 ;
101 ; METHODDESC_REGISTER: UMEntryThunk*
102 ;
103 NESTED_ENTRY UMThunkStub, _TEXT, UMThunkStubUnwindFrameChainHandler
104
105 UMThunkStubAMD64_STACK_FRAME_SIZE = 0
106
107 ; number of integer registers saved in prologue
108 UMThunkStubAMD64_NUM_REG_PUSHES = 2
109 UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + (UMThunkStubAMD64_NUM_REG_PUSHES * 8)
110
111 ; rare path spill area
112 UMThunkStubAMD64_RARE_PATH_SPILL_SIZE = 10h
113 UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + UMThunkStubAMD64_RARE_PATH_SPILL_SIZE 
114 UMThunkStubAMD64_RARE_PATH_SPILL_NEGOFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE
115
116
117
118 ; HOST_NOTIFY_FLAG
119 UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + 8
120 UMThunkStubAMD64_HOST_NOTIFY_FLAG_NEGOFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE
121
122 ; XMM save area 
123 UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + SIZEOF_MAX_FP_ARG_SPILL
124
125 ; Ensure that the offset of the XMM save area will be 16-byte aligned.
126 if ((UMThunkStubAMD64_STACK_FRAME_SIZE + 8) MOD 16) ne 0        ; +8 for caller-pushed return address
127 UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + 8
128 endif
129
130 UMThunkStubAMD64_XMM_SAVE_NEGOFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE
131
132 ; Add in the callee scratch area size.
133 UMThunkStubAMD64_CALLEE_SCRATCH_SIZE = SIZEOF_MAX_OUTGOING_ARGUMENT_HOMES
134 UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + UMThunkStubAMD64_CALLEE_SCRATCH_SIZE
135
136 ; Now we have the full size of the stack frame.  The offsets have been computed relative to the
137 ; top, so negate them to make them relative to the post-prologue rsp.
138 UMThunkStubAMD64_FRAME_OFFSET = UMThunkStubAMD64_CALLEE_SCRATCH_SIZE
139 UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE - UMThunkStubAMD64_FRAME_OFFSET - UMThunkStubAMD64_RARE_PATH_SPILL_NEGOFFSET
140 UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE - UMThunkStubAMD64_FRAME_OFFSET - UMThunkStubAMD64_HOST_NOTIFY_FLAG_NEGOFFSET 
141 UMThunkStubAMD64_XMM_SAVE_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE - UMThunkStubAMD64_FRAME_OFFSET - UMThunkStubAMD64_XMM_SAVE_NEGOFFSET 
142 UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE + 8 - UMThunkStubAMD64_FRAME_OFFSET    ; +8 for return address
143 UMThunkStubAMD64_FIXED_STACK_ALLOC_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE - (UMThunkStubAMD64_NUM_REG_PUSHES * 8)
144
145 .errnz UMTHUNKSTUB_HOST_NOTIFY_FLAG_RBPOFFSET - UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET, update UMTHUNKSTUB_HOST_NOTIFY_FLAG_RBPOFFSET
146
147
148 ;
149 ; [ callee scratch ]            <-- new RSP
150 ; [ callee scratch ]
151 ; [ callee scratch ]
152 ; [ callee scratch ]
153 ; {optional stack args passed to callee}
154 ; xmm0                          <-- RBP
155 ; xmm1
156 ; xmm2
157 ; xmm3
158 ; {optional padding to align xmm regs}
159 ; HOST_NOTIFY_FLAG (needs to make ReverseLeaveRuntime call flag)
160 ; [rare path spill area]
161 ; [rare path spill area]
162 ; rbp save
163 ; r12 save
164 ; return address                <-- entry RSP
165 ; [rcx home]
166 ; [rdx home]
167 ; [r8 home]
168 ; [r9 home]
169 ; stack arg 0
170 ; stack arg 1
171 ; ...
172
173         push_nonvol_reg r12
174         push_nonvol_reg rbp                                                                     ; stack_args
175         alloc_stack     UMThunkStubAMD64_FIXED_STACK_ALLOC_SIZE
176         set_frame       rbp, UMThunkStubAMD64_FRAME_OFFSET                                      ; stack_args
177         mov             byte ptr [rbp + UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET], 0            ; hosted
178         END_PROLOGUE
179
180         ;
181         ; Call GetThread()
182         ;
183         CALL_GETTHREAD                          ; will not trash r10
184         test            rax, rax
185         jz              DoThreadSetup
186
187 HaveThread:
188
189         mov             r12, rax                ; r12 <- Thread*
190
191         ;
192         ; disable preemptive GC
193         ;
194         mov             dword ptr [r12 + OFFSETOF__Thread__m_fPreemptiveGCDisabled], 1
195
196         ;
197         ; catch returning thread here if a GC is in progress
198         ;
199         cmp             [g_TrapReturningThreads], 0
200         jnz             DoTrapReturningThreadsTHROW
201
202 InCooperativeMode:
203
204 ifdef FEATURE_INCLUDE_ALL_INTERFACES
205         test            [gfHostConfig], ASM_CLRTASKHOSTED    ; inlined NDirect::IsHostHookEnabled       ; hosted
206 ifdef _DEBUG
207         call            IsHostHookEnabledHelper
208         test            eax, eax
209 endif ; _DEBUG
210         jnz             NotifyHost_ReverseEnterRuntime                                                  ; hosted
211 Done_NotifyHost_ReverseEnterRuntime:
212 endif
213
214         mov             rax, [r12 + OFFSETOF__Thread__m_pDomain]
215         mov             eax, [rax + OFFSETOF__AppDomain__m_dwId]
216
217         mov             r11d, [METHODDESC_REGISTER + OFFSETOF__UMEntryThunk__m_dwDomainId]
218
219         cmp             rax, r11
220         jne             WrongAppDomain
221
222         mov             r11, [METHODDESC_REGISTER + OFFSETOF__UMEntryThunk__m_pUMThunkMarshInfo]
223         mov             eax, [r11 + OFFSETOF__UMThunkMarshInfo__m_cbActualArgSize]                      ; stack_args
224         test            rax, rax                                                                        ; stack_args
225         jnz             CopyStackArgs                                                                   ; stack_args
226         
227 ArgumentsSetup:
228
229         mov             rax, [r11 + OFFSETOF__UMThunkMarshInfo__m_pILStub]                              ; rax <- Stub*
230         call            rax
231
232 PostCall:
233         ;
234         ; enable preemptive GC
235         ;
236         mov             dword ptr [r12 + OFFSETOF__Thread__m_fPreemptiveGCDisabled], 0
237
238 ifdef FEATURE_INCLUDE_ALL_INTERFACES
239         cmp             byte ptr [rbp + UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET], 0                    ; hosted
240         jnz             NotifyHost_ReverseLeaveRuntime                                                  ; hosted
241 Done_NotifyHost_ReverseLeaveRuntime:        
242 endif
243
244         ; epilog
245         lea             rsp, [rbp - UMThunkStubAMD64_FRAME_OFFSET + UMThunkStubAMD64_FIXED_STACK_ALLOC_SIZE]
246         pop             rbp                                                                             ; stack_args
247         pop             r12
248         ret
249
250
251 DoThreadSetup:
252         mov             [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET +  0h], rcx
253         mov             [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET +  8h], rdx
254         mov             [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h], r8
255         mov             [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h], r9
256
257         ; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls
258         ;            initial measurements indidcate that this could be worth about a 5% savings in reverse
259         ;            pinvoke overhead.
260         movdqa          [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET +  0h], xmm0
261         movdqa          [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h], xmm1
262         movdqa          [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h], xmm2
263         movdqa          [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h], xmm3
264
265         mov             [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET], METHODDESC_REGISTER
266         call            CreateThreadBlockThrow
267         mov             METHODDESC_REGISTER, [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET]
268
269         mov             rcx,  [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET +  0h] 
270         mov             rdx,  [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET +  8h] 
271         mov             r8,   [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h] 
272         mov             r9,   [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h]
273
274         ; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls
275         movdqa          xmm0, [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET +  0h]
276         movdqa          xmm1, [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h]
277         movdqa          xmm2, [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h]
278         movdqa          xmm3, [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h]
279         
280         jmp             HaveThread
281         
282 DoTrapReturningThreadsTHROW:
283
284         mov             [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET +  0h], rcx
285         mov             [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET +  8h], rdx
286         mov             [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h], r8
287         mov             [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h], r9
288
289         ; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls
290         ;            initial measurements indidcate that this could be worth about a 5% savings in reverse
291         ;            pinvoke overhead.
292         movdqa          [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET +  0h], xmm0
293         movdqa          [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h], xmm1
294         movdqa          [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h], xmm2
295         movdqa          [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h], xmm3
296
297         mov             [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET], METHODDESC_REGISTER
298         mov             rcx, r12                                                                  ; Thread* pThread
299         mov             rdx, METHODDESC_REGISTER                                                  ; UMEntryThunk* pUMEntry
300         call            UMThunkStubRareDisableWorker
301         mov             METHODDESC_REGISTER, [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET]
302
303         mov             rcx,  [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET +  0h] 
304         mov             rdx,  [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET +  8h] 
305         mov             r8,   [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h] 
306         mov             r9,   [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h]
307
308         ; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls
309         movdqa          xmm0, [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET +  0h]
310         movdqa          xmm1, [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h]
311         movdqa          xmm2, [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h]
312         movdqa          xmm3, [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h]
313
314         jmp             InCooperativeMode
315
316 CopyStackArgs:
317         ; rax = cbStackArgs (with 20h for register args subtracted out already)
318
319         sub             rsp, rax
320         and             rsp, -16
321
322         mov             [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET +  0h], rcx
323         mov             [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET +  8h], rdx
324         mov             [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h], r8
325
326         ; rax = number of bytes
327
328         lea             rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + SIZEOF_MAX_OUTGOING_ARGUMENT_HOMES] 
329         lea             rdx, [rsp + UMThunkStubAMD64_CALLEE_SCRATCH_SIZE]
330
331 CopyLoop:
332         ; rax = number of bytes
333         ; rcx = src
334         ; rdx = dest
335         ; r8 = sratch
336
337         add             rax, -8
338         mov             r8, [rcx + rax]
339         mov             [rdx + rax], r8
340         jnz             CopyLoop
341
342         mov             rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET +  0h]
343         mov             rdx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET +  8h]
344         mov             r8, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h]
345         
346         jmp             ArgumentsSetup
347
348 ifdef FEATURE_INCLUDE_ALL_INTERFACES
349 NotifyHost_ReverseEnterRuntime:
350         mov             [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET], METHODDESC_REGISTER
351
352         mov             [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET +  0h], rcx
353         mov             [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET +  8h], rdx
354         mov             [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h], r8
355         mov             [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h], r9
356
357         ; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls
358         ;            initial measurements indidcate that this could be worth about a 5% savings in reverse
359         ;            pinvoke overhead.
360         movdqa          [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET +  0h], xmm0
361         movdqa          [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h], xmm1
362         movdqa          [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h], xmm2
363         movdqa          [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h], xmm3
364
365         mov             rcx, r12
366         call            ReverseEnterRuntimeHelper
367         mov             byte ptr [rbp + UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET], 1
368                 
369         mov             rcx,  [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET +  0h] 
370         mov             rdx,  [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET +  8h] 
371         mov             r8,   [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h] 
372         mov             r9,   [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h]
373
374         ; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls
375         movdqa          xmm0, [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET +  0h]
376         movdqa          xmm1, [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h]
377         movdqa          xmm2, [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h]
378         movdqa          xmm3, [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h]
379
380         mov             METHODDESC_REGISTER, [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET]
381
382         jmp             Done_NotifyHost_ReverseEnterRuntime
383
384 NotifyHost_ReverseLeaveRuntime:
385
386         ; save rax, xmm0
387         mov             [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET +  0h], rax
388         movdqa          [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET +  0h], xmm0
389
390         mov             rcx, r12
391         call            ReverseLeaveRuntimeHelper
392         mov             byte ptr [rbp + UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET], 0
393
394         ; restore rax, xmm0
395         mov             rax, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET +  0h]
396         movdqa          xmm0, [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET +  0h]
397         
398         jmp             Done_NotifyHost_ReverseLeaveRuntime
399 endif
400
401 WrongAppDomain:
402         ;
403         ; home register args to the stack
404         ;
405         mov             [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET +  0h], rcx
406         mov             [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET +  8h], rdx
407         mov             [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h], r8
408         mov             [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h], r9
409
410         ;
411         ; save off xmm registers
412         ;
413         movdqa          [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET +  0h], xmm0
414         movdqa          [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h], xmm1
415         movdqa          [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h], xmm2
416         movdqa          [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h], xmm3
417
418         ;
419         ; call our helper to perform the AD transtion 
420         ;
421         mov             rcx, METHODDESC_REGISTER
422         lea             r8,  [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET]
423         mov             rax, [METHODDESC_REGISTER + OFFSETOF__UMEntryThunk__m_pUMThunkMarshInfo]
424         mov             r9d, [rax + OFFSETOF__UMThunkMarshInfo__m_cbActualArgSize]
425         call            UM2MDoADCallBack
426
427         ; restore return value
428         mov             rax,  [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET +  0h]
429         movdqa          xmm0, [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET +  0h]
430
431         jmp             PostCall
432
433 NESTED_END UMThunkStub, _TEXT
434
435 ;
436 ; EXTERN_C void __stdcall UM2MThunk_WrapperHelper(
437 ;       void *pThunkArgs,               ; rcx
438 ;       int argLen,                     ; rdx
439 ;       void *pAddr,                    ; r8            // not used
440 ;       UMEntryThunk *pEntryThunk,      ; r9
441 ;       Thread *pThread);               ; [entry_sp + 28h]
442 ;
443 NESTED_ENTRY UM2MThunk_WrapperHelper, _TEXT
444
445
446 UM2MThunk_WrapperHelper_STACK_FRAME_SIZE = 0
447
448 ; number of integer registers saved in prologue
449 UM2MThunk_WrapperHelper_NUM_REG_PUSHES = 3
450 UM2MThunk_WrapperHelper_STACK_FRAME_SIZE = UM2MThunk_WrapperHelper_STACK_FRAME_SIZE + (UM2MThunk_WrapperHelper_NUM_REG_PUSHES * 8)
451
452 UM2MThunk_WrapperHelper_CALLEE_SCRATCH_SIZE = SIZEOF_MAX_OUTGOING_ARGUMENT_HOMES
453 UM2MThunk_WrapperHelper_STACK_FRAME_SIZE = UM2MThunk_WrapperHelper_STACK_FRAME_SIZE + UM2MThunk_WrapperHelper_CALLEE_SCRATCH_SIZE
454
455 ; Ensure that rsp remains 16-byte aligned
456 if ((UM2MThunk_WrapperHelper_STACK_FRAME_SIZE + 8) MOD 16) ne 0        ; +8 for caller-pushed return address
457 UM2MThunk_WrapperHelper_STACK_FRAME_SIZE = UM2MThunk_WrapperHelper_STACK_FRAME_SIZE + 8
458 endif
459
460 UM2MThunk_WrapperHelper_FRAME_OFFSET = UM2MThunk_WrapperHelper_CALLEE_SCRATCH_SIZE
461 UM2MThunk_WrapperHelper_FIXED_STACK_ALLOC_SIZE = UM2MThunk_WrapperHelper_STACK_FRAME_SIZE - (UM2MThunk_WrapperHelper_NUM_REG_PUSHES * 8)
462
463         push_nonvol_reg rsi
464         push_nonvol_reg rdi
465         push_nonvol_reg rbp
466         alloc_stack     UM2MThunk_WrapperHelper_FIXED_STACK_ALLOC_SIZE
467         set_frame       rbp, UM2MThunk_WrapperHelper_FRAME_OFFSET
468         END_PROLOGUE
469
470         ;
471         ; We are in cooperative mode and in the correct domain. 
472         ; The host has also been notified that we've entered the 
473         ; runtime.  All we have left to do is to copy the stack, 
474         ; setup the register args and then call the managed target
475         ;
476
477         test            rdx, rdx
478         jg              CopyStackArgs
479
480 ArgumentsSetup:
481         mov             METHODDESC_REGISTER, r9
482
483         mov             rsi, rcx                ; rsi <- pThunkArgs
484         mov             rcx, [rsi +  0h]
485         mov             rdx, [rsi +  8h]
486         mov             r8,  [rsi + 10h]
487         mov             r9,  [rsi + 18h]
488
489         movdqa          xmm0, [rsi + UMThunkStubAMD64_XMM_SAVE_OFFSET - UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET +  0h]
490         movdqa          xmm1, [rsi + UMThunkStubAMD64_XMM_SAVE_OFFSET - UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h]
491         movdqa          xmm2, [rsi + UMThunkStubAMD64_XMM_SAVE_OFFSET - UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 20h]
492         movdqa          xmm3, [rsi + UMThunkStubAMD64_XMM_SAVE_OFFSET - UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 30h]
493
494         mov             rax, [METHODDESC_REGISTER + OFFSETOF__UMEntryThunk__m_pUMThunkMarshInfo]      ; rax <- UMThunkMarshInfo*
495         mov             rax, [rax + OFFSETOF__UMThunkMarshInfo__m_pILStub]                              ; rax <- Stub*
496         call            rax
497
498         ; make sure we don't trash the return value
499         mov             [rsi + 0h], rax
500         movdqa          [rsi + UMThunkStubAMD64_XMM_SAVE_OFFSET - UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET +  0h], xmm0
501
502         lea             rsp, [rbp - UM2MThunk_WrapperHelper_FRAME_OFFSET + UM2MThunk_WrapperHelper_FIXED_STACK_ALLOC_SIZE]
503         pop             rbp
504         pop             rdi
505         pop             rsi
506         ret
507         
508
509 CopyStackArgs:
510         ; rdx = cbStackArgs (with 20h for register args subtracted out already)
511         ; rcx = pSrcArgStack
512
513         sub             rsp, rdx
514         and             rsp, -16
515
516         mov             r8, rcx
517         
518         lea             rsi, [rcx + SIZEOF_MAX_OUTGOING_ARGUMENT_HOMES]
519         lea             rdi, [rsp + UM2MThunk_WrapperHelper_CALLEE_SCRATCH_SIZE]
520
521         mov             rcx, rdx
522         shr             rcx, 3
523         
524         rep movsq
525         
526         mov             rcx, r8
527         
528         jmp             ArgumentsSetup
529         
530 NESTED_END UM2MThunk_WrapperHelper, _TEXT
531
532 ifdef _DEBUG
533 ifdef FEATURE_INCLUDE_ALL_INTERFACES
534
535 NESTED_ENTRY IsHostHookEnabledHelper, _TEXT
536
537         push_nonvol_reg rcx
538         push_nonvol_reg rdx
539         push_nonvol_reg r8
540         push_nonvol_reg r9
541         push_nonvol_reg r10
542
543 IsHostHookEnabledHelper_FIXED_STACK_ALLOC_SIZE = 20h + 40h
544
545         alloc_stack     IsHostHookEnabledHelper_FIXED_STACK_ALLOC_SIZE
546
547         END_PROLOGUE
548
549         movdqa          [rsp + 20h +  0h], xmm0
550         movdqa          [rsp + 20h + 10h], xmm1
551         movdqa          [rsp + 20h + 20h], xmm2
552         movdqa          [rsp + 20h + 30h], xmm3
553
554         call            NDirect__IsHostHookEnabled
555
556         movdqa          xmm0, [rsp + 20h +  0h]
557         movdqa          xmm1, [rsp + 20h + 10h]
558         movdqa          xmm2, [rsp + 20h + 20h]
559         movdqa          xmm3, [rsp + 20h + 30h]
560
561         ; epilog
562         add             rsp, IsHostHookEnabledHelper_FIXED_STACK_ALLOC_SIZE
563         pop             r10
564         pop             r9
565         pop             r8
566         pop             rdx
567         pop             rcx
568         ret        
569 NESTED_END IsHostHookEnabledHelper, _TEXT
570
571 endif ; FEATURE_INCLUDE_ALL_INTERFACES
572 endif ; _DEBUG
573
574 ifdef FEATURE_MIXEDMODE
575 NESTED_ENTRY IJWNOADThunk__MakeCall, _TEXT
576         ; METHODDESC_REGISTER = IJWNOADThunk*
577
578         alloc_stack     68h
579
580         save_reg_postrsp    rcx, 70h
581         save_reg_postrsp    rdx, 78h
582         save_reg_postrsp    r8,  80h
583         save_reg_postrsp    r9,  88h
584
585         save_xmm128_postrsp xmm0, 20h
586         save_xmm128_postrsp xmm1, 30h
587         save_xmm128_postrsp xmm2, 40h
588         save_xmm128_postrsp xmm3, 50h
589     END_PROLOGUE
590
591         mov             rcx, METHODDESC_REGISTER
592         call            IJWNOADThunk__FindThunkTarget
593
594         movdqa          xmm0, [rsp + 20h]
595         movdqa          xmm1, [rsp + 30h]
596         movdqa          xmm2, [rsp + 40h]
597         movdqa          xmm3, [rsp + 50h]
598
599         mov             rcx, [rsp + 70h]
600         mov             rdx, [rsp + 78h]
601         mov             r8,  [rsp + 80h]
602         mov             r9 , [rsp + 88h]
603
604         ; The target is in rax
605         add             rsp, 68h
606         TAILJMP_RAX
607 NESTED_END IJWNOADThunk__MakeCall, _TEXT
608 endif ; FEATURE_MIXEDMODE
609
610         end
611