1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 // File: AMD64/VirtualCallStubCpu.hpp
12 // See code:VirtualCallStubManager for details
14 // ============================================================================
16 #ifndef _VIRTUAL_CALL_STUB_AMD64_H
17 #define _VIRTUAL_CALL_STUB_AMD64_H
19 #include "dbginterface.h"
21 //#define STUB_LOGGING
24 // since we are placing code, we want byte packing of the structs
26 #define USES_LOOKUP_STUBS 1
28 /*********************************************************************************************
29 Stubs that contain code are all part of larger structs called Holders. There is a
30 Holder for each kind of stub, i.e XXXStub is contained with XXXHolder. Holders are
31 essentially an implementation trick that allowed rearranging the code sequences more
32 easily while trying out different alternatives, and for dealing with any alignment
33 issues in a way that was mostly immune to the actually code sequences. These Holders
34 should be revisited when the stub code sequences are fixed, since in many cases they
35 add extra space to a stub that is not really needed.
37 Stubs are placed in cache and hash tables. Since unaligned access of data in memory
38 is very slow, the keys used in those tables should be aligned. The things used as keys
39 typically also occur in the generated code, e.g. a token as an immediate part of an instruction.
40 For now, to avoid alignment computations as different code strategies are tried out, the key
41 fields are all in the Holders. Eventually, many of these fields should be dropped, and the instruction
42 streams aligned so that the immediate fields fall on aligned boundaries.
50 /*LookupStub**************************************************************************************
51 Virtual and interface call sites are initially setup to point at LookupStubs.
52 This is because the runtime type of the <this> pointer is not yet known,
53 so the target cannot be resolved. Note: if the jit is able to determine the runtime type
54 of the <this> pointer, it should be generating a direct call not a virtual or interface call.
55 This stub pushes a lookup token onto the stack to identify the sought after method, and then
56 jumps into the EE (VirtualCallStubManager::ResolveWorkerStub) to effectuate the lookup and
57 transfer of control to the appropriate target method implementation, perhaps patching of the call site
58 along the way to point to a more appropriate stub. Hence callsites that point to LookupStubs
59 get quickly changed to point to another kind of stub.
63 inline PCODE entryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; }
65 inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; }
66 inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); }
69 friend struct LookupHolder;
71 // The lookup entry point starts with a nop in order to allow us to quickly see
72 // if the stub is lookup stub or a dispatch stub. We can read thye first byte
73 // of a stub to find out what kind of a stub we have.
75 BYTE _entryPoint [3]; // 90 nop
77 size_t _token; // xx xx xx xx xx xx xx xx 64-bit address
78 BYTE part2 [3]; // 50 push rax
80 size_t _resolveWorkerAddr; // xx xx xx xx xx xx xx xx 64-bit address
81 BYTE part3 [2]; // FF E0 jmp rax
84 /* LookupHolders are the containers for LookupStubs, they provide for any alignment of
85 stubs as necessary. In the case of LookupStubs, alignment is necessary since
86 LookupStubs are placed in a hash table keyed by token. */
89 static void InitializeStatic();
91 void Initialize(PCODE resolveWorkerTarget, size_t dispatchToken);
93 LookupStub* stub() { LIMITED_METHOD_CONTRACT; return &_stub; }
95 static LookupHolder* FromLookupEntry(PCODE lookupEntry);
98 friend struct LookupStub;
103 #endif // USES_LOOKUP_STUBS
106 struct DispatchStubShort;
107 struct DispatchStubLong;
108 struct DispatchHolder;
110 /*DispatchStub**************************************************************************************
111 The structure of a full dispatch stub in memory is a DispatchStub followed contiguously in memory
112 by either a DispatchStubShort of a DispatchStubLong. DispatchStubShort is used when the resolve
113 stub (failTarget()) is reachable by a rel32 (DISPL) jump. We make a pretty good effort to make sure
114 that the stub heaps are set up so that this is the case. If we allocate enough stubs that the heap
115 end up allocating in a new block that is further away than a DISPL jump can go, then we end up using
116 a DispatchStubLong which is bigger but is a full 64-bit jump. */
118 /*DispatchStubShort*********************************************************************************
119 This is the logical continuation of DispatchStub for the case when the failure target is within
120 a rel32 jump (DISPL). */
121 struct DispatchStubShort
123 friend struct DispatchHolder;
124 friend struct DispatchStub;
126 static BOOL isShortStub(LPCBYTE pCode);
127 inline PCODE implTarget() const { LIMITED_METHOD_CONTRACT; return (PCODE) _implTarget; }
128 inline PCODE failTarget() const { LIMITED_METHOD_CONTRACT; return (PCODE) &_failDispl + sizeof(DISPL) + _failDispl; }
131 BYTE part1 [2]; // 0f 85 jne
132 DISPL _failDispl; // xx xx xx xx failEntry ;must be forward jmp for perf reasons
133 BYTE part2 [2]; // 48 B8 mov rax,
134 size_t _implTarget; // xx xx xx xx xx xx xx xx 64-bit address
135 BYTE part3 [2]; // FF E0 jmp rax
137 // 31 bytes long, need 1 byte of padding to 8-byte align.
138 BYTE alignPad [1]; // cc
141 inline BOOL DispatchStubShort::isShortStub(LPCBYTE pCode)
143 LIMITED_METHOD_CONTRACT;
144 return reinterpret_cast<DispatchStubShort const *>(pCode)->part1[0] == 0x0f;
148 /*DispatchStubLong**********************************************************************************
149 This is the logical continuation of DispatchStub for the case when the failure target is not
150 reachable by a rel32 jump (DISPL). */
151 struct DispatchStubLong
153 friend struct DispatchHolder;
154 friend struct DispatchStub;
156 static inline BOOL isLongStub(LPCBYTE pCode);
157 inline PCODE implTarget() const { LIMITED_METHOD_CONTRACT; return (PCODE) _implTarget; }
158 inline PCODE failTarget() const { LIMITED_METHOD_CONTRACT; return (PCODE) _failTarget; }
161 BYTE part1 [1]; // 75 jne
162 BYTE _failDispl; // xx failLabel
163 BYTE part2 [2]; // 48 B8 mov rax,
164 size_t _implTarget; // xx xx xx xx xx xx xx xx 64-bit address
165 BYTE part3 [2]; // FF E0 jmp rax
167 BYTE part4 [2]; // 48 B8 mov rax,
168 size_t _failTarget; // xx xx xx xx xx xx xx xx 64-bit address
169 BYTE part5 [2]; // FF E0 jmp rax
171 // 39 bytes long, need 1 byte of padding to 8-byte align.
172 BYTE alignPad [1]; // cc
175 inline BOOL DispatchStubLong::isLongStub(LPCBYTE pCode)
177 LIMITED_METHOD_CONTRACT;
178 return reinterpret_cast<DispatchStubLong const *>(pCode)->part1[0] == 0x75;
181 /*DispatchStub**************************************************************************************
182 Monomorphic and mostly monomorphic call sites eventually point to DispatchStubs.
183 A dispatch stub has an expected type (expectedMT), target address (target) and fail address (failure).
184 If the calling frame does in fact have the <this> type be of the expected type, then
185 control is transfered to the target address, the method implementation. If not,
186 then control is transfered to the fail address, a fail stub (see below) where a polymorphic
187 lookup is done to find the correct address to go to.
189 implementation note: Order, choice of instructions, and branch directions
190 should be carefully tuned since it can have an inordinate effect on performance. Particular
191 attention needs to be paid to the effects on the BTB and branch prediction, both in the small
192 and in the large, i.e. it needs to run well in the face of BTB overflow--using static predictions.
193 Note that since this stub is only used for mostly monomorphic callsites (ones that are not, get patched
194 to something else), therefore the conditional jump "jne failure" is mostly not taken, and hence it is important
195 that the branch prediction staticly predict this, which means it must be a forward jump. The alternative
196 is to reverse the order of the jumps and make sure that the resulting conditional jump "je implTarget"
197 is statically predicted as taken, i.e a backward jump. The current choice was taken since it was easier
198 to control the placement of the stubs than control the placement of the jitted code and the stubs. */
201 friend struct DispatchHolder;
203 enum DispatchStubType
209 inline DispatchStubType type() const
211 LIMITED_METHOD_CONTRACT;
212 CONSISTENCY_CHECK(DispatchStubShort::isShortStub(reinterpret_cast<LPCBYTE>(this + 1))
213 || DispatchStubLong::isLongStub(reinterpret_cast<LPCBYTE>(this + 1)));
214 return DispatchStubShort::isShortStub((BYTE *)(this + 1)) ? e_TYPE_SHORT : e_TYPE_LONG;
217 inline static size_t size(DispatchStubType type)
219 STATIC_CONTRACT_LEAF;
220 return sizeof(DispatchStub) +
221 ((type == e_TYPE_SHORT) ? sizeof(DispatchStubShort) : sizeof(DispatchStubLong));
224 inline PCODE entryPoint() const { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; }
225 inline size_t expectedMT() const { LIMITED_METHOD_CONTRACT; return _expectedMT; }
226 inline size_t size() const { WRAPPER_NO_CONTRACT; return size(type()); }
228 inline PCODE implTarget() const
230 LIMITED_METHOD_CONTRACT;
231 if (type() == e_TYPE_SHORT)
232 return getShortStub()->implTarget();
234 return getLongStub()->implTarget();
237 inline PCODE failTarget() const
239 if (type() == e_TYPE_SHORT)
240 return getShortStub()->failTarget();
242 return getLongStub()->failTarget();
246 inline DispatchStubShort const *getShortStub() const
247 { LIMITED_METHOD_CONTRACT; return reinterpret_cast<DispatchStubShort const *>(this + 1); }
249 inline DispatchStubLong const *getLongStub() const
250 { LIMITED_METHOD_CONTRACT; return reinterpret_cast<DispatchStubLong const *>(this + 1); }
252 BYTE _entryPoint [2]; // 48 B8 mov rax,
253 size_t _expectedMT; // xx xx xx xx xx xx xx xx 64-bit address
254 BYTE part1 [3]; // 48 39 XX cmp [THIS_REG], rax
256 // Followed by either DispatchStubShort or DispatchStubLong, depending
257 // on whether we were able to make a rel32 or had to make an abs64 jump
258 // to the resolve stub on failure.
262 /* DispatchHolders are the containers for DispatchStubs, they provide for any alignment of
263 stubs as necessary. DispatchStubs are placed in a hashtable and in a cache. The keys for both
264 are the pair expectedMT and token. Efficiency of the of the hash table is not a big issue,
265 since lookups in it are fairly rare. Efficiency of the cache is paramount since it is accessed frequently
266 (see ResolveStub below). Currently we are storing both of these fields in the DispatchHolder to simplify
267 alignment issues. If inlineMT in the stub itself was aligned, then it could be the expectedMT field.
268 While the token field can be logically gotten by following the failure target to the failEntryPoint
269 of the ResolveStub and then to the token over there, for perf reasons of cache access, it is duplicated here.
270 This allows us to use DispatchStubs in the cache. The alternative is to provide some other immutable struct
271 for the cache composed of the triplet (expectedMT, token, target) and some sort of reclaimation scheme when
272 they are thrown out of the cache via overwrites (since concurrency will make the obvious approaches invalid).
275 /* @workaround for ee resolution - Since the EE does not currently have a resolver function that
276 does what we want, see notes in implementation of VirtualCallStubManager::Resolver, we are
277 using dispatch stubs to siumulate what we want. That means that inlineTarget, which should be immutable
278 is in fact written. Hence we have moved target out into the holder and aligned it so we can
279 atomically update it. When we get a resolver function that does what we want, we can drop this field,
280 and live with just the inlineTarget field in the stub itself, since immutability will hold.*/
281 struct DispatchHolder
283 static void InitializeStatic();
285 void Initialize(PCODE implTarget, PCODE failTarget, size_t expectedMT,
286 DispatchStub::DispatchStubType type);
288 static size_t GetHolderSize(DispatchStub::DispatchStubType type)
289 { STATIC_CONTRACT_WRAPPER; return DispatchStub::size(type); }
291 static BOOL CanShortJumpDispatchStubReachFailTarget(PCODE failTarget, LPCBYTE stubMemory)
293 STATIC_CONTRACT_WRAPPER;
294 LPCBYTE pFrom = stubMemory + sizeof(DispatchStub) + offsetof(DispatchStubShort, part2[0]);
295 size_t cbRelJump = failTarget - (PCODE)pFrom;
296 return FitsInI4(cbRelJump);
299 DispatchStub* stub() { LIMITED_METHOD_CONTRACT; return reinterpret_cast<DispatchStub *>(this); }
301 static DispatchHolder* FromDispatchEntry(PCODE dispatchEntry);
304 // DispatchStub follows here. It is dynamically sized on allocation
305 // because it could be a DispatchStubLong or a DispatchStubShort
309 struct ResolveHolder;
311 /*ResolveStub**************************************************************************************
312 Polymorphic call sites and monomorphic calls that fail end up in a ResolverStub. There is only
313 one resolver stub built for any given token, even though there may be many call sites that
314 use that token and many distinct <this> types that are used in the calling call frames. A resolver stub
315 actually has two entry points, one for polymorphic call sites and one for dispatch stubs that fail on their
316 expectedMT test. There is a third part of the resolver stub that enters the ee when a decision should
317 be made about changing the callsite. Therefore, we have defined the resolver stub as three distinct pieces,
318 even though they are actually allocated as a single contiguous block of memory. These pieces are:
320 A ResolveStub has two entry points:
322 FailEntry - where the dispatch stub goes if the expected MT test fails. This piece of the stub does
323 a check to see how often we are actually failing. If failures are frequent, control transfers to the
324 patch piece to cause the call site to be changed from a mostly monomorphic callsite
325 (calls dispatch stub) to a polymorphic callsize (calls resolve stub). If failures are rare, control
326 transfers to the resolve piece (see ResolveStub). The failEntryPoint decrements a counter
327 every time it is entered. The ee at various times will add a large chunk to the counter.
329 ResolveEntry - does a lookup via in a cache by hashing the actual type of the calling frame s
330 <this> and the token identifying the (contract,method) pair desired. If found, control is transfered
331 to the method implementation. If not found in the cache, the token is pushed and the ee is entered via
332 the ResolveWorkerStub to do a full lookup and eventual transfer to the correct method implementation. Since
333 there is a different resolve stub for every token, the token can be inlined and the token can be pre-hashed.
334 The effectiveness of this approach is highly sensitive to the effectiveness of the hashing algorithm used,
335 as well as its speed. It turns out it is very important to make the hash function sensitive to all
336 of the bits of the method table, as method tables are laid out in memory in a very non-random way. Before
337 making any changes to the code sequences here, it is very important to measure and tune them as perf
338 can vary greatly, in unexpected ways, with seeming minor changes.
340 Implementation note - Order, choice of instructions, and branch directions
341 should be carefully tuned since it can have an inordinate effect on performance. Particular
342 attention needs to be paid to the effects on the BTB and branch prediction, both in the small
343 and in the large, i.e. it needs to run well in the face of BTB overflow--using static predictions.
344 Note that this stub is called in highly polymorphic cases, but the cache should have been sized
345 and the hash function chosen to maximize the cache hit case. Hence the cmp/jcc instructions should
346 mostly be going down the cache hit route, and it is important that this be statically predicted as so.
347 Hence the 3 jcc instrs need to be forward jumps. As structured, there is only one jmp/jcc that typically
348 gets put in the BTB since all the others typically fall straight thru. Minimizing potential BTB entries
353 inline PCODE failEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_failEntryPoint[0]; }
354 inline PCODE resolveEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_resolveEntryPoint[0]; }
355 inline PCODE slowEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_slowEntryPoint[0]; }
357 inline INT32* pCounter() { LIMITED_METHOD_CONTRACT; return _pCounter; }
358 inline UINT32 hashedToken() { LIMITED_METHOD_CONTRACT; return _hashedToken >> LOG2_PTRSIZE; }
359 inline size_t cacheAddress() { LIMITED_METHOD_CONTRACT; return _cacheAddress; }
360 inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; }
361 inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); }
364 friend struct ResolveHolder;
366 BYTE _resolveEntryPoint[3];// resolveStub:
369 size_t _cacheAddress; // xx xx xx xx xx xx xx xx 64-bit address
370 BYTE part1 [15]; // 48 8B XX mov rax, [THIS_REG] ; Compute hash = ((MT + MT>>12) ^ prehash)
371 // 48 8B D0 mov rdx, rax ; rdx <- current MethodTable
372 // 48 C1 E8 0C shr rax, 12
373 // 48 03 C2 add rax, rdx
375 UINT32 _hashedToken; // xx xx xx xx hashedtoken ; xor with pre-hashed token
376 BYTE part2 [2]; // 48 25 and rax,
377 UINT32 mask; // xx xx xx xx cache_mask ; and with cache mask
378 BYTE part3 [6]; // 4A 8B 04 10 mov rax, [r10 + rax] ; get cache entry address
380 size_t _token; // xx xx xx xx xx xx xx xx 64-bit address
381 BYTE part4 [3]; // 48 3B 50 cmp rdx, [rax+ ; compare our MT vs. cache MT
382 BYTE mtOffset; // xx ResolverCacheElem.pMT]
383 BYTE part5 [1]; // 75 jne
384 BYTE toMiss1; // xx miss ; must be forward jump, for perf reasons
385 BYTE part6 [3]; // 4C 3B 50 cmp r10, [rax+ ; compare our token vs. cache token
386 BYTE tokenOffset; // xx ResolverCacheElem.token]
387 BYTE part7 [1]; // 75 jne
388 BYTE toMiss2; // xx miss ; must be forward jump, for perf reasons
389 BYTE part8 [3]; // 48 8B 40 mov rax, [rax+ ; setup rax with method impl address
390 BYTE targetOffset; // xx ResolverCacheElem.target]
391 BYTE part9 [3]; // 5A pop rdx
394 BYTE _failEntryPoint [2]; // 48 B8 mov rax,
395 INT32* _pCounter; // xx xx xx xx xx xx xx xx 64-bit address
396 BYTE part11 [4]; // 83 00 FF add dword ptr [rax], -1
398 BYTE toResolveStub1; // xx resolveStub
399 BYTE part12 [4]; // 49 83 CB 01 or r11, 1
400 BYTE _slowEntryPoint [3]; // 52 slow: push rdx
402 size_t _tokenSlow; // xx xx xx xx xx xx xx xx 64-bit address
403 // BYTE miss [5]; // 5A miss: pop rdx ; don't pop rdx
404 // // 41 52 push r10 ; don't push r10 leave it setup with token
405 BYTE miss [3]; // 50 push rax ; push ptr to cache elem
407 size_t _resolveWorker; // xx xx xx xx xx xx xx xx 64-bit address
408 BYTE part10 [2]; // FF E0 jmp rax
411 /* ResolveHolders are the containers for ResolveStubs, They provide
412 for any alignment of the stubs as necessary. The stubs are placed in a hash table keyed by
413 the token for which they are built. Efficiency of access requires that this token be aligned.
414 For now, we have copied that field into the ResolveHolder itself, if the resolve stub is arranged such that
415 any of its inlined tokens (non-prehashed) is aligned, then the token field in the ResolveHolder
419 static void InitializeStatic();
421 void Initialize(PCODE resolveWorkerTarget, PCODE patcherTarget,
422 size_t dispatchToken, UINT32 hashedToken,
423 void * cacheAddr, INT32* counterAddr);
425 ResolveStub* stub() { LIMITED_METHOD_CONTRACT; return &_stub; }
427 static ResolveHolder* FromFailEntry(PCODE resolveEntry);
428 static ResolveHolder* FromResolveEntry(PCODE resolveEntry);
437 LookupStub lookupInit;
438 DispatchStub dispatchInit;
439 DispatchStubShort dispatchShortInit;
440 DispatchStubLong dispatchLongInit;
441 ResolveStub resolveInit;
443 #define INSTR_INT3 0xcc
444 #define INSTR_NOP 0x90
446 #ifndef DACCESS_COMPILE
448 #include "asmconstants.h"
451 extern size_t g_lookup_inline_counter;
452 extern size_t g_call_inline_counter;
453 extern size_t g_miss_inline_counter;
454 extern size_t g_call_cache_counter;
455 extern size_t g_miss_cache_counter;
458 /* Template used to generate the stub. We generate a stub by allocating a block of
459 memory and copy the template over it and just update the specific fields that need
463 void LookupHolder::InitializeStatic()
465 static_assert_no_msg((sizeof(LookupHolder) % sizeof(void*)) == 0);
467 // The first instruction of a LookupStub is nop
468 // and we use it in order to differentiate the first two bytes
469 // of a LookupStub and a ResolveStub
470 lookupInit._entryPoint [0] = INSTR_NOP;
471 lookupInit._entryPoint [1] = 0x48;
472 lookupInit._entryPoint [2] = 0xB8;
473 lookupInit._token = 0xcccccccccccccccc;
474 lookupInit.part2 [0] = 0x50;
475 lookupInit.part2 [1] = 0x48;
476 lookupInit.part2 [2] = 0xB8;
477 lookupInit._resolveWorkerAddr = 0xcccccccccccccccc;
478 lookupInit.part3 [0] = 0xFF;
479 lookupInit.part3 [1] = 0xE0;
482 void LookupHolder::Initialize(PCODE resolveWorkerTarget, size_t dispatchToken)
486 //fill in the stub specific fields
487 _stub._token = dispatchToken;
488 _stub._resolveWorkerAddr = (size_t) resolveWorkerTarget;
491 /* Template used to generate the stub. We generate a stub by allocating a block of
492 memory and copy the template over it and just update the specific fields that need
496 void DispatchHolder::InitializeStatic()
498 // Check that _expectedMT is aligned in the DispatchHolder
499 static_assert_no_msg(((sizeof(DispatchStub)+sizeof(DispatchStubShort)) % sizeof(void*)) == 0);
500 static_assert_no_msg(((sizeof(DispatchStub)+sizeof(DispatchStubLong)) % sizeof(void*)) == 0);
501 CONSISTENCY_CHECK((offsetof(DispatchStubLong, part4[0]) - offsetof(DispatchStubLong, part2[0])) < INT8_MAX);
503 // Common dispatch stub initialization
504 dispatchInit._entryPoint [0] = 0x48;
505 dispatchInit._entryPoint [1] = 0xB8;
506 dispatchInit._expectedMT = 0xcccccccccccccccc;
507 dispatchInit.part1 [0] = 0x48;
508 dispatchInit.part1 [1] = 0x39;
509 #ifdef UNIX_AMD64_ABI
510 dispatchInit.part1 [2] = 0x07; // RDI
512 dispatchInit.part1 [2] = 0x01; // RCX
515 // Short dispatch stub initialization
516 dispatchShortInit.part1 [0] = 0x0F;
517 dispatchShortInit.part1 [1] = 0x85;
518 dispatchShortInit._failDispl = 0xcccccccc;
519 dispatchShortInit.part2 [0] = 0x48;
520 dispatchShortInit.part2 [1] = 0xb8;
521 dispatchShortInit._implTarget = 0xcccccccccccccccc;
522 dispatchShortInit.part3 [0] = 0xFF;
523 dispatchShortInit.part3 [1] = 0xE0;
524 dispatchShortInit.alignPad [0] = INSTR_INT3;
526 // Long dispatch stub initialization
527 dispatchLongInit.part1 [0] = 0x75;
528 dispatchLongInit._failDispl = BYTE(&dispatchLongInit.part4[0] - &dispatchLongInit.part2[0]);
529 dispatchLongInit.part2 [0] = 0x48;
530 dispatchLongInit.part2 [1] = 0xb8;
531 dispatchLongInit._implTarget = 0xcccccccccccccccc;
532 dispatchLongInit.part3 [0] = 0xFF;
533 dispatchLongInit.part3 [1] = 0xE0;
535 dispatchLongInit.part4 [0] = 0x48;
536 dispatchLongInit.part4 [1] = 0xb8;
537 dispatchLongInit._failTarget = 0xcccccccccccccccc;
538 dispatchLongInit.part5 [0] = 0xFF;
539 dispatchLongInit.part5 [1] = 0xE0;
540 dispatchLongInit.alignPad [0] = INSTR_INT3;
543 void DispatchHolder::Initialize(PCODE implTarget, PCODE failTarget, size_t expectedMT,
544 DispatchStub::DispatchStubType type)
547 // Initialize the common area
550 // initialize the static data
551 *stub() = dispatchInit;
553 // fill in the dynamic data
554 stub()->_expectedMT = expectedMT;
557 // Initialize the short/long areas
559 if (type == DispatchStub::e_TYPE_SHORT)
561 DispatchStubShort *shortStub = const_cast<DispatchStubShort *>(stub()->getShortStub());
563 // initialize the static data
564 *shortStub = dispatchShortInit;
566 // fill in the dynamic data
567 size_t displ = (failTarget - ((PCODE) &shortStub->_failDispl + sizeof(DISPL)));
568 CONSISTENCY_CHECK(FitsInI4(displ));
569 shortStub->_failDispl = (DISPL) displ;
570 shortStub->_implTarget = (size_t) implTarget;
571 CONSISTENCY_CHECK((PCODE)&shortStub->_failDispl + sizeof(DISPL) + shortStub->_failDispl == failTarget);
575 CONSISTENCY_CHECK(type == DispatchStub::e_TYPE_LONG);
576 DispatchStubLong *longStub = const_cast<DispatchStubLong *>(stub()->getLongStub());
578 // initialize the static data
579 *longStub = dispatchLongInit;
581 // fill in the dynamic data
582 longStub->_implTarget = implTarget;
583 longStub->_failTarget = failTarget;
587 /* Template used to generate the stub. We generate a stub by allocating a block of
588 memory and copy the template over it and just update the specific fields that need
592 void ResolveHolder::InitializeStatic()
594 static_assert_no_msg((sizeof(ResolveHolder) % sizeof(void*)) == 0);
596 resolveInit._resolveEntryPoint [0] = 0x52;
597 resolveInit._resolveEntryPoint [1] = 0x49;
598 resolveInit._resolveEntryPoint [2] = 0xBA;
599 resolveInit._cacheAddress = 0xcccccccccccccccc;
600 resolveInit.part1 [ 0] = 0x48;
601 resolveInit.part1 [ 1] = 0x8B;
602 #ifdef UNIX_AMD64_ABI
603 resolveInit.part1 [ 2] = 0x07; // RDI
605 resolveInit.part1 [ 2] = 0x01; // RCX
607 resolveInit.part1 [ 3] = 0x48;
608 resolveInit.part1 [ 4] = 0x8B;
609 resolveInit.part1 [ 5] = 0xD0;
610 resolveInit.part1 [ 6] = 0x48;
611 resolveInit.part1 [ 7] = 0xC1;
612 resolveInit.part1 [ 8] = 0xE8;
613 resolveInit.part1 [ 9] = CALL_STUB_CACHE_NUM_BITS;
614 resolveInit.part1 [10] = 0x48;
615 resolveInit.part1 [11] = 0x03;
616 resolveInit.part1 [12] = 0xC2;
617 resolveInit.part1 [13] = 0x48;
618 resolveInit.part1 [14] = 0x35;
619 // Review truncation from unsigned __int64 to UINT32 of a constant value.
620 #if defined(_MSC_VER)
621 #pragma warning(push)
622 #pragma warning(disable:4305 4309)
623 #endif // defined(_MSC_VER)
625 resolveInit._hashedToken = 0xcccccccc;
627 #if defined(_MSC_VER)
629 #endif // defined(_MSC_VER)
631 resolveInit.part2 [ 0] = 0x48;
632 resolveInit.part2 [ 1] = 0x25;
633 resolveInit.mask = CALL_STUB_CACHE_MASK*sizeof(void *);
634 resolveInit.part3 [0] = 0x4A;
635 resolveInit.part3 [1] = 0x8B;
636 resolveInit.part3 [2] = 0x04;
637 resolveInit.part3 [3] = 0x10;
638 resolveInit.part3 [4] = 0x49;
639 resolveInit.part3 [5] = 0xBA;
640 resolveInit._token = 0xcccccccccccccccc;
641 resolveInit.part4 [0] = 0x48;
642 resolveInit.part4 [1] = 0x3B;
643 resolveInit.part4 [2] = 0x50;
644 resolveInit.mtOffset = offsetof(ResolveCacheElem,pMT) & 0xFF;
645 resolveInit.part5 [0] = 0x75;
646 resolveInit.toMiss1 = offsetof(ResolveStub,miss)-(offsetof(ResolveStub,toMiss1)+1) & 0xFF;
647 resolveInit.part6 [0] = 0x4C;
648 resolveInit.part6 [1] = 0x3B;
649 resolveInit.part6 [2] = 0x50;
650 resolveInit.tokenOffset = offsetof(ResolveCacheElem,token) & 0xFF;
651 resolveInit.part7 [0] = 0x75;
652 resolveInit.toMiss2 = offsetof(ResolveStub,miss)-(offsetof(ResolveStub,toMiss2)+1) & 0xFF;
653 resolveInit.part8 [0] = 0x48;
654 resolveInit.part8 [1] = 0x8B;
655 resolveInit.part8 [2] = 0x40;
656 resolveInit.targetOffset = offsetof(ResolveCacheElem,target) & 0xFF;
657 resolveInit.part9 [0] = 0x5A;
658 resolveInit.part9 [1] = 0xFF;
659 resolveInit.part9 [2] = 0xE0;
660 resolveInit._failEntryPoint [0] = 0x48;
661 resolveInit._failEntryPoint [1] = 0xB8;
662 resolveInit._pCounter = (INT32*) (size_t) 0xcccccccccccccccc;
663 resolveInit.part11 [0] = 0x83;
664 resolveInit.part11 [1] = 0x00;
665 resolveInit.part11 [2] = 0xFF;
666 resolveInit.part11 [3] = 0x7D;
667 resolveInit.toResolveStub1 = (offsetof(ResolveStub, _resolveEntryPoint) - (offsetof(ResolveStub, toResolveStub1)+1)) & 0xFF;
668 resolveInit.part12 [0] = 0x49;
669 resolveInit.part12 [1] = 0x83;
670 resolveInit.part12 [2] = 0xCB;
671 resolveInit.part12 [3] = 0x01;
672 resolveInit._slowEntryPoint [0] = 0x52;
673 resolveInit._slowEntryPoint [1] = 0x49;
674 resolveInit._slowEntryPoint [2] = 0xBA;
675 resolveInit._tokenSlow = 0xcccccccccccccccc;
676 resolveInit.miss [0] = 0x50;
677 resolveInit.miss [1] = 0x48;
678 resolveInit.miss [2] = 0xB8;
679 resolveInit._resolveWorker = 0xcccccccccccccccc;
680 resolveInit.part10 [0] = 0xFF;
681 resolveInit.part10 [1] = 0xE0;
684 void ResolveHolder::Initialize(PCODE resolveWorkerTarget, PCODE patcherTarget,
685 size_t dispatchToken, UINT32 hashedToken,
686 void * cacheAddr, INT32* counterAddr)
690 //fill in the stub specific fields
691 _stub._cacheAddress = (size_t) cacheAddr;
692 _stub._hashedToken = hashedToken << LOG2_PTRSIZE;
693 _stub._token = dispatchToken;
694 _stub._tokenSlow = dispatchToken;
695 _stub._resolveWorker = (size_t) resolveWorkerTarget;
696 _stub._pCounter = counterAddr;
699 ResolveHolder* ResolveHolder::FromFailEntry(PCODE failEntry)
701 LIMITED_METHOD_CONTRACT;
702 ResolveHolder* resolveHolder = (ResolveHolder*) ( failEntry - offsetof(ResolveHolder, _stub) - offsetof(ResolveStub, _failEntryPoint) );
703 _ASSERTE(resolveHolder->_stub._resolveEntryPoint[1] == resolveInit._resolveEntryPoint[1]);
704 return resolveHolder;
707 #endif // DACCESS_COMPILE
709 LookupHolder* LookupHolder::FromLookupEntry(PCODE lookupEntry)
711 LIMITED_METHOD_CONTRACT;
712 LookupHolder* lookupHolder = (LookupHolder*) ( lookupEntry - offsetof(LookupHolder, _stub) - offsetof(LookupStub, _entryPoint) );
713 _ASSERTE(lookupHolder->_stub._entryPoint[2] == lookupInit._entryPoint[2]);
718 DispatchHolder* DispatchHolder::FromDispatchEntry(PCODE dispatchEntry)
720 LIMITED_METHOD_CONTRACT;
721 DispatchHolder* dispatchHolder = (DispatchHolder*) ( dispatchEntry - offsetof(DispatchStub, _entryPoint) );
722 _ASSERTE(dispatchHolder->stub()->_entryPoint[1] == dispatchInit._entryPoint[1]);
723 return dispatchHolder;
727 ResolveHolder* ResolveHolder::FromResolveEntry(PCODE resolveEntry)
729 LIMITED_METHOD_CONTRACT;
730 ResolveHolder* resolveHolder = (ResolveHolder*) ( resolveEntry - offsetof(ResolveHolder, _stub) - offsetof(ResolveStub, _resolveEntryPoint) );
731 _ASSERTE(resolveHolder->_stub._resolveEntryPoint[1] == resolveInit._resolveEntryPoint[1]);
732 return resolveHolder;
735 VirtualCallStubManager::StubKind VirtualCallStubManager::predictStubKind(PCODE stubStartAddress)
737 #ifdef DACCESS_COMPILE
738 return SK_BREAKPOINT; // Dac always uses the slower lookup
740 StubKind stubKind = SK_UNKNOWN;
744 // If stubStartAddress is completely bogus, then this might AV,
745 // so we protect it with SEH. An AV here is OK.
746 AVInRuntimeImplOkayHolder AVOkay;
748 WORD firstWord = *((WORD*) stubStartAddress);
750 if (firstWord == 0xB848)
752 stubKind = SK_DISPATCH;
754 else if (firstWord == 0x4890)
756 stubKind = SK_LOOKUP;
758 else if (firstWord == 0x4952)
760 stubKind = SK_RESOLVE;
762 else if (firstWord == 0x48F8)
764 stubKind = SK_LOOKUP;
768 BYTE firstByte = ((BYTE*) stubStartAddress)[0];
769 BYTE secondByte = ((BYTE*) stubStartAddress)[1];
771 if ((firstByte == INSTR_INT3) || (secondByte == INSTR_INT3))
773 stubKind = SK_BREAKPOINT;
779 stubKind = SK_UNKNOWN;
781 EX_END_CATCH(SwallowAllExceptions);
785 #endif // DACCESS_COMPILE
788 #endif //DECLARE_DATA
790 #endif // _VIRTUAL_CALL_STUB_AMD64_H