1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 // File: VirtualCallStub.h
11 // See code:VirtualCallStubManager for details
13 // ============================================================================
15 #ifndef _VIRTUAL_CALL_STUB_H
16 #define _VIRTUAL_CALL_STUB_H
18 #ifndef CROSSGEN_COMPILE
22 #if defined(_TARGET_X86_)
23 // If this is uncommented, leaves a file "StubLog_<pid>.log" with statistics on the behavior
24 // of stub-based interface dispatch.
25 //#define STUB_LOGGING
30 /////////////////////////////////////////////////////////////////////////////////////
31 // Forward class declarations
36 class VirtualCallStubManager;
37 class VirtualCallStubManagerManager;
39 struct DispatchHolder;
41 struct VTableCallHolder;
43 /////////////////////////////////////////////////////////////////////////////////////
44 // Forward function declarations
45 extern "C" void InContextTPQuickDispatchAsmStub();
48 extern "C" PCODE STDCALL StubDispatchFixupWorker(TransitionBlock * pTransitionBlock,
49 TADDR siteAddrForRegisterIndirect,
54 extern "C" PCODE STDCALL VSD_ResolveWorker(TransitionBlock * pTransitionBlock,
55 TADDR siteAddrForRegisterIndirect,
63 /////////////////////////////////////////////////////////////////////////////////////
64 #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
68 /////////////////////////////////////////////////////////////////////////////////////
69 // Represents the struct that is added to the resolve cache
70 // NOTE: If you change the layout of this struct, you'll need to update various
71 // ASM helpers in VirtualCallStubCpu that rely on offsets of members.
73 struct ResolveCacheElem
76 size_t token; // DispatchToken
79 // These are used for chaining
80 ResolveCacheElem *pNext;
81 ResolveCacheElem *Next()
82 { LIMITED_METHOD_CONTRACT; return VolatileLoad(&pNext); }
89 BOOL Equals(size_t token, void *pMT)
90 { LIMITED_METHOD_CONTRACT; return (this->pMT == pMT && this->token == token); }
92 BOOL Equals(ResolveCacheElem *pElem)
93 { WRAPPER_NO_CONTRACT; return Equals(pElem->token, pElem->pMT); }
99 e_resolveCacheElem_sizeof_mt = sizeof(void *),
100 e_resolveCacheElem_sizeof_token = sizeof(size_t),
101 e_resolveCacheElem_sizeof_target = sizeof(void *),
102 e_resolveCacheElem_sizeof_next = sizeof(ResolveCacheElem *),
104 e_resolveCacheElem_offset_mt = 0,
105 e_resolveCacheElem_offset_token = e_resolveCacheElem_offset_mt + e_resolveCacheElem_sizeof_mt,
106 e_resolveCacheElem_offset_target = e_resolveCacheElem_offset_token + e_resolveCacheElem_sizeof_token,
107 e_resolveCacheElem_offset_next = e_resolveCacheElem_offset_target + e_resolveCacheElem_sizeof_target,
110 /////////////////////////////////////////////////////////////////////////////////////
111 // A utility class to help manipulate a call site
114 friend class VirtualCallStubManager;
118 // On x86 are four possible kinds of callsites when you take into account all features
119 // Relative: direct call, e.g. "call addr". Not used currently.
120 // RelativeIndirect (JmpRel): indirect call through a relative address, e.g. "call [addr]"
121 // RegisterIndirect: indirect call through a register, e.g. "call [eax]"
122 // DelegateCallSite: anything else, tail called through a register by shuffle thunk, e.g. "jmp [eax]"
124 // On all other platforms we always use an indirect call through an indirection cell
125 // In these cases all calls are made by the platform equivalent of "call [addr]".
127 // DelegateCallSite are particular in that they can come in a variety of forms:
128 // a direct delegate call has a sequence defined by the jit but a multicast or secure delegate
129 // are defined in a stub and have a different shape
131 PTR_PCODE m_siteAddr; // Stores the address of an indirection cell
136 #if defined(_TARGET_X86_)
137 StubCallSite(TADDR siteAddrForRegisterIndirect, PCODE returnAddr);
139 PCODE GetCallerAddress();
140 #else // !defined(_TARGET_X86_)
141 // On platforms where we always use an indirection cell things
142 // are much simpler - the siteAddr always stores a pointer to a
143 // value that in turn points to the indirection cell.
145 StubCallSite(TADDR siteAddr, PCODE returnAddr)
146 { LIMITED_METHOD_CONTRACT; m_siteAddr = dac_cast<PTR_PCODE>(siteAddr); m_returnAddr = returnAddr; }
148 PCODE GetCallerAddress() { LIMITED_METHOD_CONTRACT; return m_returnAddr; }
149 #endif // !defined(_TARGET_X86_)
151 PCODE GetSiteTarget() { WRAPPER_NO_CONTRACT; return *(GetIndirectCell()); }
152 void SetSiteTarget(PCODE newTarget);
153 PTR_PCODE GetIndirectCell() { LIMITED_METHOD_CONTRACT; return dac_cast<PTR_PCODE>(m_siteAddr); }
154 PTR_PCODE * GetIndirectCellAddress() { LIMITED_METHOD_CONTRACT; return &m_siteAddr; }
156 PCODE GetReturnAddress() { LIMITED_METHOD_CONTRACT; return m_returnAddr; }
159 #ifdef FEATURE_PREJIT
160 extern "C" void StubDispatchFixupStub(); // for lazy fixup of ngen call sites
163 // These are the assembly language entry points that the stubs use when they want to go into the EE
165 extern "C" void ResolveWorkerAsmStub(); // resolve a token and transfer control to that method
166 extern "C" void ResolveWorkerChainLookupAsmStub(); // for chaining of entries in the cache
169 extern "C" void BackPatchWorkerAsmStub(); // backpatch a call site to point to a different stub
171 extern "C" void BackPatchWorkerStaticStub(PCODE returnAddr, TADDR siteAddrForRegisterIndirect);
172 #endif // FEATURE_PAL
173 #endif // _TARGET_X86_
176 typedef VPTR(class VirtualCallStubManager) PTR_VirtualCallStubManager;
178 // VirtualCallStubManager is the heart of the stub dispatch logic. See the book of the runtime entry
180 // file:../../doc/BookOfTheRuntime/ClassLoader/VirtualStubDispatchDesign.doc
182 // The basic idea is that a call to an interface (it could also be used for virtual calls in general, but we
183 // do not do this), is simply the code
185 // call [DispatchCell]
187 // Where we make sure 'DispatchCell' points at stubs that will do the right thing. DispatchCell is writable
188 // so we can udpate the code over time. There are three basic types of stubs that the dispatch cell can point
190 // * Lookup: The intial stub that has no 'fast path' and simply pushes a ID for interface being called
191 // and calls into the runtime at code:VirtualCallStubManager.ResolveWorkerStatic.
192 // * Dispatch: Lookup stubs are patched to this stub which has a fast path that checks for a particular
193 // Method Table and if that fails jumps to code that
194 // * Decrements a 'missCount' (starts out as code:STUB_MISS_COUNT_VALUE). If this count goes to zero
195 // code:VirtualCallStubManager.BackPatchWorkerStatic is called, morphs it into a resolve stub
196 // (however since this decrementing logic is SHARED among all dispatch stubs, it may take
197 // multiples of code:STUB_MISS_COUNT_VALUE if mulitple call sites are actively polymorphic (this
199 // * Calls a resolve stub (Whenever a dispatch stub is created, it always has a cooresponding resolve
200 // stub (but the resolve stubs are shared among many dispatch stubs).
201 // * Resolve: see code:ResolveStub. This looks up the Method table in a process wide cache (see
202 // code:ResolveCacheElem, and if found, jumps to it. This code path is about 17 instructions long (so
203 // pretty fast, but certainly much slower than a normal call). If the method table is not found in
204 // the cache, it calls into the runtime code:VirtualCallStubManager.ResolveWorkerStatic, which
206 // So the general progression is call site's cells
207 // * start out life pointing to a lookup stub
208 // * On first call they get updated into a dispatch stub. When this misses, it calls a resolve stub,
209 // which populates a resovle stub's cache, but does not update the call site' cell (thus it is still
210 // pointing at the dispatch cell.
211 // * After code:STUB_MISS_COUNT_VALUE misses, we update the call site's cell to point directly at the
212 // resolve stub (thus avoiding the overhead of the quick check that always seems to be failing and
213 // the miss count update).
215 // QUESTION: What is the lifetimes of the various stubs and hash table entries?
217 // QUESTION: There does not seem to be any logic that will change a call site's cell once it becomes a
218 // Resolve stub. Thus once a particular call site becomes a Resolve stub we live with the Resolve stub's
219 // (in)efficiency forever.
221 // see code:#StubDispatchNotes for more
222 class VirtualCallStubManager : public StubManager
224 friend class ClrDataAccess;
225 friend class VirtualCallStubManagerManager;
226 friend class VirtualCallStubManagerIterator;
228 VPTR_VTABLE_CLASS(VirtualCallStubManager, StubManager)
232 virtual const char * DbgGetName() { LIMITED_METHOD_CONTRACT; return "VirtualCallStubManager"; }
235 // The reason for our existence, return a callstub for type id and slot number
236 // where type id = 0 for the class contract (i.e. a virtual call), and type id > 0 for an
237 // interface invoke where the id indicates which interface it is.
239 // The function is idempotent, i.e.
240 // you'll get the same callstub twice if you call it with identical inputs.
241 PCODE GetCallStub(TypeHandle ownerType, MethodDesc *pMD);
242 PCODE GetCallStub(TypeHandle ownerType, DWORD slot);
244 // Stubs for vtable-based virtual calls with no lookups
245 PCODE GetVTableCallStub(DWORD slot);
247 // Generate an fresh indirection cell.
248 BYTE* GenerateStubIndirection(PCODE stub, BOOL fUseRecycledCell = FALSE);
250 // Set up static data structures - called during EEStartup
251 static void InitStatic();
252 static void UninitStatic();
254 // Per instance initialization - called during AppDomain::Init and ::Uninit and for collectible loader allocators
255 void Init(BaseDomain* pDomain, LoaderAllocator *pLoaderAllocator);
258 //@TODO: the logging should be tied into the VMs normal loggin mechanisms,
259 //@TODO: for now we just always write a short log file called "StubLog_<pid>.log"
260 static void StartupLogging();
261 static void LoggingDump();
262 static void FinishLogging();
264 static void ResetCache();
266 // Reclaim/rearrange any structures that can only be done during a gc sync point.
267 // This is the mechanism we are using to avoid synchronization of alot of our
268 // cache and hash table accesses. We are requiring that during a gc sync point we are not
269 // executing any stub code at all, hence at this time we are serialized on a single thread (gc)
270 // and no other thread is accessing the data structures.
271 static void ReclaimAll();
274 #ifndef DACCESS_COMPILE
275 VirtualCallStubManager()
279 dispatch_rangeList(),
280 cache_entry_rangeList(),
283 m_loaderAllocator(NULL),
284 m_initialReservedMemForHeaps(NULL),
285 m_FreeIndCellList(NULL),
286 m_RecycledIndCellList(NULL),
288 cache_entry_heap(NULL),
292 #ifdef _TARGET_AMD64_
293 m_fShouldAllocateLongJumpDispatchStubs(FALSE),
300 m_cur_counter_block(NULL),
301 m_cur_counter_block_for_reclaim(NULL),
302 m_cur_counter_block_for_reclaim_index(NULL),
305 LIMITED_METHOD_CONTRACT;
306 ZeroMemory(&stats, sizeof(stats));
309 ~VirtualCallStubManager();
310 #endif // !DACCESS_COMPILE
315 SK_LOOKUP, // Lookup Stubs are SLOW stubs that simply call into the runtime to do all work.
316 SK_DISPATCH, // Dispatch Stubs have a fast check for one type otherwise jumps to runtime. Works for monomorphic sites
317 SK_RESOLVE, // Resolve Stubs do a hash lookup before fallling back to the runtime. Works for polymorphic sites.
318 SK_VTABLECALL, // Stub that jumps to a target method using vtable-based indirections. Works for non-interface calls.
322 // peek at the assembly code and predict which kind of a stub we have
323 StubKind predictStubKind(PCODE stubStartAddress);
325 /* know thine own stubs. It is possible that when multiple
326 virtualcallstub managers are built that these may need to become
327 non-static, and the callers modified accordingly */
328 StubKind getStubKind(PCODE stubStartAddress, BOOL usePredictStubKind = TRUE)
333 // This method can called with stubStartAddress==NULL, e.g. when handling null reference exceptions
334 // caused by IP=0. Early out for this case to avoid confusing handled access violations inside predictStubKind.
335 if (PCODEToPINSTR(stubStartAddress) == NULL)
338 // Rather than calling IsInRange(stubStartAddress) for each possible stub kind
339 // we can peek at the assembly code and predict which kind of a stub we have
340 StubKind predictedKind = (usePredictStubKind) ? predictStubKind(stubStartAddress) : SK_UNKNOWN;
342 if (predictedKind == SK_DISPATCH)
344 if (isDispatchingStub(stubStartAddress))
347 else if (predictedKind == SK_LOOKUP)
349 if (isLookupStub(stubStartAddress))
352 else if (predictedKind == SK_RESOLVE)
354 if (isResolvingStub(stubStartAddress))
357 else if (predictedKind == SK_VTABLECALL)
359 if (isVTableCallStub(stubStartAddress))
360 return SK_VTABLECALL;
363 // This is the slow case. If the predict returned SK_UNKNOWN, SK_BREAKPOINT,
364 // or the predict was found to be incorrect when checked against the RangeLists
365 // (isXXXStub), then we'll check each stub heap in sequence.
366 if (isDispatchingStub(stubStartAddress))
368 else if (isLookupStub(stubStartAddress))
370 else if (isResolvingStub(stubStartAddress))
372 else if (isVTableCallStub(stubStartAddress))
373 return SK_VTABLECALL;
378 inline BOOL isStub(PCODE stubStartAddress)
383 return (getStubKind(stubStartAddress) != SK_UNKNOWN);
386 BOOL isDispatchingStub(PCODE stubStartAddress)
391 return GetDispatchRangeList()->IsInRange(stubStartAddress);
394 BOOL isResolvingStub(PCODE stubStartAddress)
399 return GetResolveRangeList()->IsInRange(stubStartAddress);
402 BOOL isLookupStub(PCODE stubStartAddress)
407 return GetLookupRangeList()->IsInRange(stubStartAddress);
410 BOOL isVTableCallStub(PCODE stubStartAddress)
415 return GetVTableCallRangeList()->IsInRange(stubStartAddress);
418 static BOOL isDispatchingStubStatic(PCODE addr)
422 FindStubManager(addr, &stubKind);
423 return stubKind == SK_DISPATCH;
426 static BOOL isResolvingStubStatic(PCODE addr)
430 FindStubManager(addr, &stubKind);
431 return stubKind == SK_RESOLVE;
434 static BOOL isLookupStubStatic(PCODE addr)
438 FindStubManager(addr, &stubKind);
439 return stubKind == SK_LOOKUP;
442 static BOOL isVtableCallStubStatic(PCODE addr)
446 FindStubManager(addr, &stubKind);
447 return stubKind == SK_VTABLECALL;
450 //use range lists to track the chunks of memory that are part of each heap
451 LockedRangeList lookup_rangeList;
452 LockedRangeList resolve_rangeList;
453 LockedRangeList dispatch_rangeList;
454 LockedRangeList cache_entry_rangeList;
455 LockedRangeList vtable_rangeList;
457 // Get dac-ized pointers to rangelist.
458 RangeList* GetLookupRangeList()
462 TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, lookup_rangeList);
463 return PTR_RangeList(addr);
465 RangeList* GetResolveRangeList()
469 TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, resolve_rangeList);
470 return PTR_RangeList(addr);
472 RangeList* GetDispatchRangeList()
476 TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, dispatch_rangeList);
477 return PTR_RangeList(addr);
479 RangeList* GetCacheEntryRangeList()
482 TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, cache_entry_rangeList);
483 return PTR_RangeList(addr);
485 RangeList* GetVTableCallRangeList()
488 TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, vtable_rangeList);
489 return PTR_RangeList(addr);
494 //allocate and initialize a stub of the desired kind
495 DispatchHolder *GenerateDispatchStub(PCODE addrOfCode,
498 size_t dispatchToken,
499 bool *pMayHaveReenteredCooperativeGCMode);
501 #ifdef _TARGET_AMD64_
502 // Used to allocate a long jump dispatch stub. See comment around
503 // m_fShouldAllocateLongJumpDispatchStubs for explaination.
504 DispatchHolder *GenerateDispatchStubLong(PCODE addrOfCode,
507 size_t dispatchToken,
508 bool *pMayHaveReenteredCooperativeGCMode);
511 ResolveHolder *GenerateResolveStub(PCODE addrOfResolver,
513 size_t dispatchToken);
515 LookupHolder *GenerateLookupStub(PCODE addrOfResolver,
516 size_t dispatchToken);
518 VTableCallHolder* GenerateVTableCallStub(DWORD slot);
520 template <typename STUB_HOLDER>
521 void AddToCollectibleVSDRangeList(STUB_HOLDER *holder)
523 if (m_loaderAllocator->IsCollectible())
525 parentDomain->GetCollectibleVSDRanges()->AddRange(reinterpret_cast<BYTE *>(holder->stub()),
526 reinterpret_cast<BYTE *>(holder->stub()) + holder->stub()->size(),
531 // The resolve cache is static across all AppDomains
532 ResolveCacheElem *GenerateResolveCacheElem(void *addrOfCode,
535 bool *pMayHaveReenteredCooperativeGCMode);
537 ResolveCacheElem *GetResolveCacheElem(void *pMT,
541 //Given a dispatch token, an object and a method table, determine the
542 //target address to go to. The return value (BOOL) states whether this address
543 //is cacheable or not.
544 static BOOL Resolver(MethodTable * pMT,
546 OBJECTREF * protectedObj,
548 BOOL throwOnConflict);
550 // This can be used to find a target without needing the ability to throw
551 static BOOL TraceResolver(Object *pObj, DispatchToken token, TraceDestination *trace);
554 // Return the MethodDesc corresponding to this token.
555 static MethodDesc *GetRepresentativeMethodDescFromToken(DispatchToken token, MethodTable *pMT);
556 static MethodDesc *GetInterfaceMethodDescFromToken(DispatchToken token);
557 static MethodTable *GetTypeFromToken(DispatchToken token);
559 //This is used to get the token out of a stub
560 static size_t GetTokenFromStub(PCODE stub);
562 //This is used to get the token out of a stub and we know the stub manager and stub kind
563 static size_t GetTokenFromStubQuick(VirtualCallStubManager * pMgr, PCODE stub, StubKind kind);
565 // General utility functions
566 // Quick lookup in the cache. NOTHROW, GC_NOTRIGGER
567 static PCODE CacheLookup(size_t token, UINT16 tokenHash, MethodTable *pMT);
569 // Full exhaustive lookup. THROWS, GC_TRIGGERS
570 static PCODE GetTarget(DispatchToken token, MethodTable *pMT, BOOL throwOnConflict);
573 // Given a dispatch token, return true if the token represents an interface, false if just a slot.
574 static BOOL IsInterfaceToken(DispatchToken token);
576 // Given a dispatch token, return true if the token represents a slot on the target.
577 static BOOL IsClassToken(DispatchToken token);
580 static ResolveCacheElem* __fastcall PromoteChainEntry(ResolveCacheElem *pElem);
583 // Flags used by the non-x86 versions of VSD_ResolveWorker
585 #define SDF_ResolveBackPatch (0x01)
586 #define SDF_ResolvePromoteChain (0x02)
587 #define SDF_ResolveFlags (0x03)
589 // These method needs to call the instance methods.
590 friend PCODE VSD_ResolveWorker(TransitionBlock * pTransitionBlock,
591 TADDR siteAddrForRegisterIndirect,
598 #if defined(_TARGET_X86_) && defined(FEATURE_PAL)
599 friend void BackPatchWorkerStaticStub(PCODE returnAddr, TADDR siteAddrForRegisterIndirect);
602 //These are the entrypoints that the stubs actually end up calling via the
603 // xxxAsmStub methods above
604 static void STDCALL BackPatchWorkerStatic(PCODE returnAddr, TADDR siteAddrForRegisterIndirect);
607 PCODE ResolveWorker(StubCallSite* pCallSite, OBJECTREF *protectedObj, DispatchToken token, StubKind stubKind);
608 void BackPatchWorker(StubCallSite* pCallSite);
610 //Change the callsite to point to stub
611 void BackPatchSite(StubCallSite* pCallSite, PCODE stub);
614 /* the following two public functions are to support tracing or stepping thru
615 stubs via the debugger. */
616 virtual BOOL CheckIsStub_Internal(PCODE stubStartAddress);
617 virtual BOOL TraceManager(Thread *thread,
618 TraceDestination *trace,
623 LIMITED_METHOD_CONTRACT;
626 retval+=indcell_heap->GetSize();
628 retval+=cache_entry_heap->GetSize();
630 retval+=lookup_heap->GetSize();
632 retval+=dispatch_heap->GetSize();
634 retval+=resolve_heap->GetSize();
639 /* the following two private functions are to support tracing or stepping thru
640 stubs via the debugger. */
641 virtual BOOL DoTraceStub(PCODE stubStartAddress,
642 TraceDestination *trace);
645 // The parent domain of this manager
646 PTR_BaseDomain parentDomain;
648 PTR_LoaderAllocator m_loaderAllocator;
650 BYTE * m_initialReservedMemForHeaps;
652 static const UINT32 INDCELLS_PER_BLOCK = 32; // 32 indirection cells per block.
654 CrstExplicitInit m_indCellLock;
656 // List of free indirection cells. The cells were directly allocated from the loader heap
657 // (code:VirtualCallStubManager::GenerateStubIndirection)
658 BYTE * m_FreeIndCellList;
660 // List of recycled indirection cells. The cells were recycled from finalized dynamic methods
661 // (code:LCGMethodResolver::RecycleIndCells).
662 BYTE * m_RecycledIndCellList;
664 #ifndef DACCESS_COMPILE
665 // This methods returns the a free cell from m_FreeIndCellList. It returns NULL if the list is empty.
666 BYTE * GetOneFreeIndCell()
670 return GetOneIndCell(&m_FreeIndCellList);
673 // This methods returns the a recycled cell from m_RecycledIndCellList. It returns NULL if the list is empty.
674 BYTE * GetOneRecycledIndCell()
678 return GetOneIndCell(&m_RecycledIndCellList);
681 // This methods returns the a cell from ppList. It returns NULL if the list is empty.
682 BYTE * GetOneIndCell(BYTE ** ppList)
688 PRECONDITION(CheckPointer(ppList));
689 PRECONDITION(m_indCellLock.OwnedByCurrentThread());
692 BYTE * temp = *ppList;
696 BYTE * pNext = *((BYTE **)temp);
704 // insert a linked list of indirection cells at the beginning of m_FreeIndCellList
705 void InsertIntoFreeIndCellList(BYTE * head, BYTE * tail)
709 InsertIntoIndCellList(&m_FreeIndCellList, head, tail);
712 // insert a linked list of indirection cells at the beginning of ppList
713 void InsertIntoIndCellList(BYTE ** ppList, BYTE * head, BYTE * tail)
718 PRECONDITION(CheckPointer(ppList));
719 PRECONDITION(CheckPointer(head));
720 PRECONDITION(CheckPointer(tail));
721 PRECONDITION(m_indCellLock.OwnedByCurrentThread());
724 BYTE * temphead = *ppList;
725 *((BYTE**)tail) = temphead;
728 #endif // !DACCESS_COMPILE
730 PTR_LoaderHeap indcell_heap; // indirection cells go here
731 PTR_LoaderHeap cache_entry_heap; // resolve cache elem entries go here
732 PTR_LoaderHeap lookup_heap; // lookup stubs go here
733 PTR_LoaderHeap dispatch_heap; // dispatch stubs go here
734 PTR_LoaderHeap resolve_heap; // resolve stubs go here
735 PTR_LoaderHeap vtable_heap; // vtable-based jump stubs go here
737 #ifdef _TARGET_AMD64_
738 // When we layout the stub heaps, we put them close together in a sequential order
739 // so that we maximize performance with respect to branch predictions. On AMD64,
740 // dispatch stubs use a rel32 jump on failure to the resolve stub. This works for
741 // a while because of the ordering, but as soon as we have to start allocating more
742 // memory for either the dispatch or resolve heaps we have a chance that we'll be
743 // further away than a rel32 jump can reach, because we're in a 64-bit address
744 // space. As such, this flag will indicate when we allocate the first dispatch stub
745 // that cannot reach a resolve stub, and when this happens we'll switch over to
746 // allocating the larger version of the dispatch stub which contains an abs64 jump.
747 //@TODO: This is a bit of a workaround, but the limitations of LoaderHeap require that we
748 //@TODO: take this approach. Hopefully in Orcas we'll have a chance to rewrite LoaderHeap.
749 BOOL m_fShouldAllocateLongJumpDispatchStubs; // Defaults to FALSE.
752 BucketTable * lookups; // hash table of lookups keyed by tokens
753 BucketTable * cache_entries; // hash table of dispatch token/target structs for dispatch cache
754 BucketTable * dispatchers; // hash table of dispatching stubs keyed by tokens/actualtype
755 BucketTable * resolvers; // hash table of resolvers keyed by tokens/resolverstub
756 BucketTable * vtableCallers; // hash table of vtable call stubs keyed by slot values
758 // This structure is used to keep track of the fail counters.
759 // We only need one fail counter per ResolveStub,
760 // and most programs use less than 250 ResolveStubs
761 // We allocate these on the main heap using "new counter block"
764 static const UINT32 MAX_COUNTER_ENTRIES = 256-2; // 254 counters should be enough for most cases.
766 counter_block * next; // the next block
767 UINT32 used; // the index of the next free entry
768 INT32 block[MAX_COUNTER_ENTRIES]; // the counters
771 counter_block *m_counters; // linked list of counter blocks of failure counters
772 counter_block *m_cur_counter_block; // current block for updating counts
773 counter_block *m_cur_counter_block_for_reclaim; // current block for updating
774 UINT32 m_cur_counter_block_for_reclaim_index; // index into the current block for updating
776 // Used to keep track of all the VCSManager objects in the system.
777 PTR_VirtualCallStubManager m_pNext; // Linked list pointer
780 // Given a stub address, find the VCSManager that owns it.
781 static VirtualCallStubManager *FindStubManager(PCODE addr,
782 StubKind* wbStubKind = NULL,
783 BOOL usePredictStubKind = TRUE);
785 #ifndef DACCESS_COMPILE
786 // insert a linked list of indirection cells at the beginning of m_RecycledIndCellList
787 void InsertIntoRecycledIndCellList_Locked(BYTE * head, BYTE * tail)
795 CrstHolder lh(&m_indCellLock);
797 InsertIntoIndCellList(&m_RecycledIndCellList, head, tail);
799 #endif // !DACCESS_COMPILE
801 // These are the counters for keeping statistics
804 UINT32 site_counter; //# of call sites
805 UINT32 stub_lookup_counter; //# of lookup stubs
806 UINT32 stub_poly_counter; //# of resolve stubs
807 UINT32 stub_mono_counter; //# of dispatch stubs
808 UINT32 stub_vtable_counter; //# of vtable call stubs
809 UINT32 site_write; //# of call site backpatch writes
810 UINT32 site_write_poly; //# of call site backpatch writes to point to resolve stubs
811 UINT32 site_write_mono; //# of call site backpatch writes to point to dispatch stubs
812 UINT32 worker_call; //# of calls into ResolveWorker
813 UINT32 worker_call_no_patch; //# of times call_worker resulted in no patch
814 UINT32 worker_collide_to_mono; //# of times we converted a poly stub to a mono stub instead of writing the cache entry
815 UINT32 stub_space; //# of bytes of stubs
816 UINT32 cache_entry_counter; //# of cache structs
817 UINT32 cache_entry_space; //# of bytes used by cache lookup structs
822 #ifdef DACCESS_COMPILE
824 virtual void DoEnumMemoryRegions(CLRDataEnumMemoryFlags flags);
825 virtual LPCWSTR GetStubManagerName(PCODE addr)
828 CONSISTENCY_CHECK(isStub(addr));
830 if (isLookupStub(addr))
832 return W("VSD_LookupStub");
834 else if (isDispatchingStub(addr))
836 return W("VSD_DispatchStub");
840 CONSISTENCY_CHECK(isResolvingStub(addr));
841 return W("VSD_ResolveStub");
847 /********************************************************************************************************
848 ********************************************************************************************************/
849 typedef VPTR(class VirtualCallStubManagerManager) PTR_VirtualCallStubManagerManager;
851 class VirtualCallStubManagerIterator;
852 class VirtualCallStubManagerManager : public StubManager
854 VPTR_VTABLE_CLASS(VirtualCallStubManagerManager, StubManager)
856 friend class StubManager;
857 friend class VirtualCallStubManager;
858 friend class VirtualCallStubManagerIterator;
859 friend class StubManagerIterator;
862 virtual BOOL TraceManager(Thread *thread, TraceDestination *trace,
863 T_CONTEXT *pContext, BYTE **pRetAddr);
865 virtual BOOL CheckIsStub_Internal(PCODE stubStartAddress);
867 virtual BOOL DoTraceStub(PCODE stubStartAddress, TraceDestination *trace);
869 static MethodDesc *Entry2MethodDesc(PCODE stubStartAddress, MethodTable *pMT);
871 #ifdef DACCESS_COMPILE
872 virtual void DoEnumMemoryRegions(CLRDataEnumMemoryFlags flags);
873 virtual LPCWSTR GetStubManagerName(PCODE addr)
874 { WRAPPER_NO_CONTRACT; return FindVirtualCallStubManager(addr)->GetStubManagerName(addr); }
878 // Used to keep track of all the VCSManager objects in the system.
879 PTR_VirtualCallStubManager m_pManagers; // Head of the linked list
881 #ifndef DACCESS_COMPILE
882 // Ctor. This is only used by StaticInit.
883 VirtualCallStubManagerManager();
886 // A cache element to quickly check the last matched manager.
887 Volatile<VirtualCallStubManager*> m_pCacheElem;
889 // RW lock for reading entries and removing them.
890 SimpleRWLock m_RWLock;
892 // This will look through all the managers in an intelligent fashion to
893 // find the manager that owns the address.
894 VirtualCallStubManager *FindVirtualCallStubManager(PCODE stubAddress);
897 // Add a VCSManager to the linked list.
898 void AddStubManager(VirtualCallStubManager *pMgr);
900 // Remove a VCSManager from the linked list.
901 void RemoveStubManager(VirtualCallStubManager *pMgr);
903 VirtualCallStubManager *FirstManager()
904 { WRAPPER_NO_CONTRACT; return m_pManagers; }
906 #ifndef DACCESS_COMPILE
907 static void InitStatic();
911 SPTR_DECL(VirtualCallStubManagerManager, g_pManager);
913 static VirtualCallStubManagerManager *GlobalManager()
914 { LIMITED_METHOD_DAC_CONTRACT; CONSISTENCY_CHECK(CheckPointer(g_pManager)); return g_pManager; }
916 VirtualCallStubManagerIterator IterateVirtualCallStubManagers();
919 // Debug helper to help identify stub-managers.
920 virtual const char * DbgGetName() { LIMITED_METHOD_CONTRACT; return "VirtualCallStubManagerManager"; }
924 /********************************************************************************************************
925 ********************************************************************************************************/
926 class VirtualCallStubManagerIterator
928 friend class VirtualCallStubManagerManager;
932 VirtualCallStubManager *Current();
935 inline VirtualCallStubManagerIterator(const VirtualCallStubManagerIterator &it);
938 inline VirtualCallStubManagerIterator(VirtualCallStubManagerManager *pMgr);
941 VirtualCallStubManager *m_pCurMgr;
944 /////////////////////////////////////////////////////////////////////////////////////////////
946 inline VirtualCallStubManagerIterator::VirtualCallStubManagerIterator(VirtualCallStubManagerManager *pMgr)
947 : m_fIsStart(TRUE), m_pCurMgr(pMgr->m_pManagers)
949 LIMITED_METHOD_DAC_CONTRACT;
950 CONSISTENCY_CHECK(CheckPointer(pMgr));
953 /////////////////////////////////////////////////////////////////////////////////////////////
955 inline VirtualCallStubManagerIterator::VirtualCallStubManagerIterator(const VirtualCallStubManagerIterator &it)
956 : m_fIsStart(it.m_fIsStart), m_pCurMgr(it.m_pCurMgr)
958 LIMITED_METHOD_DAC_CONTRACT;
961 /********************************************************************************************************
964 A note on approach. The cache and hash tables used by the stub and lookup mechanism
965 are designed with an eye to minimizing interlocking and/or syncing and/or locking operations.
966 They are intended to run in a highly concurrent environment. Since there is no magic,
967 some tradeoffs and and some implementation constraints are required. The basic notion
968 is that if all reads and writes are atomic and if all functions and operations operate
969 correctly in the face of commutative reorderings of the visibility of all reads and writes
970 across threads, then we don't have to interlock, sync, or serialize. Our approximation of
973 1. All reads and all writes to tables must be atomic. This effectively limits the actual entry
974 size in a table to be a pointer or a pointer sized thing.
976 2. All functions, like comparisons for equality or computation of hash values must function
977 correctly in the face of concurrent updating of the underlying table. This is accomplished
978 by making the underlying structures/entries effectively immutable, if concurrency is in anyway possible.
979 By effectively immutatable, we mean that the stub or token structure is either immutable or that
980 if it is ever written, all possibley concurrent writes are attempting to write the same value (atomically)
981 or that the competing (atomic) values do not affect correctness, and that the function operates correctly whether
982 or not any of the writes have taken place (is visible yet). The constraint we maintain is that all competeing
983 updates (and their visibility or lack thereof) do not alter the correctness of the program.
985 3. All tables are inexact. The counts they hold (e.g. number of contained entries) may be inaccurrate,
986 but that inaccurracy cannot affect their correctness. Table modifications, such as insertion of
987 an new entry may not succeed, but such failures cannot affect correctness. This implies that just
988 because a stub/entry is not present in a table, e.g. has been removed, that does not mean that
989 it is not in use. It also implies that internal table structures, such as discarded hash table buckets,
990 cannot be freely recycled since another concurrent thread may still be walking thru it.
992 4. Occassionaly it is necessary to pick up the pieces that have been dropped on the floor
993 so to speak, e.g. actually recycle hash buckets that aren't in use. Since we have a natural
994 sync point already in the GC, we use that to provide cleanup points. We need to make sure that code that
995 is walking our structures is not a GC safe point. Hence if the GC calls back into us inside the GC
996 sync point, we know that nobody is inside our stuctures and we can safely rearrange and recycle things.
997 ********************************************************************************************************/
999 //initial and increment value for fail stub counters
1001 extern UINT32 STUB_MISS_COUNT_VALUE;
1002 extern UINT32 STUB_COLLIDE_WRITE_PCT;
1003 extern UINT32 STUB_COLLIDE_MONO_PCT;
1004 #else // !STUB_LOGGING
1005 #define STUB_MISS_COUNT_VALUE 100
1006 #define STUB_COLLIDE_WRITE_PCT 100
1007 #define STUB_COLLIDE_MONO_PCT 0
1008 #endif // !STUB_LOGGING
1010 //size and mask of the cache used by resolve stubs
1011 // CALL_STUB_CACHE_SIZE must be equal to 2^CALL_STUB_CACHE_NUM_BITS
1012 #define CALL_STUB_CACHE_NUM_BITS 12 //10
1013 #define CALL_STUB_CACHE_SIZE 4096 //1024
1014 #define CALL_STUB_CACHE_MASK (CALL_STUB_CACHE_SIZE-1)
1015 #define CALL_STUB_CACHE_PROBES 5
1016 //min sizes for BucketTable and buckets and the growth and hashing constants
1017 #define CALL_STUB_MIN_BUCKETS 32
1018 #define CALL_STUB_MIN_ENTRIES 4
1019 //this is so that the very first growth will jump from 4 to 32 entries, then double from there.
1020 #define CALL_STUB_SECONDARY_ENTRIES 8
1021 #define CALL_STUB_GROWTH_FACTOR 2
1022 #define CALL_STUB_LOAD_FACTOR 90
1023 #define CALL_STUB_HASH_CONST1 1327
1024 #define CALL_STUB_HASH_CONST2 43627
1025 #define LARGE_PRIME 7199369
1026 //internal layout of buckets=size-1,count,entries....
1027 #define CALL_STUB_MASK_INDEX 0
1028 #define CALL_STUB_COUNT_INDEX 1
1029 #define CALL_STUB_DEAD_LINK 2
1030 #define CALL_STUB_FIRST_INDEX 3
1031 //marker entries in cache and hash tables
1032 #define CALL_STUB_EMPTY_ENTRY 0
1033 // number of successes for a chained element before it gets moved to the front
1034 #define CALL_STUB_CACHE_INITIAL_SUCCESS_COUNT (0x100)
1036 /*******************************************************************************************************
1037 Entry is an abstract class. We will make specific subclasses for each kind of
1038 entry. Entries hold references to stubs or tokens. The principle thing they provide
1039 is a virtual Equals function that is used by the caching and hashing tables within which
1040 the stubs and tokens are stored. Entries are typically stack allocated by the routines
1041 that call into the hash and caching functions, and the functions stuff stubs into the entry
1042 to do the comparisons. Essentially specific entry subclasses supply a vtable to a stub
1043 as and when needed. This means we don't have to have vtables attached to stubs.
1045 Summarizing so far, there is a struct for each kind of stub or token of the form XXXXStub.
1046 They provide that actual storage layouts.
1047 There is a stuct in which each stub which has code is containted of the form XXXXHolder.
1048 They provide alignment and anciliary storage for the stub code.
1049 There is a subclass of Entry for each kind of stub or token, of the form XXXXEntry.
1050 They provide the specific implementations of the virtual functions declared in Entry. */
1054 //access and compare the keys of the entry
1055 virtual BOOL Equals(size_t keyA, size_t keyB)=0;
1056 virtual size_t KeyA()=0;
1057 virtual size_t KeyB()=0;
1059 //contents is the struct or token that the entry exposes
1060 virtual void SetContents(size_t contents)=0;
1063 /* define the platform specific Stubs and stub holders */
1065 #include <virtualcallstubcpu.hpp>
1067 #if USES_LOOKUP_STUBS
1068 /**********************************************************************************************
1069 LookupEntry wraps LookupStubs and provide the concrete implementation of the abstract class Entry.
1070 Virtual and interface call sites when they are first jitted point to LookupStubs. The hash table
1071 that contains look up stubs is keyed by token, hence the Equals function uses the embedded token in
1072 the stub for comparison purposes. Since we are willing to allow duplicates in the hash table (as
1073 long as they are relatively rare) we do use direct comparison of the tokens rather than extracting
1074 the fields from within the tokens, for perf reasons. */
1075 class LookupEntry : public Entry
1078 //Creates an entry that wraps lookup stub s
1079 LookupEntry(size_t s)
1081 LIMITED_METHOD_CONTRACT;
1082 _ASSERTE(VirtualCallStubManager::isLookupStubStatic((PCODE)s));
1083 stub = (LookupStub*) s;
1086 //default contructor to allow stack and inline allocation of lookup entries
1087 LookupEntry() {LIMITED_METHOD_CONTRACT; stub = NULL;}
1089 //implementations of abstract class Entry
1090 BOOL Equals(size_t keyA, size_t keyB)
1091 { WRAPPER_NO_CONTRACT; return stub && (keyA == KeyA()) && (keyB == KeyB()); }
1093 size_t KeyA() { WRAPPER_NO_CONTRACT; return Token(); }
1094 size_t KeyB() { WRAPPER_NO_CONTRACT; return (size_t)0; }
1096 void SetContents(size_t contents)
1098 LIMITED_METHOD_CONTRACT;
1099 _ASSERTE(VirtualCallStubManager::isLookupStubStatic((PCODE)contents));
1100 stub = LookupHolder::FromLookupEntry((PCODE)contents)->stub();
1103 //extract the token of the underlying lookup stub
1105 inline size_t Token() { LIMITED_METHOD_CONTRACT; return stub ? stub->token() : 0; }
1108 LookupStub* stub; //the stub the entry wrapping
1110 #endif // USES_LOOKUP_STUBS
1112 class VTableCallEntry : public Entry
1115 //Creates an entry that wraps vtable call stub
1116 VTableCallEntry(size_t s)
1118 LIMITED_METHOD_CONTRACT;
1119 _ASSERTE(VirtualCallStubManager::isVtableCallStubStatic((PCODE)s));
1120 stub = (VTableCallStub*)s;
1123 //default contructor to allow stack and inline allocation of vtable call entries
1124 VTableCallEntry() { LIMITED_METHOD_CONTRACT; stub = NULL; }
1126 //implementations of abstract class Entry
1127 BOOL Equals(size_t keyA, size_t keyB)
1129 WRAPPER_NO_CONTRACT; return stub && (keyA == KeyA()) && (keyB == KeyB());
1132 size_t KeyA() { WRAPPER_NO_CONTRACT; return Token(); }
1133 size_t KeyB() { WRAPPER_NO_CONTRACT; return (size_t)0; }
1135 void SetContents(size_t contents)
1137 LIMITED_METHOD_CONTRACT;
1138 _ASSERTE(VirtualCallStubManager::isVtableCallStubStatic((PCODE)contents));
1139 stub = VTableCallHolder::FromVTableCallEntry((PCODE)contents)->stub();
1142 //extract the token of the underlying lookup stub
1144 inline size_t Token() { LIMITED_METHOD_CONTRACT; return stub ? stub->token() : 0; }
1147 VTableCallStub* stub; //the stub the entry wrapping
1150 /**********************************************************************************************
1151 ResolveCacheEntry wraps a ResolveCacheElem and provides lookup functionality for entries that
1152 were created that may be added to the ResolveCache
1154 class ResolveCacheEntry : public Entry
1157 ResolveCacheEntry(size_t elem)
1159 LIMITED_METHOD_CONTRACT;
1160 _ASSERTE(elem != 0);
1161 pElem = (ResolveCacheElem*) elem;
1164 //default contructor to allow stack and inline allocation of lookup entries
1165 ResolveCacheEntry() { LIMITED_METHOD_CONTRACT; pElem = NULL; }
1167 //access and compare the keys of the entry
1168 virtual BOOL Equals(size_t keyA, size_t keyB)
1169 { WRAPPER_NO_CONTRACT; return pElem && (keyA == KeyA()) && (keyB == KeyB()); }
1170 virtual size_t KeyA()
1171 { LIMITED_METHOD_CONTRACT; return pElem != NULL ? pElem->token : 0; }
1172 virtual size_t KeyB()
1173 { LIMITED_METHOD_CONTRACT; return pElem != NULL ? (size_t) pElem->pMT : 0; }
1175 //contents is the struct or token that the entry exposes
1176 virtual void SetContents(size_t contents)
1178 LIMITED_METHOD_CONTRACT;
1179 pElem = (ResolveCacheElem*) contents;
1182 inline const BYTE *Target()
1184 LIMITED_METHOD_CONTRACT;
1185 return pElem != NULL ? (const BYTE *)pElem->target : NULL;
1189 ResolveCacheElem *pElem;
1192 /**********************************************************************************************
1193 ResolveEntry wraps ResolveStubs and provide the concrete implementation of the abstract class Entry.
1194 Polymorphic call sites and monomorphic calls that fail end up in a ResolveStub. Resolve stubs
1195 are stored in hash tables keyed by token, hence the Equals function uses the embedded token in
1196 the stub for comparison purposes. Since we are willing to allow duplicates in the hash table (as
1197 long as they are relatively rare) we do use direct comparison of the tokens rather than extracting
1198 the fields from within the tokens, for perf reasons. */
1199 class ResolveEntry : public Entry
1202 //Creates an entry that wraps resolve stub s
1203 ResolveEntry (size_t s)
1205 LIMITED_METHOD_CONTRACT;
1206 _ASSERTE(VirtualCallStubManager::isResolvingStubStatic((PCODE)s));
1207 stub = (ResolveStub*) s;
1209 //default contructor to allow stack and inline allocation of resovler entries
1210 ResolveEntry() { LIMITED_METHOD_CONTRACT; stub = CALL_STUB_EMPTY_ENTRY; }
1212 //implementations of abstract class Entry
1213 inline BOOL Equals(size_t keyA, size_t keyB)
1214 { WRAPPER_NO_CONTRACT; return stub && (keyA == KeyA()) && (keyB == KeyB()); }
1215 inline size_t KeyA() { WRAPPER_NO_CONTRACT; return Token(); }
1216 inline size_t KeyB() { WRAPPER_NO_CONTRACT; return (size_t)0; }
1218 void SetContents(size_t contents)
1220 LIMITED_METHOD_CONTRACT;
1221 _ASSERTE(VirtualCallStubManager::isResolvingStubStatic((PCODE)contents));
1222 stub = ResolveHolder::FromResolveEntry((PCODE)contents)->stub();
1224 //extract the token of the underlying resolve stub
1225 inline size_t Token() { WRAPPER_NO_CONTRACT; return stub ? (size_t)(stub->token()) : 0; }
1227 ResolveStub* stub; //the stub the entry is wrapping
1230 /**********************************************************************************************
1231 DispatchEntry wraps DispatchStubs and provide the concrete implementation of the abstract class Entry.
1232 Monomorphic and mostly monomorphic call sites eventually point to DispatchStubs. Dispatch stubs
1233 are placed in hash and cache tables keyed by the expected Method Table and token they are built for.
1234 Since we are willing to allow duplicates in the hash table (as long as they are relatively rare)
1235 we do use direct comparison of the tokens rather than extracting the fields from within the tokens,
1237 class DispatchEntry : public Entry
1240 //Creates an entry that wraps dispatch stub s
1241 DispatchEntry (size_t s)
1243 LIMITED_METHOD_CONTRACT;
1244 _ASSERTE(VirtualCallStubManager::isDispatchingStubStatic((PCODE)s));
1245 stub = (DispatchStub*) s;
1247 //default contructor to allow stack and inline allocation of resovler entries
1248 DispatchEntry() { LIMITED_METHOD_CONTRACT; stub = CALL_STUB_EMPTY_ENTRY; }
1250 //implementations of abstract class Entry
1251 inline BOOL Equals(size_t keyA, size_t keyB)
1252 { WRAPPER_NO_CONTRACT; return stub && (keyA == KeyA()) && (keyB == KeyB()); }
1253 inline size_t KeyA() { WRAPPER_NO_CONTRACT; return Token(); }
1254 inline size_t KeyB() { WRAPPER_NO_CONTRACT; return ExpectedMT();}
1256 void SetContents(size_t contents)
1258 LIMITED_METHOD_CONTRACT;
1259 _ASSERTE(VirtualCallStubManager::isDispatchingStubStatic((PCODE)contents));
1260 stub = DispatchHolder::FromDispatchEntry((PCODE)contents)->stub();
1263 //extract the fields of the underlying dispatch stub
1264 inline size_t ExpectedMT()
1265 { WRAPPER_NO_CONTRACT; return stub ? (size_t)(stub->expectedMT()) : 0; }
1269 WRAPPER_NO_CONTRACT;
1272 ResolveHolder * resolveHolder = ResolveHolder::FromFailEntry(stub->failTarget());
1273 size_t token = resolveHolder->stub()->token();
1274 _ASSERTE(token == VirtualCallStubManager::GetTokenFromStub((PCODE)stub));
1283 inline PCODE Target()
1284 { WRAPPER_NO_CONTRACT; return stub ? stub->implTarget() : 0; }
1290 /*************************************************************************************************
1291 DispatchCache is the cache table that the resolve stubs use for inline polymorphic resolution
1292 of a call. The cache entry is logically a triplet of (method table, token, impl address) where method table
1293 is the type of the calling frame's <this>, token identifies the method being invoked,
1294 i.e. is a (type id,slot #) pair, and impl address is the address of the method implementation.
1299 static const UINT16 INVALID_HASH = (UINT16)(-1);
1303 //read and write the cache keyed by (method table,token) pair.
1304 inline ResolveCacheElem* Lookup(size_t token, void* mt)
1305 { WRAPPER_NO_CONTRACT; return Lookup(token, INVALID_HASH, mt);}
1307 ResolveCacheElem* Lookup(size_t token, UINT16 tokenHash, void* mt);
1309 enum InsertKind {IK_NONE, IK_DISPATCH, IK_RESOLVE, IK_SHARED, IK_EXTERNAL};
1311 BOOL Insert(ResolveCacheElem* elem, InsertKind insertKind);
1313 void PromoteChainEntry(ResolveCacheElem* elem);
1316 // This is the heavyweight hashing algorithm. Use sparingly.
1317 static UINT16 HashToken(size_t token);
1319 inline void GetLoadFactor(size_t *total, size_t *used)
1321 LIMITED_METHOD_CONTRACT;
1323 *total = CALL_STUB_CACHE_SIZE;
1325 for (size_t i = 0; i < CALL_STUB_CACHE_SIZE; i++)
1326 if (cache[i] != empty)
1331 inline void *GetCacheBaseAddr()
1332 { LIMITED_METHOD_CONTRACT; return &cache[0]; }
1333 inline size_t GetCacheCount()
1334 { LIMITED_METHOD_CONTRACT; return CALL_STUB_CACHE_SIZE; }
1335 inline ResolveCacheElem *GetCacheEntry(size_t idx)
1336 { LIMITED_METHOD_CONTRACT; return VolatileLoad(&cache[idx]); }
1337 inline BOOL IsCacheEntryEmpty(size_t idx)
1338 { LIMITED_METHOD_CONTRACT; return cache[idx] == empty; }
1340 inline void SetCacheEntry(size_t idx, ResolveCacheElem *elem)
1342 LIMITED_METHOD_CONTRACT;
1344 cacheData[idx].numWrites++;
1347 CONSISTENCY_CHECK(m_writeLock.OwnedByCurrentThread());
1352 inline void ClearCacheEntry(size_t idx)
1354 LIMITED_METHOD_CONTRACT;
1356 cacheData[idx].numClears++;
1363 UINT32 insert_cache_external; //# of times Insert was called for IK_EXTERNAL
1364 UINT32 insert_cache_shared; //# of times Insert was called for IK_SHARED
1365 UINT32 insert_cache_dispatch; //# of times Insert was called for IK_DISPATCH
1366 UINT32 insert_cache_resolve; //# of times Insert was called for IK_RESOLVE
1367 UINT32 insert_cache_hit; //# of times Insert found an empty cache entry
1368 UINT32 insert_cache_miss; //# of times Insert already had a matching cache entry
1369 UINT32 insert_cache_collide; //# of times Insert found a used cache entry
1370 UINT32 insert_cache_write; //# of times Insert wrote a cache entry
1375 // Unlocked iterator of entries. Use only when read/write access to the cache
1376 // is safe. This would typically be at GC sync points, currently needed during
1377 // appdomain unloading.
1381 Iterator(DispatchCache *pCache);
1382 inline BOOL IsValid()
1383 { WRAPPER_NO_CONTRACT; return (m_curBucket < (INT32)m_pCache->GetCacheCount()); }
1385 // Unlink the current entry.
1386 // **NOTE** Using this method implicitly performs a call to Next to move
1387 // past the unlinked entry. Thus, one could accidentally skip
1388 // entries unless you take this into consideration.
1389 ResolveCacheElem *UnlinkEntry();
1390 inline ResolveCacheElem *Entry()
1391 { LIMITED_METHOD_CONTRACT; CONSISTENCY_CHECK(IsValid()); return *m_ppCurElem; }
1394 void NextValidBucket();
1395 inline void NextBucket()
1396 { LIMITED_METHOD_CONTRACT; m_curBucket++; m_ppCurElem = &m_pCache->cache[m_curBucket]; }
1398 DispatchCache *m_pCache;
1400 ResolveCacheElem **m_ppCurElem;
1408 //the following hash computation is also inlined in the resolve stub in asm (SO NO TOUCHIE)
1409 inline static UINT16 HashMT(UINT16 tokenHash, void* mt)
1411 LIMITED_METHOD_CONTRACT;
1415 size_t mtHash = (size_t) mt;
1416 mtHash = (((mtHash >> CALL_STUB_CACHE_NUM_BITS) + mtHash) >> LOG2_PTRSIZE) & CALL_STUB_CACHE_MASK;
1417 hash = (UINT16) mtHash;
1419 hash ^= (tokenHash & CALL_STUB_CACHE_MASK);
1424 ResolveCacheElem* cache[CALL_STUB_CACHE_SIZE]; //must be first
1425 ResolveCacheElem* empty; //empty entry, initialized to fail all comparisons
1428 struct CacheEntryData {
1432 CacheEntryData cacheData[CALL_STUB_CACHE_SIZE];
1433 #endif // STUB_LOGGING
1436 /**************************************************************************************************
1437 The hash tables are accessed via instances of the Prober. Prober is a probe into a bucket
1438 of the hash table, and therefore has an index which is the current probe position.
1439 It includes a count of the number of probes done in that bucket so far and a stride
1440 to step thru the bucket with. To do comparisons, it has a reference to an entry with which
1441 it can do comparisons (Equals(...)) of the entries (stubs) inside the hash table. It also has
1442 the key pair (keyA, keyB) that it is looking for.
1444 Typically, an entry of the appropriate type is created on the stack and then the prober is created passing
1445 in a reference to the entry. The prober is used for a complete operation, such as look for and find an
1446 entry (stub), creating and inserting it as necessary.
1448 The initial index and the stride are orthogonal hashes of the key pair, i.e. we are doing a varient of
1449 double hashing. When we initialize the prober (see FormHash below) we set the initial probe based on
1450 one hash. The stride (used as a modulo addition of the probe position) is based on a different hash and
1451 is such that it will vist every location in the bucket before repeating. Hence it is imperative that
1452 the bucket size and the stride be relative prime wrt each other. We have chosen to make bucket sizes
1453 a power of 2, so we force stride to be odd.
1455 Note -- it must be assumed that multiple probers are walking the same tables and buckets at the same time.
1456 Additionally, the counts may not be accurate, and there may be duplicates in the tables. Since the tables
1457 do not allow concurrrent deletion, some of the concurrency issues are ameliorated.
1461 friend class FastTable;
1462 friend class BucketTable;
1464 Prober(Entry* e) {LIMITED_METHOD_CONTRACT; comparer = e;}
1465 //find the requested entry, if not there return CALL_STUB_EMPTY_ENTRY
1467 //add the entry into the bucket, if it is not already in the bucket.
1468 //return the entry actually in the bucket (existing or added)
1469 size_t Add(size_t entry);
1471 //return the bucket (FastTable*) that the prober is currently walking
1472 inline size_t* items() {LIMITED_METHOD_CONTRACT; return &base[-CALL_STUB_FIRST_INDEX];}
1473 //are there more probes possible, or have we probed everything in the bucket
1474 inline BOOL NoMore() {LIMITED_METHOD_CONTRACT; return probes>mask;} //both probes and mask are (-1)
1475 //advance the probe to a new place in the bucket
1478 WRAPPER_NO_CONTRACT;
1479 index = (index + stride) & mask;
1483 inline size_t Read()
1485 LIMITED_METHOD_CONTRACT;
1487 return VolatileLoad(&base[index]);
1489 //initialize a prober across a bucket (table) for the specified keys.
1490 void InitProber(size_t key1, size_t key2, size_t* table);
1491 //set up the initial index and stride and probe count
1492 inline void FormHash()
1494 LIMITED_METHOD_CONTRACT;
1497 //these two hash functions have not been formally measured for effectiveness
1498 //but they are at least orthogonal
1500 size_t a = ((keyA>>16) + keyA);
1501 size_t b = ((keyB>>16) ^ keyB);
1502 index = (((a*CALL_STUB_HASH_CONST1)>>4)+((b*CALL_STUB_HASH_CONST2)>>4)+CALL_STUB_HASH_CONST1) & mask;
1503 stride = ((a+(b*CALL_STUB_HASH_CONST1)+CALL_STUB_HASH_CONST2) | 1) & mask;
1505 //atomically grab an empty slot so we can insert a new entry into the bucket
1506 BOOL GrabEntry(size_t entryValue);
1507 size_t keyA; //key pair we are looking for
1509 size_t* base; //we have our own pointer to the bucket, so races don't matter.
1510 // We won't care if we do the lookup in an
1511 // outdated bucket (has grown out from under us).
1512 // All that will happen is possibly dropping an entry
1513 // on the floor or adding a duplicate.
1514 size_t index; //current probe point in the bucket
1515 size_t stride; //amount to step on each successive probe, must be relatively prime wrt the bucket size
1516 size_t mask; //size of bucket - 1
1517 size_t probes; //number probes - 1
1518 Entry* comparer;//used to compare an entry against the sought after key pair
1521 /********************************************************************************************************
1522 FastTable is used to implement the buckets of a BucketTable, a bucketized hash table. A FastTable is
1523 an array of entries (contents). The first two slots of contents store the size-1 and count of entries
1524 actually in the FastTable. Note that the count may be inaccurate and there may be duplicates. Careful
1525 attention must be paid to eliminate the need for interlocked or serialized or locked operations in face
1529 #pragma warning(push)
1530 #pragma warning(disable : 4200) // disable zero-sized array warning
1534 friend class BucketTable;
1537 FastTable() { LIMITED_METHOD_CONTRACT; }
1538 ~FastTable() { LIMITED_METHOD_CONTRACT; }
1540 //initialize a prober for the specified keys.
1541 inline BOOL SetUpProber(size_t keyA, size_t keyB, Prober* probe)
1551 probe->InitProber(keyA, keyB, &contents[0]);
1554 //find the requested entry (keys of prober), if not there return CALL_STUB_EMPTY_ENTRY
1555 size_t Find(Prober* probe);
1556 //add the entry, if it is not already there. Probe is used to search.
1557 //Return the entry actually containted (existing or added)
1558 size_t Add(size_t entry, Prober* probe);
1559 void IncrementCount();
1561 // Create a FastTable with space for numberOfEntries. Please note that this method
1562 // does not throw on OOM. **YOU MUST CHECK FOR NULL RETURN**
1563 static FastTable* MakeTable(size_t numberOfEntries)
1568 INJECT_FAULT(COMPlusThrowOM(););
1571 size_t size = CALL_STUB_MIN_ENTRIES;
1572 while (size < numberOfEntries) {size = size<<1;}
1573 // if (size == CALL_STUB_MIN_ENTRIES)
1575 size_t* bucket = new size_t[(sizeof(FastTable)/sizeof(size_t))+size+CALL_STUB_FIRST_INDEX];
1576 FastTable* table = new (bucket) FastTable();
1577 table->InitializeContents(size);
1580 //Initialize as empty
1581 void InitializeContents(size_t size)
1583 LIMITED_METHOD_CONTRACT;
1584 memset(&contents[0], CALL_STUB_EMPTY_ENTRY, (size+CALL_STUB_FIRST_INDEX)*sizeof(BYTE*));
1585 contents[CALL_STUB_MASK_INDEX] = size-1;
1587 inline size_t tableMask() {LIMITED_METHOD_CONTRACT; return (size_t) (contents[CALL_STUB_MASK_INDEX]);}
1588 inline size_t tableSize() {LIMITED_METHOD_CONTRACT; return tableMask()+1;}
1589 inline size_t tableCount() {LIMITED_METHOD_CONTRACT; return (size_t) (contents[CALL_STUB_COUNT_INDEX]);}
1590 inline BOOL isFull()
1592 LIMITED_METHOD_CONTRACT;
1593 return (tableCount()+1) * 100 / CALL_STUB_LOAD_FACTOR >= tableSize();
1595 //we store (size-1) in bucket[CALL_STUB_MASK_INDEX==0],
1596 //we store the used count in bucket[CALL_STUB_COUNT_INDEX==1],
1597 //we have an unused cell to use as a temp at bucket[CALL_STUB_DEAD_LINK==2],
1598 //and the table starts at bucket[CALL_STUB_FIRST_INDEX==3],
1602 #pragma warning(pop)
1605 /******************************************************************************************************
1606 BucketTable is a bucketized hash table. It uses FastTables for its buckets. The hash tables
1607 used by the VirtualCallStubManager are BucketTables. The number of buckets is fixed at the time
1608 the table is created. The actual buckets are allocated as needed, and grow as necessary. The reason
1609 for using buckets is primarily to reduce the cost of growing, since only a single bucket is actually
1610 grown at any given time. Since the hash tables are accessed infrequently, the load factor that
1611 controls growth is quite high (90%). Since we use hashing to pick the bucket, and we use hashing to
1612 lookup inside the bucket, it is important that the hashing function used here is orthogonal to the ones
1613 used in the buckets themselves (see FastTable::FormHash).
1618 BucketTable(size_t numberOfBuckets)
1620 WRAPPER_NO_CONTRACT;
1621 size_t size = CALL_STUB_MIN_BUCKETS;
1622 while (size < numberOfBuckets) {size = size<<1;}
1623 buckets = AllocateBuckets(size);
1624 // Initialize statistics counters
1625 memset(&stats, 0, sizeof(stats));
1630 LIMITED_METHOD_CONTRACT;
1633 size_t size = bucketCount()+CALL_STUB_FIRST_INDEX;
1634 for(size_t ix = CALL_STUB_FIRST_INDEX; ix < size; ix++) delete (FastTable*)(buckets[ix]);
1639 //initialize a prober for the specified keys.
1640 BOOL SetUpProber(size_t keyA, size_t keyB, Prober *prober);
1641 //find the requested entry (keys of prober), if not there return CALL_STUB_EMPTY_ENTRY
1642 inline size_t Find(Prober* probe) {WRAPPER_NO_CONTRACT; return probe->Find();}
1643 //add the entry, if it is not already there. Probe is used to search.
1644 size_t Add(size_t entry, Prober* probe);
1645 //reclaim abandoned buckets. Buckets are abaondoned when they need to grow.
1646 //needs to be called inside a gc sync point.
1647 static void Reclaim();
1651 UINT32 bucket_space; //# of bytes in caches and tables, not including the stubs themselves
1652 UINT32 bucket_space_dead; //# of bytes of abandoned buckets not yet recycled.
1658 inline size_t bucketMask() {LIMITED_METHOD_CONTRACT; return (size_t) (buckets[CALL_STUB_MASK_INDEX]);}
1659 inline size_t bucketCount() {LIMITED_METHOD_CONTRACT; return bucketMask()+1;}
1660 inline size_t ComputeBucketIndex(size_t keyA, size_t keyB)
1662 LIMITED_METHOD_CONTRACT;
1663 size_t a = ((keyA>>16) + keyA);
1664 size_t b = ((keyB>>16) ^ keyB);
1665 return CALL_STUB_FIRST_INDEX+(((((a*CALL_STUB_HASH_CONST2)>>5)^((b*CALL_STUB_HASH_CONST1)>>5))+CALL_STUB_HASH_CONST2) & bucketMask());
1667 //grows the bucket referenced by probe.
1668 BOOL GetMoreSpace(const Prober* probe);
1669 //creates storage in which to store references to the buckets
1670 static size_t* AllocateBuckets(size_t size)
1672 LIMITED_METHOD_CONTRACT;
1673 size_t* buckets = new size_t[size+CALL_STUB_FIRST_INDEX];
1674 if (buckets != NULL)
1676 memset(&buckets[0], CALL_STUB_EMPTY_ENTRY, (size+CALL_STUB_FIRST_INDEX)*sizeof(void*));
1677 buckets[CALL_STUB_MASK_INDEX] = size-1;
1681 inline size_t Read(size_t index)
1683 LIMITED_METHOD_CONTRACT;
1684 CONSISTENCY_CHECK(index <= bucketMask()+CALL_STUB_FIRST_INDEX);
1685 return VolatileLoad(&buckets[index]);
1689 #pragma warning(disable: 4267) //work-around for the compiler
1691 inline void Write(size_t index, size_t value)
1693 LIMITED_METHOD_CONTRACT;
1694 CONSISTENCY_CHECK(index <= bucketMask()+CALL_STUB_FIRST_INDEX);
1695 VolatileStore(&buckets[index], value);
1698 #pragma warning(default: 4267)
1701 // We store (#buckets-1) in bucket[CALL_STUB_MASK_INDEX ==0]
1702 // We have two unused cells at bucket[CALL_STUB_COUNT_INDEX ==1]
1703 // and bucket[CALL_STUB_DEAD_LINK ==2]
1704 // and the table starts at bucket[CALL_STUB_FIRST_INDEX ==3]
1705 // the number of elements is bucket[CALL_STUB_MASK_INDEX]+CALL_STUB_FIRST_INDEX
1707 static FastTable* dead; //linked list head of to be deleted (abandoned) buckets
1710 #endif // !CROSSGEN_COMPILE
1712 #endif // !_VIRTUAL_CALL_STUB_H