Port to 3.1 - Fix out of range access in GetRecycleMemoryInfo (#27959)
[platform/upstream/coreclr.git] / src / vm / win32threadpool.h
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4
5
6 /*++
7
8 Module Name:
9
10     Win32ThreadPool.h
11
12 Abstract:
13
14     This module is the header file for thread pools using Win32 APIs. 
15
16 Revision History:
17  
18
19 --*/
20
21 #ifndef _WIN32THREADPOOL_H
22 #define _WIN32THREADPOOL_H
23
24 #include "delegateinfo.h"
25 #include "util.hpp"
26 #include "nativeoverlapped.h"
27 #include "hillclimbing.h"
28
29 #define MAX_WAITHANDLES 64
30
31 #define MAX_CACHED_EVENTS 40        // upper limit on number of wait events cached 
32
33 #define WAIT_REGISTERED     0x01
34 #define WAIT_ACTIVE         0x02
35 #define WAIT_DELETE         0x04
36
37 #define TIMER_REGISTERED    0x01
38 #define TIMER_ACTIVE        0x02
39 #define TIMER_DELETE        0x04
40
41 #define WAIT_SINGLE_EXECUTION      0x00000001
42 #define WAIT_FREE_CONTEXT          0x00000002
43 #define WAIT_INTERNAL_COMPLETION   0x00000004
44
45 #define QUEUE_ONLY                 0x00000000  // do not attempt to call on the thread
46 #define CALL_OR_QUEUE              0x00000001  // call on the same thread if not too busy, else queue
47
48 const int MaxLimitThreadsPerCPU=250;               //  upper limit on number of cp threads per CPU
49 const int MaxFreeCPThreadsPerCPU=2;                 //  upper limit on number of  free cp threads per CPU
50
51 const int CpuUtilizationHigh=95;                    // remove threads when above this
52 const int CpuUtilizationLow =80;                    // inject more threads if below this
53
54 #ifndef FEATURE_PAL
55 extern HANDLE (WINAPI *g_pufnCreateIoCompletionPort)(HANDLE FileHandle,
56                                                                                           HANDLE ExistingCompletionPort,  
57                                                                                           ULONG_PTR CompletionKey,        
58                                                                                           DWORD NumberOfConcurrentThreads);
59
60 extern int (WINAPI *g_pufnNtQueryInformationThread) (HANDLE ThreadHandle,
61                                                                                           THREADINFOCLASS ThreadInformationClass,
62                                               PVOID ThreadInformation,
63                                               ULONG ThreadInformationLength,
64                                               PULONG ReturnLength);
65
66 extern int (WINAPI * g_pufnNtQuerySystemInformation) (SYSTEM_INFORMATION_CLASS SystemInformationClass,
67                                                       PVOID SystemInformation,
68                                                       ULONG SystemInformationLength,
69                                                       PULONG ReturnLength OPTIONAL);
70 #endif // !FEATURE_PAL
71
72 #define FILETIME_TO_INT64(t) (*(__int64*)&(t))
73 #define MILLI_TO_100NANO(x)  (x * 10000)        // convert from milliseond to 100 nanosecond unit
74
75 /**
76  * This type is supposed to be private to ThreadpoolMgr.
77  * It's at global scope because Strike needs to be able to access its
78  * definition.
79  */
80 struct WorkRequest {
81     WorkRequest*            next;
82     LPTHREAD_START_ROUTINE  Function; 
83     PVOID                   Context;
84
85 };
86
87 typedef struct _IOCompletionContext
88 {
89     DWORD ErrorCode;
90     DWORD numBytesTransferred;
91     LPOVERLAPPED lpOverlapped;
92     size_t key;
93 } IOCompletionContext, *PIOCompletionContext;
94
95 typedef DPTR(WorkRequest) PTR_WorkRequest;
96 class ThreadpoolMgr
97 {
98     friend class ClrDataAccess;
99     friend struct DelegateInfo;
100     friend class ThreadPoolNative;
101     friend class TimerNative;
102     friend class UnManagedPerAppDomainTPCount;
103     friend class ManagedPerAppDomainTPCount;
104     friend class PerAppDomainTPCountList;
105     friend class HillClimbing;
106     friend struct _DacGlobals;
107
108 public:
109     struct ThreadCounter
110     {
111         static const int MaxPossibleCount = 0x7fff;
112
113         // padding to ensure we get our own cache line
114         BYTE padding1[MAX_CACHE_LINE_SIZE];
115
116         union Counts
117         {
118             struct
119             {
120                 //
121                 // Note: these are signed rather than unsigned to allow us to detect under/overflow.
122                 //  
123                 int MaxWorking  : 16;  //Determined by HillClimbing; adjusted elsewhere for timeouts, etc. 
124                 int NumActive  : 16;  //Active means working or waiting on WorkerSemaphore.  These are "warm/hot" threads.
125                 int NumWorking : 16;  //Trying to get work from various queues.  Not waiting on either semaphore.
126                 int NumRetired : 16;  //Not trying to get work; waiting on RetiredWorkerSemaphore.  These are "cold" threads.
127
128                 // Note: the only reason we need "retired" threads at all is that it allows some threads to eventually time out
129                 // even if other threads are getting work.  If we ever make WorkerSemaphore a true LIFO semaphore, we will no longer
130                 // need the concept of "retirement" - instead, the very "coldest" threads will naturally be the first to time out.
131             };
132
133             LONGLONG AsLongLong;
134
135             bool operator==(Counts other) {LIMITED_METHOD_CONTRACT; return AsLongLong == other.AsLongLong;}
136         } counts;
137
138         // padding to ensure we get our own cache line
139         BYTE padding2[MAX_CACHE_LINE_SIZE];
140
141         Counts GetCleanCounts()
142         {
143             LIMITED_METHOD_CONTRACT;
144 #ifdef _WIN64
145             // VolatileLoad x64 bit read is atomic
146             return DangerousGetDirtyCounts();
147 #else // !_WIN64
148             // VolatileLoad may result in torn read
149             Counts result;
150 #ifndef DACCESS_COMPILE
151             result.AsLongLong = FastInterlockCompareExchangeLong(&counts.AsLongLong, 0, 0);
152             ValidateCounts(result);
153 #else
154             result.AsLongLong = 0; //prevents prefast warning for DAC builds
155 #endif
156             return result;
157 #endif // !_WIN64
158         }
159
160         //
161         // This does a non-atomic read of the counts.  The returned value is suitable only
162         // for use inside of a read-compare-exchange loop, where the compare-exhcange must succeed
163         // before any action is taken.  Use GetCleanWorkerCounts for other needs, but keep in mind
164         // it's much slower.
165         //
166         Counts DangerousGetDirtyCounts()
167         {
168             LIMITED_METHOD_CONTRACT;
169             Counts result;
170 #ifndef DACCESS_COMPILE
171             result.AsLongLong = VolatileLoad(&counts.AsLongLong);
172 #else
173             result.AsLongLong = 0; //prevents prefast warning for DAC builds
174 #endif
175             return result;
176         }
177
178
179         Counts CompareExchangeCounts(Counts newCounts, Counts oldCounts)
180         {
181             LIMITED_METHOD_CONTRACT;
182             Counts result;
183 #ifndef DACCESS_COMPILE
184             result.AsLongLong = FastInterlockCompareExchangeLong(&counts.AsLongLong, newCounts.AsLongLong, oldCounts.AsLongLong);
185             if (result == oldCounts)
186             {
187                 // can only do validation on success; if we failed, it may have been due to a previous
188                 // dirty read, which may contain invalid values.
189                 ValidateCounts(result);
190                 ValidateCounts(newCounts);
191             }
192 #else
193             result.AsLongLong = 0; //prevents prefast warning for DAC builds
194 #endif
195             return result;
196         }
197
198     private:
199         static void ValidateCounts(Counts counts)
200         {
201             LIMITED_METHOD_CONTRACT;
202             _ASSERTE(counts.MaxWorking > 0);
203             _ASSERTE(counts.NumActive >= 0);
204             _ASSERTE(counts.NumWorking >= 0);
205             _ASSERTE(counts.NumRetired >= 0);
206             _ASSERTE(counts.NumWorking <= counts.NumActive);
207         }
208     };
209
210 public:
211
212     static void ReportThreadStatus(bool isWorking);
213
214     // enumeration of different kinds of memory blocks that are recycled
215     enum MemType
216     {
217         MEMTYPE_AsyncCallback   = 0,
218         MEMTYPE_DelegateInfo    = 1,
219         MEMTYPE_WorkRequest     = 2,
220         MEMTYPE_COUNT           = 3,
221     };
222
223     typedef struct {
224         INT32 TimerId;
225     } TimerInfoContext;
226
227     static BOOL Initialize();
228
229     static BOOL SetMaxThreadsHelper(DWORD MaxWorkerThreads,
230                                         DWORD MaxIOCompletionThreads);
231
232     static BOOL SetMaxThreads(DWORD MaxWorkerThreads, 
233                               DWORD MaxIOCompletionThreads);
234
235     static BOOL GetMaxThreads(DWORD* MaxWorkerThreads, 
236                               DWORD* MaxIOCompletionThreads);
237     
238     static BOOL SetMinThreads(DWORD MinWorkerThreads, 
239                               DWORD MinIOCompletionThreads);
240     
241     static BOOL GetMinThreads(DWORD* MinWorkerThreads, 
242                               DWORD* MinIOCompletionThreads);
243  
244     static BOOL GetAvailableThreads(DWORD* AvailableWorkerThreads, 
245                                  DWORD* AvailableIOCompletionThreads);
246
247     static INT32 GetThreadCount();
248
249     static BOOL QueueUserWorkItem(LPTHREAD_START_ROUTINE Function, 
250                                   PVOID Context,
251                                   ULONG Flags,
252                                   BOOL UnmanagedTPRequest=TRUE);
253
254     static BOOL PostQueuedCompletionStatus(LPOVERLAPPED lpOverlapped,
255                                   LPOVERLAPPED_COMPLETION_ROUTINE Function);
256
257     inline static BOOL IsCompletionPortInitialized()
258     {
259         LIMITED_METHOD_CONTRACT;
260         return GlobalCompletionPort != NULL;
261     }
262
263     static BOOL DrainCompletionPortQueue();
264
265     static BOOL RegisterWaitForSingleObject(PHANDLE phNewWaitObject,
266                                             HANDLE hWaitObject,
267                                             WAITORTIMERCALLBACK Callback,
268                                             PVOID Context,
269                                             ULONG timeout,
270                                             DWORD dwFlag);
271
272     static BOOL UnregisterWaitEx(HANDLE hWaitObject,HANDLE CompletionEvent);
273     static void WaitHandleCleanup(HANDLE hWaitObject);
274
275     static BOOL WINAPI BindIoCompletionCallback(HANDLE FileHandle,
276                                             LPOVERLAPPED_COMPLETION_ROUTINE Function,
277                                             ULONG Flags,
278                                             DWORD& errorCode);
279
280     static void WINAPI WaitIOCompletionCallback(DWORD dwErrorCode,
281                                             DWORD numBytesTransferred,
282                                             LPOVERLAPPED lpOverlapped);
283
284     static VOID WINAPI CallbackForInitiateDrainageOfCompletionPortQueue(
285         DWORD dwErrorCode,
286         DWORD dwNumberOfBytesTransfered,
287         LPOVERLAPPED lpOverlapped
288     );
289
290     static VOID WINAPI CallbackForContinueDrainageOfCompletionPortQueue(
291         DWORD dwErrorCode,
292         DWORD dwNumberOfBytesTransfered,
293         LPOVERLAPPED lpOverlapped
294     );
295
296     static BOOL SetAppDomainRequestsActive(BOOL UnmanagedTP = FALSE);
297     static void ClearAppDomainRequestsActive(BOOL UnmanagedTP = FALSE,  LONG index = -1);
298
299     static inline void UpdateLastDequeueTime()
300     {
301         LIMITED_METHOD_CONTRACT;
302         VolatileStore(&LastDequeueTime, (unsigned int)GetTickCount());
303     }
304
305     static BOOL CreateTimerQueueTimer(PHANDLE phNewTimer,
306                                         WAITORTIMERCALLBACK Callback,
307                                         PVOID Parameter,
308                                         DWORD DueTime,
309                                         DWORD Period,
310                                         ULONG Flags);
311
312     static BOOL ChangeTimerQueueTimer(HANDLE Timer,
313                                       ULONG DueTime,
314                                       ULONG Period);
315     static BOOL DeleteTimerQueueTimer(HANDLE Timer,
316                                       HANDLE CompletionEvent);
317
318     static void RecycleMemory(LPVOID mem, enum MemType memType);
319
320     static void FlushQueueOfTimerInfos();
321
322     static BOOL HaveTimerInfosToFlush() { return TimerInfosToBeRecycled != NULL; }
323
324 #ifndef FEATURE_PAL    
325     static LPOVERLAPPED CompletionPortDispatchWorkWithinAppDomain(Thread* pThread, DWORD* pErrorCode, DWORD* pNumBytes, size_t* pKey);
326     static void StoreOverlappedInfoInThread(Thread* pThread, DWORD dwErrorCode, DWORD dwNumBytes, size_t key, LPOVERLAPPED lpOverlapped);
327 #endif // !FEATURE_PAL
328
329     // Enable filtering of correlation ETW events for cases handled at a higher abstraction level
330
331 #ifndef DACCESS_COMPILE
332     static FORCEINLINE BOOL AreEtwQueueEventsSpeciallyHandled(LPTHREAD_START_ROUTINE Function)
333     {
334         // Timer events are handled at a higher abstraction level: in the managed Timer class
335         return (Function == ThreadpoolMgr::AsyncTimerCallbackCompletion);
336     }
337
338     static FORCEINLINE BOOL AreEtwIOQueueEventsSpeciallyHandled(LPOVERLAPPED_COMPLETION_ROUTINE Function)
339     {
340         // We ignore drainage events b/c they are uninteresting
341         // We handle registered waits at a higher abstraction level
342         return (Function == ThreadpoolMgr::CallbackForInitiateDrainageOfCompletionPortQueue 
343                 || Function == ThreadpoolMgr::CallbackForContinueDrainageOfCompletionPortQueue
344                 || Function == ThreadpoolMgr::WaitIOCompletionCallback);
345     }
346 #endif
347
348 private:
349
350 #ifndef DACCESS_COMPILE
351
352     inline static void FreeWorkRequest(WorkRequest* workRequest)
353     {
354         RecycleMemory( workRequest, MEMTYPE_WorkRequest ); //delete workRequest;
355     }
356
357     inline static WorkRequest* MakeWorkRequest(LPTHREAD_START_ROUTINE  function, PVOID context)
358     {
359         CONTRACTL
360         {
361             THROWS;     
362             GC_NOTRIGGER;
363             MODE_ANY;
364         }
365         CONTRACTL_END;;
366         
367         WorkRequest* wr = (WorkRequest*) GetRecycledMemory(MEMTYPE_WorkRequest);
368         _ASSERTE(wr);
369                 if (NULL == wr)
370                         return NULL;
371         wr->Function = function;
372         wr->Context = context;
373         wr->next = NULL;
374         return wr;
375     }
376     
377 #endif // #ifndef DACCESS_COMPILE
378
379     typedef struct {
380         DWORD           numBytes;
381         ULONG_PTR      *key;
382         LPOVERLAPPED    pOverlapped;
383         DWORD           errorCode;
384     } QueuedStatus;
385
386     typedef DPTR(struct _LIST_ENTRY)                        PTR_LIST_ENTRY;
387     typedef struct _LIST_ENTRY {
388         struct _LIST_ENTRY *Flink;
389         struct _LIST_ENTRY *Blink;
390     } LIST_ENTRY, *PLIST_ENTRY;    
391     
392     struct WaitInfo;
393
394     typedef struct {
395         HANDLE          threadHandle;
396         DWORD           threadId;
397         CLREvent        startEvent;
398         LONG            NumWaitHandles;                 // number of wait objects registered to the thread <=64
399         LONG            NumActiveWaits;                 // number of objects, thread is actually waiting on (this may be less than
400                                                            // NumWaitHandles since the thread may not have activated some waits
401         HANDLE          waitHandle[MAX_WAITHANDLES];    // array of wait handles (copied from waitInfo since 
402                                                            // we need them to be contiguous)
403         LIST_ENTRY      waitPointer[MAX_WAITHANDLES];   // array of doubly linked list of corresponding waitinfo 
404     } ThreadCB;
405
406
407     typedef struct {
408         ULONG               startTime;          // time at which wait was started
409                                                 // endTime = startTime+timeout
410         ULONG               remainingTime;      // endTime - currentTime
411     } WaitTimerInfo;
412
413     struct  WaitInfo {
414         LIST_ENTRY          link;               // Win9x does not allow duplicate waithandles, so we need to
415                                                 // group all waits on a single waithandle using this linked list
416         HANDLE              waitHandle;
417         WAITORTIMERCALLBACK Callback;
418         PVOID               Context;
419         ULONG               timeout;                
420         WaitTimerInfo       timer;              
421         DWORD               flag;
422         DWORD               state;
423         ThreadCB*           threadCB;
424         LONG                refCount;                // when this reaches 0, the waitInfo can be safely deleted
425         CLREvent            PartialCompletionEvent;  // used to synchronize deactivation of a wait
426         CLREvent            InternalCompletionEvent; // only one of InternalCompletion or ExternalCompletion is used
427                                                      // but I cant make a union since CLREvent has a non-default constructor
428         HANDLE              ExternalCompletionEvent; // they are signalled when all callbacks have completed (refCount=0)
429         OBJECTHANDLE        ExternalEventSafeHandle;
430
431     } ;
432
433     // structure used to maintain global information about wait threads. Protected by WaitThreadsCriticalSection
434     typedef struct WaitThreadTag {
435         LIST_ENTRY      link;
436         ThreadCB*       threadCB;
437     } WaitThreadInfo;
438
439
440     struct AsyncCallback{   
441         WaitInfo*   wait;
442         BOOL        waitTimedOut;
443     } ;
444
445 #ifndef DACCESS_COMPILE
446
447     static VOID
448     AcquireAsyncCallback(AsyncCallback *pAsyncCB)
449     {
450         LIMITED_METHOD_CONTRACT;
451     }
452
453     static VOID
454     ReleaseAsyncCallback(AsyncCallback *pAsyncCB)
455     {
456         CONTRACTL
457         {
458             THROWS;     
459             GC_TRIGGERS;
460             MODE_ANY;
461         }
462         CONTRACTL_END;
463
464             WaitInfo *waitInfo = pAsyncCB->wait;
465             ThreadpoolMgr::RecycleMemory((LPVOID*)pAsyncCB, ThreadpoolMgr::MEMTYPE_AsyncCallback);
466             
467             // if this was a single execution, we now need to stop rooting registeredWaitHandle  
468             // in a GC handle. This will cause the finalizer to pick it up and call the cleanup
469             // routine.
470             if ( (waitInfo->flag & WAIT_SINGLE_EXECUTION)  && (waitInfo->flag & WAIT_FREE_CONTEXT))
471             {
472
473                 DelegateInfo* pDelegate = (DelegateInfo*) waitInfo->Context;
474
475                 _ASSERTE(pDelegate->m_registeredWaitHandle);
476
477                 {
478                     GCX_COOP();
479                     StoreObjectInHandle(pDelegate->m_registeredWaitHandle, NULL);
480                 }
481             }
482
483             if (InterlockedDecrement(&waitInfo->refCount) == 0)
484                 ThreadpoolMgr::DeleteWait(waitInfo);
485
486     }
487
488     typedef Holder<AsyncCallback *, ThreadpoolMgr::AcquireAsyncCallback, ThreadpoolMgr::ReleaseAsyncCallback> AsyncCallbackHolder;
489     inline static AsyncCallback* MakeAsyncCallback()
490     {
491         WRAPPER_NO_CONTRACT;
492         return (AsyncCallback*) GetRecycledMemory(MEMTYPE_AsyncCallback);
493     }
494
495     static VOID ReleaseInfo(OBJECTHANDLE& hndSafeHandle, 
496         HANDLE hndNativeHandle)
497     {
498         CONTRACTL
499         {
500             NOTHROW;
501             MODE_ANY;
502             GC_TRIGGERS;
503         }
504         CONTRACTL_END
505
506 // Use of EX_TRY, GCPROTECT etc in the same function is causing prefast to complain about local variables with
507 // same name masking each other (#246). The error could not be suppressed with "#pragma PREFAST_SUPPRESS"
508 #ifndef _PREFAST_
509
510         if (hndSafeHandle != NULL)
511         {
512
513             SAFEHANDLEREF refSH = NULL;
514
515             GCX_COOP();
516             GCPROTECT_BEGIN(refSH);
517
518             {
519                 EX_TRY
520                 {
521                     // Read the GC handle
522                     refSH = (SAFEHANDLEREF) ObjectToOBJECTREF(ObjectFromHandle(hndSafeHandle));
523
524                     // Destroy the GC handle
525                     DestroyHandle(hndSafeHandle);
526
527                     if (refSH != NULL)
528                     {
529                         SafeHandleHolder h(&refSH);
530
531                         HANDLE hEvent = refSH->GetHandle();
532                         if (hEvent != INVALID_HANDLE_VALUE)
533                         {
534                             SetEvent(hEvent);
535                         }
536                     }
537                 }
538                 EX_CATCH
539                 {
540                 }
541                 EX_END_CATCH(SwallowAllExceptions);
542             }
543
544             GCPROTECT_END();
545             
546             hndSafeHandle = NULL;
547         }
548 #endif
549     }
550
551 #endif // #ifndef DACCESS_COMPILE
552
553     typedef struct {
554         LIST_ENTRY  link;
555         HANDLE      Handle;
556     } WaitEvent ;
557
558     // Timer 
559     typedef struct {
560         LIST_ENTRY  link;           // doubly linked list of timers
561         ULONG FiringTime;           // TickCount of when to fire next
562         WAITORTIMERCALLBACK Function;             // Function to call when timer fires
563         PVOID Context;              // Context to pass to function when timer fires
564         ULONG Period;
565         DWORD flag;                 // How do we deal with the context
566         DWORD state;
567         LONG refCount;
568         HANDLE ExternalCompletionEvent;     // only one of this is used, but cant do a union since CLREvent has a non-default constructor
569         CLREvent InternalCompletionEvent;   // flags indicates which one is being used
570         OBJECTHANDLE    ExternalEventSafeHandle;
571     } TimerInfo;
572
573     static VOID AcquireWaitInfo(WaitInfo *pInfo)
574     {
575     }
576     static VOID ReleaseWaitInfo(WaitInfo *pInfo)
577     {
578         WRAPPER_NO_CONTRACT;
579 #ifndef DACCESS_COMPILE
580         ReleaseInfo(pInfo->ExternalEventSafeHandle, 
581         pInfo->ExternalCompletionEvent);
582 #endif
583     }
584     static VOID AcquireTimerInfo(TimerInfo *pInfo)
585     {
586     }
587     static VOID ReleaseTimerInfo(TimerInfo *pInfo)
588     {
589         WRAPPER_NO_CONTRACT;
590 #ifndef DACCESS_COMPILE
591         ReleaseInfo(pInfo->ExternalEventSafeHandle, 
592         pInfo->ExternalCompletionEvent);
593 #endif
594     }
595
596     typedef Holder<WaitInfo *, ThreadpoolMgr::AcquireWaitInfo, ThreadpoolMgr::ReleaseWaitInfo> WaitInfoHolder;
597     typedef Holder<TimerInfo *, ThreadpoolMgr::AcquireTimerInfo, ThreadpoolMgr::ReleaseTimerInfo> TimerInfoHolder;
598
599     typedef struct {
600         TimerInfo* Timer;           // timer to be updated
601         ULONG DueTime ;             // new due time
602         ULONG Period ;              // new period
603     } TimerUpdateInfo;
604
605     // Definitions and data structures to support recycling of high-frequency 
606     // memory blocks. We use a spin-lock to access the list
607
608     class RecycledListInfo
609     {
610         static const unsigned int MaxCachedEntries = 40;
611
612         struct Entry
613             {
614                 Entry* next;
615             };
616
617         Volatile<LONG> lock;            // this is the spin lock
618         DWORD         count;            // count of number of elements in the list
619         Entry*        root;             // ptr to first element of recycled list
620 #ifndef _WIN64
621                 DWORD         filler;       // Pad the structure to a multiple of the 16.
622 #endif
623
624                 //--//
625
626 public:
627                 RecycledListInfo()
628                 {
629             LIMITED_METHOD_CONTRACT;
630
631             lock  = 0;
632             root  = NULL;
633             count = 0;
634                 }
635
636                 FORCEINLINE bool CanInsert()
637                 {
638             LIMITED_METHOD_CONTRACT;
639
640                         return count < MaxCachedEntries;
641                 }
642
643             FORCEINLINE LPVOID Remove()
644             {
645                 LIMITED_METHOD_CONTRACT;
646
647                         if(root == NULL) return NULL; // No need for acquiring the lock, there's nothing to remove.
648
649                 AcquireLock();
650
651                 Entry* ret = (Entry*)root;
652
653                 if(ret)
654                 {
655                     root   = ret->next;
656                     count -= 1;
657                 }
658
659                 ReleaseLock();
660
661                 return ret;
662             }
663
664             FORCEINLINE void Insert( LPVOID mem )
665             {
666                 LIMITED_METHOD_CONTRACT;
667
668                     AcquireLock();
669
670                 Entry* entry = (Entry*)mem;
671
672                 entry->next = root;
673
674                 root   = entry;
675                 count += 1;
676
677                 ReleaseLock();
678             }
679
680         private:
681             FORCEINLINE void AcquireLock()
682             {
683                 LIMITED_METHOD_CONTRACT;
684
685                 unsigned int rounds = 0;
686
687                 DWORD dwSwitchCount = 0;
688
689                 while(lock != 0 || FastInterlockExchange( &lock, 1 ) != 0)
690                 {
691                 YieldProcessorNormalized(); // indicate to the processor that we are spinning
692
693                     rounds++;
694                     
695                     if((rounds % 32) == 0)
696                     {
697                         __SwitchToThread( 0, ++dwSwitchCount );
698                     }
699                 }
700             }
701
702             FORCEINLINE void ReleaseLock()
703             {
704                 LIMITED_METHOD_CONTRACT;
705
706                 lock = 0;
707             }
708         };
709
710     //
711     // It's critical that we ensure these pointers are allocated by the linker away from
712     // variables that are modified a lot at runtime.
713     //
714     // The use of the CacheGuard is a temporary solution,
715     // the thread pool has to be refactor away from static variable and
716     // toward a single global structure, where we can control the locality of variables.
717     //
718     class RecycledListsWrapper
719     {
720         DWORD                        CacheGuardPre[MAX_CACHE_LINE_SIZE/sizeof(DWORD)];
721         
722         RecycledListInfo            (*pRecycledListPerProcessor)[MEMTYPE_COUNT];  // RecycledListInfo [numProc][MEMTYPE_COUNT]
723
724         DWORD                        CacheGuardPost[MAX_CACHE_LINE_SIZE/sizeof(DWORD)];
725
726     public:
727         void Initialize( unsigned int numProcs );
728
729         FORCEINLINE bool IsInitialized()
730         {
731             LIMITED_METHOD_CONTRACT;
732
733             return pRecycledListPerProcessor != NULL;
734         }
735         
736         FORCEINLINE RecycledListInfo& GetRecycleMemoryInfo( enum MemType memType )
737         {
738             LIMITED_METHOD_CONTRACT;
739
740             DWORD processorNumber = 0;
741
742 #ifndef FEATURE_PAL
743                 if (CPUGroupInfo::CanEnableGCCPUGroups() && CPUGroupInfo::CanEnableThreadUseAllCpuGroups())
744                 processorNumber = CPUGroupInfo::CalculateCurrentProcessorNumber();
745             else
746                 // Turns out GetCurrentProcessorNumber can return a value greater than the number of processors reported by
747                 // GetSystemInfo, if we're running in WOW64 on a machine with >32 processors.
748                     processorNumber = GetCurrentProcessorNumber()%NumberOfProcessors;
749 #else // !FEATURE_PAL
750             if (PAL_HasGetCurrentProcessorNumber())
751             {
752                 // On linux, GetCurrentProcessorNumber which uses sched_getcpu() can return a value greater than the number
753                 // of processors reported by sysconf(_SC_NPROCESSORS_ONLN) when using OpenVZ kernel.
754                 processorNumber = GetCurrentProcessorNumber()%NumberOfProcessors;
755             }
756 #endif // !FEATURE_PAL
757             return pRecycledListPerProcessor[processorNumber][memType];
758         }
759     };
760
761 #define GATE_THREAD_STATUS_NOT_RUNNING         0 // There is no gate thread
762 #define GATE_THREAD_STATUS_REQUESTED           1 // There is a gate thread, and someone has asked it to stick around recently
763 #define GATE_THREAD_STATUS_WAITING_FOR_REQUEST 2 // There is a gate thread, but nobody has asked it to stay.  It may die soon
764
765     // Private methods
766
767     static DWORD WINAPI intermediateThreadProc(PVOID arg);
768
769     typedef struct {
770         LPTHREAD_START_ROUTINE  lpThreadFunction;
771         PVOID                   lpArg;        
772     } intermediateThreadParam;
773
774     static Thread* CreateUnimpersonatedThread(LPTHREAD_START_ROUTINE lpStartAddress, LPVOID lpArgs, BOOL *pIsCLRThread);
775
776     static BOOL CreateWorkerThread();
777
778     static void EnqueueWorkRequest(WorkRequest* wr);
779
780     static WorkRequest* DequeueWorkRequest();
781
782     static void ExecuteWorkRequest(bool* foundWork, bool* wasNotRecalled);
783
784     static DWORD WINAPI ExecuteHostRequest(PVOID pArg);
785
786 #ifndef DACCESS_COMPILE
787
788     inline static void AppendWorkRequest(WorkRequest* entry)
789     {
790         CONTRACTL
791         {
792             NOTHROW;         
793             MODE_ANY;
794             GC_NOTRIGGER;
795         }
796         CONTRACTL_END;
797
798         if (WorkRequestTail)
799         {
800             _ASSERTE(WorkRequestHead != NULL);
801             WorkRequestTail->next = entry;
802         }
803         else
804         {
805             _ASSERTE(WorkRequestHead == NULL);
806             WorkRequestHead = entry;
807         }
808
809         WorkRequestTail = entry;
810         _ASSERTE(WorkRequestTail->next == NULL);
811     }
812
813     inline static WorkRequest* RemoveWorkRequest()
814     {
815         CONTRACTL
816         {
817             NOTHROW;         
818             MODE_ANY;
819             GC_NOTRIGGER;
820         }
821         CONTRACTL_END;
822
823         WorkRequest* entry = NULL;
824         if (WorkRequestHead)
825         {
826             entry = WorkRequestHead;
827             WorkRequestHead = entry->next;
828             if (WorkRequestHead == NULL)
829                 WorkRequestTail = NULL;
830         }
831         return entry;
832     }
833
834     static void EnsureInitialized();
835     static void InitPlatformVariables();
836
837     inline static BOOL IsInitialized()
838     {
839         LIMITED_METHOD_CONTRACT;
840         return Initialization == -1;
841     }
842
843     static void MaybeAddWorkingWorker();
844
845     static void NotifyWorkItemCompleted()
846     {
847         WRAPPER_NO_CONTRACT;
848         Thread::IncrementWorkerThreadPoolCompletionCount(GetThread());
849         UpdateLastDequeueTime();
850     }
851
852     static bool ShouldAdjustMaxWorkersActive()
853     {
854         WRAPPER_NO_CONTRACT;
855
856         DWORD priorTime = PriorCompletedWorkRequestsTime;
857         MemoryBarrier(); // read fresh value for NextCompletedWorkRequestsTime below
858         DWORD requiredInterval = NextCompletedWorkRequestsTime - priorTime;
859         DWORD elapsedInterval = GetTickCount() - priorTime;
860         if (elapsedInterval >= requiredInterval)
861         {
862             ThreadCounter::Counts counts = WorkerCounter.GetCleanCounts();
863             if (counts.NumActive <= counts.MaxWorking)
864                 return !IsHillClimbingDisabled;
865         }
866
867         return false;
868     }
869
870     static void AdjustMaxWorkersActive();
871     static bool ShouldWorkerKeepRunning();
872
873     static BOOL SuspendProcessing();
874
875     static DWORD SafeWait(CLREvent * ev, DWORD sleepTime, BOOL alertable);
876
877     static DWORD WINAPI WorkerThreadStart(LPVOID lpArgs);
878
879     static BOOL AddWaitRequest(HANDLE waitHandle, WaitInfo* waitInfo);
880
881
882     static ThreadCB* FindWaitThread();              // returns a wait thread that can accomodate another wait request
883
884     static BOOL CreateWaitThread();
885
886     static void WINAPI InsertNewWaitForSelf(WaitInfo* pArg);
887
888     static int FindWaitIndex(const ThreadCB* threadCB, const HANDLE waitHandle);
889
890     static DWORD MinimumRemainingWait(LIST_ENTRY* waitInfo, unsigned int numWaits);
891
892     static void ProcessWaitCompletion( WaitInfo* waitInfo,
893                                 unsigned index,      // array index 
894                                 BOOL waitTimedOut);
895
896     static DWORD WINAPI WaitThreadStart(LPVOID lpArgs);
897
898     static DWORD WINAPI AsyncCallbackCompletion(PVOID pArgs);
899
900     static void QueueTimerInfoForRelease(TimerInfo *pTimerInfo);
901
902     static void DeactivateWait(WaitInfo* waitInfo);
903     static void DeactivateNthWait(WaitInfo* waitInfo, DWORD index);
904
905     static void DeleteWait(WaitInfo* waitInfo);
906
907
908     inline static void ShiftWaitArray( ThreadCB* threadCB, 
909                                        ULONG SrcIndex, 
910                                        ULONG DestIndex, 
911                                        ULONG count)
912     {
913         LIMITED_METHOD_CONTRACT;
914         memmove(&threadCB->waitHandle[DestIndex],
915                &threadCB->waitHandle[SrcIndex],
916                count * sizeof(HANDLE));
917         memmove(&threadCB->waitPointer[DestIndex],
918                &threadCB->waitPointer[SrcIndex],
919                count * sizeof(LIST_ENTRY));
920     }
921
922     static void WINAPI DeregisterWait(WaitInfo* pArgs);
923
924 #ifndef FEATURE_PAL
925     // holds the aggregate of system cpu usage of all processors
926     typedef struct _PROCESS_CPU_INFORMATION
927     {
928         LARGE_INTEGER idleTime; 
929         LARGE_INTEGER kernelTime;
930         LARGE_INTEGER userTime;
931         DWORD_PTR affinityMask;
932         int  numberOfProcessors;
933         SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION* usageBuffer;
934         int  usageBufferSize;
935     } PROCESS_CPU_INFORMATION;
936
937     static int GetCPUBusyTime_NT(PROCESS_CPU_INFORMATION* pOldInfo);
938     static BOOL CreateCompletionPortThread(LPVOID lpArgs);
939     static DWORD WINAPI CompletionPortThreadStart(LPVOID lpArgs);
940 public:
941     inline static bool HaveNativeWork()
942     {
943         LIMITED_METHOD_CONTRACT;
944         return WorkRequestHead != NULL;
945     }
946
947     static void GrowCompletionPortThreadpoolIfNeeded();
948     static BOOL ShouldGrowCompletionPortThreadpool(ThreadCounter::Counts counts);
949 #else
950     static int GetCPUBusyTime_NT(PAL_IOCP_CPU_INFORMATION* pOldInfo);
951
952 #endif // !FEATURE_PAL
953
954 private:
955     static BOOL IsIoPending();
956
957     static BOOL CreateGateThread();
958     static void EnsureGateThreadRunning();
959     static bool ShouldGateThreadKeepRunning();
960     static DWORD WINAPI GateThreadStart(LPVOID lpArgs);
961     static BOOL SufficientDelaySinceLastSample(unsigned int LastThreadCreationTime, 
962                                                unsigned NumThreads, // total number of threads of that type (worker or CP)
963                                                double   throttleRate=0.0 // the delay is increased by this percentage for each extra thread
964                                                );
965     static BOOL SufficientDelaySinceLastDequeue();
966
967     static LPVOID   GetRecycledMemory(enum MemType memType);
968
969     static DWORD WINAPI TimerThreadStart(LPVOID args);
970     static void TimerThreadFire(); // helper method used by TimerThreadStart
971     static void WINAPI InsertNewTimer(TimerInfo* pArg);
972     static DWORD FireTimers();
973     static DWORD WINAPI AsyncTimerCallbackCompletion(PVOID pArgs);
974     static void DeactivateTimer(TimerInfo* timerInfo);
975     static DWORD WINAPI AsyncDeleteTimer(PVOID pArgs);
976     static void DeleteTimer(TimerInfo* timerInfo);
977     static void WINAPI UpdateTimer(TimerUpdateInfo* pArgs);
978
979     static void WINAPI DeregisterTimer(TimerInfo* pArgs);
980
981     inline static DWORD QueueDeregisterWait(HANDLE waitThread, WaitInfo* waitInfo)
982     {
983         CONTRACTL
984         {
985             NOTHROW;         
986             MODE_ANY;
987             GC_NOTRIGGER;
988         }
989         CONTRACTL_END;
990
991         DWORD result = QueueUserAPC(reinterpret_cast<PAPCFUNC>(DeregisterWait), waitThread, reinterpret_cast<ULONG_PTR>(waitInfo));
992         SetWaitThreadAPCPending();
993         return result;
994     }
995
996
997     inline static void SetWaitThreadAPCPending() {IsApcPendingOnWaitThread = TRUE;}
998     inline static void ResetWaitThreadAPCPending() {IsApcPendingOnWaitThread = FALSE;}  
999     inline static BOOL IsWaitThreadAPCPending()  {return IsApcPendingOnWaitThread;}
1000
1001 #ifdef _DEBUG
1002     inline static DWORD GetTickCount()
1003     {
1004         LIMITED_METHOD_CONTRACT;
1005         return ::GetTickCount() + TickCountAdjustment;
1006     }
1007 #endif 
1008
1009 #endif // #ifndef DACCESS_COMPILE
1010     // Private variables
1011
1012     static LONG Initialization;                         // indicator of whether the threadpool is initialized.
1013
1014     SVAL_DECL(LONG,MinLimitTotalWorkerThreads);         // same as MinLimitTotalCPThreads
1015     SVAL_DECL(LONG,MaxLimitTotalWorkerThreads);         // same as MaxLimitTotalCPThreads
1016         
1017     DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) static unsigned int LastDequeueTime;      // used to determine if work items are getting thread starved
1018     
1019     static HillClimbing HillClimbingInstance;
1020
1021     DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) static LONG PriorCompletedWorkRequests;
1022     static DWORD PriorCompletedWorkRequestsTime;
1023     static DWORD NextCompletedWorkRequestsTime;
1024
1025     static LARGE_INTEGER CurrentSampleStartTime;
1026
1027     static unsigned int WorkerThreadSpinLimit;
1028     static bool IsHillClimbingDisabled;
1029     static int ThreadAdjustmentInterval;
1030
1031     SPTR_DECL(WorkRequest,WorkRequestHead);             // Head of work request queue
1032     SPTR_DECL(WorkRequest,WorkRequestTail);             // Head of work request queue
1033
1034     static unsigned int LastCPThreadCreation;           // last time a completion port thread was created
1035     static unsigned int NumberOfProcessors;             // = NumberOfWorkerThreads - no. of blocked threads
1036
1037     static BOOL IsApcPendingOnWaitThread;               // Indicates if an APC is pending on the wait thread
1038
1039     // This needs to be non-hosted, because worker threads can run prior to EE startup.
1040     static DangerousNonHostedSpinLock ThreadAdjustmentLock;
1041
1042 public:
1043     static CrstStatic WorkerCriticalSection;
1044
1045 private:
1046     static const DWORD WorkerTimeout = 20 * 1000;
1047     static const DWORD WorkerTimeoutAppX = 5 * 1000;    // shorter timeout to allow threads to exit prior to app suspension
1048
1049     DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) SVAL_DECL(ThreadCounter,WorkerCounter);
1050
1051     // 
1052     // WorkerSemaphore is an UnfairSemaphore because:
1053     // 1) Threads enter and exit this semaphore very frequently, and thus benefit greatly from the spinning done by UnfairSemaphore
1054     // 2) There is no functional reason why any particular thread should be preferred when waking workers.  This only impacts performance, 
1055     //    and un-fairness helps performance in this case.
1056     //
1057     static CLRLifoSemaphore* WorkerSemaphore;
1058
1059     //
1060     // RetiredWorkerSemaphore is a regular CLRSemaphore, not an UnfairSemaphore, because if a thread waits on this semaphore is it almost certainly
1061     // NOT going to be released soon, so the spinning done in UnfairSemaphore only burns valuable CPU time.  However, if UnfairSemaphore is ever 
1062     // implemented in terms of a Win32 IO Completion Port, we should reconsider this.  The IOCP's LIFO unblocking behavior could help keep working set
1063     // down, by constantly re-using the same small set of retired workers rather than round-robining between all of them as CLRSemaphore will do.
1064     // If we go that route, we should add a "no-spin" option to UnfairSemaphore.Wait to avoid wasting CPU.
1065     //
1066     static CLRLifoSemaphore* RetiredWorkerSemaphore;
1067
1068     static CLREvent * RetiredCPWakeupEvent;    
1069     
1070     static CrstStatic WaitThreadsCriticalSection;
1071     static LIST_ENTRY WaitThreadsHead;                  // queue of wait threads, each thread can handle upto 64 waits
1072
1073     static TimerInfo *TimerInfosToBeRecycled;           // list of delegate infos associated with deleted timers
1074     static CrstStatic TimerQueueCriticalSection;        // critical section to synchronize timer queue access
1075     SVAL_DECL(LIST_ENTRY,TimerQueue);                   // queue of timers
1076     static HANDLE TimerThread;                          // Currently we only have one timer thread
1077     static Thread*  pTimerThread;
1078     DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) static DWORD LastTickCount;      // the count just before timer thread goes to sleep
1079
1080     static BOOL InitCompletionPortThreadpool;           // flag indicating whether completion port threadpool has been initialized
1081     static HANDLE GlobalCompletionPort;                 // used for binding io completions on file handles
1082
1083 public:
1084     SVAL_DECL(ThreadCounter,CPThreadCounter);
1085
1086 private:
1087     SVAL_DECL(LONG,MaxLimitTotalCPThreads);             // = MaxLimitCPThreadsPerCPU * number of CPUS
1088     SVAL_DECL(LONG,MinLimitTotalCPThreads);             
1089     SVAL_DECL(LONG,MaxFreeCPThreads);                   // = MaxFreeCPThreadsPerCPU * Number of CPUS
1090
1091     DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) static LONG GateThreadStatus;    // See GateThreadStatus enumeration
1092
1093     static Volatile<LONG> NumCPInfrastructureThreads;   // number of threads currently busy handling draining cycle
1094
1095     SVAL_DECL(LONG,cpuUtilization);
1096     static LONG cpuUtilizationAverage;
1097
1098     DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) static RecycledListsWrapper RecycledLists;
1099
1100 #ifdef _DEBUG
1101     static DWORD   TickCountAdjustment;                 // add this value to value returned by GetTickCount
1102 #endif
1103
1104     DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) static int offset_counter;
1105     static const int offset_multiplier = 128;
1106 };
1107
1108
1109
1110
1111 #endif // _WIN32THREADPOOL_H