2 Bullet Continuous Collision Detection and Physics Library
3 Copyright (c) 2003-2018 Erwin Coumans http://bulletphysics.com
5 This software is provided 'as-is', without any express or implied warranty.
6 In no event will the authors be held liable for any damages arising from the use of this software.
7 Permission is granted to anyone to use this software for any purpose,
8 including commercial applications, and to alter it and redistribute it freely,
9 subject to the following restrictions:
11 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
12 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
13 3. This notice may not be removed or altered from any source distribution.
16 #if defined(_WIN32) && BT_THREADSAFE
18 #include "LinearMath/btScalar.h"
19 #include "LinearMath/btMinMax.h"
20 #include "LinearMath/btAlignedObjectArray.h"
21 #include "LinearMath/btThreads.h"
22 #include "btThreadSupportInterface.h"
26 struct btProcessorInfo
28 int numLogicalProcessors;
34 int numPhysicalPackages;
35 static const int maxNumTeamMasks = 32;
37 UINT64 processorTeamMasks[maxNumTeamMasks];
40 UINT64 getProcessorTeamMask(const btProcessorInfo& procInfo, int procId)
42 UINT64 procMask = UINT64(1) << procId;
43 for (int i = 0; i < procInfo.numTeamMasks; ++i)
45 if (procMask & procInfo.processorTeamMasks[i])
47 return procInfo.processorTeamMasks[i];
53 int getProcessorTeamIndex(const btProcessorInfo& procInfo, int procId)
55 UINT64 procMask = UINT64(1) << procId;
56 for (int i = 0; i < procInfo.numTeamMasks; ++i)
58 if (procMask & procInfo.processorTeamMasks[i])
66 int countSetBits(ULONG64 bits)
80 typedef BOOL(WINAPI* Pfn_GetLogicalProcessorInformation)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD);
82 void getProcessorInformation(btProcessorInfo* procInfo)
84 memset(procInfo, 0, sizeof(*procInfo));
85 #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) && !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
86 // Can't dlopen libraries on UWP.
89 Pfn_GetLogicalProcessorInformation getLogicalProcInfo =
90 (Pfn_GetLogicalProcessorInformation)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation");
91 if (getLogicalProcInfo == NULL)
96 PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buf = NULL;
100 if (getLogicalProcInfo(buf, &bufSize))
106 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
112 buf = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(bufSize);
117 int len = bufSize / sizeof(*buf);
118 for (int i = 0; i < len; ++i)
120 PSYSTEM_LOGICAL_PROCESSOR_INFORMATION info = buf + i;
121 switch (info->Relationship)
123 case RelationNumaNode:
124 procInfo->numNumaNodes++;
127 case RelationProcessorCore:
128 procInfo->numCores++;
129 procInfo->numLogicalProcessors += countSetBits(info->ProcessorMask);
133 if (info->Cache.Level == 1)
135 procInfo->numL1Cache++;
137 else if (info->Cache.Level == 2)
139 procInfo->numL2Cache++;
141 else if (info->Cache.Level == 3)
143 procInfo->numL3Cache++;
144 // processors that share L3 cache are considered to be on the same team
145 // because they can more easily work together on the same data.
146 // Large performance penalties will occur if 2 or more threads from different
147 // teams attempt to frequently read and modify the same cache lines.
149 // On the AMD Ryzen 7 CPU for example, the 8 cores on the CPU are split into
150 // 2 CCX units of 4 cores each. Each CCX has a separate L3 cache, so if both
151 // CCXs are operating on the same data, many cycles will be spent keeping the
152 // two caches coherent.
153 if (procInfo->numTeamMasks < btProcessorInfo::maxNumTeamMasks)
155 procInfo->processorTeamMasks[procInfo->numTeamMasks] = info->ProcessorMask;
156 procInfo->numTeamMasks++;
161 case RelationProcessorPackage:
162 procInfo->numPhysicalPackages++;
170 ///btThreadSupportWin32 helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
171 class btThreadSupportWin32 : public btThreadSupportInterface
174 struct btThreadStatus
180 ThreadFunc m_userThreadFunc;
181 void* m_userPtr; //for taskDesc etc
183 void* m_threadHandle; //this one is calling 'Win32ThreadFunc'
185 void* m_eventStartHandle;
186 char m_eventStartHandleName[32];
188 void* m_eventCompleteHandle;
189 char m_eventCompleteHandleName[32];
193 btAlignedObjectArray<btThreadStatus> m_activeThreadStatus;
194 btAlignedObjectArray<void*> m_completeHandles;
196 DWORD_PTR m_startedThreadMask;
197 btProcessorInfo m_processorInfo;
199 void startThreads(const ConstructionInfo& threadInfo);
201 int waitForResponse();
204 btThreadSupportWin32(const ConstructionInfo& threadConstructionInfo);
205 virtual ~btThreadSupportWin32();
207 virtual int getNumWorkerThreads() const BT_OVERRIDE { return m_numThreads; }
208 virtual int getCacheFriendlyNumThreads() const BT_OVERRIDE { return countSetBits(m_processorInfo.processorTeamMasks[0]); }
209 virtual int getLogicalToPhysicalCoreRatio() const BT_OVERRIDE { return m_processorInfo.numLogicalProcessors / m_processorInfo.numCores; }
211 virtual void runTask(int threadIndex, void* userData) BT_OVERRIDE;
212 virtual void waitForAllTasks() BT_OVERRIDE;
214 virtual btCriticalSection* createCriticalSection() BT_OVERRIDE;
215 virtual void deleteCriticalSection(btCriticalSection* criticalSection) BT_OVERRIDE;
218 btThreadSupportWin32::btThreadSupportWin32(const ConstructionInfo& threadConstructionInfo)
220 startThreads(threadConstructionInfo);
223 btThreadSupportWin32::~btThreadSupportWin32()
228 DWORD WINAPI win32threadStartFunc(LPVOID lpParam)
230 btThreadSupportWin32::btThreadStatus* status = (btThreadSupportWin32::btThreadStatus*)lpParam;
234 WaitForSingleObject(status->m_eventStartHandle, INFINITE);
235 void* userPtr = status->m_userPtr;
239 btAssert(status->m_status);
240 status->m_userThreadFunc(userPtr);
241 status->m_status = 2;
242 SetEvent(status->m_eventCompleteHandle);
247 status->m_status = 3;
248 printf("Thread with taskId %i with handle %p exiting\n", status->m_taskId, status->m_threadHandle);
249 SetEvent(status->m_eventCompleteHandle);
253 printf("Thread TERMINATED\n");
257 void btThreadSupportWin32::runTask(int threadIndex, void* userData)
259 btThreadStatus& threadStatus = m_activeThreadStatus[threadIndex];
260 btAssert(threadIndex >= 0);
261 btAssert(int(threadIndex) < m_activeThreadStatus.size());
263 threadStatus.m_commandId = 1;
264 threadStatus.m_status = 1;
265 threadStatus.m_userPtr = userData;
266 m_startedThreadMask |= DWORD_PTR(1) << threadIndex;
268 ///fire event to start new task
269 SetEvent(threadStatus.m_eventStartHandle);
272 int btThreadSupportWin32::waitForResponse()
274 btAssert(m_activeThreadStatus.size());
277 DWORD res = WaitForMultipleObjects(m_completeHandles.size(), &m_completeHandles[0], FALSE, INFINITE);
278 btAssert(res != WAIT_FAILED);
279 last = res - WAIT_OBJECT_0;
281 btThreadStatus& threadStatus = m_activeThreadStatus[last];
282 btAssert(threadStatus.m_threadHandle);
283 btAssert(threadStatus.m_eventCompleteHandle);
285 //WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
286 btAssert(threadStatus.m_status > 1);
287 threadStatus.m_status = 0;
289 ///need to find an active spu
291 m_startedThreadMask &= ~(DWORD_PTR(1) << last);
296 void btThreadSupportWin32::waitForAllTasks()
298 while (m_startedThreadMask)
304 void btThreadSupportWin32::startThreads(const ConstructionInfo& threadConstructionInfo)
306 static int uniqueId = 0;
308 btProcessorInfo& procInfo = m_processorInfo;
309 getProcessorInformation(&procInfo);
310 DWORD_PTR dwProcessAffinityMask = 0;
311 DWORD_PTR dwSystemAffinityMask = 0;
312 if (!GetProcessAffinityMask(GetCurrentProcess(), &dwProcessAffinityMask, &dwSystemAffinityMask))
314 dwProcessAffinityMask = 0;
316 ///The number of threads should be equal to the number of available cores - 1
317 m_numThreads = btMin(procInfo.numLogicalProcessors, int(BT_MAX_THREAD_COUNT)) - 1; // cap to max thread count (-1 because main thread already exists)
319 m_activeThreadStatus.resize(m_numThreads);
320 m_completeHandles.resize(m_numThreads);
321 m_startedThreadMask = 0;
323 // set main thread affinity
324 if (DWORD_PTR mask = dwProcessAffinityMask & getProcessorTeamMask(procInfo, 0))
326 SetThreadAffinityMask(GetCurrentThread(), mask);
327 SetThreadIdealProcessor(GetCurrentThread(), 0);
330 for (int i = 0; i < m_numThreads; i++)
332 printf("starting thread %d\n", i);
334 btThreadStatus& threadStatus = m_activeThreadStatus[i];
336 LPSECURITY_ATTRIBUTES lpThreadAttributes = NULL;
337 SIZE_T dwStackSize = threadConstructionInfo.m_threadStackSize;
338 LPTHREAD_START_ROUTINE lpStartAddress = &win32threadStartFunc;
339 LPVOID lpParameter = &threadStatus;
340 DWORD dwCreationFlags = 0;
341 LPDWORD lpThreadId = 0;
343 threadStatus.m_userPtr = 0;
345 sprintf(threadStatus.m_eventStartHandleName, "es%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i);
346 threadStatus.m_eventStartHandle = CreateEventA(0, false, false, threadStatus.m_eventStartHandleName);
348 sprintf(threadStatus.m_eventCompleteHandleName, "ec%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i);
349 threadStatus.m_eventCompleteHandle = CreateEventA(0, false, false, threadStatus.m_eventCompleteHandleName);
351 m_completeHandles[i] = threadStatus.m_eventCompleteHandle;
353 HANDLE handle = CreateThread(lpThreadAttributes, dwStackSize, lpStartAddress, lpParameter, dwCreationFlags, lpThreadId);
354 //SetThreadPriority( handle, THREAD_PRIORITY_HIGHEST );
355 // highest priority -- can cause erratic performance when numThreads > numCores
356 // we don't want worker threads to be higher priority than the main thread or the main thread could get
357 // totally shut out and unable to tell the workers to stop
358 //SetThreadPriority( handle, THREAD_PRIORITY_BELOW_NORMAL );
361 int processorId = i + 1; // leave processor 0 for main thread
362 DWORD_PTR teamMask = getProcessorTeamMask(procInfo, processorId);
365 // bind each thread to only execute on processors of it's assigned team
366 // - for single-socket Intel x86 CPUs this has no effect (only a single, shared L3 cache so there is only 1 team)
367 // - for multi-socket Intel this will keep threads from migrating from one socket to another
368 // - for AMD Ryzen this will keep threads from migrating from one CCX to another
369 DWORD_PTR mask = teamMask & dwProcessAffinityMask;
372 SetThreadAffinityMask(handle, mask);
375 SetThreadIdealProcessor(handle, processorId);
378 threadStatus.m_taskId = i;
379 threadStatus.m_commandId = 0;
380 threadStatus.m_status = 0;
381 threadStatus.m_threadHandle = handle;
382 threadStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
384 printf("started %s thread %d with threadHandle %p\n", threadConstructionInfo.m_uniqueName, i, handle);
388 ///tell the task scheduler we are done with the SPU tasks
389 void btThreadSupportWin32::stopThreads()
391 for (int i = 0; i < m_activeThreadStatus.size(); i++)
393 btThreadStatus& threadStatus = m_activeThreadStatus[i];
394 if (threadStatus.m_status > 0)
396 WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
399 threadStatus.m_userPtr = NULL;
400 SetEvent(threadStatus.m_eventStartHandle);
401 WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
403 CloseHandle(threadStatus.m_eventCompleteHandle);
404 CloseHandle(threadStatus.m_eventStartHandle);
405 CloseHandle(threadStatus.m_threadHandle);
408 m_activeThreadStatus.clear();
409 m_completeHandles.clear();
412 class btWin32CriticalSection : public btCriticalSection
415 CRITICAL_SECTION mCriticalSection;
418 btWin32CriticalSection()
420 InitializeCriticalSection(&mCriticalSection);
423 ~btWin32CriticalSection()
425 DeleteCriticalSection(&mCriticalSection);
430 EnterCriticalSection(&mCriticalSection);
435 LeaveCriticalSection(&mCriticalSection);
439 btCriticalSection* btThreadSupportWin32::createCriticalSection()
441 unsigned char* mem = (unsigned char*)btAlignedAlloc(sizeof(btWin32CriticalSection), 16);
442 btWin32CriticalSection* cs = new (mem) btWin32CriticalSection();
446 void btThreadSupportWin32::deleteCriticalSection(btCriticalSection* criticalSection)
448 criticalSection->~btCriticalSection();
449 btAlignedFree(criticalSection);
452 btThreadSupportInterface* btThreadSupportInterface::create(const ConstructionInfo& info)
454 return new btThreadSupportWin32(info);
457 #endif //defined(_WIN32) && BT_THREADSAFE