3 ***************************************************************************************************
7 ** Real-Time Hierarchical Profiling for Game Programming Gems 3
9 ** by Greg Hjelstrom & Byon Garrabrant
11 ***************************************************************************************************/
13 // Credits: The Clock class was inspired by the Timer classes in
14 // Ogre (www.ogre3d.org).
16 #include "btQuickprof.h"
17 #include "btThreads.h"
20 #include <sys/sys_time.h>
21 #include <sys/time_util.h>
25 #if defined(SUNOS) || defined(__SUNOS__)
29 #include <mach/mach_time.h>
30 #include <TargetConditionals.h>
33 #if defined(WIN32) || defined(_WIN32)
35 #define BT_USE_WINDOWS_TIMERS
36 #define WIN32_LEAN_AND_MEAN
47 #define GetTickCount64 GetTickCount
57 #ifdef BT_LINUX_REALTIME
58 //required linking against rt (librt)
60 #endif //BT_LINUX_REALTIME
64 #define mymin(a, b) (a > b ? a : b)
68 #ifdef BT_USE_WINDOWS_TIMERS
69 LARGE_INTEGER mClockFrequency;
71 LARGE_INTEGER mStartTime;
77 uint64_t mStartTimeNano;
79 struct timeval mStartTime;
81 #endif //__CELLOS_LV2__
84 ///The btClock is a portable basic clock that measures accurate time in seconds, use for profiling.
87 m_data = new btClockData;
88 #ifdef BT_USE_WINDOWS_TIMERS
89 QueryPerformanceFrequency(&m_data->mClockFrequency);
99 btClock::btClock(const btClock& other)
101 m_data = new btClockData;
102 *m_data = *other.m_data;
105 btClock& btClock::operator=(const btClock& other)
107 *m_data = *other.m_data;
111 /// Resets the initial reference time.
112 void btClock::reset()
114 #ifdef BT_USE_WINDOWS_TIMERS
115 QueryPerformanceCounter(&m_data->mStartTime);
116 m_data->mStartTick = GetTickCount64();
118 #ifdef __CELLOS_LV2__
120 typedef uint64_t ClockSize;
122 //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
123 SYS_TIMEBASE_GET(newTime);
124 m_data->mStartTime = newTime;
127 m_data->mStartTimeNano = mach_absolute_time();
129 gettimeofday(&m_data->mStartTime, 0);
134 /// Returns the time in ms since the last call to reset or since
135 /// the btClock was created.
136 unsigned long long int btClock::getTimeMilliseconds()
138 #ifdef BT_USE_WINDOWS_TIMERS
139 LARGE_INTEGER currentTime;
140 QueryPerformanceCounter(¤tTime);
141 LONGLONG elapsedTime = currentTime.QuadPart -
142 m_data->mStartTime.QuadPart;
143 // Compute the number of millisecond ticks elapsed.
144 unsigned long msecTicks = (unsigned long)(1000 * elapsedTime /
145 m_data->mClockFrequency.QuadPart);
150 #ifdef __CELLOS_LV2__
151 uint64_t freq = sys_time_get_timebase_frequency();
152 double dFreq = ((double)freq) / 1000.0;
153 typedef uint64_t ClockSize;
155 SYS_TIMEBASE_GET(newTime);
156 //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
158 return (unsigned long int)((double(newTime - m_data->mStartTime)) / dFreq);
161 struct timeval currentTime;
162 gettimeofday(¤tTime, 0);
163 return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000 +
164 (currentTime.tv_usec - m_data->mStartTime.tv_usec) / 1000;
165 #endif //__CELLOS_LV2__
169 /// Returns the time in us since the last call to reset or since
170 /// the Clock was created.
171 unsigned long long int btClock::getTimeMicroseconds()
173 #ifdef BT_USE_WINDOWS_TIMERS
174 //see https://msdn.microsoft.com/en-us/library/windows/desktop/dn553408(v=vs.85).aspx
175 LARGE_INTEGER currentTime, elapsedTime;
177 QueryPerformanceCounter(¤tTime);
178 elapsedTime.QuadPart = currentTime.QuadPart -
179 m_data->mStartTime.QuadPart;
180 elapsedTime.QuadPart *= 1000000;
181 elapsedTime.QuadPart /= m_data->mClockFrequency.QuadPart;
183 return (unsigned long long)elapsedTime.QuadPart;
186 #ifdef __CELLOS_LV2__
187 uint64_t freq = sys_time_get_timebase_frequency();
188 double dFreq = ((double)freq) / 1000000.0;
189 typedef uint64_t ClockSize;
191 //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
192 SYS_TIMEBASE_GET(newTime);
194 return (unsigned long int)((double(newTime - m_data->mStartTime)) / dFreq);
197 struct timeval currentTime;
198 gettimeofday(¤tTime, 0);
199 return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000000 +
200 (currentTime.tv_usec - m_data->mStartTime.tv_usec);
201 #endif //__CELLOS_LV2__
205 unsigned long long int btClock::getTimeNanoseconds()
207 #ifdef BT_USE_WINDOWS_TIMERS
208 //see https://msdn.microsoft.com/en-us/library/windows/desktop/dn553408(v=vs.85).aspx
209 LARGE_INTEGER currentTime, elapsedTime;
211 QueryPerformanceCounter(¤tTime);
212 elapsedTime.QuadPart = currentTime.QuadPart -
213 m_data->mStartTime.QuadPart;
214 elapsedTime.QuadPart *= 1000000000;
215 elapsedTime.QuadPart /= m_data->mClockFrequency.QuadPart;
217 return (unsigned long long)elapsedTime.QuadPart;
220 #ifdef __CELLOS_LV2__
221 uint64_t freq = sys_time_get_timebase_frequency();
222 double dFreq = ((double)freq) / 1e9;
223 typedef uint64_t ClockSize;
225 //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
226 SYS_TIMEBASE_GET(newTime);
228 return (unsigned long int)((double(newTime - m_data->mStartTime)) / dFreq);
231 uint64_t ticks = mach_absolute_time() - m_data->mStartTimeNano;
232 static long double conversion = 0.0L;
233 if (0.0L == conversion)
235 // attempt to get conversion to nanoseconds
236 mach_timebase_info_data_t info;
237 int err = mach_timebase_info(&info);
243 conversion = info.numer / info.denom;
245 return (ticks * conversion);
249 #ifdef BT_LINUX_REALTIME
251 clock_gettime(CLOCK_REALTIME, &ts);
252 return 1000000000 * ts.tv_sec + ts.tv_nsec;
254 struct timeval currentTime;
255 gettimeofday(¤tTime, 0);
256 return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1e9 +
257 (currentTime.tv_usec - m_data->mStartTime.tv_usec) * 1000;
258 #endif //BT_LINUX_REALTIME
261 #endif //__CELLOS_LV2__
265 /// Returns the time in s since the last call to reset or since
266 /// the Clock was created.
267 btScalar btClock::getTimeSeconds()
269 static const btScalar microseconds_to_seconds = btScalar(0.000001);
270 return btScalar(getTimeMicroseconds()) * microseconds_to_seconds;
273 #ifndef BT_NO_PROFILE
275 static btClock gProfileClock;
277 inline void Profile_Get_Ticks(unsigned long int* ticks)
279 *ticks = (unsigned long int)gProfileClock.getTimeMicroseconds();
282 inline float Profile_Get_Tick_Rate(void)
288 /***************************************************************************************************
292 ***************************************************************************************************/
294 /***********************************************************************************************
296 * name - pointer to a static string which is the name of this profile node *
297 * parent - parent pointer *
300 * The name is assumed to be a static pointer, only the pointer is stored and compared for *
301 * efficiency reasons. *
302 *=============================================================================================*/
303 CProfileNode::CProfileNode(const char* name, CProfileNode* parent) : Name(name),
316 void CProfileNode::CleanupMemory()
324 CProfileNode::~CProfileNode(void)
329 /***********************************************************************************************
331 * name - static string pointer to the name of the node we are searching for *
334 * All profile names are assumed to be static strings so this function uses pointer compares *
335 * to find the named node. *
336 *=============================================================================================*/
337 CProfileNode* CProfileNode::Get_Sub_Node(const char* name)
339 // Try to find this sub node
340 CProfileNode* child = Child;
343 if (child->Name == name)
347 child = child->Sibling;
350 // We didn't find it, so add it
352 CProfileNode* node = new CProfileNode(name, this);
353 node->Sibling = Child;
358 void CProfileNode::Reset(void)
373 void CProfileNode::Call(void)
376 if (RecursionCounter++ == 0)
378 Profile_Get_Ticks(&StartTime);
382 bool CProfileNode::Return(void)
384 if (--RecursionCounter == 0 && TotalCalls != 0)
386 unsigned long int time;
387 Profile_Get_Ticks(&time);
390 TotalTime += (float)time / Profile_Get_Tick_Rate();
392 return (RecursionCounter == 0);
395 /***************************************************************************************************
399 ***************************************************************************************************/
400 CProfileIterator::CProfileIterator(CProfileNode* start)
402 CurrentParent = start;
403 CurrentChild = CurrentParent->Get_Child();
406 void CProfileIterator::First(void)
408 CurrentChild = CurrentParent->Get_Child();
411 void CProfileIterator::Next(void)
413 CurrentChild = CurrentChild->Get_Sibling();
416 bool CProfileIterator::Is_Done(void)
418 return CurrentChild == NULL;
421 void CProfileIterator::Enter_Child(int index)
423 CurrentChild = CurrentParent->Get_Child();
424 while ((CurrentChild != NULL) && (index != 0))
427 CurrentChild = CurrentChild->Get_Sibling();
430 if (CurrentChild != NULL)
432 CurrentParent = CurrentChild;
433 CurrentChild = CurrentParent->Get_Child();
437 void CProfileIterator::Enter_Parent(void)
439 if (CurrentParent->Get_Parent() != NULL)
441 CurrentParent = CurrentParent->Get_Parent();
443 CurrentChild = CurrentParent->Get_Child();
446 /***************************************************************************************************
450 ***************************************************************************************************/
452 CProfileNode gRoots[BT_QUICKPROF_MAX_THREAD_COUNT] = {
453 CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL),
454 CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL),
455 CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL),
456 CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL),
457 CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL),
458 CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL),
459 CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL),
460 CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL),
461 CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL),
462 CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL),
463 CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL),
464 CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL),
465 CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL),
466 CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL),
467 CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL),
468 CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL), CProfileNode("Root", NULL)};
470 CProfileNode* gCurrentNodes[BT_QUICKPROF_MAX_THREAD_COUNT] =
538 int CProfileManager::FrameCounter = 0;
539 unsigned long int CProfileManager::ResetTime = 0;
541 CProfileIterator* CProfileManager::Get_Iterator(void)
543 int threadIndex = btQuickprofGetCurrentThreadIndex2();
544 if ((threadIndex < 0) || threadIndex >= BT_QUICKPROF_MAX_THREAD_COUNT)
547 return new CProfileIterator(&gRoots[threadIndex]);
550 void CProfileManager::CleanupMemory(void)
552 for (int i = 0; i < BT_QUICKPROF_MAX_THREAD_COUNT; i++)
554 gRoots[i].CleanupMemory();
558 /***********************************************************************************************
559 * CProfileManager::Start_Profile -- Begin a named profile *
561 * Steps one level deeper into the tree, if a child already exists with the specified name *
562 * then it accumulates the profiling; otherwise a new child node is added to the profile tree. *
565 * name - name of this profiling record *
568 * The string used is assumed to be a static string; pointer compares are used throughout *
569 * the profiling code for efficiency. *
570 *=============================================================================================*/
571 void CProfileManager::Start_Profile(const char* name)
573 int threadIndex = btQuickprofGetCurrentThreadIndex2();
574 if ((threadIndex < 0) || threadIndex >= BT_QUICKPROF_MAX_THREAD_COUNT)
577 if (name != gCurrentNodes[threadIndex]->Get_Name())
579 gCurrentNodes[threadIndex] = gCurrentNodes[threadIndex]->Get_Sub_Node(name);
582 gCurrentNodes[threadIndex]->Call();
585 /***********************************************************************************************
586 * CProfileManager::Stop_Profile -- Stop timing and record the results. *
587 *=============================================================================================*/
588 void CProfileManager::Stop_Profile(void)
590 int threadIndex = btQuickprofGetCurrentThreadIndex2();
591 if ((threadIndex < 0) || threadIndex >= BT_QUICKPROF_MAX_THREAD_COUNT)
594 // Return will indicate whether we should back up to our parent (we may
595 // be profiling a recursive function)
596 if (gCurrentNodes[threadIndex]->Return())
598 gCurrentNodes[threadIndex] = gCurrentNodes[threadIndex]->Get_Parent();
602 /***********************************************************************************************
603 * CProfileManager::Reset -- Reset the contents of the profiling system *
605 * This resets everything except for the tree structure. All of the timing data is reset. *
606 *=============================================================================================*/
607 void CProfileManager::Reset(void)
609 gProfileClock.reset();
610 int threadIndex = btQuickprofGetCurrentThreadIndex2();
611 if ((threadIndex < 0) || threadIndex >= BT_QUICKPROF_MAX_THREAD_COUNT)
613 gRoots[threadIndex].Reset();
614 gRoots[threadIndex].Call();
616 Profile_Get_Ticks(&ResetTime);
619 /***********************************************************************************************
620 * CProfileManager::Increment_Frame_Counter -- Increment the frame counter *
621 *=============================================================================================*/
622 void CProfileManager::Increment_Frame_Counter(void)
627 /***********************************************************************************************
628 * CProfileManager::Get_Time_Since_Reset -- returns the elapsed time since last reset *
629 *=============================================================================================*/
630 float CProfileManager::Get_Time_Since_Reset(void)
632 unsigned long int time;
633 Profile_Get_Ticks(&time);
635 return (float)time / Profile_Get_Tick_Rate();
640 void CProfileManager::dumpRecursive(CProfileIterator* profileIterator, int spacing)
642 profileIterator->First();
643 if (profileIterator->Is_Done())
646 float accumulated_time = 0, parent_time = profileIterator->Is_Root() ? CProfileManager::Get_Time_Since_Reset() : profileIterator->Get_Current_Parent_Total_Time();
648 int frames_since_reset = CProfileManager::Get_Frame_Count_Since_Reset();
649 for (i = 0; i < spacing; i++) printf(".");
650 printf("----------------------------------\n");
651 for (i = 0; i < spacing; i++) printf(".");
652 printf("Profiling: %s (total running time: %.3f ms) ---\n", profileIterator->Get_Current_Parent_Name(), parent_time);
653 float totalTime = 0.f;
657 for (i = 0; !profileIterator->Is_Done(); i++, profileIterator->Next())
660 float current_total_time = profileIterator->Get_Current_Total_Time();
661 accumulated_time += current_total_time;
662 float fraction = parent_time > SIMD_EPSILON ? (current_total_time / parent_time) * 100 : 0.f;
665 for (i = 0; i < spacing; i++) printf(".");
667 printf("%d -- %s (%.2f %%) :: %.3f ms / frame (%d calls)\n", i, profileIterator->Get_Current_Name(), fraction, (current_total_time / (double)frames_since_reset), profileIterator->Get_Current_Total_Calls());
668 totalTime += current_total_time;
669 //recurse into children
672 if (parent_time < accumulated_time)
674 //printf("what's wrong\n");
676 for (i = 0; i < spacing; i++) printf(".");
677 printf("%s (%.3f %%) :: %.3f ms\n", "Unaccounted:", parent_time > SIMD_EPSILON ? ((parent_time - accumulated_time) / parent_time) * 100 : 0.f, parent_time - accumulated_time);
679 for (i = 0; i < numChildren; i++)
681 profileIterator->Enter_Child(i);
682 dumpRecursive(profileIterator, spacing + 3);
683 profileIterator->Enter_Parent();
687 void CProfileManager::dumpAll()
689 CProfileIterator* profileIterator = 0;
690 profileIterator = CProfileManager::Get_Iterator();
692 dumpRecursive(profileIterator, 0);
694 CProfileManager::Release_Iterator(profileIterator);
698 void btEnterProfileZoneDefault(const char* name)
701 void btLeaveProfileZoneDefault()
706 void btEnterProfileZoneDefault(const char* name)
709 void btLeaveProfileZoneDefault()
712 #endif //BT_NO_PROFILE
716 #if defined(_WIN32) && (defined(__MINGW32__) || defined(__MINGW64__))
717 #define BT_HAVE_TLS 1
718 #elif __APPLE__ && !TARGET_OS_IPHONE
719 // TODO: Modern versions of iOS support TLS now with updated version checking.
720 #define BT_HAVE_TLS 1
722 #define BT_HAVE_TLS 1
723 #elif defined(__FreeBSD__) || defined(__NetBSD__)
724 // TODO: At the moment disabling purposely OpenBSD, albeit tls support exists but not fully functioning
725 #define BT_HAVE_TLS 1
728 // __thread is broken on Andorid clang until r12b. See
729 // https://github.com/android-ndk/ndk/issues/8
730 #if defined(__ANDROID__) && defined(__clang__)
731 #if __has_include(<android/ndk-version.h>)
732 #include <android/ndk-version.h>
733 #endif // __has_include(<android/ndk-version.h>)
734 #if defined(__NDK_MAJOR__) && \
735 ((__NDK_MAJOR__ < 12) || ((__NDK_MAJOR__ == 12) && (__NDK_MINOR__ < 1)))
738 #endif // defined(__ANDROID__) && defined(__clang__)
741 unsigned int btQuickprofGetCurrentThreadIndex2()
743 const unsigned int kNullIndex = ~0U;
746 return btGetCurrentThreadIndex();
748 #if defined(BT_HAVE_TLS)
749 static __thread unsigned int sThreadIndex = kNullIndex;
750 #elif defined(_WIN32)
751 __declspec(thread) static unsigned int sThreadIndex = kNullIndex;
753 unsigned int sThreadIndex = 0;
757 static int gThreadCounter = 0;
759 if (sThreadIndex == kNullIndex)
761 sThreadIndex = gThreadCounter++;
764 #endif //BT_THREADSAFE
767 static btEnterProfileZoneFunc* bts_enterFunc = btEnterProfileZoneDefault;
768 static btLeaveProfileZoneFunc* bts_leaveFunc = btLeaveProfileZoneDefault;
770 void btEnterProfileZone(const char* name)
772 (bts_enterFunc)(name);
774 void btLeaveProfileZone()
779 btEnterProfileZoneFunc* btGetCurrentEnterProfileZoneFunc()
781 return bts_enterFunc;
783 btLeaveProfileZoneFunc* btGetCurrentLeaveProfileZoneFunc()
785 return bts_leaveFunc;
788 void btSetCustomEnterProfileZoneFunc(btEnterProfileZoneFunc* enterFunc)
790 bts_enterFunc = enterFunc;
792 void btSetCustomLeaveProfileZoneFunc(btLeaveProfileZoneFunc* leaveFunc)
794 bts_leaveFunc = leaveFunc;
797 CProfileSample::CProfileSample(const char* name)
799 btEnterProfileZone(name);
802 CProfileSample::~CProfileSample(void)
804 btLeaveProfileZone();