3 ***************************************************************************************************
7 ** Real-Time Hierarchical Profiling for Game Programming Gems 3
9 ** by Greg Hjelstrom & Byon Garrabrant
11 ***************************************************************************************************/
13 // Credits: The Clock class was inspired by the Timer classes in
14 // Ogre (www.ogre3d.org).
16 #include "btQuickprof.h"
21 static btClock gProfileClock;
25 #include <sys/sys_time.h>
26 #include <sys/time_util.h>
30 #if defined (SUNOS) || defined (__SUNOS__)
34 #if defined(WIN32) || defined(_WIN32)
36 #define BT_USE_WINDOWS_TIMERS
37 #define WIN32_LEAN_AND_MEAN
55 #define mymin(a,b) (a > b ? a : b)
60 #ifdef BT_USE_WINDOWS_TIMERS
61 LARGE_INTEGER mClockFrequency;
63 LONGLONG mPrevElapsedTime;
64 LARGE_INTEGER mStartTime;
69 struct timeval mStartTime;
71 #endif //__CELLOS_LV2__
75 ///The btClock is a portable basic clock that measures accurate time in seconds, use for profiling.
78 m_data = new btClockData;
79 #ifdef BT_USE_WINDOWS_TIMERS
80 QueryPerformanceFrequency(&m_data->mClockFrequency);
90 btClock::btClock(const btClock& other)
92 m_data = new btClockData;
93 *m_data = *other.m_data;
96 btClock& btClock::operator=(const btClock& other)
98 *m_data = *other.m_data;
103 /// Resets the initial reference time.
104 void btClock::reset()
106 #ifdef BT_USE_WINDOWS_TIMERS
107 QueryPerformanceCounter(&m_data->mStartTime);
108 m_data->mStartTick = GetTickCount();
109 m_data->mPrevElapsedTime = 0;
111 #ifdef __CELLOS_LV2__
113 typedef uint64_t ClockSize;
115 //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
116 SYS_TIMEBASE_GET( newTime );
117 m_data->mStartTime = newTime;
119 gettimeofday(&m_data->mStartTime, 0);
124 /// Returns the time in ms since the last call to reset or since
125 /// the btClock was created.
126 unsigned long int btClock::getTimeMilliseconds()
128 #ifdef BT_USE_WINDOWS_TIMERS
129 LARGE_INTEGER currentTime;
130 QueryPerformanceCounter(¤tTime);
131 LONGLONG elapsedTime = currentTime.QuadPart -
132 m_data->mStartTime.QuadPart;
133 // Compute the number of millisecond ticks elapsed.
134 unsigned long msecTicks = (unsigned long)(1000 * elapsedTime /
135 m_data->mClockFrequency.QuadPart);
136 // Check for unexpected leaps in the Win32 performance counter.
137 // (This is caused by unexpected data across the PCI to ISA
138 // bridge, aka south bridge. See Microsoft KB274323.)
139 unsigned long elapsedTicks = GetTickCount() - m_data->mStartTick;
140 signed long msecOff = (signed long)(msecTicks - elapsedTicks);
141 if (msecOff < -100 || msecOff > 100)
143 // Adjust the starting time forwards.
144 LONGLONG msecAdjustment = mymin(msecOff *
145 m_data->mClockFrequency.QuadPart / 1000, elapsedTime -
146 m_data->mPrevElapsedTime);
147 m_data->mStartTime.QuadPart += msecAdjustment;
148 elapsedTime -= msecAdjustment;
150 // Recompute the number of millisecond ticks elapsed.
151 msecTicks = (unsigned long)(1000 * elapsedTime /
152 m_data->mClockFrequency.QuadPart);
155 // Store the current elapsed time for adjustments next time.
156 m_data->mPrevElapsedTime = elapsedTime;
161 #ifdef __CELLOS_LV2__
162 uint64_t freq=sys_time_get_timebase_frequency();
163 double dFreq=((double) freq) / 1000.0;
164 typedef uint64_t ClockSize;
166 SYS_TIMEBASE_GET( newTime );
167 //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
169 return (unsigned long int)((double(newTime-m_data->mStartTime)) / dFreq);
172 struct timeval currentTime;
173 gettimeofday(¤tTime, 0);
174 return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000 +
175 (currentTime.tv_usec - m_data->mStartTime.tv_usec) / 1000;
176 #endif //__CELLOS_LV2__
180 /// Returns the time in us since the last call to reset or since
181 /// the Clock was created.
182 unsigned long int btClock::getTimeMicroseconds()
184 #ifdef BT_USE_WINDOWS_TIMERS
185 LARGE_INTEGER currentTime;
186 QueryPerformanceCounter(¤tTime);
187 LONGLONG elapsedTime = currentTime.QuadPart -
188 m_data->mStartTime.QuadPart;
190 // Compute the number of millisecond ticks elapsed.
191 unsigned long msecTicks = (unsigned long)(1000 * elapsedTime /
192 m_data->mClockFrequency.QuadPart);
194 // Check for unexpected leaps in the Win32 performance counter.
195 // (This is caused by unexpected data across the PCI to ISA
196 // bridge, aka south bridge. See Microsoft KB274323.)
197 unsigned long elapsedTicks = GetTickCount() - m_data->mStartTick;
198 signed long msecOff = (signed long)(msecTicks - elapsedTicks);
199 if (msecOff < -100 || msecOff > 100)
201 // Adjust the starting time forwards.
202 LONGLONG msecAdjustment = mymin(msecOff *
203 m_data->mClockFrequency.QuadPart / 1000, elapsedTime -
204 m_data->mPrevElapsedTime);
205 m_data->mStartTime.QuadPart += msecAdjustment;
206 elapsedTime -= msecAdjustment;
209 // Store the current elapsed time for adjustments next time.
210 m_data->mPrevElapsedTime = elapsedTime;
212 // Convert to microseconds.
213 unsigned long usecTicks = (unsigned long)(1000000 * elapsedTime /
214 m_data->mClockFrequency.QuadPart);
219 #ifdef __CELLOS_LV2__
220 uint64_t freq=sys_time_get_timebase_frequency();
221 double dFreq=((double) freq)/ 1000000.0;
222 typedef uint64_t ClockSize;
224 //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
225 SYS_TIMEBASE_GET( newTime );
227 return (unsigned long int)((double(newTime-m_data->mStartTime)) / dFreq);
230 struct timeval currentTime;
231 gettimeofday(¤tTime, 0);
232 return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000000 +
233 (currentTime.tv_usec - m_data->mStartTime.tv_usec);
234 #endif//__CELLOS_LV2__
242 inline void Profile_Get_Ticks(unsigned long int * ticks)
244 *ticks = gProfileClock.getTimeMicroseconds();
247 inline float Profile_Get_Tick_Rate(void)
256 /***************************************************************************************************
260 ***************************************************************************************************/
262 /***********************************************************************************************
264 * name - pointer to a static string which is the name of this profile node *
265 * parent - parent pointer *
268 * The name is assumed to be a static pointer, only the pointer is stored and compared for *
269 * efficiency reasons. *
270 *=============================================================================================*/
271 CProfileNode::CProfileNode( const char * name, CProfileNode * parent ) :
276 RecursionCounter( 0 ),
286 void CProfileNode::CleanupMemory()
294 CProfileNode::~CProfileNode( void )
301 /***********************************************************************************************
303 * name - static string pointer to the name of the node we are searching for *
306 * All profile names are assumed to be static strings so this function uses pointer compares *
307 * to find the named node. *
308 *=============================================================================================*/
309 CProfileNode * CProfileNode::Get_Sub_Node( const char * name )
311 // Try to find this sub node
312 CProfileNode * child = Child;
314 if ( child->Name == name ) {
317 child = child->Sibling;
320 // We didn't find it, so add it
322 CProfileNode * node = new CProfileNode( name, this );
323 node->Sibling = Child;
329 void CProfileNode::Reset( void )
344 void CProfileNode::Call( void )
347 if (RecursionCounter++ == 0) {
348 Profile_Get_Ticks(&StartTime);
353 bool CProfileNode::Return( void )
355 if ( --RecursionCounter == 0 && TotalCalls != 0 ) {
356 unsigned long int time;
357 Profile_Get_Ticks(&time);
359 TotalTime += (float)time / Profile_Get_Tick_Rate();
361 return ( RecursionCounter == 0 );
365 /***************************************************************************************************
369 ***************************************************************************************************/
370 CProfileIterator::CProfileIterator( CProfileNode * start )
372 CurrentParent = start;
373 CurrentChild = CurrentParent->Get_Child();
377 void CProfileIterator::First(void)
379 CurrentChild = CurrentParent->Get_Child();
383 void CProfileIterator::Next(void)
385 CurrentChild = CurrentChild->Get_Sibling();
389 bool CProfileIterator::Is_Done(void)
391 return CurrentChild == NULL;
395 void CProfileIterator::Enter_Child( int index )
397 CurrentChild = CurrentParent->Get_Child();
398 while ( (CurrentChild != NULL) && (index != 0) ) {
400 CurrentChild = CurrentChild->Get_Sibling();
403 if ( CurrentChild != NULL ) {
404 CurrentParent = CurrentChild;
405 CurrentChild = CurrentParent->Get_Child();
410 void CProfileIterator::Enter_Parent( void )
412 if ( CurrentParent->Get_Parent() != NULL ) {
413 CurrentParent = CurrentParent->Get_Parent();
415 CurrentChild = CurrentParent->Get_Child();
419 /***************************************************************************************************
423 ***************************************************************************************************/
425 CProfileNode CProfileManager::Root( "Root", NULL );
426 CProfileNode * CProfileManager::CurrentNode = &CProfileManager::Root;
427 int CProfileManager::FrameCounter = 0;
428 unsigned long int CProfileManager::ResetTime = 0;
431 /***********************************************************************************************
432 * CProfileManager::Start_Profile -- Begin a named profile *
434 * Steps one level deeper into the tree, if a child already exists with the specified name *
435 * then it accumulates the profiling; otherwise a new child node is added to the profile tree. *
438 * name - name of this profiling record *
441 * The string used is assumed to be a static string; pointer compares are used throughout *
442 * the profiling code for efficiency. *
443 *=============================================================================================*/
444 void CProfileManager::Start_Profile( const char * name )
446 if (name != CurrentNode->Get_Name()) {
447 CurrentNode = CurrentNode->Get_Sub_Node( name );
454 /***********************************************************************************************
455 * CProfileManager::Stop_Profile -- Stop timing and record the results. *
456 *=============================================================================================*/
457 void CProfileManager::Stop_Profile( void )
459 // Return will indicate whether we should back up to our parent (we may
460 // be profiling a recursive function)
461 if (CurrentNode->Return()) {
462 CurrentNode = CurrentNode->Get_Parent();
467 /***********************************************************************************************
468 * CProfileManager::Reset -- Reset the contents of the profiling system *
470 * This resets everything except for the tree structure. All of the timing data is reset. *
471 *=============================================================================================*/
472 void CProfileManager::Reset( void )
474 gProfileClock.reset();
478 Profile_Get_Ticks(&ResetTime);
482 /***********************************************************************************************
483 * CProfileManager::Increment_Frame_Counter -- Increment the frame counter *
484 *=============================================================================================*/
485 void CProfileManager::Increment_Frame_Counter( void )
491 /***********************************************************************************************
492 * CProfileManager::Get_Time_Since_Reset -- returns the elapsed time since last reset *
493 *=============================================================================================*/
494 float CProfileManager::Get_Time_Since_Reset( void )
496 unsigned long int time;
497 Profile_Get_Ticks(&time);
499 return (float)time / Profile_Get_Tick_Rate();
504 void CProfileManager::dumpRecursive(CProfileIterator* profileIterator, int spacing)
506 profileIterator->First();
507 if (profileIterator->Is_Done())
510 float accumulated_time=0,parent_time = profileIterator->Is_Root() ? CProfileManager::Get_Time_Since_Reset() : profileIterator->Get_Current_Parent_Total_Time();
512 int frames_since_reset = CProfileManager::Get_Frame_Count_Since_Reset();
513 for (i=0;i<spacing;i++) printf(".");
514 printf("----------------------------------\n");
515 for (i=0;i<spacing;i++) printf(".");
516 printf("Profiling: %s (total running time: %.3f ms) ---\n", profileIterator->Get_Current_Parent_Name(), parent_time );
517 float totalTime = 0.f;
522 for (i = 0; !profileIterator->Is_Done(); i++,profileIterator->Next())
525 float current_total_time = profileIterator->Get_Current_Total_Time();
526 accumulated_time += current_total_time;
527 float fraction = parent_time > SIMD_EPSILON ? (current_total_time / parent_time) * 100 : 0.f;
529 int i; for (i=0;i<spacing;i++) printf(".");
531 printf("%d -- %s (%.2f %%) :: %.3f ms / frame (%d calls)\n",i, profileIterator->Get_Current_Name(), fraction,(current_total_time / (double)frames_since_reset),profileIterator->Get_Current_Total_Calls());
532 totalTime += current_total_time;
533 //recurse into children
536 if (parent_time < accumulated_time)
538 printf("what's wrong\n");
540 for (i=0;i<spacing;i++) printf(".");
541 printf("%s (%.3f %%) :: %.3f ms\n", "Unaccounted:",parent_time > SIMD_EPSILON ? ((parent_time - accumulated_time) / parent_time) * 100 : 0.f, parent_time - accumulated_time);
543 for (i=0;i<numChildren;i++)
545 profileIterator->Enter_Child(i);
546 dumpRecursive(profileIterator,spacing+3);
547 profileIterator->Enter_Parent();
553 void CProfileManager::dumpAll()
555 CProfileIterator* profileIterator = 0;
556 profileIterator = CProfileManager::Get_Iterator();
558 dumpRecursive(profileIterator,0);
560 CProfileManager::Release_Iterator(profileIterator);
566 #endif //BT_NO_PROFILE