From 5375fe820cff7ae7f3c5c771f28c6f5518f2ee60 Mon Sep 17 00:00:00 2001 From: Jonathan Peyton Date: Mon, 14 Nov 2016 21:13:44 +0000 Subject: [PATCH] Update stats-gathering code Have developer timers use partitioning scheme which also required that some redundant developer timers be removed in favor of the already existing normal timers. Move per thread stats initialization to just after global thread id assignment which is as early as possible. Also put all global stats initialization code in __kmp_stats_init() and all global stats destruction code in __kmp_stats_fini(). Differential Revision: https://reviews.llvm.org/D26361 llvm-svn: 286892 --- openmp/runtime/src/kmp_barrier.cpp | 36 +++++++------- openmp/runtime/src/kmp_global.c | 4 +- openmp/runtime/src/kmp_runtime.c | 22 +++++---- openmp/runtime/src/kmp_stats.cpp | 38 ++++++++++----- openmp/runtime/src/kmp_stats.h | 99 ++++++++++++++------------------------ openmp/runtime/src/z_Linux_util.c | 20 ++++---- 6 files changed, 102 insertions(+), 117 deletions(-) diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp index 7843b68..5e77614 100644 --- a/openmp/runtime/src/kmp_barrier.cpp +++ b/openmp/runtime/src/kmp_barrier.cpp @@ -50,7 +50,7 @@ __kmp_linear_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void * itt_sync_obj) ) { - KMP_TIME_DEVELOPER_BLOCK(KMP_linear_gather); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather); register kmp_team_t *team = this_thr->th.th_team; register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb; register kmp_info_t **other_threads = team->t.t_threads; @@ -130,7 +130,7 @@ __kmp_linear_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gti int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj) ) { - KMP_TIME_DEVELOPER_BLOCK(KMP_linear_release); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release); register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; register kmp_team_t *team; @@ -149,7 +149,7 @@ __kmp_linear_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gti if (nproc > 1) { #if KMP_BARRIER_ICV_PUSH { - KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy); if (propagate_icvs) { ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs); for (i=1; ith.th_team; register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; register kmp_info_t **other_threads = team->t.t_threads; @@ -323,7 +323,7 @@ __kmp_tree_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj) ) { - KMP_TIME_DEVELOPER_BLOCK(KMP_tree_release); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release); register kmp_team_t *team; register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; register kmp_uint32 nproc; @@ -393,7 +393,7 @@ __kmp_tree_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, #if KMP_BARRIER_ICV_PUSH { - KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy); if (propagate_icvs) { __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[child_tid], team, child_tid, FALSE); @@ -426,7 +426,7 @@ __kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj) ) { - KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_gather); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather); register kmp_team_t *team = this_thr->th.th_team; register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; register kmp_info_t **other_threads = team->t.t_threads; @@ -535,7 +535,7 @@ __kmp_hyper_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj) ) { - KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_release); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release); register kmp_team_t *team; register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb; register kmp_info_t **other_threads; @@ -742,7 +742,7 @@ __kmp_hierarchical_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, void (*reduce) (void *, void *) USE_ITT_BUILD_ARG(void * itt_sync_obj) ) { - KMP_TIME_DEVELOPER_BLOCK(KMP_hier_gather); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather); register kmp_team_t *team = this_thr->th.th_team; register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb; register kmp_uint32 nproc = this_thr->th.th_team_nproc; @@ -883,7 +883,7 @@ __kmp_hierarchical_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, i int propagate_icvs USE_ITT_BUILD_ARG(void * itt_sync_obj) ) { - KMP_TIME_DEVELOPER_BLOCK(KMP_hier_release); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release); register kmp_team_t *team; register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; register kmp_uint32 nproc; @@ -1067,9 +1067,8 @@ int __kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size, void *reduce_data, void (*reduce)(void *, void *)) { - KMP_TIME_DEVELOPER_BLOCK(KMP_barrier); - KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER); KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier); + KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER); register int tid = __kmp_tid_from_gtid(gtid); register kmp_info_t *this_thr = __kmp_threads[gtid]; register kmp_team_t *team = this_thr->th.th_team; @@ -1333,7 +1332,8 @@ __kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size, void __kmp_end_split_barrier(enum barrier_type bt, int gtid) { - KMP_TIME_DEVELOPER_BLOCK(KMP_end_split_barrier); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier); + KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER); int tid = __kmp_tid_from_gtid(gtid); kmp_info_t *this_thr = __kmp_threads[gtid]; kmp_team_t *team = this_thr->th.th_team; @@ -1376,9 +1376,8 @@ __kmp_end_split_barrier(enum barrier_type bt, int gtid) void __kmp_join_barrier(int gtid) { - KMP_TIME_PARTITIONED_BLOCK(OMP_fork_join_barrier); + KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier); KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER); - KMP_TIME_DEVELOPER_BLOCK(KMP_join_barrier); register kmp_info_t *this_thr = __kmp_threads[gtid]; register kmp_team_t *team; register kmp_uint nproc; @@ -1592,9 +1591,8 @@ __kmp_join_barrier(int gtid) void __kmp_fork_barrier(int gtid, int tid) { - KMP_TIME_PARTITIONED_BLOCK(OMP_fork_join_barrier); + KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier); KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER); - KMP_TIME_DEVELOPER_BLOCK(KMP_fork_barrier); kmp_info_t *this_thr = __kmp_threads[gtid]; kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL; #if USE_ITT_BUILD @@ -1707,7 +1705,7 @@ __kmp_fork_barrier(int gtid, int tid) the fixed ICVs in the master's thread struct, because it is not always the case that the threads arrays have been allocated when __kmp_fork_call() is executed. */ { - KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy); if (!KMP_MASTER_TID(tid)) { // master thread already has ICVs // Copy the initial ICVs from the master's thread struct to the implicit task for this tid. KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid)); @@ -1762,7 +1760,7 @@ __kmp_fork_barrier(int gtid, int tid) void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, kmp_internal_control_t *new_icvs, ident_t *loc ) { - KMP_TIME_DEVELOPER_BLOCK(KMP_setup_icv_copy); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy); KMP_DEBUG_ASSERT(team && new_nproc && new_icvs); KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc); diff --git a/openmp/runtime/src/kmp_global.c b/openmp/runtime/src/kmp_global.c index 5519696..86cd3d2 100644 --- a/openmp/runtime/src/kmp_global.c +++ b/openmp/runtime/src/kmp_global.c @@ -28,10 +28,10 @@ kmp_cpuinfo_t __kmp_cpuinfo = { 0 }; // Not initialized kmp_tas_lock_t __kmp_stats_lock; // global list of per thread stats, the head is a sentinel node which accumulates all stats produced before __kmp_create_worker is called. -kmp_stats_list __kmp_stats_list; +kmp_stats_list* __kmp_stats_list; // thread local pointer to stats node within list -__thread kmp_stats_list* __kmp_stats_thread_ptr = &__kmp_stats_list; +__thread kmp_stats_list* __kmp_stats_thread_ptr = NULL; // gives reference tick for all events (considered the 0 tick) tsc_tick_count __kmp_stats_start_time; diff --git a/openmp/runtime/src/kmp_runtime.c b/openmp/runtime/src/kmp_runtime.c index e439150..c875f58 100644 --- a/openmp/runtime/src/kmp_runtime.c +++ b/openmp/runtime/src/kmp_runtime.c @@ -1417,7 +1417,7 @@ __kmp_fork_call( kmp_hot_team_ptr_t **p_hot_teams; #endif { // KMP_TIME_BLOCK - KMP_TIME_DEVELOPER_BLOCK(KMP_fork_call); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call); KMP_COUNT_VALUE(OMP_PARALLEL_args, argc); KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid )); @@ -2199,7 +2199,6 @@ __kmp_fork_call( { KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); - // KMP_TIME_DEVELOPER_BLOCK(USER_master_invoke); if (! team->t.t_invoke( gtid )) { KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" ); } @@ -2258,7 +2257,7 @@ __kmp_join_call(ident_t *loc, int gtid #endif /* OMP_40_ENABLED */ ) { - KMP_TIME_DEVELOPER_BLOCK(KMP_join_call); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call); kmp_team_t *team; kmp_team_t *parent_team; kmp_info_t *master_th; @@ -3681,6 +3680,13 @@ __kmp_register_root( int initial_thread ) KMP_DEBUG_ASSERT( ! root->r.r_root_team ); } +#if KMP_STATS_ENABLED + // Initialize stats as soon as possible (right after gtid assignment). + __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid); + KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life); + KMP_SET_THREAD_STATE(SERIAL_REGION); + KMP_INIT_PARTITIONED_TIMERS(OMP_serial); +#endif __kmp_initialize_root( root ); /* setup new root thread structure */ @@ -4748,7 +4754,7 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, kmp_internal_control_t *new_icvs, int argc USE_NESTED_HOT_ARG(kmp_info_t *master) ) { - KMP_TIME_DEVELOPER_BLOCK(KMP_allocate_team); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team); int f; kmp_team_t *team; int use_hot_team = ! root->r.r_active; @@ -5504,14 +5510,11 @@ __kmp_launch_thread( kmp_info_t *this_thr ) } #endif - KMP_STOP_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop); { - KMP_TIME_DEVELOPER_BLOCK(USER_worker_invoke); KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); rc = (*pteam)->t.t_invoke( gtid ); } - KMP_START_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop); KMP_ASSERT( rc ); #if OMPT_SUPPORT @@ -6332,7 +6335,7 @@ __kmp_do_serial_initialize( void ) #endif #endif #if KMP_STATS_ENABLED - __kmp_init_tas_lock( & __kmp_stats_lock ); + __kmp_stats_init(); #endif __kmp_init_lock( & __kmp_global_lock ); __kmp_init_queuing_lock( & __kmp_dispatch_lock ); @@ -7293,8 +7296,7 @@ __kmp_cleanup( void ) __kmp_i18n_catclose(); #if KMP_STATS_ENABLED - __kmp_accumulate_stats_at_exit(); - __kmp_stats_list.deallocate(); + __kmp_stats_fini(); #endif KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) ); diff --git a/openmp/runtime/src/kmp_stats.cpp b/openmp/runtime/src/kmp_stats.cpp index a65a481..3ae25d5 100644 --- a/openmp/runtime/src/kmp_stats.cpp +++ b/openmp/runtime/src/kmp_stats.cpp @@ -29,11 +29,11 @@ #define expandName(name,flags,ignore) {STRINGIZE(name),flags}, statInfo timeStat::timerInfo[] = { KMP_FOREACH_TIMER(expandName,0) - {0,0} + {"TIMER_LAST", 0} }; const statInfo counter::counterInfo[] = { KMP_FOREACH_COUNTER(expandName,0) - {0,0} + {"COUNTER_LAST", 0} }; #undef expandName @@ -71,7 +71,7 @@ const kmp_stats_output_module::rgb_color kmp_stats_output_module::globalColorArr static uint32_t statsPrinted = 0; // output interface -static kmp_stats_output_module __kmp_stats_global_output; +static kmp_stats_output_module* __kmp_stats_global_output = NULL; /* ****************************************************** */ /* ************* statistic member functions ************* */ @@ -164,7 +164,7 @@ void explicitTimer::start(timer_e timerEnumValue) { return; } -void explicitTimer::stop(timer_e timerEnumValue) { +void explicitTimer::stop(timer_e timerEnumValue, kmp_stats_list* stats_ptr /* = nullptr */) { if (startTime.getValue() == 0) return; @@ -174,8 +174,10 @@ void explicitTimer::stop(timer_e timerEnumValue) { stat->addSample(((finishTime - startTime) - totalPauseTime).ticks()); if(timeStat::logEvent(timerEnumValue)) { - __kmp_stats_thread_ptr->push_event(startTime.getValue() - __kmp_stats_start_time.getValue(), finishTime.getValue() - __kmp_stats_start_time.getValue(), __kmp_stats_thread_ptr->getNestValue(), timerEnumValue); - __kmp_stats_thread_ptr->decrementNestValue(); + if(!stats_ptr) + stats_ptr = __kmp_stats_thread_ptr; + stats_ptr->push_event(startTime.getValue() - __kmp_stats_start_time.getValue(), finishTime.getValue() - __kmp_stats_start_time.getValue(), __kmp_stats_thread_ptr->getNestValue(), timerEnumValue); + stats_ptr->decrementNestValue(); } /* We accept the risk that we drop a sample because it really did start at t==0. */ @@ -481,18 +483,18 @@ void kmp_stats_output_module::windupExplicitTimers() // and say "it's over". // If the timer wasn't running, this won't record anything anyway. kmp_stats_list::iterator it; - for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) { + for(it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) { kmp_stats_list* ptr = *it; ptr->getPartitionedTimers()->windup(); for (int timer=0; timergetExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer); + ptr->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer, ptr); } } } void kmp_stats_output_module::printPloticusFile() { int i; - int size = __kmp_stats_list.size(); + int size = __kmp_stats_list->size(); FILE* plotOut = fopen(plotFileName, "w+"); fprintf(plotOut, "#proc page\n" @@ -602,7 +604,7 @@ void kmp_stats_output_module::outputStats(const char* heading) fprintf(statsOut, "%s\n",heading); // Accumulate across threads. kmp_stats_list::iterator it; - for (it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) { + for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) { int t = (*it)->getGtid(); // Output per thread stats if requested. if (printPerThreadFlag) { @@ -666,7 +668,7 @@ extern "C" { void __kmp_reset_stats() { kmp_stats_list::iterator it; - for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) { + for(it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) { timeStat * timers = (*it)->getTimers(); counter * counters = (*it)->getCounters(); explicitTimer * eTimers = (*it)->getExplicitTimers(); @@ -688,7 +690,7 @@ void __kmp_reset_stats() // This function will reset all stats and stop all threads' explicit timers if they haven't been stopped already. void __kmp_output_stats(const char * heading) { - __kmp_stats_global_output.outputStats(heading); + __kmp_stats_global_output->outputStats(heading); __kmp_reset_stats(); } @@ -703,6 +705,18 @@ void __kmp_accumulate_stats_at_exit(void) void __kmp_stats_init(void) { + __kmp_init_tas_lock( & __kmp_stats_lock ); + __kmp_stats_start_time = tsc_tick_count::now(); + __kmp_stats_global_output = new kmp_stats_output_module(); + __kmp_stats_list = new kmp_stats_list(); +} + +void __kmp_stats_fini(void) +{ + __kmp_accumulate_stats_at_exit(); + __kmp_stats_list->deallocate(); + delete __kmp_stats_global_output; + delete __kmp_stats_list; } } // extern "C" diff --git a/openmp/runtime/src/kmp_stats.h b/openmp/runtime/src/kmp_stats.h index b767fdf..1521d21 100644 --- a/openmp/runtime/src/kmp_stats.h +++ b/openmp/runtime/src/kmp_stats.h @@ -104,8 +104,7 @@ enum stats_state_e { macro (OMP_TASKLOOP, 0, arg) \ macro (TASK_executed, 0, arg) \ macro (TASK_cancelled, 0, arg) \ - macro (TASK_stolen, 0, arg) \ - macro (LAST,0,arg) + macro (TASK_stolen, 0, arg) /*! * \brief Add new timers under KMP_FOREACH_TIMER() macro in kmp_stats.h @@ -123,31 +122,31 @@ enum stats_state_e { * @ingroup STATS_GATHERING2 */ #define KMP_FOREACH_TIMER(macro, arg) \ - macro (OMP_worker_thread_life, 0, arg) \ + macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg) \ macro (FOR_static_scheduling, 0, arg) \ macro (FOR_dynamic_scheduling, 0, arg) \ macro (OMP_critical, 0, arg) \ macro (OMP_critical_wait, 0, arg) \ macro (OMP_single, 0, arg) \ macro (OMP_master, 0, arg) \ - macro (OMP_idle, 0, arg) \ - macro (OMP_plain_barrier, 0, arg) \ - macro (OMP_fork_join_barrier, 0, arg) \ - macro (OMP_parallel, 0, arg) \ + macro (OMP_idle, stats_flags_e::logEvent, arg) \ + macro (OMP_plain_barrier, stats_flags_e::logEvent, arg) \ + macro (OMP_fork_barrier, stats_flags_e::logEvent, arg) \ + macro (OMP_join_barrier, stats_flags_e::logEvent, arg) \ + macro (OMP_parallel, stats_flags_e::logEvent, arg) \ macro (OMP_task_immediate, 0, arg) \ macro (OMP_task_taskwait, 0, arg) \ macro (OMP_task_taskyield, 0, arg) \ macro (OMP_task_taskgroup, 0, arg) \ macro (OMP_task_join_bar, 0, arg) \ macro (OMP_task_plain_bar, 0, arg) \ - macro (OMP_serial, 0, arg) \ + macro (OMP_serial, stats_flags_e::logEvent, arg) \ macro (OMP_taskloop_scheduling, 0, arg) \ macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ macro (FOR_static_iterations, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ macro (FOR_dynamic_iterations,stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ - KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ - macro (LAST,0, arg) + KMP_FOREACH_DEVELOPER_TIMER(macro, arg) // OMP_start_end -- Time from when OpenMP is initialized until the stats are printed at exit @@ -190,28 +189,22 @@ enum stats_state_e { // KMP_tree_release -- time in __kmp_tree_barrier_release // KMP_hyper_gather -- time in __kmp_hyper_barrier_gather // KMP_hyper_release -- time in __kmp_hyper_barrier_release -# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ - macro (KMP_fork_call, 0, arg) \ - macro (KMP_join_call, 0, arg) \ - macro (KMP_fork_barrier, stats_flags_e::logEvent, arg) \ - macro (KMP_join_barrier, stats_flags_e::logEvent, arg) \ - macro (KMP_barrier, 0, arg) \ - macro (KMP_end_split_barrier, 0, arg) \ - macro (KMP_hier_gather, 0, arg) \ - macro (KMP_hier_release, 0, arg) \ - macro (KMP_hyper_gather, stats_flags_e::logEvent, arg) \ - macro (KMP_hyper_release, stats_flags_e::logEvent, arg) \ - macro (KMP_linear_gather, 0, arg) \ - macro (KMP_linear_release, 0, arg) \ - macro (KMP_tree_gather, 0, arg) \ - macro (KMP_tree_release, 0, arg) \ - macro (USER_master_invoke, stats_flags_e::logEvent, arg) \ - macro (USER_worker_invoke, stats_flags_e::logEvent, arg) \ - macro (USER_resume, stats_flags_e::logEvent, arg) \ - macro (USER_suspend, stats_flags_e::logEvent, arg) \ - macro (USER_launch_thread_loop, stats_flags_e::logEvent, arg) \ - macro (KMP_allocate_team, 0, arg) \ - macro (KMP_setup_icv_copy, 0, arg) \ +# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ + macro (KMP_fork_call, 0, arg) \ + macro (KMP_join_call, 0, arg) \ + macro (KMP_end_split_barrier, 0, arg) \ + macro (KMP_hier_gather, 0, arg) \ + macro (KMP_hier_release, 0, arg) \ + macro (KMP_hyper_gather, 0, arg) \ + macro (KMP_hyper_release, 0, arg) \ + macro (KMP_linear_gather, 0, arg) \ + macro (KMP_linear_release, 0, arg) \ + macro (KMP_tree_gather, 0, arg) \ + macro (KMP_tree_release, 0, arg) \ + macro (USER_resume, 0, arg) \ + macro (USER_suspend, 0, arg) \ + macro (KMP_allocate_team, 0, arg) \ + macro (KMP_setup_icv_copy, 0, arg) \ macro (USER_icv_copy, 0, arg) #else # define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) @@ -233,47 +226,23 @@ enum stats_state_e { * * @ingroup STATS_GATHERING */ -#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \ - macro(OMP_worker_thread_life, 0, arg) \ - macro(FOR_static_scheduling, 0, arg) \ - macro(FOR_dynamic_scheduling, 0, arg) \ - macro(OMP_critical, 0, arg) \ - macro(OMP_critical_wait, 0, arg) \ - macro(OMP_single, 0, arg) \ - macro(OMP_master, 0, arg) \ - macro(OMP_idle, 0, arg) \ - macro(OMP_plain_barrier, 0, arg) \ - macro(OMP_fork_join_barrier, 0, arg) \ - macro(OMP_parallel, 0, arg) \ - macro(OMP_task_immediate, 0, arg) \ - macro(OMP_task_taskwait, 0, arg) \ - macro(OMP_task_taskyield, 0, arg) \ - macro(OMP_task_taskgroup, 0, arg) \ - macro(OMP_task_join_bar, 0, arg) \ - macro(OMP_task_plain_bar, 0, arg) \ - macro(OMP_serial, 0, arg) \ - macro(OMP_taskloop_scheduling, 0, arg) \ - KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro,arg) \ - macro(LAST, 0, arg) - -#if (KMP_DEVELOPER_STATS) -# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg) \ - macro(USER_launch_thread_loop, stats_flags_e::logEvent, arg) -#else -# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg) -#endif +#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \ + KMP_FOREACH_TIMER(macro, arg) #define ENUMERATE(name,ignore,prefix) prefix##name, enum timer_e { KMP_FOREACH_TIMER(ENUMERATE, TIMER_) + TIMER_LAST }; enum explicit_timer_e { KMP_FOREACH_EXPLICIT_TIMER(ENUMERATE, EXPLICIT_TIMER_) + EXPLICIT_TIMER_LAST }; enum counter_e { KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_) + COUNTER_LAST }; #undef ENUMERATE @@ -370,7 +339,7 @@ class explicitTimer void start(timer_e timerEnumValue); void pause() { pauseStartTime = tsc_tick_count::now(); } void resume() { totalPauseTime += (tsc_tick_count::now() - pauseStartTime); } - void stop(timer_e timerEnumValue); + void stop(timer_e timerEnumValue, kmp_stats_list* stats_ptr = nullptr); void reset() { startTime = 0; pauseStartTime = 0; totalPauseTime = 0; } }; @@ -716,13 +685,14 @@ class kmp_stats_output_module { extern "C" { #endif void __kmp_stats_init(); +void __kmp_stats_fini(); void __kmp_reset_stats(); void __kmp_output_stats(const char *); void __kmp_accumulate_stats_at_exit(void); // thread local pointer to stats node within list extern __thread kmp_stats_list* __kmp_stats_thread_ptr; // head to stats list. -extern kmp_stats_list __kmp_stats_list; +extern kmp_stats_list* __kmp_stats_list; // lock for __kmp_stats_list extern kmp_tas_lock_t __kmp_stats_lock; // reference start time @@ -866,6 +836,7 @@ extern kmp_stats_output_module __kmp_stats_output; # define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n) # define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) KMP_START_EXPLICIT_TIMER(n) # define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) KMP_STOP_EXPLICIT_TIMER(n) +# define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) KMP_TIME_PARTITIONED_BLOCK(n) #else // Null definitions # define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0) @@ -873,6 +844,7 @@ extern kmp_stats_output_module __kmp_stats_output; # define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) # define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) # define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) +# define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0) #endif #else // KMP_STATS_ENABLED @@ -894,6 +866,7 @@ extern kmp_stats_output_module __kmp_stats_output; #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) #define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0) #define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0) +#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0) #define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0) #define KMP_POP_PARTITIONED_TIMER() ((void)0) #define KMP_SET_THREAD_STATE(state_name) ((void)0) diff --git a/openmp/runtime/src/z_Linux_util.c b/openmp/runtime/src/z_Linux_util.c index 389f4d5..4a26d4b 100644 --- a/openmp/runtime/src/z_Linux_util.c +++ b/openmp/runtime/src/z_Linux_util.c @@ -866,14 +866,12 @@ __kmp_create_worker( int gtid, kmp_info_t *th, size_t stack_size ) // th->th.th_stats is used to transfer thread specific stats-pointer to __kmp_launch_worker // So when thread is created (goes into __kmp_launch_worker) it will // set it's __thread local pointer to th->th.th_stats - th->th.th_stats = __kmp_stats_list.push_back(gtid); - if(KMP_UBER_GTID(gtid)) { - __kmp_stats_start_time = tsc_tick_count::now(); - __kmp_stats_thread_ptr = th->th.th_stats; - __kmp_stats_init(); - KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life); - KMP_SET_THREAD_STATE(SERIAL_REGION); - KMP_INIT_PARTITIONED_TIMERS(OMP_serial); + if(!KMP_UBER_GTID(gtid)) { + th->th.th_stats = __kmp_stats_list->push_back(gtid); + } else { + // For root threads, the __kmp_stats_thread_ptr is set in __kmp_register_root(), so + // set the th->th.th_stats field to it. + th->th.th_stats = __kmp_stats_thread_ptr; } __kmp_release_tas_lock(&__kmp_stats_lock, gtid); @@ -1541,7 +1539,7 @@ __kmp_suspend_uninitialize_thread( kmp_info_t *th ) template static inline void __kmp_suspend_template( int th_gtid, C *flag ) { - KMP_TIME_DEVELOPER_BLOCK(USER_suspend); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_suspend); kmp_info_t *th = __kmp_threads[th_gtid]; int status; typename C::flag_t old_spin; @@ -1675,7 +1673,7 @@ void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) { template static inline void __kmp_resume_template( int target_gtid, C *flag ) { - KMP_TIME_DEVELOPER_BLOCK(USER_resume); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume); kmp_info_t *th = __kmp_threads[target_gtid]; int status; @@ -1750,7 +1748,7 @@ void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) { void __kmp_resume_monitor() { - KMP_TIME_DEVELOPER_BLOCK(USER_resume); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume); int status; #ifdef KMP_DEBUG int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; -- 2.7.4