int gtid = __kmp_entry_gtid();
#if (KMP_STATS_ENABLED)
+ // If we were in a serial region, then stop the serial timer, record
+ // the event, and start parallel region timer
+ stats_state_e previous_state = KMP_GET_THREAD_STATE();
+ if (previous_state == stats_state_e::SERIAL_REGION) {
+ KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
+ } else {
+ KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
+ }
int inParallel = __kmpc_in_parallel(loc);
if (inParallel) {
KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
va_end(ap);
}
+
+#if KMP_STATS_ENABLED
+ if (previous_state == stats_state_e::SERIAL_REGION) {
+ KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
+ } else {
+ KMP_POP_PARTITIONED_TIMER();
+ }
+#endif // KMP_STATS_ENABLED
}
#if OMP_40_ENABLED
__kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
#else
KMP_COUNT_BLOCK(OMP_CRITICAL);
- KMP_TIME_PARTITIONED_BLOCK(
- OMP_critical_wait); /* Time spent waiting to enter the critical section */
#if OMPT_SUPPORT && OMPT_OPTIONAL
omp_state_t prev_state = omp_state_undefined;
ompt_thread_info_t ti;
// TODO: add THR_OVHD_STATE
+ KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
KMP_CHECK_USER_LOCK_INIT();
if ((__kmp_user_lock_kind == lk_tas) &&
}
}
#endif
+ KMP_POP_PARTITIONED_TIMER();
- KMP_START_EXPLICIT_TIMER(OMP_critical);
+ KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
#endif // KMP_USE_DYNAMIC_LOCK
}
kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
// Check if it is initialized.
+ KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
if (*lk == 0) {
kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
if (KMP_IS_D_LOCK(lckseq)) {
#endif
KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
}
+ KMP_POP_PARTITIONED_TIMER();
#if USE_ITT_BUILD
__kmp_itt_critical_acquired(lck);
Mark the end of a statically scheduled loop.
*/
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
+ KMP_POP_PARTITIONED_TIMER();
KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
#if OMPT_SUPPORT && OMPT_OPTIONAL
&(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
}
#endif
-
if (__kmp_env_consistency_check)
__kmp_pop_workshare(global_tid, ct_pdo, loc);
}
ntc = (tc % chunk ? 1 : 0) + tc / chunk;
if (nproc > 1 && ntc >= nproc) {
- KMP_COUNT_BLOCK(OMP_FOR_static_steal);
+ KMP_COUNT_BLOCK(OMP_LOOP_STATIC_STEAL);
T id = tid;
T small_chunk, extras;
active = !team->t.t_serialized;
th->th.th_ident = loc;
+ // Any half-decent optimizer will remove this test when the blocks are empty
+ // since the macros expand to nothing
+ // when statistics are disabled.
+ if (schedule == __kmp_static) {
+ KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
+ } else {
+ KMP_COUNT_BLOCK(OMP_LOOP_DYNAMIC);
+ }
+
#if KMP_USE_HIER_SCHED
// Initialize the scheduling hierarchy if requested in OMP_SCHEDULE envirable
// Hierarchical scheduling does not work with ordered, so if ordered is
}
}
- // Any half-decent optimizer will remove this test when the blocks are empty
- // since the macros expand to nothing
- // when statistics are disabled.
- if (schedule == __kmp_static) {
- KMP_COUNT_BLOCK(OMP_FOR_static);
- KMP_COUNT_VALUE(FOR_static_iterations, pr->u.p.tc);
- } else {
- KMP_COUNT_BLOCK(OMP_FOR_dynamic);
- KMP_COUNT_VALUE(FOR_dynamic_iterations, pr->u.p.tc);
- }
-
if (active) {
/* The name of this buffer should be my_buffer_index when it's free to use
* it */
&(task_info->task_data), pr->u.p.tc, OMPT_LOAD_RETURN_ADDRESS(gtid));
}
#endif
+ KMP_PUSH_PARTITIONED_TIMER(OMP_loop_dynamic);
}
/* For ordered loops, either __kmp_dispatch_finish() should be called after
// by 1
if (remaining > 3) {
// steal 1/4 of remaining
- KMP_COUNT_VALUE(FOR_static_steal_stolen, remaining >> 2);
+ KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen, remaining >> 2);
init = (victim->u.p.ub -= (remaining >> 2));
} else {
// steal 1 chunk of 2 or 3 remaining
- KMP_COUNT_VALUE(FOR_static_steal_stolen, 1);
+ KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen, 1);
init = (victim->u.p.ub -= 1);
}
__kmp_release_lock(lck, gtid);
*VOLATILE_CAST(kmp_int64 *) & vold.b,
*VOLATILE_CAST(kmp_int64 *) & vnew.b)) {
// stealing succedded
- KMP_COUNT_VALUE(FOR_static_steal_stolen, vold.p.ub - vnew.p.ub);
+ KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen,
+ vold.p.ub - vnew.p.ub);
status = 1;
while_index = 0;
// now update own count and ub
init *= chunk;
limit = chunk + init - 1;
incr = pr->u.p.st;
- KMP_COUNT_VALUE(FOR_static_steal_chunks, 1);
+ KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_chunks, 1);
KMP_DEBUG_ASSERT(init <= trip);
if ((last = (limit >= trip)) != 0)
#define OMPT_LOOP_END // no-op
#endif
+#if KMP_STATS_ENABLED
+#define KMP_STATS_LOOP_END \
+ { \
+ kmp_int64 u, l, t, i; \
+ l = (kmp_int64)(*p_lb); \
+ u = (kmp_int64)(*p_ub); \
+ i = (kmp_int64)(pr->u.p.st); \
+ if (status == 0) { \
+ t = 0; \
+ KMP_POP_PARTITIONED_TIMER(); \
+ } else if (i == 1) { \
+ if (u >= l) \
+ t = u - l + 1; \
+ else \
+ t = 0; \
+ } else if (i < 0) { \
+ if (l >= u) \
+ t = (l - u) / (-i) + 1; \
+ else \
+ t = 0; \
+ } else { \
+ if (u >= l) \
+ t = (u - l) / i + 1; \
+ else \
+ t = 0; \
+ } \
+ KMP_COUNT_VALUE(OMP_loop_dynamic_iterations, t); \
+ }
+#else
+#define KMP_STATS_LOOP_END /* Nothing */
+#endif
+
template <typename T>
static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last,
T *p_lb, T *p_ub,
// even if the actual runtme schedule is static. (Which points out a
// disadavantage of schedule(runtime): even when static scheduling is used it
// costs more than a compile time choice to use static scheduling would.)
- KMP_TIME_PARTITIONED_BLOCK(FOR_dynamic_scheduling);
+ KMP_TIME_PARTITIONED_BLOCK(OMP_loop_dynamic_scheduling);
int status;
dispatch_private_info_template<T> *pr;
SSC_MARK_DISPATCH_NEXT();
#endif
OMPT_LOOP_END;
+ KMP_STATS_LOOP_END;
return status;
} else {
kmp_int32 last = 0;
SSC_MARK_DISPATCH_NEXT();
#endif
OMPT_LOOP_END;
+ KMP_STATS_LOOP_END;
return status;
}
KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
parent_team->t.t_id, parent_team->t.t_pkfn));
- {
- KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
- KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
- if (!parent_team->t.t_invoke(gtid)) {
- KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread");
- }
+ if (!parent_team->t.t_invoke(gtid)) {
+ KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread");
}
KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
parent_team->t.t_id, parent_team->t.t_pkfn));
// because initial code in teams should have level=0
team->t.t_level--;
// AC: call special invoker for outer "parallel" of teams construct
- {
- KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
- KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
- invoker(gtid);
- }
+ invoker(gtid);
} else {
#endif /* OMP_40_ENABLED */
argv = args;
team->t.t_id, team->t.t_pkfn));
} // END of timer KMP_fork_call block
- {
- KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
- KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
- if (!team->t.t_invoke(gtid)) {
- KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread");
- }
+ if (!team->t.t_invoke(gtid)) {
+ KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread");
}
KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
team->t.t_id, team->t.t_pkfn));
#if KMP_STATS_ENABLED
// Initialize stats as soon as possible (right after gtid assignment).
__kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
- KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life);
+ __kmp_stats_thread_ptr->startLife();
KMP_SET_THREAD_STATE(SERIAL_REGION);
KMP_INIT_PARTITIONED_TIMERS(OMP_serial);
#endif
}
#endif
- {
- KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
- KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
- rc = (*pteam)->t.t_invoke(gtid);
- }
+ rc = (*pteam)->t.t_invoke(gtid);
KMP_ASSERT(rc);
KMP_MB();
void *codeptr
#endif
) {
- KMP_COUNT_BLOCK(OMP_FOR_static);
- KMP_TIME_PARTITIONED_BLOCK(FOR_static_scheduling);
+ KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
+ KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
+ KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
typedef typename traits_t<T>::unsigned_t UT;
typedef typename traits_t<T>::signed_t ST;
&(task_info->task_data), 0, codeptr);
}
#endif
- KMP_COUNT_VALUE(FOR_static_iterations, 0);
return;
}
loc);
}
}
- KMP_COUNT_VALUE(FOR_static_iterations, trip_count);
/* compute remaining parameters */
switch (schedtype) {
}
#endif
+#if KMP_STATS_ENABLED
+ {
+ kmp_int64 t;
+ kmp_int64 u = (kmp_int64)(*pupper);
+ kmp_int64 l = (kmp_int64)(*plower);
+ kmp_int64 i = (kmp_int64)incr;
+ /* compute trip count */
+ if (i == 1) {
+ t = u - l + 1;
+ } else if (i == -1) {
+ t = l - u + 1;
+ } else if (i > 0) {
+ t = (u - l) / i + 1;
+ } else {
+ t = (l - u) / (-i) + 1;
+ }
+ KMP_COUNT_VALUE(OMP_loop_static_iterations, t);
+ KMP_POP_PARTITIONED_TIMER();
+ }
+#endif
return;
}
#include <iomanip>
#include <sstream>
#include <stdlib.h> // for atexit
+#include <cmath>
#define STRINGIZE2(x) #x
#define STRINGIZE(x) STRINGIZE2(x)
// output interface
static kmp_stats_output_module *__kmp_stats_global_output = NULL;
+double logHistogram::binMax[] = {
+ 1.e1l, 1.e2l, 1.e3l, 1.e4l, 1.e5l, 1.e6l, 1.e7l, 1.e8l,
+ 1.e9l, 1.e10l, 1.e11l, 1.e12l, 1.e13l, 1.e14l, 1.e15l, 1.e16l,
+ 1.e17l, 1.e18l, 1.e19l, 1.e20l, 1.e21l, 1.e22l, 1.e23l, 1.e24l,
+ 1.e25l, 1.e26l, 1.e27l, 1.e28l, 1.e29l, 1.e30l};
+
/* ************* statistic member functions ************* */
void statistic::addSample(double sample) {
+ sample -= offset;
+ KMP_DEBUG_ASSERT(std::isfinite(sample));
+
double delta = sample - meanVal;
sampleCount = sampleCount + 1;
minVal = std::min(minVal, sample);
maxVal = std::max(maxVal, sample);
+ if (collectingHist)
+ hist.addSample(sample);
}
statistic &statistic::operator+=(const statistic &other) {
+ if (other.sampleCount == 0)
+ return *this;
+
if (sampleCount == 0) {
*this = other;
return *this;
minVal = std::min(minVal, other.minVal);
maxVal = std::max(maxVal, other.maxVal);
sampleCount = newSampleCount;
+ if (collectingHist)
+ hist += other.hist;
return *this;
}
return result;
}
+/* ************* histogram member functions ************* */
+
+// Lowest bin that has anything in it
+int logHistogram::minBin() const {
+ for (int i = 0; i < numBins; i++) {
+ if (bins[i].count != 0)
+ return i - logOffset;
+ }
+ return -logOffset;
+}
+
+// Highest bin that has anything in it
+int logHistogram::maxBin() const {
+ for (int i = numBins - 1; i >= 0; i--) {
+ if (bins[i].count != 0)
+ return i - logOffset;
+ }
+ return -logOffset;
+}
+
+// Which bin does this sample belong in ?
+uint32_t logHistogram::findBin(double sample) {
+ double v = std::fabs(sample);
+ // Simply loop up looking which bin to put it in.
+ // According to a micro-architect this is likely to be faster than a binary
+ // search, since
+ // it will only have one branch mis-predict
+ for (int b = 0; b < numBins; b++)
+ if (binMax[b] > v)
+ return b;
+ fprintf(stderr,
+ "Trying to add a sample that is too large into a histogram\n");
+ KMP_ASSERT(0);
+ return -1;
+}
+
+void logHistogram::addSample(double sample) {
+ if (sample == 0.0) {
+ zeroCount += 1;
+#ifdef KMP_DEBUG
+ _total++;
+ check();
+#endif
+ return;
+ }
+ KMP_DEBUG_ASSERT(std::isfinite(sample));
+ uint32_t bin = findBin(sample);
+ KMP_DEBUG_ASSERT(0 <= bin && bin < numBins);
+
+ bins[bin].count += 1;
+ bins[bin].total += sample;
+#ifdef KMP_DEBUG
+ _total++;
+ check();
+#endif
+}
+
+// This may not be the format we want, but it'll do for now
+std::string logHistogram::format(char unit) const {
+ std::stringstream result;
+
+ result << "Bin, Count, Total\n";
+ if (zeroCount) {
+ result << "0, " << formatSI(zeroCount, 9, ' ') << ", ",
+ formatSI(0.0, 9, unit);
+ if (count(minBin()) == 0)
+ return result.str();
+ result << "\n";
+ }
+ for (int i = minBin(); i <= maxBin(); i++) {
+ result << "10**" << i << "<=v<10**" << (i + 1) << ", "
+ << formatSI(count(i), 9, ' ') << ", " << formatSI(total(i), 9, unit);
+ if (i != maxBin())
+ result << "\n";
+ }
+
+ return result.str();
+}
+
/* ************* explicitTimer member functions ************* */
-void explicitTimer::start(timer_e timerEnumValue) {
- startTime = tsc_tick_count::now();
+void explicitTimer::start(tsc_tick_count tick) {
+ startTime = tick;
totalPauseTime = 0;
if (timeStat::logEvent(timerEnumValue)) {
__kmp_stats_thread_ptr->incrementNestValue();
return;
}
-void explicitTimer::stop(timer_e timerEnumValue,
+void explicitTimer::stop(tsc_tick_count tick,
kmp_stats_list *stats_ptr /* = nullptr */) {
if (startTime.getValue() == 0)
return;
- tsc_tick_count finishTime = tsc_tick_count::now();
-
- // stat->addSample ((tsc_tick_count::now() - startTime).ticks());
- stat->addSample(((finishTime - startTime) - totalPauseTime).ticks());
+ stat->addSample(((tick - startTime) - totalPauseTime).ticks());
if (timeStat::logEvent(timerEnumValue)) {
if (!stats_ptr)
stats_ptr = __kmp_stats_thread_ptr;
stats_ptr->push_event(
startTime.getValue() - __kmp_stats_start_time.getValue(),
- finishTime.getValue() - __kmp_stats_start_time.getValue(),
+ tick.getValue() - __kmp_stats_start_time.getValue(),
__kmp_stats_thread_ptr->getNestValue(), timerEnumValue);
stats_ptr->decrementNestValue();
}
/* ************* partitionedTimers member functions ************* */
partitionedTimers::partitionedTimers() { timer_stack.reserve(8); }
-// add a timer to this collection of partitioned timers.
-void partitionedTimers::add_timer(explicit_timer_e timer_index,
- explicitTimer *timer_pointer) {
- KMP_DEBUG_ASSERT((int)timer_index < (int)EXPLICIT_TIMER_LAST + 1);
- timers[timer_index] = timer_pointer;
-}
-
// initialize the paritioned timers to an initial timer
-void partitionedTimers::init(timerPair init_timer_pair) {
+void partitionedTimers::init(explicitTimer timer) {
KMP_DEBUG_ASSERT(this->timer_stack.size() == 0);
- timer_stack.push_back(init_timer_pair);
- timers[init_timer_pair.get_index()]->start(init_timer_pair.get_timer());
+ timer_stack.push_back(timer);
+ timer_stack.back().start(tsc_tick_count::now());
}
// stop/save the current timer, and start the new timer (timer_pair)
// There is a special condition where if the current timer is equal to
// the one you are trying to push, then it only manipulates the stack,
// and it won't stop/start the currently running timer.
-void partitionedTimers::push(timerPair timer_pair) {
+void partitionedTimers::push(explicitTimer timer) {
// get the current timer
- // stop current timer
+ // pause current timer
// push new timer
// start the new timer
+ explicitTimer *current_timer, *new_timer;
+ size_t stack_size;
KMP_DEBUG_ASSERT(this->timer_stack.size() > 0);
- timerPair current_timer = timer_stack.back();
- timer_stack.push_back(timer_pair);
- if (current_timer != timer_pair) {
- timers[current_timer.get_index()]->pause();
- timers[timer_pair.get_index()]->start(timer_pair.get_timer());
- }
+ timer_stack.push_back(timer);
+ stack_size = timer_stack.size();
+ current_timer = &(timer_stack[stack_size - 2]);
+ new_timer = &(timer_stack[stack_size - 1]);
+ tsc_tick_count tick = tsc_tick_count::now();
+ current_timer->pause(tick);
+ new_timer->start(tick);
}
// stop/discard the current timer, and start the previously saved timer
void partitionedTimers::pop() {
// get the current timer
- // stop current timer
+ // stop current timer (record event/sample)
// pop current timer
- // get the new current timer and start it back up
- KMP_DEBUG_ASSERT(this->timer_stack.size() > 1);
- timerPair current_timer = timer_stack.back();
+ // get the new current timer and resume
+ explicitTimer *old_timer, *new_timer;
+ size_t stack_size = timer_stack.size();
+ KMP_DEBUG_ASSERT(stack_size > 1);
+ old_timer = &(timer_stack[stack_size - 1]);
+ new_timer = &(timer_stack[stack_size - 2]);
+ tsc_tick_count tick = tsc_tick_count::now();
+ old_timer->stop(tick);
+ new_timer->resume(tick);
timer_stack.pop_back();
- timerPair new_timer = timer_stack.back();
- if (current_timer != new_timer) {
- timers[current_timer.get_index()]->stop(current_timer.get_timer());
- timers[new_timer.get_index()]->resume();
- }
+}
+
+void partitionedTimers::exchange(explicitTimer timer) {
+ // get the current timer
+ // stop current timer (record event/sample)
+ // push new timer
+ // start the new timer
+ explicitTimer *current_timer, *new_timer;
+ size_t stack_size;
+ KMP_DEBUG_ASSERT(this->timer_stack.size() > 0);
+ tsc_tick_count tick = tsc_tick_count::now();
+ stack_size = timer_stack.size();
+ current_timer = &(timer_stack[stack_size - 1]);
+ current_timer->stop(tick);
+ timer_stack.pop_back();
+ timer_stack.push_back(timer);
+ new_timer = &(timer_stack[stack_size - 1]);
+ new_timer->start(tick);
}
// Wind up all the currently running timers.
while (timer_stack.size() > 1) {
this->pop();
}
+ // Pop the timer from the init() call
if (timer_stack.size() > 0) {
- timerPair last_timer = timer_stack.back();
+ timer_stack.back().stop(tsc_tick_count::now());
timer_stack.pop_back();
- timers[last_timer.get_index()]->stop(last_timer.get_timer());
}
}
// event2 or zero if event1 == event2. This sorts by start time (lowest to
// highest).
int compare_two_events(const void *event1, const void *event2) {
- kmp_stats_event *ev1 = (kmp_stats_event *)event1;
- kmp_stats_event *ev2 = (kmp_stats_event *)event2;
+ const kmp_stats_event *ev1 = RCAST(const kmp_stats_event *, event1);
+ const kmp_stats_event *ev2 = RCAST(const kmp_stats_event *, event2);
if (ev1->getStart() < ev2->getStart())
return -1;
int kmp_stats_output_module::printPerThreadFlag = 0;
int kmp_stats_output_module::printPerThreadEventsFlag = 0;
+static char const *lastName(char *name) {
+ int l = strlen(name);
+ for (int i = l - 1; i >= 0; --i) {
+ if (name[i] == '.')
+ name[i] = '_';
+ if (name[i] == '/')
+ return name + i + 1;
+ }
+ return name;
+}
+
+/* Read the name of the executable from /proc/self/cmdline */
+static char const *getImageName(char *buffer, size_t buflen) {
+ FILE *f = fopen("/proc/self/cmdline", "r");
+ buffer[0] = char(0);
+ if (!f)
+ return buffer;
+
+ // The file contains char(0) delimited words from the commandline.
+ // This just returns the last filename component of the first word on the
+ // line.
+ size_t n = fread(buffer, 1, buflen, f);
+ if (n == 0) {
+ fclose(f);
+ KMP_CHECK_SYSFAIL("fread", 1)
+ }
+ fclose(f);
+ buffer[buflen - 1] = char(0);
+ return lastName(buffer);
+}
+
+static void getTime(char *buffer, size_t buflen, bool underscores = false) {
+ time_t timer;
+
+ time(&timer);
+
+ struct tm *tm_info = localtime(&timer);
+ if (underscores)
+ strftime(buffer, buflen, "%Y-%m-%d_%H%M%S", tm_info);
+ else
+ strftime(buffer, buflen, "%Y-%m-%d %H%M%S", tm_info);
+}
+
+/* Generate a stats file name, expanding prototypes */
+static std::string generateFilename(char const *prototype,
+ char const *imageName) {
+ std::string res;
+
+ for (int i = 0; prototype[i] != char(0); i++) {
+ char ch = prototype[i];
+
+ if (ch == '%') {
+ i++;
+ if (prototype[i] == char(0))
+ break;
+
+ switch (prototype[i]) {
+ case 't': // Insert time and date
+ {
+ char date[26];
+ getTime(date, sizeof(date), true);
+ res += date;
+ } break;
+ case 'e': // Insert executable name
+ res += imageName;
+ break;
+ case 'p': // Insert pid
+ {
+ std::stringstream ss;
+ ss << getpid();
+ res += ss.str();
+ } break;
+ default:
+ res += prototype[i];
+ break;
+ }
+ } else
+ res += ch;
+ }
+ return res;
+}
+
// init() is called very near the beginning of execution time in the constructor
// of __kmp_stats_global_output
void kmp_stats_output_module::init() {
+
+ fprintf(stderr, "*** Stats enabled OpenMP* runtime ***\n");
char *statsFileName = getenv("KMP_STATS_FILE");
eventsFileName = getenv("KMP_STATS_EVENTS_FILE");
plotFileName = getenv("KMP_STATS_PLOT_FILE");
// set the stats output filenames based on environment variables and defaults
if (statsFileName) {
- // append the process id to the output filename
- // events.csv --> events-pid.csv
- size_t index;
- std::string baseFileName, pid, suffix;
- std::stringstream ss;
- outputFileName = std::string(statsFileName);
- index = outputFileName.find_last_of('.');
- if (index == std::string::npos) {
- baseFileName = outputFileName;
- } else {
- baseFileName = outputFileName.substr(0, index);
- suffix = outputFileName.substr(index);
- }
- ss << getpid();
- pid = ss.str();
- outputFileName = baseFileName + "-" + pid + suffix;
+ char imageName[1024];
+ // Process any escapes (e.g., %p, %e, %t) in the name
+ outputFileName = generateFilename(
+ statsFileName, getImageName(&imageName[0], sizeof(imageName)));
}
eventsFileName = eventsFileName ? eventsFileName : "events.dat";
plotFileName = plotFileName ? plotFileName : "events.plt";
// will clear flag so that no event will be logged
timeStat::clearEventFlags();
}
-
- return;
}
void kmp_stats_output_module::setupEventColors() {
globalColorIndex = (globalColorIndex + 1) % numGlobalColors;
}
}
- return;
}
void kmp_stats_output_module::printTimerStats(FILE *statsOut,
statistic const *theStats,
statistic const *totalStats) {
- fprintf(statsOut, "Timer, SampleCount, Min, "
- "Mean, Max, Total, SD\n");
+ fprintf(statsOut,
+ "Timer, SampleCount, Min, "
+ "Mean, Max, Total, SD\n");
for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) {
statistic const *stat = &theStats[s];
char tag = timeStat::noUnits(s) ? ' ' : 'T';
- fprintf(statsOut, "%-28s, %s\n", timeStat::name(s),
+ fprintf(statsOut, "%-35s, %s\n", timeStat::name(s),
stat->format(tag, true).c_str());
}
// Also print the Total_ versions of times.
for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) {
char tag = timeStat::noUnits(s) ? ' ' : 'T';
if (totalStats && !timeStat::noTotal(s))
- fprintf(statsOut, "Total_%-22s, %s\n", timeStat::name(s),
+ fprintf(statsOut, "Total_%-29s, %s\n", timeStat::name(s),
totalStats[s].format(tag, true).c_str());
}
+
+ // Print historgram of statistics
+ if (theStats[0].haveHist()) {
+ fprintf(statsOut, "\nTimer distributions\n");
+ for (int s = 0; s < TIMER_LAST; s++) {
+ statistic const *stat = &theStats[s];
+
+ if (stat->getCount() != 0) {
+ char tag = timeStat::noUnits(timer_e(s)) ? ' ' : 'T';
+
+ fprintf(statsOut, "%s\n", timeStat::name(timer_e(s)));
+ fprintf(statsOut, "%s\n", stat->getHist()->format(tag).c_str());
+ }
+ }
+ }
}
void kmp_stats_output_module::printCounterStats(FILE *statsOut,
fprintf(statsOut, "%-25s, %s\n", counter::name(counter_e(s)),
stat->format(' ', true).c_str());
}
+ // Print histogram of counters
+ if (theStats[0].haveHist()) {
+ fprintf(statsOut, "\nCounter distributions\n");
+ for (int s = 0; s < COUNTER_LAST; s++) {
+ statistic const *stat = &theStats[s];
+
+ if (stat->getCount() != 0) {
+ fprintf(statsOut, "%s\n", counter::name(counter_e(s)));
+ fprintf(statsOut, "%s\n", stat->getHist()->format(' ').c_str());
+ }
+ }
+ }
}
void kmp_stats_output_module::printCounters(FILE *statsOut,
for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
kmp_stats_list *ptr = *it;
ptr->getPartitionedTimers()->windup();
- for (int timer = 0; timer < EXPLICIT_TIMER_LAST; timer++) {
- ptr->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer, ptr);
- }
+ ptr->endLife();
}
}
return;
}
+static void outputEnvVariable(FILE *statsOut, char const *name) {
+ char const *value = getenv(name);
+ fprintf(statsOut, "# %s = %s\n", name, value ? value : "*unspecified*");
+}
+
/* Print some useful information about
* the date and time this experiment ran.
* the machine on which it ran.
else
fprintf(statsOut, "# Nominal frequency: %sz\n",
formatSI(double(__kmp_cpuinfo.frequency), 9, 'H').c_str());
+ outputEnvVariable(statsOut, "KMP_HW_SUBSET");
+ outputEnvVariable(statsOut, "KMP_AFFINITY");
+ outputEnvVariable(statsOut, "KMP_BLOCKTIME");
+ outputEnvVariable(statsOut, "KMP_LIBRARY");
+ fprintf(statsOut, "# Production runtime built " __DATE__ " " __TIME__ "\n");
#endif
}
for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
timeStat *timers = (*it)->getTimers();
counter *counters = (*it)->getCounters();
- explicitTimer *eTimers = (*it)->getExplicitTimers();
for (int t = 0; t < TIMER_LAST; t++)
timers[t].reset();
for (int c = 0; c < COUNTER_LAST; c++)
counters[c].reset();
- for (int t = 0; t < EXPLICIT_TIMER_LAST; t++)
- eTimers[t].reset();
-
// reset the event vector so all previous events are "erased"
(*it)->resetEventVector();
}
//===----------------------------------------------------------------------===//
#include "kmp_config.h"
+#include "kmp_debug.h"
#if KMP_STATS_ENABLED
/* Statistics accumulator.
/* Enable developer statistics here if you want them. They are more detailed
than is useful for application characterisation and are intended for the
runtime library developer. */
-// #define KMP_DEVELOPER_STATS 1
+#define KMP_DEVELOPER_STATS 0
+
+/* Enable/Disable histogram output */
+#define KMP_STATS_HIST 0
/*!
* @ingroup STATS_GATHERING
#define KMP_FOREACH_COUNTER(macro, arg) \
macro(OMP_PARALLEL,stats_flags_e::onlyInMaster|stats_flags_e::noTotal,arg) \
macro(OMP_NESTED_PARALLEL, 0, arg) \
- macro(OMP_FOR_static, 0, arg) \
- macro(OMP_FOR_static_steal, 0, arg) \
- macro(OMP_FOR_dynamic, 0, arg) \
+ macro(OMP_LOOP_STATIC, 0, arg) \
+ macro(OMP_LOOP_STATIC_STEAL, 0, arg) \
+ macro(OMP_LOOP_DYNAMIC, 0, arg) \
macro(OMP_DISTRIBUTE, 0, arg) \
macro(OMP_BARRIER, 0, arg) \
macro(OMP_CRITICAL, 0, arg) \
* @param arg a user defined argument to send to the user defined macro
*
* \details A timer collects multiple samples of some count in each thread and
- * then finally aggregates alll of the samples from all of the threads. For most
+ * then finally aggregates all of the samples from all of the threads. For most
* timers the printing code also provides an aggregation over the thread totals.
* These are printed as TOTAL_foo. The count is normally a time (in ticks),
* hence the name "timer". (But can be any value, so we use this for "number of
*/
// clang-format off
#define KMP_FOREACH_TIMER(macro, arg) \
- macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg) \
- macro (FOR_static_scheduling, 0, arg) \
- macro (FOR_dynamic_scheduling, 0, arg) \
- macro (OMP_critical, 0, arg) \
- macro (OMP_critical_wait, 0, arg) \
- macro (OMP_single, 0, arg) \
- macro (OMP_master, 0, arg) \
- macro (OMP_idle, stats_flags_e::logEvent, arg) \
- macro (OMP_plain_barrier, stats_flags_e::logEvent, arg) \
- macro (OMP_fork_barrier, stats_flags_e::logEvent, arg) \
- macro (OMP_join_barrier, stats_flags_e::logEvent, arg) \
- macro (OMP_parallel, stats_flags_e::logEvent, arg) \
- macro (OMP_task_immediate, 0, arg) \
- macro (OMP_task_taskwait, 0, arg) \
- macro (OMP_task_taskyield, 0, arg) \
- macro (OMP_task_taskgroup, 0, arg) \
- macro (OMP_task_join_bar, 0, arg) \
- macro (OMP_task_plain_bar, 0, arg) \
- macro (OMP_serial, stats_flags_e::logEvent, arg) \
- macro (OMP_taskloop_scheduling, 0, arg) \
- macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal,\
- arg) \
- macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, \
- arg) \
- macro (FOR_static_iterations, \
- stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
- macro (FOR_dynamic_iterations, \
- stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
- macro (FOR_static_steal_stolen, \
- stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
- macro (FOR_static_steal_chunks, \
- stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
- KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
+ macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg) \
+ macro (OMP_parallel, stats_flags_e::logEvent, arg) \
+ macro (OMP_parallel_overhead, stats_flags_e::logEvent, arg) \
+ macro (OMP_loop_static, 0, arg) \
+ macro (OMP_loop_static_scheduling, 0, arg) \
+ macro (OMP_loop_dynamic, 0, arg) \
+ macro (OMP_loop_dynamic_scheduling, 0, arg) \
+ macro (OMP_critical, 0, arg) \
+ macro (OMP_critical_wait, 0, arg) \
+ macro (OMP_single, 0, arg) \
+ macro (OMP_master, 0, arg) \
+ macro (OMP_task_immediate, 0, arg) \
+ macro (OMP_task_taskwait, 0, arg) \
+ macro (OMP_task_taskyield, 0, arg) \
+ macro (OMP_task_taskgroup, 0, arg) \
+ macro (OMP_task_join_bar, 0, arg) \
+ macro (OMP_task_plain_bar, 0, arg) \
+ macro (OMP_taskloop_scheduling, 0, arg) \
+ macro (OMP_plain_barrier, stats_flags_e::logEvent, arg) \
+ macro (OMP_idle, stats_flags_e::logEvent, arg) \
+ macro (OMP_fork_barrier, stats_flags_e::logEvent, arg) \
+ macro (OMP_join_barrier, stats_flags_e::logEvent, arg) \
+ macro (OMP_serial, stats_flags_e::logEvent, arg) \
+ macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal, \
+ arg) \
+ macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, \
+ arg) \
+ macro (OMP_loop_static_iterations, \
+ stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
+ macro (OMP_loop_dynamic_iterations, \
+ stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
+ KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
// clang-format on
-// OMP_start_end -- Time from when OpenMP is initialized until the
-// stats are printed at exit
-// OMP_serial -- Thread zero time executing serial code
-// OMP_work -- Elapsed time in code dispatched by a fork (measured
-// in the thread)
-// OMP_barrier -- Time at "real" barriers (includes task time)
-// FOR_static_scheduling -- Time spent doing scheduling for a static "for"
-// FOR_dynamic_scheduling -- Time spent doing scheduling for a dynamic "for"
-// OMP_idle -- Worker threads time spent waiting for inclusion in
-// a parallel region
-// OMP_plain_barrier -- Time spent in a barrier construct
-// OMP_fork_join_barrier -- Time spent in a the fork-join barrier surrounding a
-// parallel region
-// OMP_parallel -- Time spent inside a parallel construct
+// OMP_worker_thread_life -- Time from thread becoming an OpenMP thread (either
+// initializing OpenMP or being created by a master)
+// until the thread is destroyed
+// OMP_parallel -- Time thread spends executing work directly
+// within a #pragma omp parallel
+// OMP_parallel_overhead -- Time thread spends setting up a parallel region
+// OMP_loop_static -- Time thread spends executing loop iterations from
+// a statically scheduled loop
+// OMP_loop_static_scheduling -- Time thread spends scheduling loop iterations
+// from a statically scheduled loop
+// OMP_loop_dynamic -- Time thread spends executing loop iterations from
+// a dynamically scheduled loop
+// OMP_loop_dynamic_scheduling -- Time thread spends scheduling loop iterations
+// from a dynamically scheduled loop
+// OMP_critical -- Time thread spends executing critical section
+// OMP_critical_wait -- Time thread spends waiting to enter
+// a critcal seciton
+// OMP_single -- Time spent executing a "single" region
+// OMP_master -- Time spent executing a "master" region
// OMP_task_immediate -- Time spent executing non-deferred tasks
// OMP_task_taskwait -- Time spent executing tasks inside a taskwait
// construct
// OMP_task_join_bar -- Time spent executing tasks inside a join barrier
// OMP_task_plain_bar -- Time spent executing tasks inside a barrier
// construct
-// OMP_single -- Time spent executing a "single" region
-// OMP_master -- Time spent executing a "master" region
+// OMP_taskloop_scheduling -- Time spent scheduling tasks inside a taskloop
+// construct
+// OMP_plain_barrier -- Time spent in a #pragma omp barrier construct or
+// inside implicit barrier at end of worksharing
+// construct
+// OMP_idle -- Time worker threads spend waiting for next
+// parallel region
+// OMP_fork_barrier -- Time spent in a the fork barrier surrounding a
+// parallel region
+// OMP_join_barrier -- Time spent in a the join barrier surrounding a
+// parallel region
+// OMP_serial -- Time thread zero spends executing serial code
// OMP_set_numthreads -- Values passed to omp_set_num_threads
// OMP_PARALLEL_args -- Number of arguments passed to a parallel region
-// FOR_static_iterations -- Number of available parallel chunks of work in a
-// static for
-// FOR_dynamic_iterations -- Number of available parallel chunks of work in a
-// dynamic for
-// Both adjust for any chunking, so if there were an
-// iteration count of 20 but a chunk size of 10, we'd
-// record 2.
+// OMP_loop_static_iterations -- Number of iterations thread is assigned for
+// statically scheduled loops
+// OMP_loop_dynamic_iterations -- Number of iterations thread is assigned for
+// dynamically scheduled loops
#if (KMP_DEVELOPER_STATS)
// Timers which are of interest to runtime library developers, not end users.
macro(USER_suspend, 0, arg) \
macro(KMP_allocate_team, 0, arg) \
macro(KMP_setup_icv_copy, 0, arg) \
- macro(USER_icv_copy, 0, arg)
+ macro(USER_icv_copy, 0, arg) \
+ macro (FOR_static_steal_stolen, \
+ stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
+ macro (FOR_static_steal_chunks, \
+ stats_flags_e::noUnits | stats_flags_e::noTotal, arg)
#else
#define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
#endif
enum counter_e { KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_) COUNTER_LAST };
#undef ENUMERATE
-class timerPair {
- explicit_timer_e timer_index;
- timer_e timer;
+/*
+ * A logarithmic histogram. It accumulates the number of values in each power of
+ * ten bin. So 1<=x<10, 10<=x<100, ...
+ * Mostly useful where we have some big outliers and want to see information
+ * about them.
+ */
+class logHistogram {
+ enum {
+ numBins = 31, /* Number of powers of 10. If this changes you need to change
+ * the initializer for binMax */
+
+ /*
+ * If you want to use this to analyse values that may be less than 1, (for
+ * instance times in s), then the logOffset gives you negative powers.
+ * In our case here, we're just looking at times in ticks, or counts, so we
+ * can never see values with magnitude < 1 (other than zero), so we can set
+ * it to 0. As above change the initializer if you change this.
+ */
+ logOffset = 0
+ };
+ uint32_t KMP_ALIGN_CACHE zeroCount;
+ struct {
+ uint32_t count;
+ double total;
+ } bins[numBins];
+
+ static double binMax[numBins];
+
+#ifdef KMP_DEBUG
+ uint64_t _total;
+
+ void check() const {
+ uint64_t t = zeroCount;
+ for (int i = 0; i < numBins; i++)
+ t += bins[i].count;
+ KMP_DEBUG_ASSERT(t == _total);
+ }
+#else
+ void check() const {}
+#endif
public:
- timerPair(explicit_timer_e ti, timer_e t) : timer_index(ti), timer(t) {}
- inline explicit_timer_e get_index() const { return timer_index; }
- inline timer_e get_timer() const { return timer; }
- bool operator==(const timerPair &rhs) {
- return this->get_index() == rhs.get_index();
+ logHistogram() { reset(); }
+
+ logHistogram(logHistogram const &o) {
+ for (int i = 0; i < numBins; i++)
+ bins[i] = o.bins[i];
+#ifdef KMP_DEBUG
+ _total = o._total;
+#endif
}
- bool operator!=(const timerPair &rhs) { return !(*this == rhs); }
+
+ void reset() {
+ zeroCount = 0;
+ for (int i = 0; i < numBins; i++) {
+ bins[i].count = 0;
+ bins[i].total = 0;
+ }
+
+#ifdef KMP_DEBUG
+ _total = 0;
+#endif
+ }
+ uint32_t count(int b) const { return bins[b + logOffset].count; }
+ double total(int b) const { return bins[b + logOffset].total; }
+ static uint32_t findBin(double sample);
+
+ logHistogram &operator+=(logHistogram const &o) {
+ zeroCount += o.zeroCount;
+ for (int i = 0; i < numBins; i++) {
+ bins[i].count += o.bins[i].count;
+ bins[i].total += o.bins[i].total;
+ }
+#ifdef KMP_DEBUG
+ _total += o._total;
+ check();
+#endif
+
+ return *this;
+ }
+
+ void addSample(double sample);
+ int minBin() const;
+ int maxBin() const;
+
+ std::string format(char) const;
};
class statistic {
- double minVal;
+ double KMP_ALIGN_CACHE minVal;
double maxVal;
double meanVal;
double m2;
uint64_t sampleCount;
+ double offset;
+ bool collectingHist;
+ logHistogram hist;
public:
- statistic() { reset(); }
+ statistic(bool doHist = bool(KMP_STATS_HIST)) {
+ reset();
+ collectingHist = doHist;
+ }
statistic(statistic const &o)
: minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2),
- sampleCount(o.sampleCount) {}
-
+ sampleCount(o.sampleCount), offset(o.offset),
+ collectingHist(o.collectingHist), hist(o.hist) {}
+ statistic(double minv, double maxv, double meanv, uint64_t sc, double sd)
+ : minVal(minv), maxVal(maxv), meanVal(meanv), m2(sd * sd * sc),
+ sampleCount(sc), offset(0.0), collectingHist(false) {}
+ bool haveHist() const { return collectingHist; }
double getMin() const { return minVal; }
double getMean() const { return meanVal; }
double getMax() const { return maxVal; }
uint64_t getCount() const { return sampleCount; }
double getSD() const { return sqrt(m2 / sampleCount); }
double getTotal() const { return sampleCount * meanVal; }
+ logHistogram const *getHist() const { return &hist; }
+ void setOffset(double d) { offset = d; }
void reset() {
minVal = std::numeric_limits<double>::max();
- maxVal = -std::numeric_limits<double>::max();
+ maxVal = -minVal;
meanVal = 0.0;
m2 = 0.0;
sampleCount = 0;
+ offset = 0.0;
+ hist.reset();
}
void addSample(double sample);
void scale(double factor);
void scaleDown(double f) { scale(1. / f); }
+ void forceCount(uint64_t count) { sampleCount = count; }
statistic &operator+=(statistic const &other);
std::string format(char unit, bool total = false) const;
+ std::string formatHist(char unit) const { return hist.format(unit); }
};
struct statInfo {
// to live on the stack of the thread, they're more work to use.
class explicitTimer {
timeStat *stat;
+ timer_e timerEnumValue;
tsc_tick_count startTime;
tsc_tick_count pauseStartTime;
tsc_tick_count::tsc_interval_t totalPauseTime;
public:
- explicitTimer()
- : stat(0), startTime(0), pauseStartTime(0), totalPauseTime() {}
- explicitTimer(timeStat *s)
- : stat(s), startTime(), pauseStartTime(0), totalPauseTime() {}
-
- void setStat(timeStat *s) { stat = s; }
- void start(timer_e timerEnumValue);
- void pause() { pauseStartTime = tsc_tick_count::now(); }
- void resume() { totalPauseTime += (tsc_tick_count::now() - pauseStartTime); }
- void stop(timer_e timerEnumValue, kmp_stats_list *stats_ptr = nullptr);
+ explicitTimer(timeStat *s, timer_e te)
+ : stat(s), timerEnumValue(te), startTime(), pauseStartTime(0),
+ totalPauseTime() {}
+
+ // void setStat(timeStat *s) { stat = s; }
+ void start(tsc_tick_count tick);
+ void pause(tsc_tick_count tick) { pauseStartTime = tick; }
+ void resume(tsc_tick_count tick) {
+ totalPauseTime += (tick - pauseStartTime);
+ }
+ void stop(tsc_tick_count tick, kmp_stats_list *stats_ptr = nullptr);
void reset() {
startTime = 0;
pauseStartTime = 0;
totalPauseTime = 0;
}
-};
-
-// Where all you need is to time a block, this is enough.
-// (It avoids the need to have an explicit end, leaving the scope suffices.)
-class blockTimer : public explicitTimer {
- timer_e timerEnumValue;
-
-public:
- blockTimer(timeStat *s, timer_e newTimerEnumValue)
- : timerEnumValue(newTimerEnumValue), explicitTimer(s) {
- start(timerEnumValue);
- }
- ~blockTimer() { stop(timerEnumValue); }
+ timer_e get_type() const { return timerEnumValue; }
};
// Where you need to partition a threads clock ticks into separate states
// versa
class partitionedTimers {
private:
- explicitTimer *timers[EXPLICIT_TIMER_LAST + 1];
- std::vector<timerPair> timer_stack;
+ std::vector<explicitTimer> timer_stack;
public:
partitionedTimers();
- void add_timer(explicit_timer_e timer_index, explicitTimer *timer_pointer);
- void init(timerPair timer_index);
- void push(timerPair timer_index);
+ void init(explicitTimer timer);
+ void exchange(explicitTimer timer);
+ void push(explicitTimer timer);
void pop();
void windup();
};
// It avoids the need to have an explicit end, leaving the scope suffices.
class blockPartitionedTimer {
partitionedTimers *part_timers;
- timerPair timer_pair;
public:
- blockPartitionedTimer(partitionedTimers *pt, timerPair tp)
- : part_timers(pt), timer_pair(tp) {
- part_timers->push(timer_pair);
+ blockPartitionedTimer(partitionedTimers *pt, explicitTimer timer)
+ : part_timers(pt) {
+ part_timers->push(timer);
}
~blockPartitionedTimer() { part_timers->pop(); }
};
int gtid;
timeStat _timers[TIMER_LAST + 1];
counter _counters[COUNTER_LAST + 1];
- explicitTimer _explicitTimers[EXPLICIT_TIMER_LAST + 1];
+ explicitTimer thread_life_timer;
partitionedTimers _partitionedTimers;
int _nestLevel; // one per thread
kmp_stats_event_vector _event_vector;
public:
kmp_stats_list()
- : _nestLevel(0), _event_vector(), next(this), prev(this), state(IDLE),
- thread_is_idle_flag(0) {
-#define doInit(name, ignore1, ignore2) \
- getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name)); \
- _partitionedTimers.add_timer(EXPLICIT_TIMER_##name, \
- getExplicitTimer(EXPLICIT_TIMER_##name));
- KMP_FOREACH_EXPLICIT_TIMER(doInit, 0);
-#undef doInit
- }
+ : thread_life_timer(&_timers[TIMER_OMP_worker_thread_life],
+ TIMER_OMP_worker_thread_life),
+ _nestLevel(0), _event_vector(), next(this), prev(this), state(IDLE),
+ thread_is_idle_flag(0) {}
~kmp_stats_list() {}
inline timeStat *getTimer(timer_e idx) { return &_timers[idx]; }
inline counter *getCounter(counter_e idx) { return &_counters[idx]; }
- inline explicitTimer *getExplicitTimer(explicit_timer_e idx) {
- return &_explicitTimers[idx];
- }
inline partitionedTimers *getPartitionedTimers() {
return &_partitionedTimers;
}
inline timeStat *getTimers() { return _timers; }
inline counter *getCounters() { return _counters; }
- inline explicitTimer *getExplicitTimers() { return _explicitTimers; }
inline kmp_stats_event_vector &getEventVector() { return _event_vector; }
+ inline void startLife() { thread_life_timer.start(tsc_tick_count::now()); }
+ inline void endLife() { thread_life_timer.stop(tsc_tick_count::now(), this); }
inline void resetEventVector() { _event_vector.reset(); }
inline void incrementNestValue() { _nestLevel++; }
inline int getNestValue() { return _nestLevel; }
// Simple, standard interfaces that drop out completely if stats aren't enabled
/*!
- * \brief Uses specified timer (name) to time code block.
- *
- * @param name timer name as specified under the KMP_FOREACH_TIMER() macro
- *
- * \details Use KMP_TIME_BLOCK(name) macro to time a code block. This will
- * record the time taken in the block and use the destructor to stop the timer.
- * Convenient! With this definition you can't have more than one KMP_TIME_BLOCK
- * in the same code block. I don't think that's a problem.
- *
- * @ingroup STATS_GATHERING
-*/
-#define KMP_TIME_BLOCK(name) \
- blockTimer __BLOCKTIME__(__kmp_stats_thread_ptr->getTimer(TIMER_##name), \
- TIMER_##name)
-
-/*!
* \brief Adds value to specified timer (name).
*
* @param name timer name as specified under the KMP_FOREACH_TIMER() macro
__kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment()
/*!
- * \brief "Starts" an explicit timer which will need a corresponding
- * KMP_STOP_EXPLICIT_TIMER() macro.
- *
- * @param name explicit timer name as specified under the
- * KMP_FOREACH_EXPLICIT_TIMER() macro
- *
- * \details Use to start a timer. This will need a corresponding
- * KMP_STOP_EXPLICIT_TIMER() macro to stop the timer unlike the
- * KMP_TIME_BLOCK(name) macro which has an implicit stopping macro at the end
- * of the code block. All explicit timers are stopped at library exit time
- * before the final statistics are outputted.
- *
- * @ingroup STATS_GATHERING
-*/
-#define KMP_START_EXPLICIT_TIMER(name) \
- __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name) \
- ->start(TIMER_##name)
-
-/*!
- * \brief "Stops" an explicit timer.
- *
- * @param name explicit timer name as specified under the
- * KMP_FOREACH_EXPLICIT_TIMER() macro
- *
- * \details Use KMP_STOP_EXPLICIT_TIMER(name) to stop a timer. When this is
- * done, the time between the last KMP_START_EXPLICIT_TIMER(name) and this
- * KMP_STOP_EXPLICIT_TIMER(name) will be added to the timer's stat value. The
- * timer will then be reset. After the KMP_STOP_EXPLICIT_TIMER(name) macro is
- * called, another call to KMP_START_EXPLICIT_TIMER(name) will start the timer
- * once again.
- *
- * @ingroup STATS_GATHERING
-*/
-#define KMP_STOP_EXPLICIT_TIMER(name) \
- __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name) \
- ->stop(TIMER_##name)
-
-/*!
* \brief Outputs the current thread statistics and reset them.
*
* @param heading_string heading put above the final stats output
* @ingroup STATS_GATHERING
*/
#define KMP_INIT_PARTITIONED_TIMERS(name) \
- __kmp_stats_thread_ptr->getPartitionedTimers()->init( \
- timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
+ __kmp_stats_thread_ptr->getPartitionedTimers()->init(explicitTimer( \
+ __kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name))
#define KMP_TIME_PARTITIONED_BLOCK(name) \
blockPartitionedTimer __PBLOCKTIME__( \
__kmp_stats_thread_ptr->getPartitionedTimers(), \
- timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
+ explicitTimer(__kmp_stats_thread_ptr->getTimer(TIMER_##name), \
+ TIMER_##name))
#define KMP_PUSH_PARTITIONED_TIMER(name) \
- __kmp_stats_thread_ptr->getPartitionedTimers()->push( \
- timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
+ __kmp_stats_thread_ptr->getPartitionedTimers()->push(explicitTimer( \
+ __kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name))
#define KMP_POP_PARTITIONED_TIMER() \
__kmp_stats_thread_ptr->getPartitionedTimers()->pop()
+#define KMP_EXCHANGE_PARTITIONED_TIMER(name) \
+ __kmp_stats_thread_ptr->getPartitionedTimers()->exchange(explicitTimer( \
+ __kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name))
+
#define KMP_SET_THREAD_STATE(state_name) \
__kmp_stats_thread_ptr->setState(state_name)
static struct {
double scale;
char prefix;
- } ranges[] = {{1.e12, 'f'}, {1.e9, 'p'}, {1.e6, 'n'}, {1.e3, 'u'},
- {1.0, 'm'}, {1.e-3, ' '}, {1.e-6, 'k'}, {1.e-9, 'M'},
- {1.e-12, 'G'}, {1.e-15, 'T'}, {1.e-18, 'P'}, {1.e-21, 'E'},
- {1.e-24, 'Z'}, {1.e-27, 'Y'}};
+ } ranges[] = {{1.e21, 'y'}, {1.e18, 'z'}, {1.e15, 'a'}, {1.e12, 'f'},
+ {1.e9, 'p'}, {1.e6, 'n'}, {1.e3, 'u'}, {1.0, 'm'},
+ {1.e-3, ' '}, {1.e-6, 'k'}, {1.e-9, 'M'}, {1.e-12, 'G'},
+ {1.e-15, 'T'}, {1.e-18, 'P'}, {1.e-21, 'E'}, {1.e-24, 'Z'},
+ {1.e-27, 'Y'}};
if (interval == 0.0) {
os << std::setw(width - 3) << std::right << "0.00" << std::setw(3)
#if KMP_STATS_ENABLED
// set thread local index to point to thread-specific stats
__kmp_stats_thread_ptr = ((kmp_info_t *)thr)->th.th_stats;
- KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life);
+ __kmp_stats_thread_ptr->startLife();
KMP_SET_THREAD_STATE(IDLE);
KMP_INIT_PARTITIONED_TIMERS(OMP_idle);
#endif