perf stat: Add support for --initial-delay option

author Andi Kleen <ak@linux.intel.com>

Sat, 3 Aug 2013 00:41:11 +0000 (17:41 -0700)

committer Arnaldo Carvalho de Melo <acme@redhat.com>

Wed, 7 Aug 2013 20:35:29 +0000 (17:35 -0300)
author Andi Kleen <ak@linux.intel.com>
Sat, 3 Aug 2013 00:41:11 +0000 (17:41 -0700)
committer Arnaldo Carvalho de Melo <acme@redhat.com>
Wed, 7 Aug 2013 20:35:29 +0000 (17:35 -0300)
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt

index 2fe87fb..73c9759 100644 (file)
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -132,6 +132,11 @@ is a useful mode to detect imbalance between physical cores.  To enable this mod
  use --per-core in addition to -a. (system-wide).  The output includes the
  core number and the number of online logical processors on that physical processor.
  
  use --per-core in addition to -a. (system-wide).  The output includes the
  core number and the number of online logical processors on that physical processor.
  
+-D msecs::
+--initial-delay msecs::
+After starting the program, wait msecs before measuring. This is useful to
+filter out the startup phase of the program, which is often very different.
+
  EXAMPLES
  --------
  
  EXAMPLES
  --------
  
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c

index 352fbd7..2e637e4 100644 (file)
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -100,6 +100,7 @@ static const char           *pre_cmd                        = NULL;
  static const char              *post_cmd                       = NULL;
  static bool                    sync_run                        = false;
  static unsigned int            interval                        = 0;
  static const char              *post_cmd                       = NULL;
  static bool                    sync_run                        = false;
  static unsigned int            interval                        = 0;
+static unsigned int            initial_delay                   = 0;
  static bool                    forever                         = false;
  static struct timespec         ref_time;
  static struct cpu_map          *aggr_map;
  static bool                    forever                         = false;
  static struct timespec         ref_time;
  static struct cpu_map          *aggr_map;
@@ -254,7 +255,8 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
         if (!perf_target__has_task(&target) &&
             perf_evsel__is_group_leader(evsel)) {
                 attr->disabled = 1;
         if (!perf_target__has_task(&target) &&
             perf_evsel__is_group_leader(evsel)) {
                 attr->disabled = 1;
-               attr->enable_on_exec = 1;
+               if (!initial_delay)
+                       attr->enable_on_exec = 1;
         }
  
         return perf_evsel__open_per_thread(evsel, evsel_list->threads);
         }
  
         return perf_evsel__open_per_thread(evsel, evsel_list->threads);
@@ -416,6 +418,20 @@ static void print_interval(void)
         }
  }
  
         }
  }
  
+static void handle_initial_delay(void)
+{
+       struct perf_evsel *counter;
+
+       if (initial_delay) {
+               const int ncpus = cpu_map__nr(evsel_list->cpus),
+                       nthreads = thread_map__nr(evsel_list->threads);
+
+               usleep(initial_delay * 1000);
+               list_for_each_entry(counter, &evsel_list->entries, node)
+                       perf_evsel__enable(counter, ncpus, nthreads);
+       }
+}
+
  static int __run_perf_stat(int argc, const char **argv)
  {
         char msg[512];
  static int __run_perf_stat(int argc, const char **argv)
  {
         char msg[512];
@@ -486,6 +502,7 @@ static int __run_perf_stat(int argc, const char **argv)
  
         if (forks) {
                 perf_evlist__start_workload(evsel_list);
  
         if (forks) {
                 perf_evlist__start_workload(evsel_list);
+               handle_initial_delay();
  
                 if (interval) {
                         while (!waitpid(child_pid, &status, WNOHANG)) {
  
                 if (interval) {
                         while (!waitpid(child_pid, &status, WNOHANG)) {
@@ -497,6 +514,7 @@ static int __run_perf_stat(int argc, const char **argv)
                 if (WIFSIGNALED(status))
                         psignal(WTERMSIG(status), argv[0]);
         } else {
                 if (WIFSIGNALED(status))
                         psignal(WTERMSIG(status), argv[0]);
         } else {
+               handle_initial_delay();
                 while (!done) {
                         nanosleep(&ts, NULL);
                         if (interval)
                 while (!done) {
                         nanosleep(&ts, NULL);
                         if (interval)
@@ -1419,6 +1437,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
                      "aggregate counts per processor socket", AGGR_SOCKET),
         OPT_SET_UINT(0, "per-core", &aggr_mode,
                      "aggregate counts per physical processor core", AGGR_CORE),
                      "aggregate counts per processor socket", AGGR_SOCKET),
         OPT_SET_UINT(0, "per-core", &aggr_mode,
                      "aggregate counts per physical processor core", AGGR_CORE),
+       OPT_UINTEGER('D', "delay", &initial_delay,
+                    "ms to wait before starting measurement after program start"),
         OPT_END()
         };
         const char * const stat_usage[] = {
         OPT_END()
         };
         const char * const stat_usage[] = {
author	Andi Kleen <ak@linux.intel.com>
	Sat, 3 Aug 2013 00:41:11 +0000 (17:41 -0700)
committer	Arnaldo Carvalho de Melo <acme@redhat.com>
	Wed, 7 Aug 2013 20:35:29 +0000 (17:35 -0300)
tools/perf/Documentation/perf-stat.txt		patch \| blob \| history
tools/perf/builtin-stat.c		patch \| blob \| history