perf stat: Introduce evlist methods to allocate/free the stats
[platform/adaptation/renesas_rcar/renesas_kernel.git] / tools / perf / builtin-stat.c
1 /*
2  * builtin-stat.c
3  *
4  * Builtin stat command: Give a precise performance counters summary
5  * overview about any workload, CPU or specific PID.
6  *
7  * Sample output:
8
9    $ perf stat ./hackbench 10
10
11   Time: 0.118
12
13   Performance counter stats for './hackbench 10':
14
15        1708.761321 task-clock                #   11.037 CPUs utilized
16             41,190 context-switches          #    0.024 M/sec
17              6,735 CPU-migrations            #    0.004 M/sec
18             17,318 page-faults               #    0.010 M/sec
19      5,205,202,243 cycles                    #    3.046 GHz
20      3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
21      1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
22      2,603,501,247 instructions              #    0.50  insns per cycle
23                                              #    1.48  stalled cycles per insn
24        484,357,498 branches                  #  283.455 M/sec
25          6,388,934 branch-misses             #    1.32% of all branches
26
27         0.154822978  seconds time elapsed
28
29  *
30  * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
31  *
32  * Improvements and fixes by:
33  *
34  *   Arjan van de Ven <arjan@linux.intel.com>
35  *   Yanmin Zhang <yanmin.zhang@intel.com>
36  *   Wu Fengguang <fengguang.wu@intel.com>
37  *   Mike Galbraith <efault@gmx.de>
38  *   Paul Mackerras <paulus@samba.org>
39  *   Jaswinder Singh Rajput <jaswinder@kernel.org>
40  *
41  * Released under the GPL v2. (and only v2, not any later version)
42  */
43
44 #include "perf.h"
45 #include "builtin.h"
46 #include "util/util.h"
47 #include "util/parse-options.h"
48 #include "util/parse-events.h"
49 #include "util/event.h"
50 #include "util/evlist.h"
51 #include "util/evsel.h"
52 #include "util/debug.h"
53 #include "util/color.h"
54 #include "util/stat.h"
55 #include "util/header.h"
56 #include "util/cpumap.h"
57 #include "util/thread.h"
58 #include "util/thread_map.h"
59
60 #include <stdlib.h>
61 #include <sys/prctl.h>
62 #include <locale.h>
63
64 #define DEFAULT_SEPARATOR       " "
65 #define CNTR_NOT_SUPPORTED      "<not supported>"
66 #define CNTR_NOT_COUNTED        "<not counted>"
67
68 static void print_stat(int argc, const char **argv);
69 static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
70 static void print_counter(struct perf_evsel *counter, char *prefix);
71 static void print_aggr_socket(char *prefix);
72
73 static struct perf_evlist       *evsel_list;
74
75 static struct perf_target       target = {
76         .uid    = UINT_MAX,
77 };
78
79 static int                      run_count                       =  1;
80 static bool                     no_inherit                      = false;
81 static bool                     scale                           =  true;
82 static bool                     no_aggr                         = false;
83 static bool                     aggr_socket                     = false;
84 static pid_t                    child_pid                       = -1;
85 static bool                     null_run                        =  false;
86 static int                      detailed_run                    =  0;
87 static bool                     big_num                         =  true;
88 static int                      big_num_opt                     =  -1;
89 static const char               *csv_sep                        = NULL;
90 static bool                     csv_output                      = false;
91 static bool                     group                           = false;
92 static FILE                     *output                         = NULL;
93 static const char               *pre_cmd                        = NULL;
94 static const char               *post_cmd                       = NULL;
95 static bool                     sync_run                        = false;
96 static unsigned int             interval                        = 0;
97 static bool                     forever                         = false;
98 static struct timespec          ref_time;
99 static struct cpu_map           *sock_map;
100
101 static volatile int done = 0;
102
103 struct perf_stat {
104         struct stats      res_stats[3];
105 };
106
107 static inline void diff_timespec(struct timespec *r, struct timespec *a,
108                                  struct timespec *b)
109 {
110         r->tv_sec = a->tv_sec - b->tv_sec;
111         if (a->tv_nsec < b->tv_nsec) {
112                 r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
113                 r->tv_sec--;
114         } else {
115                 r->tv_nsec = a->tv_nsec - b->tv_nsec ;
116         }
117 }
118
119 static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
120 {
121         return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
122 }
123
124 static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
125 {
126         return perf_evsel__cpus(evsel)->nr;
127 }
128
129 static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
130 {
131         memset(evsel->priv, 0, sizeof(struct perf_stat));
132 }
133
134 static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
135 {
136         evsel->priv = zalloc(sizeof(struct perf_stat));
137         return evsel->priv == NULL ? -ENOMEM : 0;
138 }
139
140 static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
141 {
142         free(evsel->priv);
143         evsel->priv = NULL;
144 }
145
146 static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
147 {
148         void *addr;
149         size_t sz;
150
151         sz = sizeof(*evsel->counts) +
152              (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values));
153
154         addr = zalloc(sz);
155         if (!addr)
156                 return -ENOMEM;
157
158         evsel->prev_raw_counts =  addr;
159
160         return 0;
161 }
162
163 static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
164 {
165         free(evsel->prev_raw_counts);
166         evsel->prev_raw_counts = NULL;
167 }
168
169 static void perf_evlist__free_stats(struct perf_evlist *evlist)
170 {
171         struct perf_evsel *evsel;
172
173         list_for_each_entry(evsel, &evlist->entries, node) {
174                 perf_evsel__free_stat_priv(evsel);
175                 perf_evsel__free_counts(evsel);
176                 perf_evsel__free_prev_raw_counts(evsel);
177         }
178 }
179
180 static int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw)
181 {
182         struct perf_evsel *evsel;
183
184         list_for_each_entry(evsel, &evlist->entries, node) {
185                 if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
186                     perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) < 0 ||
187                     (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) < 0))
188                         goto out_free;
189         }
190
191         return 0;
192
193 out_free:
194         perf_evlist__free_stats(evlist);
195         return -1;
196 }
197
198 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
199 static struct stats runtime_cycles_stats[MAX_NR_CPUS];
200 static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
201 static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
202 static struct stats runtime_branches_stats[MAX_NR_CPUS];
203 static struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
204 static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
205 static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
206 static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
207 static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
208 static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
209 static struct stats walltime_nsecs_stats;
210
211 static void perf_stat__reset_stats(struct perf_evlist *evlist)
212 {
213         struct perf_evsel *evsel;
214
215         list_for_each_entry(evsel, &evlist->entries, node) {
216                 perf_evsel__reset_stat_priv(evsel);
217                 perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel));
218         }
219
220         memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
221         memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
222         memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
223         memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
224         memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
225         memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
226         memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
227         memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
228         memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
229         memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
230         memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
231         memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
232 }
233
234 static int create_perf_stat_counter(struct perf_evsel *evsel)
235 {
236         struct perf_event_attr *attr = &evsel->attr;
237
238         if (scale)
239                 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
240                                     PERF_FORMAT_TOTAL_TIME_RUNNING;
241
242         attr->inherit = !no_inherit;
243
244         if (perf_target__has_cpu(&target))
245                 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
246
247         if (!perf_target__has_task(&target) &&
248             perf_evsel__is_group_leader(evsel)) {
249                 attr->disabled = 1;
250                 attr->enable_on_exec = 1;
251         }
252
253         return perf_evsel__open_per_thread(evsel, evsel_list->threads);
254 }
255
256 /*
257  * Does the counter have nsecs as a unit?
258  */
259 static inline int nsec_counter(struct perf_evsel *evsel)
260 {
261         if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
262             perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
263                 return 1;
264
265         return 0;
266 }
267
268 /*
269  * Update various tracking values we maintain to print
270  * more semantic information such as miss/hit ratios,
271  * instruction rates, etc:
272  */
273 static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
274 {
275         if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
276                 update_stats(&runtime_nsecs_stats[0], count[0]);
277         else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
278                 update_stats(&runtime_cycles_stats[0], count[0]);
279         else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
280                 update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
281         else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
282                 update_stats(&runtime_stalled_cycles_back_stats[0], count[0]);
283         else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
284                 update_stats(&runtime_branches_stats[0], count[0]);
285         else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
286                 update_stats(&runtime_cacherefs_stats[0], count[0]);
287         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
288                 update_stats(&runtime_l1_dcache_stats[0], count[0]);
289         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
290                 update_stats(&runtime_l1_icache_stats[0], count[0]);
291         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
292                 update_stats(&runtime_ll_cache_stats[0], count[0]);
293         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
294                 update_stats(&runtime_dtlb_cache_stats[0], count[0]);
295         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
296                 update_stats(&runtime_itlb_cache_stats[0], count[0]);
297 }
298
299 /*
300  * Read out the results of a single counter:
301  * aggregate counts across CPUs in system-wide mode
302  */
303 static int read_counter_aggr(struct perf_evsel *counter)
304 {
305         struct perf_stat *ps = counter->priv;
306         u64 *count = counter->counts->aggr.values;
307         int i;
308
309         if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter),
310                                thread_map__nr(evsel_list->threads), scale) < 0)
311                 return -1;
312
313         for (i = 0; i < 3; i++)
314                 update_stats(&ps->res_stats[i], count[i]);
315
316         if (verbose) {
317                 fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
318                         perf_evsel__name(counter), count[0], count[1], count[2]);
319         }
320
321         /*
322          * Save the full runtime - to allow normalization during printout:
323          */
324         update_shadow_stats(counter, count);
325
326         return 0;
327 }
328
329 /*
330  * Read out the results of a single counter:
331  * do not aggregate counts across CPUs in system-wide mode
332  */
333 static int read_counter(struct perf_evsel *counter)
334 {
335         u64 *count;
336         int cpu;
337
338         for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
339                 if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
340                         return -1;
341
342                 count = counter->counts->cpu[cpu].values;
343
344                 update_shadow_stats(counter, count);
345         }
346
347         return 0;
348 }
349
350 static void print_interval(void)
351 {
352         static int num_print_interval;
353         struct perf_evsel *counter;
354         struct perf_stat *ps;
355         struct timespec ts, rs;
356         char prefix[64];
357
358         if (no_aggr) {
359                 list_for_each_entry(counter, &evsel_list->entries, node) {
360                         ps = counter->priv;
361                         memset(ps->res_stats, 0, sizeof(ps->res_stats));
362                         read_counter(counter);
363                 }
364         } else {
365                 list_for_each_entry(counter, &evsel_list->entries, node) {
366                         ps = counter->priv;
367                         memset(ps->res_stats, 0, sizeof(ps->res_stats));
368                         read_counter_aggr(counter);
369                 }
370         }
371         clock_gettime(CLOCK_MONOTONIC, &ts);
372         diff_timespec(&rs, &ts, &ref_time);
373         sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);
374
375         if (num_print_interval == 0 && !csv_output) {
376                 if (aggr_socket)
377                         fprintf(output, "#           time socket cpus             counts events\n");
378                 else if (no_aggr)
379                         fprintf(output, "#           time CPU                 counts events\n");
380                 else
381                         fprintf(output, "#           time             counts events\n");
382         }
383
384         if (++num_print_interval == 25)
385                 num_print_interval = 0;
386
387         if (aggr_socket)
388                 print_aggr_socket(prefix);
389         else if (no_aggr) {
390                 list_for_each_entry(counter, &evsel_list->entries, node)
391                         print_counter(counter, prefix);
392         } else {
393                 list_for_each_entry(counter, &evsel_list->entries, node)
394                         print_counter_aggr(counter, prefix);
395         }
396 }
397
398 static int __run_perf_stat(int argc, const char **argv)
399 {
400         char msg[512];
401         unsigned long long t0, t1;
402         struct perf_evsel *counter;
403         struct timespec ts;
404         int status = 0;
405         const bool forks = (argc > 0);
406
407         if (interval) {
408                 ts.tv_sec  = interval / 1000;
409                 ts.tv_nsec = (interval % 1000) * 1000000;
410         } else {
411                 ts.tv_sec  = 1;
412                 ts.tv_nsec = 0;
413         }
414
415         if (aggr_socket
416             && cpu_map__build_socket_map(evsel_list->cpus, &sock_map)) {
417                 perror("cannot build socket map");
418                 return -1;
419         }
420
421         if (forks) {
422                 if (perf_evlist__prepare_workload(evsel_list, &target, argv,
423                                                   false, false) < 0) {
424                         perror("failed to prepare workload");
425                         return -1;
426                 }
427         }
428
429         if (group)
430                 perf_evlist__set_leader(evsel_list);
431
432         list_for_each_entry(counter, &evsel_list->entries, node) {
433                 if (create_perf_stat_counter(counter) < 0) {
434                         /*
435                          * PPC returns ENXIO for HW counters until 2.6.37
436                          * (behavior changed with commit b0a873e).
437                          */
438                         if (errno == EINVAL || errno == ENOSYS ||
439                             errno == ENOENT || errno == EOPNOTSUPP ||
440                             errno == ENXIO) {
441                                 if (verbose)
442                                         ui__warning("%s event is not supported by the kernel.\n",
443                                                     perf_evsel__name(counter));
444                                 counter->supported = false;
445                                 continue;
446                         }
447
448                         perf_evsel__open_strerror(counter, &target,
449                                                   errno, msg, sizeof(msg));
450                         ui__error("%s\n", msg);
451
452                         if (child_pid != -1)
453                                 kill(child_pid, SIGTERM);
454
455                         return -1;
456                 }
457                 counter->supported = true;
458         }
459
460         if (perf_evlist__apply_filters(evsel_list)) {
461                 error("failed to set filter with %d (%s)\n", errno,
462                         strerror(errno));
463                 return -1;
464         }
465
466         /*
467          * Enable counters and exec the command:
468          */
469         t0 = rdclock();
470         clock_gettime(CLOCK_MONOTONIC, &ref_time);
471
472         if (forks) {
473                 perf_evlist__start_workload(evsel_list);
474
475                 if (interval) {
476                         while (!waitpid(child_pid, &status, WNOHANG)) {
477                                 nanosleep(&ts, NULL);
478                                 print_interval();
479                         }
480                 }
481                 wait(&status);
482                 if (WIFSIGNALED(status))
483                         psignal(WTERMSIG(status), argv[0]);
484         } else {
485                 while (!done) {
486                         nanosleep(&ts, NULL);
487                         if (interval)
488                                 print_interval();
489                 }
490         }
491
492         t1 = rdclock();
493
494         update_stats(&walltime_nsecs_stats, t1 - t0);
495
496         if (no_aggr) {
497                 list_for_each_entry(counter, &evsel_list->entries, node) {
498                         read_counter(counter);
499                         perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1);
500                 }
501         } else {
502                 list_for_each_entry(counter, &evsel_list->entries, node) {
503                         read_counter_aggr(counter);
504                         perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
505                                              thread_map__nr(evsel_list->threads));
506                 }
507         }
508
509         return WEXITSTATUS(status);
510 }
511
512 static int run_perf_stat(int argc __maybe_unused, const char **argv)
513 {
514         int ret;
515
516         if (pre_cmd) {
517                 ret = system(pre_cmd);
518                 if (ret)
519                         return ret;
520         }
521
522         if (sync_run)
523                 sync();
524
525         ret = __run_perf_stat(argc, argv);
526         if (ret)
527                 return ret;
528
529         if (post_cmd) {
530                 ret = system(post_cmd);
531                 if (ret)
532                         return ret;
533         }
534
535         return ret;
536 }
537
538 static void print_noise_pct(double total, double avg)
539 {
540         double pct = rel_stddev_stats(total, avg);
541
542         if (csv_output)
543                 fprintf(output, "%s%.2f%%", csv_sep, pct);
544         else if (pct)
545                 fprintf(output, "  ( +-%6.2f%% )", pct);
546 }
547
548 static void print_noise(struct perf_evsel *evsel, double avg)
549 {
550         struct perf_stat *ps;
551
552         if (run_count == 1)
553                 return;
554
555         ps = evsel->priv;
556         print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
557 }
558
559 static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
560 {
561         double msecs = avg / 1e6;
562         char cpustr[16] = { '\0', };
563         const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s";
564
565         if (aggr_socket)
566                 sprintf(cpustr, "S%*d%s%*d%s",
567                         csv_output ? 0 : -5,
568                         cpu,
569                         csv_sep,
570                         csv_output ? 0 : 4,
571                         nr,
572                         csv_sep);
573         else if (no_aggr)
574                 sprintf(cpustr, "CPU%*d%s",
575                         csv_output ? 0 : -4,
576                         perf_evsel__cpus(evsel)->map[cpu], csv_sep);
577
578         fprintf(output, fmt, cpustr, msecs, csv_sep, perf_evsel__name(evsel));
579
580         if (evsel->cgrp)
581                 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
582
583         if (csv_output || interval)
584                 return;
585
586         if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
587                 fprintf(output, " # %8.3f CPUs utilized          ",
588                         avg / avg_stats(&walltime_nsecs_stats));
589         else
590                 fprintf(output, "                                   ");
591 }
592
593 /* used for get_ratio_color() */
594 enum grc_type {
595         GRC_STALLED_CYCLES_FE,
596         GRC_STALLED_CYCLES_BE,
597         GRC_CACHE_MISSES,
598         GRC_MAX_NR
599 };
600
601 static const char *get_ratio_color(enum grc_type type, double ratio)
602 {
603         static const double grc_table[GRC_MAX_NR][3] = {
604                 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
605                 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
606                 [GRC_CACHE_MISSES]      = { 20.0, 10.0, 5.0 },
607         };
608         const char *color = PERF_COLOR_NORMAL;
609
610         if (ratio > grc_table[type][0])
611                 color = PERF_COLOR_RED;
612         else if (ratio > grc_table[type][1])
613                 color = PERF_COLOR_MAGENTA;
614         else if (ratio > grc_table[type][2])
615                 color = PERF_COLOR_YELLOW;
616
617         return color;
618 }
619
620 static void print_stalled_cycles_frontend(int cpu,
621                                           struct perf_evsel *evsel
622                                           __maybe_unused, double avg)
623 {
624         double total, ratio = 0.0;
625         const char *color;
626
627         total = avg_stats(&runtime_cycles_stats[cpu]);
628
629         if (total)
630                 ratio = avg / total * 100.0;
631
632         color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
633
634         fprintf(output, " #  ");
635         color_fprintf(output, color, "%6.2f%%", ratio);
636         fprintf(output, " frontend cycles idle   ");
637 }
638
639 static void print_stalled_cycles_backend(int cpu,
640                                          struct perf_evsel *evsel
641                                          __maybe_unused, double avg)
642 {
643         double total, ratio = 0.0;
644         const char *color;
645
646         total = avg_stats(&runtime_cycles_stats[cpu]);
647
648         if (total)
649                 ratio = avg / total * 100.0;
650
651         color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
652
653         fprintf(output, " #  ");
654         color_fprintf(output, color, "%6.2f%%", ratio);
655         fprintf(output, " backend  cycles idle   ");
656 }
657
658 static void print_branch_misses(int cpu,
659                                 struct perf_evsel *evsel __maybe_unused,
660                                 double avg)
661 {
662         double total, ratio = 0.0;
663         const char *color;
664
665         total = avg_stats(&runtime_branches_stats[cpu]);
666
667         if (total)
668                 ratio = avg / total * 100.0;
669
670         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
671
672         fprintf(output, " #  ");
673         color_fprintf(output, color, "%6.2f%%", ratio);
674         fprintf(output, " of all branches        ");
675 }
676
677 static void print_l1_dcache_misses(int cpu,
678                                    struct perf_evsel *evsel __maybe_unused,
679                                    double avg)
680 {
681         double total, ratio = 0.0;
682         const char *color;
683
684         total = avg_stats(&runtime_l1_dcache_stats[cpu]);
685
686         if (total)
687                 ratio = avg / total * 100.0;
688
689         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
690
691         fprintf(output, " #  ");
692         color_fprintf(output, color, "%6.2f%%", ratio);
693         fprintf(output, " of all L1-dcache hits  ");
694 }
695
696 static void print_l1_icache_misses(int cpu,
697                                    struct perf_evsel *evsel __maybe_unused,
698                                    double avg)
699 {
700         double total, ratio = 0.0;
701         const char *color;
702
703         total = avg_stats(&runtime_l1_icache_stats[cpu]);
704
705         if (total)
706                 ratio = avg / total * 100.0;
707
708         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
709
710         fprintf(output, " #  ");
711         color_fprintf(output, color, "%6.2f%%", ratio);
712         fprintf(output, " of all L1-icache hits  ");
713 }
714
715 static void print_dtlb_cache_misses(int cpu,
716                                     struct perf_evsel *evsel __maybe_unused,
717                                     double avg)
718 {
719         double total, ratio = 0.0;
720         const char *color;
721
722         total = avg_stats(&runtime_dtlb_cache_stats[cpu]);
723
724         if (total)
725                 ratio = avg / total * 100.0;
726
727         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
728
729         fprintf(output, " #  ");
730         color_fprintf(output, color, "%6.2f%%", ratio);
731         fprintf(output, " of all dTLB cache hits ");
732 }
733
734 static void print_itlb_cache_misses(int cpu,
735                                     struct perf_evsel *evsel __maybe_unused,
736                                     double avg)
737 {
738         double total, ratio = 0.0;
739         const char *color;
740
741         total = avg_stats(&runtime_itlb_cache_stats[cpu]);
742
743         if (total)
744                 ratio = avg / total * 100.0;
745
746         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
747
748         fprintf(output, " #  ");
749         color_fprintf(output, color, "%6.2f%%", ratio);
750         fprintf(output, " of all iTLB cache hits ");
751 }
752
753 static void print_ll_cache_misses(int cpu,
754                                   struct perf_evsel *evsel __maybe_unused,
755                                   double avg)
756 {
757         double total, ratio = 0.0;
758         const char *color;
759
760         total = avg_stats(&runtime_ll_cache_stats[cpu]);
761
762         if (total)
763                 ratio = avg / total * 100.0;
764
765         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
766
767         fprintf(output, " #  ");
768         color_fprintf(output, color, "%6.2f%%", ratio);
769         fprintf(output, " of all LL-cache hits   ");
770 }
771
772 static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
773 {
774         double total, ratio = 0.0;
775         char cpustr[16] = { '\0', };
776         const char *fmt;
777
778         if (csv_output)
779                 fmt = "%s%.0f%s%s";
780         else if (big_num)
781                 fmt = "%s%'18.0f%s%-25s";
782         else
783                 fmt = "%s%18.0f%s%-25s";
784
785         if (aggr_socket)
786                 sprintf(cpustr, "S%*d%s%*d%s",
787                         csv_output ? 0 : -5,
788                         cpu,
789                         csv_sep,
790                         csv_output ? 0 : 4,
791                         nr,
792                         csv_sep);
793         else if (no_aggr)
794                 sprintf(cpustr, "CPU%*d%s",
795                         csv_output ? 0 : -4,
796                         perf_evsel__cpus(evsel)->map[cpu], csv_sep);
797         else
798                 cpu = 0;
799
800         fprintf(output, fmt, cpustr, avg, csv_sep, perf_evsel__name(evsel));
801
802         if (evsel->cgrp)
803                 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
804
805         if (csv_output || interval)
806                 return;
807
808         if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
809                 total = avg_stats(&runtime_cycles_stats[cpu]);
810                 if (total)
811                         ratio = avg / total;
812
813                 fprintf(output, " #   %5.2f  insns per cycle        ", ratio);
814
815                 total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
816                 total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
817
818                 if (total && avg) {
819                         ratio = total / avg;
820                         fprintf(output, "\n                                             #   %5.2f  stalled cycles per insn", ratio);
821                 }
822
823         } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
824                         runtime_branches_stats[cpu].n != 0) {
825                 print_branch_misses(cpu, evsel, avg);
826         } else if (
827                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
828                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
829                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
830                                         ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
831                         runtime_l1_dcache_stats[cpu].n != 0) {
832                 print_l1_dcache_misses(cpu, evsel, avg);
833         } else if (
834                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
835                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
836                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
837                                         ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
838                         runtime_l1_icache_stats[cpu].n != 0) {
839                 print_l1_icache_misses(cpu, evsel, avg);
840         } else if (
841                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
842                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
843                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
844                                         ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
845                         runtime_dtlb_cache_stats[cpu].n != 0) {
846                 print_dtlb_cache_misses(cpu, evsel, avg);
847         } else if (
848                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
849                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
850                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
851                                         ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
852                         runtime_itlb_cache_stats[cpu].n != 0) {
853                 print_itlb_cache_misses(cpu, evsel, avg);
854         } else if (
855                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
856                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
857                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
858                                         ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
859                         runtime_ll_cache_stats[cpu].n != 0) {
860                 print_ll_cache_misses(cpu, evsel, avg);
861         } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
862                         runtime_cacherefs_stats[cpu].n != 0) {
863                 total = avg_stats(&runtime_cacherefs_stats[cpu]);
864
865                 if (total)
866                         ratio = avg * 100 / total;
867
868                 fprintf(output, " # %8.3f %% of all cache refs    ", ratio);
869
870         } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
871                 print_stalled_cycles_frontend(cpu, evsel, avg);
872         } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
873                 print_stalled_cycles_backend(cpu, evsel, avg);
874         } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
875                 total = avg_stats(&runtime_nsecs_stats[cpu]);
876
877                 if (total)
878                         ratio = 1.0 * avg / total;
879
880                 fprintf(output, " # %8.3f GHz                    ", ratio);
881         } else if (runtime_nsecs_stats[cpu].n != 0) {
882                 char unit = 'M';
883
884                 total = avg_stats(&runtime_nsecs_stats[cpu]);
885
886                 if (total)
887                         ratio = 1000.0 * avg / total;
888                 if (ratio < 0.001) {
889                         ratio *= 1000;
890                         unit = 'K';
891                 }
892
893                 fprintf(output, " # %8.3f %c/sec                  ", ratio, unit);
894         } else {
895                 fprintf(output, "                                   ");
896         }
897 }
898
899 static void print_aggr_socket(char *prefix)
900 {
901         struct perf_evsel *counter;
902         u64 ena, run, val;
903         int cpu, s, s2, sock, nr;
904
905         if (!sock_map)
906                 return;
907
908         for (s = 0; s < sock_map->nr; s++) {
909                 sock = cpu_map__socket(sock_map, s);
910                 list_for_each_entry(counter, &evsel_list->entries, node) {
911                         val = ena = run = 0;
912                         nr = 0;
913                         for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
914                                 s2 = cpu_map__get_socket(evsel_list->cpus, cpu);
915                                 if (s2 != sock)
916                                         continue;
917                                 val += counter->counts->cpu[cpu].val;
918                                 ena += counter->counts->cpu[cpu].ena;
919                                 run += counter->counts->cpu[cpu].run;
920                                 nr++;
921                         }
922                         if (prefix)
923                                 fprintf(output, "%s", prefix);
924
925                         if (run == 0 || ena == 0) {
926                                 fprintf(output, "S%*d%s%*d%s%*s%s%*s",
927                                         csv_output ? 0 : -5,
928                                         s,
929                                         csv_sep,
930                                         csv_output ? 0 : 4,
931                                         nr,
932                                         csv_sep,
933                                         csv_output ? 0 : 18,
934                                         counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
935                                         csv_sep,
936                                         csv_output ? 0 : -24,
937                                         perf_evsel__name(counter));
938                                 if (counter->cgrp)
939                                         fprintf(output, "%s%s",
940                                                 csv_sep, counter->cgrp->name);
941
942                                 fputc('\n', output);
943                                 continue;
944                         }
945
946                         if (nsec_counter(counter))
947                                 nsec_printout(sock, nr, counter, val);
948                         else
949                                 abs_printout(sock, nr, counter, val);
950
951                         if (!csv_output) {
952                                 print_noise(counter, 1.0);
953
954                                 if (run != ena)
955                                         fprintf(output, "  (%.2f%%)",
956                                                 100.0 * run / ena);
957                         }
958                         fputc('\n', output);
959                 }
960         }
961 }
962
963 /*
964  * Print out the results of a single counter:
965  * aggregated counts in system-wide mode
966  */
967 static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
968 {
969         struct perf_stat *ps = counter->priv;
970         double avg = avg_stats(&ps->res_stats[0]);
971         int scaled = counter->counts->scaled;
972
973         if (prefix)
974                 fprintf(output, "%s", prefix);
975
976         if (scaled == -1) {
977                 fprintf(output, "%*s%s%*s",
978                         csv_output ? 0 : 18,
979                         counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
980                         csv_sep,
981                         csv_output ? 0 : -24,
982                         perf_evsel__name(counter));
983
984                 if (counter->cgrp)
985                         fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
986
987                 fputc('\n', output);
988                 return;
989         }
990
991         if (nsec_counter(counter))
992                 nsec_printout(-1, 0, counter, avg);
993         else
994                 abs_printout(-1, 0, counter, avg);
995
996         print_noise(counter, avg);
997
998         if (csv_output) {
999                 fputc('\n', output);
1000                 return;
1001         }
1002
1003         if (scaled) {
1004                 double avg_enabled, avg_running;
1005
1006                 avg_enabled = avg_stats(&ps->res_stats[1]);
1007                 avg_running = avg_stats(&ps->res_stats[2]);
1008
1009                 fprintf(output, " [%5.2f%%]", 100 * avg_running / avg_enabled);
1010         }
1011         fprintf(output, "\n");
1012 }
1013
1014 /*
1015  * Print out the results of a single counter:
1016  * does not use aggregated count in system-wide
1017  */
1018 static void print_counter(struct perf_evsel *counter, char *prefix)
1019 {
1020         u64 ena, run, val;
1021         int cpu;
1022
1023         for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
1024                 val = counter->counts->cpu[cpu].val;
1025                 ena = counter->counts->cpu[cpu].ena;
1026                 run = counter->counts->cpu[cpu].run;
1027
1028                 if (prefix)
1029                         fprintf(output, "%s", prefix);
1030
1031                 if (run == 0 || ena == 0) {
1032                         fprintf(output, "CPU%*d%s%*s%s%*s",
1033                                 csv_output ? 0 : -4,
1034                                 perf_evsel__cpus(counter)->map[cpu], csv_sep,
1035                                 csv_output ? 0 : 18,
1036                                 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
1037                                 csv_sep,
1038                                 csv_output ? 0 : -24,
1039                                 perf_evsel__name(counter));
1040
1041                         if (counter->cgrp)
1042                                 fprintf(output, "%s%s",
1043                                         csv_sep, counter->cgrp->name);
1044
1045                         fputc('\n', output);
1046                         continue;
1047                 }
1048
1049                 if (nsec_counter(counter))
1050                         nsec_printout(cpu, 0, counter, val);
1051                 else
1052                         abs_printout(cpu, 0, counter, val);
1053
1054                 if (!csv_output) {
1055                         print_noise(counter, 1.0);
1056
1057                         if (run != ena)
1058                                 fprintf(output, "  (%.2f%%)",
1059                                         100.0 * run / ena);
1060                 }
1061                 fputc('\n', output);
1062         }
1063 }
1064
1065 static void print_stat(int argc, const char **argv)
1066 {
1067         struct perf_evsel *counter;
1068         int i;
1069
1070         fflush(stdout);
1071
1072         if (!csv_output) {
1073                 fprintf(output, "\n");
1074                 fprintf(output, " Performance counter stats for ");
1075                 if (!perf_target__has_task(&target)) {
1076                         fprintf(output, "\'%s", argv[0]);
1077                         for (i = 1; i < argc; i++)
1078                                 fprintf(output, " %s", argv[i]);
1079                 } else if (target.pid)
1080                         fprintf(output, "process id \'%s", target.pid);
1081                 else
1082                         fprintf(output, "thread id \'%s", target.tid);
1083
1084                 fprintf(output, "\'");
1085                 if (run_count > 1)
1086                         fprintf(output, " (%d runs)", run_count);
1087                 fprintf(output, ":\n\n");
1088         }
1089
1090         if (aggr_socket)
1091                 print_aggr_socket(NULL);
1092         else if (no_aggr) {
1093                 list_for_each_entry(counter, &evsel_list->entries, node)
1094                         print_counter(counter, NULL);
1095         } else {
1096                 list_for_each_entry(counter, &evsel_list->entries, node)
1097                         print_counter_aggr(counter, NULL);
1098         }
1099
1100         if (!csv_output) {
1101                 if (!null_run)
1102                         fprintf(output, "\n");
1103                 fprintf(output, " %17.9f seconds time elapsed",
1104                                 avg_stats(&walltime_nsecs_stats)/1e9);
1105                 if (run_count > 1) {
1106                         fprintf(output, "                                        ");
1107                         print_noise_pct(stddev_stats(&walltime_nsecs_stats),
1108                                         avg_stats(&walltime_nsecs_stats));
1109                 }
1110                 fprintf(output, "\n\n");
1111         }
1112 }
1113
1114 static volatile int signr = -1;
1115
1116 static void skip_signal(int signo)
1117 {
1118         if ((child_pid == -1) || interval)
1119                 done = 1;
1120
1121         signr = signo;
1122 }
1123
1124 static void sig_atexit(void)
1125 {
1126         if (child_pid != -1)
1127                 kill(child_pid, SIGTERM);
1128
1129         if (signr == -1)
1130                 return;
1131
1132         signal(signr, SIG_DFL);
1133         kill(getpid(), signr);
1134 }
1135
1136 static int stat__set_big_num(const struct option *opt __maybe_unused,
1137                              const char *s __maybe_unused, int unset)
1138 {
1139         big_num_opt = unset ? 0 : 1;
1140         return 0;
1141 }
1142
1143 /*
1144  * Add default attributes, if there were no attributes specified or
1145  * if -d/--detailed, -d -d or -d -d -d is used:
1146  */
1147 static int add_default_attributes(void)
1148 {
1149         struct perf_event_attr default_attrs[] = {
1150
1151   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK              },
1152   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES        },
1153   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS          },
1154   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS             },
1155
1156   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES              },
1157   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
1158   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND  },
1159   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS            },
1160   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS     },
1161   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES           },
1162
1163 };
1164
1165 /*
1166  * Detailed stats (-d), covering the L1 and last level data caches:
1167  */
1168         struct perf_event_attr detailed_attrs[] = {
1169
1170   { .type = PERF_TYPE_HW_CACHE,
1171     .config =
1172          PERF_COUNT_HW_CACHE_L1D                <<  0  |
1173         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1174         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1175
1176   { .type = PERF_TYPE_HW_CACHE,
1177     .config =
1178          PERF_COUNT_HW_CACHE_L1D                <<  0  |
1179         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1180         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1181
1182   { .type = PERF_TYPE_HW_CACHE,
1183     .config =
1184          PERF_COUNT_HW_CACHE_LL                 <<  0  |
1185         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1186         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1187
1188   { .type = PERF_TYPE_HW_CACHE,
1189     .config =
1190          PERF_COUNT_HW_CACHE_LL                 <<  0  |
1191         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1192         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1193 };
1194
1195 /*
1196  * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
1197  */
1198         struct perf_event_attr very_detailed_attrs[] = {
1199
1200   { .type = PERF_TYPE_HW_CACHE,
1201     .config =
1202          PERF_COUNT_HW_CACHE_L1I                <<  0  |
1203         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1204         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1205
1206   { .type = PERF_TYPE_HW_CACHE,
1207     .config =
1208          PERF_COUNT_HW_CACHE_L1I                <<  0  |
1209         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1210         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1211
1212   { .type = PERF_TYPE_HW_CACHE,
1213     .config =
1214          PERF_COUNT_HW_CACHE_DTLB               <<  0  |
1215         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1216         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1217
1218   { .type = PERF_TYPE_HW_CACHE,
1219     .config =
1220          PERF_COUNT_HW_CACHE_DTLB               <<  0  |
1221         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1222         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1223
1224   { .type = PERF_TYPE_HW_CACHE,
1225     .config =
1226          PERF_COUNT_HW_CACHE_ITLB               <<  0  |
1227         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1228         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1229
1230   { .type = PERF_TYPE_HW_CACHE,
1231     .config =
1232          PERF_COUNT_HW_CACHE_ITLB               <<  0  |
1233         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1234         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1235
1236 };
1237
1238 /*
1239  * Very, very detailed stats (-d -d -d), adding prefetch events:
1240  */
1241         struct perf_event_attr very_very_detailed_attrs[] = {
1242
1243   { .type = PERF_TYPE_HW_CACHE,
1244     .config =
1245          PERF_COUNT_HW_CACHE_L1D                <<  0  |
1246         (PERF_COUNT_HW_CACHE_OP_PREFETCH        <<  8) |
1247         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1248
1249   { .type = PERF_TYPE_HW_CACHE,
1250     .config =
1251          PERF_COUNT_HW_CACHE_L1D                <<  0  |
1252         (PERF_COUNT_HW_CACHE_OP_PREFETCH        <<  8) |
1253         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1254 };
1255
1256         /* Set attrs if no event is selected and !null_run: */
1257         if (null_run)
1258                 return 0;
1259
1260         if (!evsel_list->nr_entries) {
1261                 if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
1262                         return -1;
1263         }
1264
1265         /* Detailed events get appended to the event list: */
1266
1267         if (detailed_run <  1)
1268                 return 0;
1269
1270         /* Append detailed run extra attributes: */
1271         if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
1272                 return -1;
1273
1274         if (detailed_run < 2)
1275                 return 0;
1276
1277         /* Append very detailed run extra attributes: */
1278         if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
1279                 return -1;
1280
1281         if (detailed_run < 3)
1282                 return 0;
1283
1284         /* Append very, very detailed run extra attributes: */
1285         return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
1286 }
1287
1288 int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1289 {
1290         bool append_file = false;
1291         int output_fd = 0;
1292         const char *output_name = NULL;
1293         const struct option options[] = {
1294         OPT_CALLBACK('e', "event", &evsel_list, "event",
1295                      "event selector. use 'perf list' to list available events",
1296                      parse_events_option),
1297         OPT_CALLBACK(0, "filter", &evsel_list, "filter",
1298                      "event filter", parse_filter),
1299         OPT_BOOLEAN('i', "no-inherit", &no_inherit,
1300                     "child tasks do not inherit counters"),
1301         OPT_STRING('p', "pid", &target.pid, "pid",
1302                    "stat events on existing process id"),
1303         OPT_STRING('t', "tid", &target.tid, "tid",
1304                    "stat events on existing thread id"),
1305         OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
1306                     "system-wide collection from all CPUs"),
1307         OPT_BOOLEAN('g', "group", &group,
1308                     "put the counters into a counter group"),
1309         OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"),
1310         OPT_INCR('v', "verbose", &verbose,
1311                     "be more verbose (show counter open errors, etc)"),
1312         OPT_INTEGER('r', "repeat", &run_count,
1313                     "repeat command and print average + stddev (max: 100, forever: 0)"),
1314         OPT_BOOLEAN('n', "null", &null_run,
1315                     "null run - dont start any counters"),
1316         OPT_INCR('d', "detailed", &detailed_run,
1317                     "detailed run - start a lot of events"),
1318         OPT_BOOLEAN('S', "sync", &sync_run,
1319                     "call sync() before starting a run"),
1320         OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 
1321                            "print large numbers with thousands\' separators",
1322                            stat__set_big_num),
1323         OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
1324                     "list of cpus to monitor in system-wide"),
1325         OPT_BOOLEAN('A', "no-aggr", &no_aggr, "disable CPU count aggregation"),
1326         OPT_STRING('x', "field-separator", &csv_sep, "separator",
1327                    "print counts with custom separator"),
1328         OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
1329                      "monitor event in cgroup name only", parse_cgroups),
1330         OPT_STRING('o', "output", &output_name, "file", "output file name"),
1331         OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
1332         OPT_INTEGER(0, "log-fd", &output_fd,
1333                     "log output to fd, instead of stderr"),
1334         OPT_STRING(0, "pre", &pre_cmd, "command",
1335                         "command to run prior to the measured command"),
1336         OPT_STRING(0, "post", &post_cmd, "command",
1337                         "command to run after to the measured command"),
1338         OPT_UINTEGER('I', "interval-print", &interval,
1339                     "print counts at regular interval in ms (>= 100)"),
1340         OPT_BOOLEAN(0, "aggr-socket", &aggr_socket, "aggregate counts per processor socket"),
1341         OPT_END()
1342         };
1343         const char * const stat_usage[] = {
1344                 "perf stat [<options>] [<command>]",
1345                 NULL
1346         };
1347         int status = -ENOMEM, run_idx;
1348         const char *mode;
1349
1350         setlocale(LC_ALL, "");
1351
1352         evsel_list = perf_evlist__new();
1353         if (evsel_list == NULL)
1354                 return -ENOMEM;
1355
1356         argc = parse_options(argc, argv, options, stat_usage,
1357                 PARSE_OPT_STOP_AT_NON_OPTION);
1358
1359         output = stderr;
1360         if (output_name && strcmp(output_name, "-"))
1361                 output = NULL;
1362
1363         if (output_name && output_fd) {
1364                 fprintf(stderr, "cannot use both --output and --log-fd\n");
1365                 usage_with_options(stat_usage, options);
1366         }
1367
1368         if (output_fd < 0) {
1369                 fprintf(stderr, "argument to --log-fd must be a > 0\n");
1370                 usage_with_options(stat_usage, options);
1371         }
1372
1373         if (!output) {
1374                 struct timespec tm;
1375                 mode = append_file ? "a" : "w";
1376
1377                 output = fopen(output_name, mode);
1378                 if (!output) {
1379                         perror("failed to create output file");
1380                         return -1;
1381                 }
1382                 clock_gettime(CLOCK_REALTIME, &tm);
1383                 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
1384         } else if (output_fd > 0) {
1385                 mode = append_file ? "a" : "w";
1386                 output = fdopen(output_fd, mode);
1387                 if (!output) {
1388                         perror("Failed opening logfd");
1389                         return -errno;
1390                 }
1391         }
1392
1393         if (csv_sep) {
1394                 csv_output = true;
1395                 if (!strcmp(csv_sep, "\\t"))
1396                         csv_sep = "\t";
1397         } else
1398                 csv_sep = DEFAULT_SEPARATOR;
1399
1400         /*
1401          * let the spreadsheet do the pretty-printing
1402          */
1403         if (csv_output) {
1404                 /* User explicitly passed -B? */
1405                 if (big_num_opt == 1) {
1406                         fprintf(stderr, "-B option not supported with -x\n");
1407                         usage_with_options(stat_usage, options);
1408                 } else /* Nope, so disable big number formatting */
1409                         big_num = false;
1410         } else if (big_num_opt == 0) /* User passed --no-big-num */
1411                 big_num = false;
1412
1413         if (!argc && !perf_target__has_task(&target))
1414                 usage_with_options(stat_usage, options);
1415         if (run_count < 0) {
1416                 usage_with_options(stat_usage, options);
1417         } else if (run_count == 0) {
1418                 forever = true;
1419                 run_count = 1;
1420         }
1421
1422         /* no_aggr, cgroup are for system-wide only */
1423         if ((no_aggr || nr_cgroups) && !perf_target__has_cpu(&target)) {
1424                 fprintf(stderr, "both cgroup and no-aggregation "
1425                         "modes only available in system-wide mode\n");
1426
1427                 usage_with_options(stat_usage, options);
1428         }
1429
1430         if (aggr_socket) {
1431                 if (!perf_target__has_cpu(&target)) {
1432                         fprintf(stderr, "--aggr-socket only available in system-wide mode (-a)\n");
1433                         usage_with_options(stat_usage, options);
1434                 }
1435                 no_aggr = true;
1436         }
1437
1438         if (add_default_attributes())
1439                 goto out;
1440
1441         perf_target__validate(&target);
1442
1443         if (perf_evlist__create_maps(evsel_list, &target) < 0) {
1444                 if (perf_target__has_task(&target))
1445                         pr_err("Problems finding threads of monitor\n");
1446                 if (perf_target__has_cpu(&target))
1447                         perror("failed to parse CPUs map");
1448
1449                 usage_with_options(stat_usage, options);
1450                 return -1;
1451         }
1452         if (interval && interval < 100) {
1453                 pr_err("print interval must be >= 100ms\n");
1454                 usage_with_options(stat_usage, options);
1455                 return -1;
1456         }
1457
1458         if (perf_evlist__alloc_stats(evsel_list, interval))
1459                 goto out_free_maps;
1460
1461         /*
1462          * We dont want to block the signals - that would cause
1463          * child tasks to inherit that and Ctrl-C would not work.
1464          * What we want is for Ctrl-C to work in the exec()-ed
1465          * task, but being ignored by perf stat itself:
1466          */
1467         atexit(sig_atexit);
1468         if (!forever)
1469                 signal(SIGINT,  skip_signal);
1470         signal(SIGCHLD, skip_signal);
1471         signal(SIGALRM, skip_signal);
1472         signal(SIGABRT, skip_signal);
1473
1474         status = 0;
1475         for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
1476                 if (run_count != 1 && verbose)
1477                         fprintf(output, "[ perf stat: executing run #%d ... ]\n",
1478                                 run_idx + 1);
1479
1480                 status = run_perf_stat(argc, argv);
1481                 if (forever && status != -1) {
1482                         print_stat(argc, argv);
1483                         perf_stat__reset_stats(evsel_list);
1484                 }
1485         }
1486
1487         if (!forever && status != -1 && !interval)
1488                 print_stat(argc, argv);
1489
1490         perf_evlist__free_stats(evsel_list);
1491 out_free_maps:
1492         perf_evlist__delete_maps(evsel_list);
1493 out:
1494         perf_evlist__delete(evsel_list);
1495         return status;
1496 }