tools/perf/util/evsel.c

   1 /*
   2  * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
   3  *
   4  * Parts came from builtin-{top,stat,record}.c, see those files for further
   5  * copyright notes.
   6  *
   7  * Released under the GPL v2. (and only v2, not any later version)
   8  */
   9
  10 #include <byteswap.h>
  11 #include "asm/bug.h"
  12 #include "evsel.h"
  13 #include "evlist.h"
  14 #include "util.h"
  15 #include "cpumap.h"
  16 #include "thread_map.h"
  17
  18 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
  19
  20 int __perf_evsel__sample_size(u64 sample_type)
  21 {
  22         u64 mask = sample_type & PERF_SAMPLE_MASK;
  23         int size = 0;
  24         int i;
  25
  26         for (i = 0; i < 64; i++) {
  27                 if (mask & (1ULL << i))
  28                         size++;
  29         }
  30
  31         size *= sizeof(u64);
  32
  33         return size;
  34 }
  35
  36 void perf_evsel__init(struct perf_evsel *evsel,
  37                       struct perf_event_attr *attr, int idx)
  38 {
  39         evsel->idx         = idx;
  40         evsel->attr        = *attr;
  41         INIT_LIST_HEAD(&evsel->node);
  42         hists__init(&evsel->hists);
  43 }
  44
  45 struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx)
  46 {
  47         struct perf_evsel *evsel = zalloc(sizeof(*evsel));
  48
  49         if (evsel != NULL)
  50                 perf_evsel__init(evsel, attr, idx);
  51
  52         return evsel;
  53 }
  54
  55 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
  56 {
  57         int cpu, thread;
  58         evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
  59
  60         if (evsel->fd) {
  61                 for (cpu = 0; cpu < ncpus; cpu++) {
  62                         for (thread = 0; thread < nthreads; thread++) {
  63                                 FD(evsel, cpu, thread) = -1;
  64                         }
  65                 }
  66         }
  67
  68         return evsel->fd != NULL ? 0 : -ENOMEM;
  69 }
  70
  71 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
  72 {
  73         evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
  74         if (evsel->sample_id == NULL)
  75                 return -ENOMEM;
  76
  77         evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
  78         if (evsel->id == NULL) {
  79                 xyarray__delete(evsel->sample_id);
  80                 evsel->sample_id = NULL;
  81                 return -ENOMEM;
  82         }
  83
  84         return 0;
  85 }
  86
  87 int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
  88 {
  89         evsel->counts = zalloc((sizeof(*evsel->counts) +
  90                                 (ncpus * sizeof(struct perf_counts_values))));
  91         return evsel->counts != NULL ? 0 : -ENOMEM;
  92 }
  93
  94 void perf_evsel__free_fd(struct perf_evsel *evsel)
  95 {
  96         xyarray__delete(evsel->fd);
  97         evsel->fd = NULL;
  98 }
  99
 100 void perf_evsel__free_id(struct perf_evsel *evsel)
 101 {
 102         xyarray__delete(evsel->sample_id);
 103         evsel->sample_id = NULL;
 104         free(evsel->id);
 105         evsel->id = NULL;
 106 }
 107
 108 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
 109 {
 110         int cpu, thread;
 111
 112         for (cpu = 0; cpu < ncpus; cpu++)
 113                 for (thread = 0; thread < nthreads; ++thread) {
 114                         close(FD(evsel, cpu, thread));
 115                         FD(evsel, cpu, thread) = -1;
 116                 }
 117 }
 118
 119 void perf_evsel__exit(struct perf_evsel *evsel)
 120 {
 121         assert(list_empty(&evsel->node));
 122         xyarray__delete(evsel->fd);
 123         xyarray__delete(evsel->sample_id);
 124         free(evsel->id);
 125 }
 126
 127 void perf_evsel__delete(struct perf_evsel *evsel)
 128 {
 129         perf_evsel__exit(evsel);
 130         close_cgroup(evsel->cgrp);
 131         free(evsel->name);
 132         free(evsel);
 133 }
 134
 135 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
 136                               int cpu, int thread, bool scale)
 137 {
 138         struct perf_counts_values count;
 139         size_t nv = scale ? 3 : 1;
 140
 141         if (FD(evsel, cpu, thread) < 0)
 142                 return -EINVAL;
 143
 144         if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0)
 145                 return -ENOMEM;
 146
 147         if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
 148                 return -errno;
 149
 150         if (scale) {
 151                 if (count.run == 0)
 152                         count.val = 0;
 153                 else if (count.run < count.ena)
 154                         count.val = (u64)((double)count.val * count.ena / count.run + 0.5);
 155         } else
 156                 count.ena = count.run = 0;
 157
 158         evsel->counts->cpu[cpu] = count;
 159         return 0;
 160 }
 161
 162 int __perf_evsel__read(struct perf_evsel *evsel,
 163                        int ncpus, int nthreads, bool scale)
 164 {
 165         size_t nv = scale ? 3 : 1;
 166         int cpu, thread;
 167         struct perf_counts_values *aggr = &evsel->counts->aggr, count;
 168
 169         aggr->val = aggr->ena = aggr->run = 0;
 170
 171         for (cpu = 0; cpu < ncpus; cpu++) {
 172                 for (thread = 0; thread < nthreads; thread++) {
 173                         if (FD(evsel, cpu, thread) < 0)
 174                                 continue;
 175
 176                         if (readn(FD(evsel, cpu, thread),
 177                                   &count, nv * sizeof(u64)) < 0)
 178                                 return -errno;
 179
 180                         aggr->val += count.val;
 181                         if (scale) {
 182                                 aggr->ena += count.ena;
 183                                 aggr->run += count.run;
 184                         }
 185                 }
 186         }
 187
 188         evsel->counts->scaled = 0;
 189         if (scale) {
 190                 if (aggr->run == 0) {
 191                         evsel->counts->scaled = -1;
 192                         aggr->val = 0;
 193                         return 0;
 194                 }
 195
 196                 if (aggr->run < aggr->ena) {
 197                         evsel->counts->scaled = 1;
 198                         aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5);
 199                 }
 200         } else
 201                 aggr->ena = aggr->run = 0;
 202
 203         return 0;
 204 }
 205
 206 static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
 207                               struct thread_map *threads, bool group)
 208 {
 209         int cpu, thread;
 210         unsigned long flags = 0;
 211         int pid = -1;
 212
 213         if (evsel->fd == NULL &&
 214             perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
 215                 return -1;
 216
 217         if (evsel->cgrp) {
 218                 flags = PERF_FLAG_PID_CGROUP;
 219                 pid = evsel->cgrp->fd;
 220         }
 221
 222         for (cpu = 0; cpu < cpus->nr; cpu++) {
 223                 int group_fd = -1;
 224
 225                 for (thread = 0; thread < threads->nr; thread++) {
 226
 227                         if (!evsel->cgrp)
 228                                 pid = threads->map[thread];
 229
 230                         FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
 231                                                                      pid,
 232                                                                      cpus->map[cpu],
 233                                                                      group_fd, flags);
 234                         if (FD(evsel, cpu, thread) < 0)
 235                                 goto out_close;
 236
 237                         if (group && group_fd == -1)
 238                                 group_fd = FD(evsel, cpu, thread);
 239                 }
 240         }
 241
 242         return 0;
 243
 244 out_close:
 245         do {
 246                 while (--thread >= 0) {
 247                         close(FD(evsel, cpu, thread));
 248                         FD(evsel, cpu, thread) = -1;
 249                 }
 250                 thread = threads->nr;
 251         } while (--cpu >= 0);
 252         return -1;
 253 }
 254
 255 static struct {
 256         struct cpu_map map;
 257         int cpus[1];
 258 } empty_cpu_map = {
 259         .map.nr = 1,
 260         .cpus   = { -1, },
 261 };
 262
 263 static struct {
 264         struct thread_map map;
 265         int threads[1];
 266 } empty_thread_map = {
 267         .map.nr  = 1,
 268         .threads = { -1, },
 269 };
 270
 271 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
 272                      struct thread_map *threads, bool group)
 273 {
 274         if (cpus == NULL) {
 275                 /* Work around old compiler warnings about strict aliasing */
 276                 cpus = &empty_cpu_map.map;
 277         }
 278
 279         if (threads == NULL)
 280                 threads = &empty_thread_map.map;
 281
 282         return __perf_evsel__open(evsel, cpus, threads, group);
 283 }
 284
 285 int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
 286                              struct cpu_map *cpus, bool group)
 287 {
 288         return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group);
 289 }
 290
 291 int perf_evsel__open_per_thread(struct perf_evsel *evsel,
 292                                 struct thread_map *threads, bool group)
 293 {
 294         return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group);
 295 }
 296
 297 static int perf_event__parse_id_sample(const union perf_event *event, u64 type,
 298                                        struct perf_sample *sample)
 299 {
 300         const u64 *array = event->sample.array;
 301
 302         array += ((event->header.size -
 303                    sizeof(event->header)) / sizeof(u64)) - 1;
 304
 305         if (type & PERF_SAMPLE_CPU) {
 306                 u32 *p = (u32 *)array;
 307                 sample->cpu = *p;
 308                 array--;
 309         }
 310
 311         if (type & PERF_SAMPLE_STREAM_ID) {
 312                 sample->stream_id = *array;
 313                 array--;
 314         }
 315
 316         if (type & PERF_SAMPLE_ID) {
 317                 sample->id = *array;
 318                 array--;
 319         }
 320
 321         if (type & PERF_SAMPLE_TIME) {
 322                 sample->time = *array;
 323                 array--;
 324         }
 325
 326         if (type & PERF_SAMPLE_TID) {
 327                 u32 *p = (u32 *)array;
 328                 sample->pid = p[0];
 329                 sample->tid = p[1];
 330         }
 331
 332         return 0;
 333 }
 334
 335 static bool sample_overlap(const union perf_event *event,
 336                            const void *offset, u64 size)
 337 {
 338         const void *base = event;
 339
 340         if (offset + size > base + event->header.size)
 341                 return true;
 342
 343         return false;
 344 }
 345
 346 int perf_event__parse_sample(const union perf_event *event, u64 type,
 347                              int sample_size, bool sample_id_all,
 348                              struct perf_sample *data, bool swapped)
 349 {
 350         const u64 *array;
 351
 352         /*
 353          * used for cross-endian analysis. See git commit 65014ab3
 354          * for why this goofiness is needed.
 355          */
 356         union {
 357                 u64 val64;
 358                 u32 val32[2];
 359         } u;
 360
 361
 362         data->cpu = data->pid = data->tid = -1;
 363         data->stream_id = data->id = data->time = -1ULL;
 364
 365         if (event->header.type != PERF_RECORD_SAMPLE) {
 366                 if (!sample_id_all)
 367                         return 0;
 368                 return perf_event__parse_id_sample(event, type, data);
 369         }
 370
 371         array = event->sample.array;
 372
 373         if (sample_size + sizeof(event->header) > event->header.size)
 374                 return -EFAULT;
 375
 376         if (type & PERF_SAMPLE_IP) {
 377                 data->ip = event->ip.ip;
 378                 array++;
 379         }
 380
 381         if (type & PERF_SAMPLE_TID) {
 382                 u.val64 = *array;
 383                 if (swapped) {
 384                         /* undo swap of u64, then swap on individual u32s */
 385                         u.val64 = bswap_64(u.val64);
 386                         u.val32[0] = bswap_32(u.val32[0]);
 387                         u.val32[1] = bswap_32(u.val32[1]);
 388                 }
 389
 390                 data->pid = u.val32[0];
 391                 data->tid = u.val32[1];
 392                 array++;
 393         }
 394
 395         if (type & PERF_SAMPLE_TIME) {
 396                 data->time = *array;
 397                 array++;
 398         }
 399
 400         data->addr = 0;
 401         if (type & PERF_SAMPLE_ADDR) {
 402                 data->addr = *array;
 403                 array++;
 404         }
 405
 406         data->id = -1ULL;
 407         if (type & PERF_SAMPLE_ID) {
 408                 data->id = *array;
 409                 array++;
 410         }
 411
 412         if (type & PERF_SAMPLE_STREAM_ID) {
 413                 data->stream_id = *array;
 414                 array++;
 415         }
 416
 417         if (type & PERF_SAMPLE_CPU) {
 418
 419                 u.val64 = *array;
 420                 if (swapped) {
 421                         /* undo swap of u64, then swap on individual u32s */
 422                         u.val64 = bswap_64(u.val64);
 423                         u.val32[0] = bswap_32(u.val32[0]);
 424                 }
 425
 426                 data->cpu = u.val32[0];
 427                 array++;
 428         }
 429
 430         if (type & PERF_SAMPLE_PERIOD) {
 431                 data->period = *array;
 432                 array++;
 433         }
 434
 435         if (type & PERF_SAMPLE_READ) {
 436                 fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n");
 437                 return -1;
 438         }
 439
 440         if (type & PERF_SAMPLE_CALLCHAIN) {
 441                 if (sample_overlap(event, array, sizeof(data->callchain->nr)))
 442                         return -EFAULT;
 443
 444                 data->callchain = (struct ip_callchain *)array;
 445
 446                 if (sample_overlap(event, array, data->callchain->nr))
 447                         return -EFAULT;
 448
 449                 array += 1 + data->callchain->nr;
 450         }
 451
 452         if (type & PERF_SAMPLE_RAW) {
 453                 const u64 *pdata;
 454
 455                 u.val64 = *array;
 456                 if (WARN_ONCE(swapped,
 457                               "Endianness of raw data not corrected!\n")) {
 458                         /* undo swap of u64, then swap on individual u32s */
 459                         u.val64 = bswap_64(u.val64);
 460                         u.val32[0] = bswap_32(u.val32[0]);
 461                         u.val32[1] = bswap_32(u.val32[1]);
 462                 }
 463
 464                 if (sample_overlap(event, array, sizeof(u32)))
 465                         return -EFAULT;
 466
 467                 data->raw_size = u.val32[0];
 468                 pdata = (void *) array + sizeof(u32);
 469
 470                 if (sample_overlap(event, pdata, data->raw_size))
 471                         return -EFAULT;
 472
 473                 data->raw_data = (void *) pdata;
 474         }
 475
 476         return 0;
 477 }