tools/perf/util/evsel.c

   1 /*
   2  * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
   3  *
   4  * Parts came from builtin-{top,stat,record}.c, see those files for further
   5  * copyright notes.
   6  *
   7  * Released under the GPL v2. (and only v2, not any later version)
   8  */
   9
  10 #include <byteswap.h>
  11 #include "asm/bug.h"
  12 #include "evsel.h"
  13 #include "evlist.h"
  14 #include "util.h"
  15 #include "cpumap.h"
  16 #include "thread_map.h"
  17
  18 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
  19 #define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0))
  20
  21 int __perf_evsel__sample_size(u64 sample_type)
  22 {
  23         u64 mask = sample_type & PERF_SAMPLE_MASK;
  24         int size = 0;
  25         int i;
  26
  27         for (i = 0; i < 64; i++) {
  28                 if (mask & (1ULL << i))
  29                         size++;
  30         }
  31
  32         size *= sizeof(u64);
  33
  34         return size;
  35 }
  36
  37 static void hists__init(struct hists *hists)
  38 {
  39         memset(hists, 0, sizeof(*hists));
  40         hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT;
  41         hists->entries_in = &hists->entries_in_array[0];
  42         hists->entries_collapsed = RB_ROOT;
  43         hists->entries = RB_ROOT;
  44         pthread_mutex_init(&hists->lock, NULL);
  45 }
  46
  47 void perf_evsel__init(struct perf_evsel *evsel,
  48                       struct perf_event_attr *attr, int idx)
  49 {
  50         evsel->idx         = idx;
  51         evsel->attr        = *attr;
  52         INIT_LIST_HEAD(&evsel->node);
  53         hists__init(&evsel->hists);
  54 }
  55
  56 struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx)
  57 {
  58         struct perf_evsel *evsel = zalloc(sizeof(*evsel));
  59
  60         if (evsel != NULL)
  61                 perf_evsel__init(evsel, attr, idx);
  62
  63         return evsel;
  64 }
  65
  66 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
  67 {
  68         int cpu, thread;
  69         evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
  70
  71         if (evsel->fd) {
  72                 for (cpu = 0; cpu < ncpus; cpu++) {
  73                         for (thread = 0; thread < nthreads; thread++) {
  74                                 FD(evsel, cpu, thread) = -1;
  75                         }
  76                 }
  77         }
  78
  79         return evsel->fd != NULL ? 0 : -ENOMEM;
  80 }
  81
  82 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
  83 {
  84         evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
  85         if (evsel->sample_id == NULL)
  86                 return -ENOMEM;
  87
  88         evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
  89         if (evsel->id == NULL) {
  90                 xyarray__delete(evsel->sample_id);
  91                 evsel->sample_id = NULL;
  92                 return -ENOMEM;
  93         }
  94
  95         return 0;
  96 }
  97
  98 int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
  99 {
 100         evsel->counts = zalloc((sizeof(*evsel->counts) +
 101                                 (ncpus * sizeof(struct perf_counts_values))));
 102         return evsel->counts != NULL ? 0 : -ENOMEM;
 103 }
 104
 105 void perf_evsel__free_fd(struct perf_evsel *evsel)
 106 {
 107         xyarray__delete(evsel->fd);
 108         evsel->fd = NULL;
 109 }
 110
 111 void perf_evsel__free_id(struct perf_evsel *evsel)
 112 {
 113         xyarray__delete(evsel->sample_id);
 114         evsel->sample_id = NULL;
 115         free(evsel->id);
 116         evsel->id = NULL;
 117 }
 118
 119 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
 120 {
 121         int cpu, thread;
 122
 123         for (cpu = 0; cpu < ncpus; cpu++)
 124                 for (thread = 0; thread < nthreads; ++thread) {
 125                         close(FD(evsel, cpu, thread));
 126                         FD(evsel, cpu, thread) = -1;
 127                 }
 128 }
 129
 130 void perf_evsel__exit(struct perf_evsel *evsel)
 131 {
 132         assert(list_empty(&evsel->node));
 133         xyarray__delete(evsel->fd);
 134         xyarray__delete(evsel->sample_id);
 135         free(evsel->id);
 136 }
 137
 138 void perf_evsel__delete(struct perf_evsel *evsel)
 139 {
 140         perf_evsel__exit(evsel);
 141         close_cgroup(evsel->cgrp);
 142         free(evsel->name);
 143         free(evsel);
 144 }
 145
 146 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
 147                               int cpu, int thread, bool scale)
 148 {
 149         struct perf_counts_values count;
 150         size_t nv = scale ? 3 : 1;
 151
 152         if (FD(evsel, cpu, thread) < 0)
 153                 return -EINVAL;
 154
 155         if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0)
 156                 return -ENOMEM;
 157
 158         if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
 159                 return -errno;
 160
 161         if (scale) {
 162                 if (count.run == 0)
 163                         count.val = 0;
 164                 else if (count.run < count.ena)
 165                         count.val = (u64)((double)count.val * count.ena / count.run + 0.5);
 166         } else
 167                 count.ena = count.run = 0;
 168
 169         evsel->counts->cpu[cpu] = count;
 170         return 0;
 171 }
 172
 173 int __perf_evsel__read(struct perf_evsel *evsel,
 174                        int ncpus, int nthreads, bool scale)
 175 {
 176         size_t nv = scale ? 3 : 1;
 177         int cpu, thread;
 178         struct perf_counts_values *aggr = &evsel->counts->aggr, count;
 179
 180         aggr->val = aggr->ena = aggr->run = 0;
 181
 182         for (cpu = 0; cpu < ncpus; cpu++) {
 183                 for (thread = 0; thread < nthreads; thread++) {
 184                         if (FD(evsel, cpu, thread) < 0)
 185                                 continue;
 186
 187                         if (readn(FD(evsel, cpu, thread),
 188                                   &count, nv * sizeof(u64)) < 0)
 189                                 return -errno;
 190
 191                         aggr->val += count.val;
 192                         if (scale) {
 193                                 aggr->ena += count.ena;
 194                                 aggr->run += count.run;
 195                         }
 196                 }
 197         }
 198
 199         evsel->counts->scaled = 0;
 200         if (scale) {
 201                 if (aggr->run == 0) {
 202                         evsel->counts->scaled = -1;
 203                         aggr->val = 0;
 204                         return 0;
 205                 }
 206
 207                 if (aggr->run < aggr->ena) {
 208                         evsel->counts->scaled = 1;
 209                         aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5);
 210                 }
 211         } else
 212                 aggr->ena = aggr->run = 0;
 213
 214         return 0;
 215 }
 216
 217 static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
 218                               struct thread_map *threads, bool group,
 219                               struct xyarray *group_fds)
 220 {
 221         int cpu, thread;
 222         unsigned long flags = 0;
 223         int pid = -1, err;
 224
 225         if (evsel->fd == NULL &&
 226             perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
 227                 return -ENOMEM;
 228
 229         if (evsel->cgrp) {
 230                 flags = PERF_FLAG_PID_CGROUP;
 231                 pid = evsel->cgrp->fd;
 232         }
 233
 234         for (cpu = 0; cpu < cpus->nr; cpu++) {
 235                 int group_fd = group_fds ? GROUP_FD(group_fds, cpu) : -1;
 236
 237                 for (thread = 0; thread < threads->nr; thread++) {
 238
 239                         if (!evsel->cgrp)
 240                                 pid = threads->map[thread];
 241
 242                         FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
 243                                                                      pid,
 244                                                                      cpus->map[cpu],
 245                                                                      group_fd, flags);
 246                         if (FD(evsel, cpu, thread) < 0) {
 247                                 err = -errno;
 248                                 goto out_close;
 249                         }
 250
 251                         if (group && group_fd == -1)
 252                                 group_fd = FD(evsel, cpu, thread);
 253                 }
 254         }
 255
 256         return 0;
 257
 258 out_close:
 259         do {
 260                 while (--thread >= 0) {
 261                         close(FD(evsel, cpu, thread));
 262                         FD(evsel, cpu, thread) = -1;
 263                 }
 264                 thread = threads->nr;
 265         } while (--cpu >= 0);
 266         return err;
 267 }
 268
 269 void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads)
 270 {
 271         if (evsel->fd == NULL)
 272                 return;
 273
 274         perf_evsel__close_fd(evsel, ncpus, nthreads);
 275         perf_evsel__free_fd(evsel);
 276         evsel->fd = NULL;
 277 }
 278
 279 static struct {
 280         struct cpu_map map;
 281         int cpus[1];
 282 } empty_cpu_map = {
 283         .map.nr = 1,
 284         .cpus   = { -1, },
 285 };
 286
 287 static struct {
 288         struct thread_map map;
 289         int threads[1];
 290 } empty_thread_map = {
 291         .map.nr  = 1,
 292         .threads = { -1, },
 293 };
 294
 295 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
 296                      struct thread_map *threads, bool group,
 297                      struct xyarray *group_fd)
 298 {
 299         if (cpus == NULL) {
 300                 /* Work around old compiler warnings about strict aliasing */
 301                 cpus = &empty_cpu_map.map;
 302         }
 303
 304         if (threads == NULL)
 305                 threads = &empty_thread_map.map;
 306
 307         return __perf_evsel__open(evsel, cpus, threads, group, group_fd);
 308 }
 309
 310 int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
 311                              struct cpu_map *cpus, bool group,
 312                              struct xyarray *group_fd)
 313 {
 314         return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group,
 315                                   group_fd);
 316 }
 317
 318 int perf_evsel__open_per_thread(struct perf_evsel *evsel,
 319                                 struct thread_map *threads, bool group,
 320                                 struct xyarray *group_fd)
 321 {
 322         return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group,
 323                                   group_fd);
 324 }
 325
 326 static int perf_event__parse_id_sample(const union perf_event *event, u64 type,
 327                                        struct perf_sample *sample)
 328 {
 329         const u64 *array = event->sample.array;
 330
 331         array += ((event->header.size -
 332                    sizeof(event->header)) / sizeof(u64)) - 1;
 333
 334         if (type & PERF_SAMPLE_CPU) {
 335                 u32 *p = (u32 *)array;
 336                 sample->cpu = *p;
 337                 array--;
 338         }
 339
 340         if (type & PERF_SAMPLE_STREAM_ID) {
 341                 sample->stream_id = *array;
 342                 array--;
 343         }
 344
 345         if (type & PERF_SAMPLE_ID) {
 346                 sample->id = *array;
 347                 array--;
 348         }
 349
 350         if (type & PERF_SAMPLE_TIME) {
 351                 sample->time = *array;
 352                 array--;
 353         }
 354
 355         if (type & PERF_SAMPLE_TID) {
 356                 u32 *p = (u32 *)array;
 357                 sample->pid = p[0];
 358                 sample->tid = p[1];
 359         }
 360
 361         return 0;
 362 }
 363
 364 static bool sample_overlap(const union perf_event *event,
 365                            const void *offset, u64 size)
 366 {
 367         const void *base = event;
 368
 369         if (offset + size > base + event->header.size)
 370                 return true;
 371
 372         return false;
 373 }
 374
 375 int perf_event__parse_sample(const union perf_event *event, u64 type,
 376                              int sample_size, bool sample_id_all,
 377                              struct perf_sample *data, bool swapped)
 378 {
 379         const u64 *array;
 380
 381         /*
 382          * used for cross-endian analysis. See git commit 65014ab3
 383          * for why this goofiness is needed.
 384          */
 385         union {
 386                 u64 val64;
 387                 u32 val32[2];
 388         } u;
 389
 390
 391         data->cpu = data->pid = data->tid = -1;
 392         data->stream_id = data->id = data->time = -1ULL;
 393
 394         if (event->header.type != PERF_RECORD_SAMPLE) {
 395                 if (!sample_id_all)
 396                         return 0;
 397                 return perf_event__parse_id_sample(event, type, data);
 398         }
 399
 400         array = event->sample.array;
 401
 402         if (sample_size + sizeof(event->header) > event->header.size)
 403                 return -EFAULT;
 404
 405         if (type & PERF_SAMPLE_IP) {
 406                 data->ip = event->ip.ip;
 407                 array++;
 408         }
 409
 410         if (type & PERF_SAMPLE_TID) {
 411                 u.val64 = *array;
 412                 if (swapped) {
 413                         /* undo swap of u64, then swap on individual u32s */
 414                         u.val64 = bswap_64(u.val64);
 415                         u.val32[0] = bswap_32(u.val32[0]);
 416                         u.val32[1] = bswap_32(u.val32[1]);
 417                 }
 418
 419                 data->pid = u.val32[0];
 420                 data->tid = u.val32[1];
 421                 array++;
 422         }
 423
 424         if (type & PERF_SAMPLE_TIME) {
 425                 data->time = *array;
 426                 array++;
 427         }
 428
 429         data->addr = 0;
 430         if (type & PERF_SAMPLE_ADDR) {
 431                 data->addr = *array;
 432                 array++;
 433         }
 434
 435         data->id = -1ULL;
 436         if (type & PERF_SAMPLE_ID) {
 437                 data->id = *array;
 438                 array++;
 439         }
 440
 441         if (type & PERF_SAMPLE_STREAM_ID) {
 442                 data->stream_id = *array;
 443                 array++;
 444         }
 445
 446         if (type & PERF_SAMPLE_CPU) {
 447
 448                 u.val64 = *array;
 449                 if (swapped) {
 450                         /* undo swap of u64, then swap on individual u32s */
 451                         u.val64 = bswap_64(u.val64);
 452                         u.val32[0] = bswap_32(u.val32[0]);
 453                 }
 454
 455                 data->cpu = u.val32[0];
 456                 array++;
 457         }
 458
 459         if (type & PERF_SAMPLE_PERIOD) {
 460                 data->period = *array;
 461                 array++;
 462         }
 463
 464         if (type & PERF_SAMPLE_READ) {
 465                 fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n");
 466                 return -1;
 467         }
 468
 469         if (type & PERF_SAMPLE_CALLCHAIN) {
 470                 if (sample_overlap(event, array, sizeof(data->callchain->nr)))
 471                         return -EFAULT;
 472
 473                 data->callchain = (struct ip_callchain *)array;
 474
 475                 if (sample_overlap(event, array, data->callchain->nr))
 476                         return -EFAULT;
 477
 478                 array += 1 + data->callchain->nr;
 479         }
 480
 481         if (type & PERF_SAMPLE_RAW) {
 482                 const u64 *pdata;
 483
 484                 u.val64 = *array;
 485                 if (WARN_ONCE(swapped,
 486                               "Endianness of raw data not corrected!\n")) {
 487                         /* undo swap of u64, then swap on individual u32s */
 488                         u.val64 = bswap_64(u.val64);
 489                         u.val32[0] = bswap_32(u.val32[0]);
 490                         u.val32[1] = bswap_32(u.val32[1]);
 491                 }
 492
 493                 if (sample_overlap(event, array, sizeof(u32)))
 494                         return -EFAULT;
 495
 496                 data->raw_size = u.val32[0];
 497                 pdata = (void *) array + sizeof(u32);
 498
 499                 if (sample_overlap(event, pdata, data->raw_size))
 500                         return -EFAULT;
 501
 502                 data->raw_data = (void *) pdata;
 503         }
 504
 505         return 0;
 506 }