perf record: Introduce thread specific data array
authorAlexey Bayduraev <alexey.v.bayduraev@linux.intel.com>
Mon, 17 Jan 2022 18:34:23 +0000 (21:34 +0300)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Thu, 10 Feb 2022 19:25:30 +0000 (16:25 -0300)
Introduce thread specific data object and array of such objects
to store and manage thread local data. Implement functions to
allocate, initialize, finalize and release thread specific data.

Thread local maps and overwrite_maps arrays keep pointers to
mmap buffer objects to serve according to maps thread mask.
Thread local pollfd array keeps event fds connected to mmaps
buffers according to maps thread mask.

Thread control commands are delivered via thread local comm pipes
and ctlfd_pos fd. External control commands (--control option)
are delivered via evlist ctlfd_pos fd and handled by the main
tool thread.

Reviewed-by: Riccardo Mancini <rickyman7@gmail.com>
Signed-off-by: Alexey Bayduraev <alexey.v.bayduraev@linux.intel.com>
Tested-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Riccardo Mancini <rickyman7@gmail.com>
Acked-by: Namhyung Kim <namhyung@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Antonov <alexander.antonov@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Budankov <abudankov@huawei.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/fc9f74af6f822d9c0fa0e145c3564a760dbe3d4b.1642440724.git.alexey.v.bayduraev@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/builtin-record.c

index 41998f2..0d4a34c 100644 (file)
@@ -58,6 +58,9 @@
 #include <poll.h>
 #include <pthread.h>
 #include <unistd.h>
+#ifndef HAVE_GETTID
+#include <syscall.h>
+#endif
 #include <sched.h>
 #include <signal.h>
 #ifdef HAVE_EVENTFD_SUPPORT
@@ -92,6 +95,21 @@ struct thread_mask {
        struct mmap_cpu_mask    affinity;
 };
 
+struct record_thread {
+       pid_t                   tid;
+       struct thread_mask      *mask;
+       struct {
+               int             msg[2];
+               int             ack[2];
+       } pipes;
+       struct fdarray          pollfd;
+       int                     ctlfd_pos;
+       int                     nr_mmaps;
+       struct mmap             **maps;
+       struct mmap             **overwrite_maps;
+       struct record           *rec;
+};
+
 struct record {
        struct perf_tool        tool;
        struct record_opts      opts;
@@ -119,6 +137,7 @@ struct record {
        struct perf_debuginfod  debuginfod;
        int                     nr_threads;
        struct thread_mask      *thread_masks;
+       struct record_thread    *thread_data;
 };
 
 static volatile int done;
@@ -131,6 +150,13 @@ static const char *affinity_tags[PERF_AFFINITY_MAX] = {
        "SYS", "NODE", "CPU"
 };
 
+#ifndef HAVE_GETTID
+static inline pid_t gettid(void)
+{
+       return (pid_t)syscall(__NR_gettid);
+}
+#endif
+
 static bool switch_output_signal(struct record *rec)
 {
        return rec->switch_output.signal &&
@@ -848,9 +874,218 @@ static int record__kcore_copy(struct machine *machine, struct perf_data *data)
        return kcore_copy(from_dir, kcore_dir);
 }
 
+static void record__thread_data_init_pipes(struct record_thread *thread_data)
+{
+       thread_data->pipes.msg[0] = -1;
+       thread_data->pipes.msg[1] = -1;
+       thread_data->pipes.ack[0] = -1;
+       thread_data->pipes.ack[1] = -1;
+}
+
+static int record__thread_data_open_pipes(struct record_thread *thread_data)
+{
+       if (pipe(thread_data->pipes.msg))
+               return -EINVAL;
+
+       if (pipe(thread_data->pipes.ack)) {
+               close(thread_data->pipes.msg[0]);
+               thread_data->pipes.msg[0] = -1;
+               close(thread_data->pipes.msg[1]);
+               thread_data->pipes.msg[1] = -1;
+               return -EINVAL;
+       }
+
+       pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
+                thread_data->pipes.msg[0], thread_data->pipes.msg[1],
+                thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
+
+       return 0;
+}
+
+static void record__thread_data_close_pipes(struct record_thread *thread_data)
+{
+       if (thread_data->pipes.msg[0] != -1) {
+               close(thread_data->pipes.msg[0]);
+               thread_data->pipes.msg[0] = -1;
+       }
+       if (thread_data->pipes.msg[1] != -1) {
+               close(thread_data->pipes.msg[1]);
+               thread_data->pipes.msg[1] = -1;
+       }
+       if (thread_data->pipes.ack[0] != -1) {
+               close(thread_data->pipes.ack[0]);
+               thread_data->pipes.ack[0] = -1;
+       }
+       if (thread_data->pipes.ack[1] != -1) {
+               close(thread_data->pipes.ack[1]);
+               thread_data->pipes.ack[1] = -1;
+       }
+}
+
+static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
+{
+       int m, tm, nr_mmaps = evlist->core.nr_mmaps;
+       struct mmap *mmap = evlist->mmap;
+       struct mmap *overwrite_mmap = evlist->overwrite_mmap;
+       struct perf_cpu_map *cpus = evlist->core.cpus;
+
+       thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
+                                             thread_data->mask->maps.nbits);
+       if (mmap) {
+               thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
+               if (!thread_data->maps)
+                       return -ENOMEM;
+       }
+       if (overwrite_mmap) {
+               thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
+               if (!thread_data->overwrite_maps) {
+                       zfree(&thread_data->maps);
+                       return -ENOMEM;
+               }
+       }
+       pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
+                thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
+
+       for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
+               if (test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) {
+                       if (thread_data->maps) {
+                               thread_data->maps[tm] = &mmap[m];
+                               pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
+                                         thread_data, cpus->map[m].cpu, tm, m);
+                       }
+                       if (thread_data->overwrite_maps) {
+                               thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
+                               pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
+                                         thread_data, cpus->map[m].cpu, tm, m);
+                       }
+                       tm++;
+               }
+       }
+
+       return 0;
+}
+
+static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist)
+{
+       int f, tm, pos;
+       struct mmap *map, *overwrite_map;
+
+       fdarray__init(&thread_data->pollfd, 64);
+
+       for (tm = 0; tm < thread_data->nr_mmaps; tm++) {
+               map = thread_data->maps ? thread_data->maps[tm] : NULL;
+               overwrite_map = thread_data->overwrite_maps ?
+                               thread_data->overwrite_maps[tm] : NULL;
+
+               for (f = 0; f < evlist->core.pollfd.nr; f++) {
+                       void *ptr = evlist->core.pollfd.priv[f].ptr;
+
+                       if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) {
+                               pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
+                                                             &evlist->core.pollfd);
+                               if (pos < 0)
+                                       return pos;
+                               pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
+                                        thread_data, pos, evlist->core.pollfd.entries[f].fd);
+                       }
+               }
+       }
+
+       return 0;
+}
+
+static void record__free_thread_data(struct record *rec)
+{
+       int t;
+       struct record_thread *thread_data = rec->thread_data;
+
+       if (thread_data == NULL)
+               return;
+
+       for (t = 0; t < rec->nr_threads; t++) {
+               record__thread_data_close_pipes(&thread_data[t]);
+               zfree(&thread_data[t].maps);
+               zfree(&thread_data[t].overwrite_maps);
+               fdarray__exit(&thread_data[t].pollfd);
+       }
+
+       zfree(&rec->thread_data);
+}
+
+static int record__alloc_thread_data(struct record *rec, struct evlist *evlist)
+{
+       int t, ret;
+       struct record_thread *thread_data;
+
+       rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
+       if (!rec->thread_data) {
+               pr_err("Failed to allocate thread data\n");
+               return -ENOMEM;
+       }
+       thread_data = rec->thread_data;
+
+       for (t = 0; t < rec->nr_threads; t++)
+               record__thread_data_init_pipes(&thread_data[t]);
+
+       for (t = 0; t < rec->nr_threads; t++) {
+               thread_data[t].rec = rec;
+               thread_data[t].mask = &rec->thread_masks[t];
+               ret = record__thread_data_init_maps(&thread_data[t], evlist);
+               if (ret) {
+                       pr_err("Failed to initialize thread[%d] maps\n", t);
+                       goto out_free;
+               }
+               ret = record__thread_data_init_pollfd(&thread_data[t], evlist);
+               if (ret) {
+                       pr_err("Failed to initialize thread[%d] pollfd\n", t);
+                       goto out_free;
+               }
+               if (t) {
+                       thread_data[t].tid = -1;
+                       ret = record__thread_data_open_pipes(&thread_data[t]);
+                       if (ret) {
+                               pr_err("Failed to open thread[%d] communication pipes\n", t);
+                               goto out_free;
+                       }
+                       ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
+                                          POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable);
+                       if (ret < 0) {
+                               pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
+                               goto out_free;
+                       }
+                       thread_data[t].ctlfd_pos = ret;
+                       pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
+                                thread_data, thread_data[t].ctlfd_pos,
+                                thread_data[t].pipes.msg[0]);
+               } else {
+                       thread_data[t].tid = gettid();
+                       if (evlist->ctl_fd.pos == -1)
+                               continue;
+                       ret = fdarray__dup_entry_from(&thread_data[t].pollfd, evlist->ctl_fd.pos,
+                                                     &evlist->core.pollfd);
+                       if (ret < 0) {
+                               pr_err("Failed to duplicate descriptor in main thread pollfd\n");
+                               goto out_free;
+                       }
+                       thread_data[t].ctlfd_pos = ret;
+                       pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
+                                thread_data, thread_data[t].ctlfd_pos,
+                                evlist->core.pollfd.entries[evlist->ctl_fd.pos].fd);
+               }
+       }
+
+       return 0;
+
+out_free:
+       record__free_thread_data(rec);
+
+       return ret;
+}
+
 static int record__mmap_evlist(struct record *rec,
                               struct evlist *evlist)
 {
+       int ret;
        struct record_opts *opts = &rec->opts;
        bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
                                  opts->auxtrace_sample_mode;
@@ -881,6 +1116,14 @@ static int record__mmap_evlist(struct record *rec,
                                return -EINVAL;
                }
        }
+
+       if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack))
+               return -1;
+
+       ret = record__alloc_thread_data(rec, evlist);
+       if (ret)
+               return ret;
+
        return 0;
 }
 
@@ -1862,9 +2105,6 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
                evlist__start_workload(rec->evlist);
        }
 
-       if (evlist__initialize_ctlfd(rec->evlist, opts->ctl_fd, opts->ctl_fd_ack))
-               goto out_child;
-
        if (opts->initial_delay) {
                pr_info(EVLIST_DISABLED_MSG);
                if (opts->initial_delay > 0) {
@@ -2022,6 +2262,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 out_child:
        evlist__finalize_ctlfd(rec->evlist);
        record__mmap_read_all(rec, true);
+       record__free_thread_data(rec);
        record__aio_mmap_read_sync(rec);
 
        if (rec->session->bytes_transferred && rec->session->bytes_compressed) {