perf record: Extend trace writing to multi AIO

author Alexey Budankov <alexey.budankov@linux.intel.com>

Tue, 6 Nov 2018 09:07:19 +0000 (12:07 +0300)

committer Arnaldo Carvalho de Melo <acme@redhat.com>

Mon, 17 Dec 2018 17:55:11 +0000 (14:55 -0300)
author Alexey Budankov <alexey.budankov@linux.intel.com>
Tue, 6 Nov 2018 09:07:19 +0000 (12:07 +0300)
committer Arnaldo Carvalho de Melo <acme@redhat.com>
Mon, 17 Dec 2018 17:55:11 +0000 (14:55 -0300)
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt

index 7efb4af88a689e2b6599da0c75c250bc9f7b29ec..d232b13ea7137c493304853426cba67a514383f3 100644 (file)
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -435,8 +435,8 @@ Specify vmlinux path which has debuginfo.
  --buildid-all::
  Record build-id of all DSOs regardless whether it's actually hit or not.
  
---aio::
-Enable asynchronous (Posix AIO) trace writing mode.
+--aio[=n]::
+Use <n> control blocks in asynchronous (Posix AIO) trace writing mode (default: 1, max: 4).
  Asynchronous mode is supported only when linking Perf tool with libc library
  providing implementation for Posix AIO API.
  
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c

index 408d6477c960b21694993b5eef26e283d0765398..4736dc96c4cae40c8b9cd43829cc5a12a8b5d593 100644 (file)
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -196,16 +196,35 @@ static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
         return rc;
  }
  
-static void record__aio_sync(struct perf_mmap *md)
+static int record__aio_sync(struct perf_mmap *md, bool sync_all)
  {
-       struct aiocb *cblock = &md->aio.cblock;
+       struct aiocb **aiocb = md->aio.aiocb;
+       struct aiocb *cblocks = md->aio.cblocks;
         struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
+       int i, do_suspend;
  
         do {
-               if (cblock->aio_fildes == -1 || record__aio_complete(md, cblock))
-                       return;
+               do_suspend = 0;
+               for (i = 0; i < md->aio.nr_cblocks; ++i) {
+                       if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
+                               if (sync_all)
+                                       aiocb[i] = NULL;
+                               else
+                                       return i;
+                       } else {
+                               /*
+                                * Started aio write is not complete yet
+                                * so it has to be waited before the
+                                * next allocation.
+                                */
+                               aiocb[i] = &cblocks[i];
+                               do_suspend = 1;
+                       }
+               }
+               if (!do_suspend)
+                       return -1;
  
-               while (aio_suspend((const struct aiocb**)&cblock, 1, &timeout)) {
+               while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
                         if (!(errno == EAGAIN || errno == EINTR))
                                 pr_err("failed to sync perf data, error: %m\n");
                 }
@@ -252,28 +271,36 @@ static void record__aio_mmap_read_sync(struct record *rec)
                 struct perf_mmap *map = &maps[i];
  
                 if (map->base)
-                       record__aio_sync(map);
+                       record__aio_sync(map, true);
         }
  }
  
  static int nr_cblocks_default = 1;
+static int nr_cblocks_max = 4;
  
  static int record__aio_parse(const struct option *opt,
-                            const char *str __maybe_unused,
+                            const char *str,
                              int unset)
  {
         struct record_opts *opts = (struct record_opts *)opt->value;
  
-       if (unset)
+       if (unset) {
                 opts->nr_cblocks = 0;
-       else
-               opts->nr_cblocks = nr_cblocks_default;
+       } else {
+               if (str)
+                       opts->nr_cblocks = strtol(str, NULL, 0);
+               if (!opts->nr_cblocks)
+                       opts->nr_cblocks = nr_cblocks_default;
+       }
  
         return 0;
  }
  #else /* HAVE_AIO_SUPPORT */
-static void record__aio_sync(struct perf_mmap *md __maybe_unused)
+static int nr_cblocks_max = 0;
+
+static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused)
  {
+       return -1;
  }
  
  static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused,
@@ -728,12 +755,13 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
                                         goto out;
                                 }
                         } else {
+                               int idx;
                                 /*
                                  * Call record__aio_sync() to wait till map->data buffer
                                  * becomes available after previous aio write request.
                                  */
-                               record__aio_sync(map);
-                               if (perf_mmap__aio_push(map, rec, record__aio_pushfn, &off) != 0) {
+                               idx = record__aio_sync(map, false);
+                               if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
                                         record__aio_set_pos(trace_fd, off);
                                         rc = -1;
                                         goto out;
@@ -1503,6 +1531,13 @@ static int perf_record_config(const char *var, const char *value, void *cb)
                 var = "call-graph.record-mode";
                 return perf_default_config(var, value, cb);
         }
+#ifdef HAVE_AIO_SUPPORT
+       if (!strcmp(var, "record.aio")) {
+               rec->opts.nr_cblocks = strtol(value, NULL, 0);
+               if (!rec->opts.nr_cblocks)
+                       rec->opts.nr_cblocks = nr_cblocks_default;
+       }
+#endif
  
         return 0;
  }
@@ -1909,8 +1944,8 @@ static struct option __record_options[] = {
         OPT_BOOLEAN(0, "dry-run", &dry_run,
                     "Parse options then exit"),
  #ifdef HAVE_AIO_SUPPORT
-       OPT_CALLBACK_NOOPT(0, "aio", &record.opts,
-                    NULL, "Enable asynchronous trace writing mode",
+       OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
+                    &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
                      record__aio_parse),
  #endif
         OPT_END()
@@ -2105,6 +2140,8 @@ int cmd_record(int argc, const char **argv)
                 goto out;
         }
  
+       if (rec->opts.nr_cblocks > nr_cblocks_max)
+               rec->opts.nr_cblocks = nr_cblocks_max;
         if (verbose > 0)
                 pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
  
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c

index 61aa381d05d0a8a26204628268637716a03eb1e4..ab30555d2afcc5b54d1a59c9fc2b904f5635fbbf 100644 (file)
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -156,28 +156,50 @@ void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __mayb
  #ifdef HAVE_AIO_SUPPORT
  static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp)
  {
-       int delta_max;
+       int delta_max, i, prio;
  
         map->aio.nr_cblocks = mp->nr_cblocks;
         if (map->aio.nr_cblocks) {
-               map->aio.data = malloc(perf_mmap__mmap_len(map));
+               map->aio.aiocb = calloc(map->aio.nr_cblocks, sizeof(struct aiocb *));
+               if (!map->aio.aiocb) {
+                       pr_debug2("failed to allocate aiocb for data buffer, error %m\n");
+                       return -1;
+               }
+               map->aio.cblocks = calloc(map->aio.nr_cblocks, sizeof(struct aiocb));
+               if (!map->aio.cblocks) {
+                       pr_debug2("failed to allocate cblocks for data buffer, error %m\n");
+                       return -1;
+               }
+               map->aio.data = calloc(map->aio.nr_cblocks, sizeof(void *));
                 if (!map->aio.data) {
                         pr_debug2("failed to allocate data buffer, error %m\n");
                         return -1;
                 }
-               /*
-                * Use cblock.aio_fildes value different from -1
-                * to denote started aio write operation on the
-                * cblock so it requires explicit record__aio_sync()
-                * call prior the cblock may be reused again.
-                */
-               map->aio.cblock.aio_fildes = -1;
-               /*
-                * Allocate cblock with max priority delta to
-                * have faster aio write system calls.
-                */
                 delta_max = sysconf(_SC_AIO_PRIO_DELTA_MAX);
-               map->aio.cblock.aio_reqprio = delta_max;
+               for (i = 0; i < map->aio.nr_cblocks; ++i) {
+                       map->aio.data[i] = malloc(perf_mmap__mmap_len(map));
+                       if (!map->aio.data[i]) {
+                               pr_debug2("failed to allocate data buffer area, error %m");
+                               return -1;
+                       }
+                       /*
+                        * Use cblock.aio_fildes value different from -1
+                        * to denote started aio write operation on the
+                        * cblock so it requires explicit record__aio_sync()
+                        * call prior the cblock may be reused again.
+                        */
+                       map->aio.cblocks[i].aio_fildes = -1;
+                       /*
+                        * Allocate cblocks with priority delta to have
+                        * faster aio write system calls because queued requests
+                        * are kept in separate per-prio queues and adding
+                        * a new request will iterate thru shorter per-prio
+                        * list. Blocks with numbers higher than
+                        *  _SC_AIO_PRIO_DELTA_MAX go with priority 0.
+                        */
+                       prio = delta_max - i;
+                       map->aio.cblocks[i].aio_reqprio = prio >= 0 ? prio : 0;
+               }
         }
  
         return 0;
@@ -189,7 +211,7 @@ static void perf_mmap__aio_munmap(struct perf_mmap *map)
                 zfree(&map->aio.data);
  }
  
-int perf_mmap__aio_push(struct perf_mmap *md, void *to,
+int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx,
                         int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off),
                         off_t *off)
  {
@@ -204,7 +226,7 @@ int perf_mmap__aio_push(struct perf_mmap *md, void *to,
                 return (rc == -EAGAIN) ? 0 : -1;
  
         /*
-        * md->base data is copied into md->data buffer to
+        * md->base data is copied into md->data[idx] buffer to
          * release space in the kernel buffer as fast as possible,
          * thru perf_mmap__consume() below.
          *
@@ -226,20 +248,20 @@ int perf_mmap__aio_push(struct perf_mmap *md, void *to,
                 buf = &data[md->start & md->mask];
                 size = md->mask + 1 - (md->start & md->mask);
                 md->start += size;
-               memcpy(md->aio.data, buf, size);
+               memcpy(md->aio.data[idx], buf, size);
                 size0 = size;
         }
  
         buf = &data[md->start & md->mask];
         size = md->end - md->start;
         md->start += size;
-       memcpy(md->aio.data + size0, buf, size);
+       memcpy(md->aio.data[idx] + size0, buf, size);
  
         /*
-        * Increment md->refcount to guard md->data buffer
+        * Increment md->refcount to guard md->data[idx] buffer
          * from premature deallocation because md object can be
          * released earlier than aio write request started
-        * on mmap->data is complete.
+        * on mmap->data[idx] is complete.
          *
          * perf_mmap__put() is done at record__aio_complete()
          * after started request completion.
@@ -249,7 +271,7 @@ int perf_mmap__aio_push(struct perf_mmap *md, void *to,
         md->prev = head;
         perf_mmap__consume(md);
  
-       rc = push(to, &md->aio.cblock, md->aio.data, size0 + size, *off);
+       rc = push(to, &md->aio.cblocks[idx], md->aio.data[idx], size0 + size, *off);
         if (!rc) {
                 *off += size0 + size;
         } else {
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h

index b99213ba11b5f072c54ae3177114fc65960fab09..aeb6942fdb00ca8daccb0f5a6acc34fea72fb2b1 100644 (file)
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -32,8 +32,9 @@ struct perf_mmap {
         char             event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
  #ifdef HAVE_AIO_SUPPORT
         struct {
-               void             *data;
-               struct aiocb     cblock;
+               void             **data;
+               struct aiocb     *cblocks;
+               struct aiocb     **aiocb;
                 int              nr_cblocks;
         } aio;
  #endif
@@ -97,11 +98,11 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map);
  int perf_mmap__push(struct perf_mmap *md, void *to,
                     int push(struct perf_mmap *map, void *to, void *buf, size_t size));
  #ifdef HAVE_AIO_SUPPORT
-int perf_mmap__aio_push(struct perf_mmap *md, void *to,
+int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx,
                         int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off),
                         off_t *off);
  #else
-static inline int perf_mmap__aio_push(struct perf_mmap *md __maybe_unused, void *to __maybe_unused,
+static inline int perf_mmap__aio_push(struct perf_mmap *md __maybe_unused, void *to __maybe_unused, int idx __maybe_unused,
         int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off) __maybe_unused,
         off_t *off __maybe_unused)
  {
author	Alexey Budankov <alexey.budankov@linux.intel.com>
	Tue, 6 Nov 2018 09:07:19 +0000 (12:07 +0300)
committer	Arnaldo Carvalho de Melo <acme@redhat.com>
	Mon, 17 Dec 2018 17:55:11 +0000 (14:55 -0300)
tools/perf/Documentation/perf-record.txt		patch \| blob \| history
tools/perf/builtin-record.c		patch \| blob \| history
tools/perf/util/mmap.c		patch \| blob \| history
tools/perf/util/mmap.h		patch \| blob \| history