--buildid-all::
Record build-id of all DSOs regardless whether it's actually hit or not.
---aio::
-Enable asynchronous (Posix AIO) trace writing mode.
+--aio[=n]::
+Use <n> control blocks in asynchronous (Posix AIO) trace writing mode (default: 1, max: 4).
Asynchronous mode is supported only when linking Perf tool with libc library
providing implementation for Posix AIO API.
return rc;
}
-static void record__aio_sync(struct perf_mmap *md)
+static int record__aio_sync(struct perf_mmap *md, bool sync_all)
{
- struct aiocb *cblock = &md->aio.cblock;
+ struct aiocb **aiocb = md->aio.aiocb;
+ struct aiocb *cblocks = md->aio.cblocks;
struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */
+ int i, do_suspend;
do {
- if (cblock->aio_fildes == -1 || record__aio_complete(md, cblock))
- return;
+ do_suspend = 0;
+ for (i = 0; i < md->aio.nr_cblocks; ++i) {
+ if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
+ if (sync_all)
+ aiocb[i] = NULL;
+ else
+ return i;
+ } else {
+ /*
+ * Started aio write is not complete yet
+ * so it has to be waited before the
+ * next allocation.
+ */
+ aiocb[i] = &cblocks[i];
+ do_suspend = 1;
+ }
+ }
+ if (!do_suspend)
+ return -1;
- while (aio_suspend((const struct aiocb**)&cblock, 1, &timeout)) {
+ while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
if (!(errno == EAGAIN || errno == EINTR))
pr_err("failed to sync perf data, error: %m\n");
}
struct perf_mmap *map = &maps[i];
if (map->base)
- record__aio_sync(map);
+ record__aio_sync(map, true);
}
}
static int nr_cblocks_default = 1;
+static int nr_cblocks_max = 4;
static int record__aio_parse(const struct option *opt,
- const char *str __maybe_unused,
+ const char *str,
int unset)
{
struct record_opts *opts = (struct record_opts *)opt->value;
- if (unset)
+ if (unset) {
opts->nr_cblocks = 0;
- else
- opts->nr_cblocks = nr_cblocks_default;
+ } else {
+ if (str)
+ opts->nr_cblocks = strtol(str, NULL, 0);
+ if (!opts->nr_cblocks)
+ opts->nr_cblocks = nr_cblocks_default;
+ }
return 0;
}
#else /* HAVE_AIO_SUPPORT */
-static void record__aio_sync(struct perf_mmap *md __maybe_unused)
+static int nr_cblocks_max = 0;
+
+static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused)
{
+ return -1;
}
static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused,
goto out;
}
} else {
+ int idx;
/*
* Call record__aio_sync() to wait till map->data buffer
* becomes available after previous aio write request.
*/
- record__aio_sync(map);
- if (perf_mmap__aio_push(map, rec, record__aio_pushfn, &off) != 0) {
+ idx = record__aio_sync(map, false);
+ if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
record__aio_set_pos(trace_fd, off);
rc = -1;
goto out;
var = "call-graph.record-mode";
return perf_default_config(var, value, cb);
}
+#ifdef HAVE_AIO_SUPPORT
+ if (!strcmp(var, "record.aio")) {
+ rec->opts.nr_cblocks = strtol(value, NULL, 0);
+ if (!rec->opts.nr_cblocks)
+ rec->opts.nr_cblocks = nr_cblocks_default;
+ }
+#endif
return 0;
}
OPT_BOOLEAN(0, "dry-run", &dry_run,
"Parse options then exit"),
#ifdef HAVE_AIO_SUPPORT
- OPT_CALLBACK_NOOPT(0, "aio", &record.opts,
- NULL, "Enable asynchronous trace writing mode",
+ OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
+ &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
record__aio_parse),
#endif
OPT_END()
goto out;
}
+ if (rec->opts.nr_cblocks > nr_cblocks_max)
+ rec->opts.nr_cblocks = nr_cblocks_max;
if (verbose > 0)
pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
#ifdef HAVE_AIO_SUPPORT
static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp)
{
- int delta_max;
+ int delta_max, i, prio;
map->aio.nr_cblocks = mp->nr_cblocks;
if (map->aio.nr_cblocks) {
- map->aio.data = malloc(perf_mmap__mmap_len(map));
+ map->aio.aiocb = calloc(map->aio.nr_cblocks, sizeof(struct aiocb *));
+ if (!map->aio.aiocb) {
+ pr_debug2("failed to allocate aiocb for data buffer, error %m\n");
+ return -1;
+ }
+ map->aio.cblocks = calloc(map->aio.nr_cblocks, sizeof(struct aiocb));
+ if (!map->aio.cblocks) {
+ pr_debug2("failed to allocate cblocks for data buffer, error %m\n");
+ return -1;
+ }
+ map->aio.data = calloc(map->aio.nr_cblocks, sizeof(void *));
if (!map->aio.data) {
pr_debug2("failed to allocate data buffer, error %m\n");
return -1;
}
- /*
- * Use cblock.aio_fildes value different from -1
- * to denote started aio write operation on the
- * cblock so it requires explicit record__aio_sync()
- * call prior the cblock may be reused again.
- */
- map->aio.cblock.aio_fildes = -1;
- /*
- * Allocate cblock with max priority delta to
- * have faster aio write system calls.
- */
delta_max = sysconf(_SC_AIO_PRIO_DELTA_MAX);
- map->aio.cblock.aio_reqprio = delta_max;
+ for (i = 0; i < map->aio.nr_cblocks; ++i) {
+ map->aio.data[i] = malloc(perf_mmap__mmap_len(map));
+ if (!map->aio.data[i]) {
+ pr_debug2("failed to allocate data buffer area, error %m");
+ return -1;
+ }
+ /*
+ * Use cblock.aio_fildes value different from -1
+ * to denote started aio write operation on the
+ * cblock so it requires explicit record__aio_sync()
+ * call prior the cblock may be reused again.
+ */
+ map->aio.cblocks[i].aio_fildes = -1;
+ /*
+ * Allocate cblocks with priority delta to have
+ * faster aio write system calls because queued requests
+ * are kept in separate per-prio queues and adding
+ * a new request will iterate thru shorter per-prio
+ * list. Blocks with numbers higher than
+ * _SC_AIO_PRIO_DELTA_MAX go with priority 0.
+ */
+ prio = delta_max - i;
+ map->aio.cblocks[i].aio_reqprio = prio >= 0 ? prio : 0;
+ }
}
return 0;
zfree(&map->aio.data);
}
-int perf_mmap__aio_push(struct perf_mmap *md, void *to,
+int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx,
int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off),
off_t *off)
{
return (rc == -EAGAIN) ? 0 : -1;
/*
- * md->base data is copied into md->data buffer to
+ * md->base data is copied into md->data[idx] buffer to
* release space in the kernel buffer as fast as possible,
* thru perf_mmap__consume() below.
*
buf = &data[md->start & md->mask];
size = md->mask + 1 - (md->start & md->mask);
md->start += size;
- memcpy(md->aio.data, buf, size);
+ memcpy(md->aio.data[idx], buf, size);
size0 = size;
}
buf = &data[md->start & md->mask];
size = md->end - md->start;
md->start += size;
- memcpy(md->aio.data + size0, buf, size);
+ memcpy(md->aio.data[idx] + size0, buf, size);
/*
- * Increment md->refcount to guard md->data buffer
+ * Increment md->refcount to guard md->data[idx] buffer
* from premature deallocation because md object can be
* released earlier than aio write request started
- * on mmap->data is complete.
+ * on mmap->data[idx] is complete.
*
* perf_mmap__put() is done at record__aio_complete()
* after started request completion.
md->prev = head;
perf_mmap__consume(md);
- rc = push(to, &md->aio.cblock, md->aio.data, size0 + size, *off);
+ rc = push(to, &md->aio.cblocks[idx], md->aio.data[idx], size0 + size, *off);
if (!rc) {
*off += size0 + size;
} else {
char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
#ifdef HAVE_AIO_SUPPORT
struct {
- void *data;
- struct aiocb cblock;
+ void **data;
+ struct aiocb *cblocks;
+ struct aiocb **aiocb;
int nr_cblocks;
} aio;
#endif
int perf_mmap__push(struct perf_mmap *md, void *to,
int push(struct perf_mmap *map, void *to, void *buf, size_t size));
#ifdef HAVE_AIO_SUPPORT
-int perf_mmap__aio_push(struct perf_mmap *md, void *to,
+int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx,
int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off),
off_t *off);
#else
-static inline int perf_mmap__aio_push(struct perf_mmap *md __maybe_unused, void *to __maybe_unused,
+static inline int perf_mmap__aio_push(struct perf_mmap *md __maybe_unused, void *to __maybe_unused, int idx __maybe_unused,
int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off) __maybe_unused,
off_t *off __maybe_unused)
{