perf arm-spe: Support synthetic events
authorTan Xiaojun <tanxiaojun@huawei.com>
Sat, 30 May 2020 12:24:42 +0000 (20:24 +0800)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Mon, 1 Jun 2020 15:24:23 +0000 (12:24 -0300)
After the commit ffd3d18c20b8 ("perf tools: Add ARM Statistical
Profiling Extensions (SPE) support") has been merged, it supports to
output raw data with option "--dump-raw-trace".  However, it misses for
support synthetic events so cannot output any statistical info.

This patch is to improve the "perf report" support for ARM SPE for four
types synthetic events:

  First level cache synthetic events, including L1 data cache accessing
  and missing events;
  Last level cache synthetic events, including last level cache
  accessing and missing events;
  TLB synthetic events, including TLB accessing and missing events;
  Remote access events, which is used to account load/store operations
  caused to another socket.

Example usage:

  $ perf record -c 1024 -e arm_spe_0/branch_filter=1,ts_enable=1,pct_enable=1,pa_enable=1,load_filter=1,jitter=1,store_filter=1,min_latency=0/ dd if=/dev/zero of=/dev/null count=10000
  $ perf report --stdio

  # Samples: 59  of event 'l1d-miss'
  # Event count (approx.): 59
  #
  # Children      Self  Command  Shared Object      Symbol
  # ........  ........  .......  .................  ..................................
  #
      23.73%    23.73%  dd       [kernel.kallsyms]  [k] perf_iterate_ctx.constprop.135
      20.34%    20.34%  dd       [kernel.kallsyms]  [k] filemap_map_pages
       5.08%     5.08%  dd       [kernel.kallsyms]  [k] perf_event_mmap
       5.08%     5.08%  dd       [kernel.kallsyms]  [k] unlock_page_memcg
       5.08%     5.08%  dd       [kernel.kallsyms]  [k] unmap_page_range
       3.39%     3.39%  dd       [kernel.kallsyms]  [k] PageHuge
       3.39%     3.39%  dd       [kernel.kallsyms]  [k] release_pages
       3.39%     3.39%  dd       ld-2.28.so         [.] 0x0000000000008b5c
       1.69%     1.69%  dd       [kernel.kallsyms]  [k] __alloc_fd
       [...]

  # Samples: 3K of event 'l1d-access'
  # Event count (approx.): 3980
  #
  # Children      Self  Command  Shared Object      Symbol
  # ........  ........  .......  .................  ......................................
  #
      26.98%    26.98%  dd       [kernel.kallsyms]  [k] ret_to_user
      10.53%    10.53%  dd       [kernel.kallsyms]  [k] fsnotify
       7.51%     7.51%  dd       [kernel.kallsyms]  [k] new_sync_read
       4.57%     4.57%  dd       [kernel.kallsyms]  [k] vfs_read
       4.35%     4.35%  dd       [kernel.kallsyms]  [k] vfs_write
       3.69%     3.69%  dd       [kernel.kallsyms]  [k] __fget_light
       3.69%     3.69%  dd       [kernel.kallsyms]  [k] rw_verify_area
       3.44%     3.44%  dd       [kernel.kallsyms]  [k] security_file_permission
       2.76%     2.76%  dd       [kernel.kallsyms]  [k] __fsnotify_parent
       2.44%     2.44%  dd       [kernel.kallsyms]  [k] ksys_write
       2.24%     2.24%  dd       [kernel.kallsyms]  [k] iov_iter_zero
       2.19%     2.19%  dd       [kernel.kallsyms]  [k] read_iter_zero
       1.81%     1.81%  dd       dd                 [.] 0x0000000000002960
       1.78%     1.78%  dd       dd                 [.] 0x0000000000002980
       [...]

  # Samples: 35  of event 'llc-miss'
  # Event count (approx.): 35
  #
  # Children      Self  Command  Shared Object      Symbol
  # ........  ........  .......  .................  ...........................
  #
      34.29%    34.29%  dd       [kernel.kallsyms]  [k] filemap_map_pages
       8.57%     8.57%  dd       [kernel.kallsyms]  [k] unlock_page_memcg
       8.57%     8.57%  dd       [kernel.kallsyms]  [k] unmap_page_range
       5.71%     5.71%  dd       [kernel.kallsyms]  [k] PageHuge
       5.71%     5.71%  dd       [kernel.kallsyms]  [k] release_pages
       5.71%     5.71%  dd       ld-2.28.so         [.] 0x0000000000008b5c
       2.86%     2.86%  dd       [kernel.kallsyms]  [k] __queue_work
       2.86%     2.86%  dd       [kernel.kallsyms]  [k] __radix_tree_lookup
       2.86%     2.86%  dd       [kernel.kallsyms]  [k] copy_page
       [...]

  # Samples: 2  of event 'llc-access'
  # Event count (approx.): 2
  #
  # Children      Self  Command  Shared Object      Symbol
  # ........  ........  .......  .................  .............
  #
      50.00%    50.00%  dd       [kernel.kallsyms]  [k] copy_page
      50.00%    50.00%  dd       libc-2.28.so       [.] _dl_addr

  # Samples: 48  of event 'tlb-miss'
  # Event count (approx.): 48
  #
  # Children      Self  Command  Shared Object      Symbol
  # ........  ........  .......  .................  ..................................
  #
      20.83%    20.83%  dd       [kernel.kallsyms]  [k] perf_iterate_ctx.constprop.135
      12.50%    12.50%  dd       [kernel.kallsyms]  [k] __arch_clear_user
      10.42%    10.42%  dd       [kernel.kallsyms]  [k] clear_page
       4.17%     4.17%  dd       [kernel.kallsyms]  [k] copy_page
       4.17%     4.17%  dd       [kernel.kallsyms]  [k] filemap_map_pages
       2.08%     2.08%  dd       [kernel.kallsyms]  [k] __alloc_fd
       2.08%     2.08%  dd       [kernel.kallsyms]  [k] __mod_memcg_state.part.70
       2.08%     2.08%  dd       [kernel.kallsyms]  [k] __queue_work
       2.08%     2.08%  dd       [kernel.kallsyms]  [k] __rcu_read_unlock
       2.08%     2.08%  dd       [kernel.kallsyms]  [k] d_path
       2.08%     2.08%  dd       [kernel.kallsyms]  [k] destroy_inode
       2.08%     2.08%  dd       [kernel.kallsyms]  [k] do_dentry_open
       [...]

  # Samples: 9K of event 'tlb-access'
  # Event count (approx.): 9573
  #
  # Children      Self  Command  Shared Object      Symbol
  # ........  ........  .......  .................  ......................................
  #
      25.79%    25.79%  dd       [kernel.kallsyms]  [k] __arch_clear_user
      11.22%    11.22%  dd       [kernel.kallsyms]  [k] ret_to_user
       8.56%     8.56%  dd       [kernel.kallsyms]  [k] fsnotify
       4.06%     4.06%  dd       [kernel.kallsyms]  [k] new_sync_read
       3.67%     3.67%  dd       [kernel.kallsyms]  [k] el0_svc_common.constprop.2
       3.04%     3.04%  dd       [kernel.kallsyms]  [k] __fsnotify_parent
       2.90%     2.90%  dd       [kernel.kallsyms]  [k] vfs_write
       2.82%     2.82%  dd       [kernel.kallsyms]  [k] vfs_read
       2.52%     2.52%  dd       libc-2.28.so       [.] write
       2.26%     2.26%  dd       [kernel.kallsyms]  [k] security_file_permission
       2.08%     2.08%  dd       [kernel.kallsyms]  [k] ksys_write
       1.96%     1.96%  dd       [kernel.kallsyms]  [k] rw_verify_area
       1.95%     1.95%  dd       [kernel.kallsyms]  [k] read_iter_zero
       [...]

  # Samples: 9  of event 'branch-miss'
  # Event count (approx.): 9
  #
  # Children      Self  Command  Shared Object      Symbol
  # ........  ........  .......  .................  .........................
  #
      22.22%    22.22%  dd       libc-2.28.so       [.] _dl_addr
      11.11%    11.11%  dd       [kernel.kallsyms]  [k] __arch_clear_user
      11.11%    11.11%  dd       [kernel.kallsyms]  [k] __arch_copy_from_user
      11.11%    11.11%  dd       [kernel.kallsyms]  [k] __dentry_kill
      11.11%    11.11%  dd       [kernel.kallsyms]  [k] __efistub_memcpy
      11.11%    11.11%  dd       ld-2.28.so         [.] 0x0000000000012b7c
      11.11%    11.11%  dd       libc-2.28.so       [.] 0x000000000002a980
      11.11%    11.11%  dd       libc-2.28.so       [.] 0x0000000000083340

  # Samples: 29  of event 'remote-access'
  # Event count (approx.): 29
  #
  # Children      Self  Command  Shared Object      Symbol
  # ........  ........  .......  .................  ...........................
  #
      41.38%    41.38%  dd       [kernel.kallsyms]  [k] filemap_map_pages
      10.34%    10.34%  dd       [kernel.kallsyms]  [k] unlock_page_memcg
      10.34%    10.34%  dd       [kernel.kallsyms]  [k] unmap_page_range
       6.90%     6.90%  dd       [kernel.kallsyms]  [k] release_pages
       3.45%     3.45%  dd       [kernel.kallsyms]  [k] PageHuge
       3.45%     3.45%  dd       [kernel.kallsyms]  [k] __queue_work
       3.45%     3.45%  dd       [kernel.kallsyms]  [k] page_add_file_rmap
       3.45%     3.45%  dd       [kernel.kallsyms]  [k] page_counter_try_charge
       3.45%     3.45%  dd       [kernel.kallsyms]  [k] page_remove_rmap
       3.45%     3.45%  dd       [kernel.kallsyms]  [k] xas_start
       3.45%     3.45%  dd       ld-2.28.so         [.] 0x0000000000002a1c
       3.45%     3.45%  dd       ld-2.28.so         [.] 0x0000000000008b5c
       3.45%     3.45%  dd       ld-2.28.so         [.] 0x00000000000093cc

Signed-off-by: Tan Xiaojun <tanxiaojun@huawei.com>
Tested-by: James Clark <james.clark@arm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Al Grant <al.grant@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lore.kernel.org/lkml/20200530122442.490-4-leo.yan@linaro.org
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/util/arm-spe-decoder/Build
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c [new file with mode: 0644]
tools/perf/util/arm-spe-decoder/arm-spe-decoder.h [new file with mode: 0644]
tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
tools/perf/util/arm-spe.c

index 16efbc2..f8dae13 100644 (file)
@@ -1 +1 @@
-perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o
+perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o arm-spe-decoder.o
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
new file mode 100644 (file)
index 0000000..302a14d
--- /dev/null
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arm_spe_decoder.c: ARM SPE support
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <errno.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <linux/compiler.h>
+#include <linux/zalloc.h>
+
+#include "../auxtrace.h"
+#include "../debug.h"
+#include "../util.h"
+
+#include "arm-spe-decoder.h"
+
+#ifndef BIT
+#define BIT(n)         (1UL << (n))
+#endif
+
+static u64 arm_spe_calc_ip(int index, u64 payload)
+{
+       u8 *addr = (u8 *)&payload;
+       int ns, el;
+
+       /* Instruction virtual address or Branch target address */
+       if (index == SPE_ADDR_PKT_HDR_INDEX_INS ||
+           index == SPE_ADDR_PKT_HDR_INDEX_BRANCH) {
+               ns = addr[7] & SPE_ADDR_PKT_NS;
+               el = (addr[7] & SPE_ADDR_PKT_EL_MASK) >> SPE_ADDR_PKT_EL_OFFSET;
+
+               /* Fill highest byte for EL1 or EL2 (VHE) mode */
+               if (ns && (el == SPE_ADDR_PKT_EL1 || el == SPE_ADDR_PKT_EL2))
+                       addr[7] = 0xff;
+               /* Clean highest byte for other cases */
+               else
+                       addr[7] = 0x0;
+
+       /* Data access virtual address */
+       } else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT) {
+
+               /* Fill highest byte if bits [48..55] is 0xff */
+               if (addr[6] == 0xff)
+                       addr[7] = 0xff;
+               /* Otherwise, cleanup tags */
+               else
+                       addr[7] = 0x0;
+
+       /* Data access physical address */
+       } else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) {
+               /* Cleanup byte 7 */
+               addr[7] = 0x0;
+       } else {
+               pr_err("unsupported address packet index: 0x%x\n", index);
+       }
+
+       return payload;
+}
+
+struct arm_spe_decoder *arm_spe_decoder_new(struct arm_spe_params *params)
+{
+       struct arm_spe_decoder *decoder;
+
+       if (!params->get_trace)
+               return NULL;
+
+       decoder = zalloc(sizeof(struct arm_spe_decoder));
+       if (!decoder)
+               return NULL;
+
+       decoder->get_trace = params->get_trace;
+       decoder->data = params->data;
+
+       return decoder;
+}
+
+void arm_spe_decoder_free(struct arm_spe_decoder *decoder)
+{
+       free(decoder);
+}
+
+static int arm_spe_get_data(struct arm_spe_decoder *decoder)
+{
+       struct arm_spe_buffer buffer = { .buf = 0, };
+       int ret;
+
+       pr_debug("Getting more data\n");
+       ret = decoder->get_trace(&buffer, decoder->data);
+       if (ret < 0)
+               return ret;
+
+       decoder->buf = buffer.buf;
+       decoder->len = buffer.len;
+
+       if (!decoder->len)
+               pr_debug("No more data\n");
+
+       return decoder->len;
+}
+
+static int arm_spe_get_next_packet(struct arm_spe_decoder *decoder)
+{
+       int ret;
+
+       do {
+               if (!decoder->len) {
+                       ret = arm_spe_get_data(decoder);
+
+                       /* Failed to read out trace data */
+                       if (ret <= 0)
+                               return ret;
+               }
+
+               ret = arm_spe_get_packet(decoder->buf, decoder->len,
+                                        &decoder->packet);
+               if (ret <= 0) {
+                       /* Move forward for 1 byte */
+                       decoder->buf += 1;
+                       decoder->len -= 1;
+                       return -EBADMSG;
+               }
+
+               decoder->buf += ret;
+               decoder->len -= ret;
+       } while (decoder->packet.type == ARM_SPE_PAD);
+
+       return 1;
+}
+
+static int arm_spe_read_record(struct arm_spe_decoder *decoder)
+{
+       int err;
+       int idx;
+       u64 payload, ip;
+
+       memset(&decoder->record, 0x0, sizeof(decoder->record));
+
+       while (1) {
+               err = arm_spe_get_next_packet(decoder);
+               if (err <= 0)
+                       return err;
+
+               idx = decoder->packet.index;
+               payload = decoder->packet.payload;
+
+               switch (decoder->packet.type) {
+               case ARM_SPE_TIMESTAMP:
+                       decoder->record.timestamp = payload;
+                       return 1;
+               case ARM_SPE_END:
+                       return 1;
+               case ARM_SPE_ADDRESS:
+                       ip = arm_spe_calc_ip(idx, payload);
+                       if (idx == SPE_ADDR_PKT_HDR_INDEX_INS)
+                               decoder->record.from_ip = ip;
+                       else if (idx == SPE_ADDR_PKT_HDR_INDEX_BRANCH)
+                               decoder->record.to_ip = ip;
+                       break;
+               case ARM_SPE_COUNTER:
+                       break;
+               case ARM_SPE_CONTEXT:
+                       break;
+               case ARM_SPE_OP_TYPE:
+                       break;
+               case ARM_SPE_EVENTS:
+                       if (payload & BIT(EV_L1D_REFILL))
+                               decoder->record.type |= ARM_SPE_L1D_MISS;
+
+                       if (payload & BIT(EV_L1D_ACCESS))
+                               decoder->record.type |= ARM_SPE_L1D_ACCESS;
+
+                       if (payload & BIT(EV_TLB_WALK))
+                               decoder->record.type |= ARM_SPE_TLB_MISS;
+
+                       if (payload & BIT(EV_TLB_ACCESS))
+                               decoder->record.type |= ARM_SPE_TLB_ACCESS;
+
+                       if ((idx == 1 || idx == 2 || idx == 3) &&
+                           (payload & BIT(EV_LLC_MISS)))
+                               decoder->record.type |= ARM_SPE_LLC_MISS;
+
+                       if ((idx == 1 || idx == 2 || idx == 3) &&
+                           (payload & BIT(EV_LLC_ACCESS)))
+                               decoder->record.type |= ARM_SPE_LLC_ACCESS;
+
+                       if ((idx == 1 || idx == 2 || idx == 3) &&
+                           (payload & BIT(EV_REMOTE_ACCESS)))
+                               decoder->record.type |= ARM_SPE_REMOTE_ACCESS;
+
+                       if (payload & BIT(EV_MISPRED))
+                               decoder->record.type |= ARM_SPE_BRANCH_MISS;
+
+                       break;
+               case ARM_SPE_DATA_SOURCE:
+                       break;
+               case ARM_SPE_BAD:
+                       break;
+               case ARM_SPE_PAD:
+                       break;
+               default:
+                       pr_err("Get packet error!\n");
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+int arm_spe_decode(struct arm_spe_decoder *decoder)
+{
+       return arm_spe_read_record(decoder);
+}
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
new file mode 100644 (file)
index 0000000..a5111a8
--- /dev/null
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arm_spe_decoder.h: Arm Statistical Profiling Extensions support
+ * Copyright (c) 2019-2020, Arm Ltd.
+ */
+
+#ifndef INCLUDE__ARM_SPE_DECODER_H__
+#define INCLUDE__ARM_SPE_DECODER_H__
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "arm-spe-pkt-decoder.h"
+
+enum arm_spe_events {
+       EV_EXCEPTION_GEN        = 0,
+       EV_RETIRED              = 1,
+       EV_L1D_ACCESS           = 2,
+       EV_L1D_REFILL           = 3,
+       EV_TLB_ACCESS           = 4,
+       EV_TLB_WALK             = 5,
+       EV_NOT_TAKEN            = 6,
+       EV_MISPRED              = 7,
+       EV_LLC_ACCESS           = 8,
+       EV_LLC_MISS             = 9,
+       EV_REMOTE_ACCESS        = 10,
+       EV_ALIGNMENT            = 11,
+       EV_PARTIAL_PREDICATE    = 17,
+       EV_EMPTY_PREDICATE      = 18,
+};
+
+enum arm_spe_sample_type {
+       ARM_SPE_L1D_ACCESS      = 1 << 0,
+       ARM_SPE_L1D_MISS        = 1 << 1,
+       ARM_SPE_LLC_ACCESS      = 1 << 2,
+       ARM_SPE_LLC_MISS        = 1 << 3,
+       ARM_SPE_TLB_ACCESS      = 1 << 4,
+       ARM_SPE_TLB_MISS        = 1 << 5,
+       ARM_SPE_BRANCH_MISS     = 1 << 6,
+       ARM_SPE_REMOTE_ACCESS   = 1 << 7,
+};
+
+struct arm_spe_record {
+       enum arm_spe_sample_type type;
+       int err;
+       u64 from_ip;
+       u64 to_ip;
+       u64 timestamp;
+};
+
+struct arm_spe_insn;
+
+struct arm_spe_buffer {
+       const unsigned char *buf;
+       size_t len;
+       u64 offset;
+       u64 trace_nr;
+};
+
+struct arm_spe_params {
+       int (*get_trace)(struct arm_spe_buffer *buffer, void *data);
+       void *data;
+};
+
+struct arm_spe_decoder {
+       int (*get_trace)(struct arm_spe_buffer *buffer, void *data);
+       void *data;
+       struct arm_spe_record record;
+
+       const unsigned char *buf;
+       size_t len;
+
+       struct arm_spe_pkt packet;
+};
+
+struct arm_spe_decoder *arm_spe_decoder_new(struct arm_spe_params *params);
+void arm_spe_decoder_free(struct arm_spe_decoder *decoder);
+
+int arm_spe_decode(struct arm_spe_decoder *decoder);
+
+#endif
index d786ef6..4c87052 100644 (file)
@@ -15,6 +15,8 @@
 #define ARM_SPE_NEED_MORE_BYTES                -1
 #define ARM_SPE_BAD_PACKET             -2
 
+#define ARM_SPE_PKT_MAX_SZ             16
+
 enum arm_spe_pkt_type {
        ARM_SPE_BAD,
        ARM_SPE_PAD,
@@ -34,6 +36,20 @@ struct arm_spe_pkt {
        uint64_t                payload;
 };
 
+#define SPE_ADDR_PKT_HDR_INDEX_INS             (0x0)
+#define SPE_ADDR_PKT_HDR_INDEX_BRANCH          (0x1)
+#define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT       (0x2)
+#define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS       (0x3)
+
+#define SPE_ADDR_PKT_NS                                BIT(7)
+#define SPE_ADDR_PKT_CH                                BIT(6)
+#define SPE_ADDR_PKT_EL_OFFSET                 (5)
+#define SPE_ADDR_PKT_EL_MASK                   (0x3 << SPE_ADDR_PKT_EL_OFFSET)
+#define SPE_ADDR_PKT_EL0                       (0)
+#define SPE_ADDR_PKT_EL1                       (1)
+#define SPE_ADDR_PKT_EL2                       (2)
+#define SPE_ADDR_PKT_EL3                       (3)
+
 const char *arm_spe_pkt_name(enum arm_spe_pkt_type);
 
 int arm_spe_get_packet(const unsigned char *buf, size_t len,
index 235de3d..3882a53 100644 (file)
@@ -4,46 +4,85 @@
  * Copyright (c) 2017-2018, Arm Ltd.
  */
 
+#include <byteswap.h>
 #include <endian.h>
 #include <errno.h>
-#include <byteswap.h>
 #include <inttypes.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
 #include <linux/bitops.h>
+#include <linux/kernel.h>
 #include <linux/log2.h>
+#include <linux/types.h>
 #include <linux/zalloc.h>
+#include <stdlib.h>
+#include <unistd.h>
 
+#include "auxtrace.h"
 #include "color.h"
+#include "debug.h"
+#include "evlist.h"
 #include "evsel.h"
 #include "machine.h"
 #include "session.h"
-#include "debug.h"
-#include "auxtrace.h"
+#include "symbol.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include "tool.h"
+#include "util/synthetic-events.h"
+
 #include "arm-spe.h"
+#include "arm-spe-decoder/arm-spe-decoder.h"
 #include "arm-spe-decoder/arm-spe-pkt-decoder.h"
 
+#define MAX_TIMESTAMP (~0ULL)
+
 struct arm_spe {
        struct auxtrace                 auxtrace;
        struct auxtrace_queues          queues;
        struct auxtrace_heap            heap;
+       struct itrace_synth_opts        synth_opts;
        u32                             auxtrace_type;
        struct perf_session             *session;
        struct machine                  *machine;
        u32                             pmu_type;
+
+       u8                              timeless_decoding;
+       u8                              data_queued;
+
+       u8                              sample_flc;
+       u8                              sample_llc;
+       u8                              sample_tlb;
+       u8                              sample_branch;
+       u8                              sample_remote_access;
+
+       u64                             l1d_miss_id;
+       u64                             l1d_access_id;
+       u64                             llc_miss_id;
+       u64                             llc_access_id;
+       u64                             tlb_miss_id;
+       u64                             tlb_access_id;
+       u64                             branch_miss_id;
+       u64                             remote_access_id;
+
+       u64                             kernel_start;
+
+       unsigned long                   num_events;
 };
 
 struct arm_spe_queue {
-       struct arm_spe          *spe;
-       unsigned int            queue_nr;
-       struct auxtrace_buffer  *buffer;
-       bool                    on_heap;
-       bool                    done;
-       pid_t                   pid;
-       pid_t                   tid;
-       int                     cpu;
+       struct arm_spe                  *spe;
+       unsigned int                    queue_nr;
+       struct auxtrace_buffer          *buffer;
+       struct auxtrace_buffer          *old_buffer;
+       union perf_event                *event_buf;
+       bool                            on_heap;
+       bool                            done;
+       pid_t                           pid;
+       pid_t                           tid;
+       int                             cpu;
+       struct arm_spe_decoder          *decoder;
+       u64                             time;
+       u64                             timestamp;
+       struct thread                   *thread;
 };
 
 static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
@@ -92,44 +131,520 @@ static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
        arm_spe_dump(spe, buf, len);
 }
 
-static int arm_spe_process_event(struct perf_session *session __maybe_unused,
-                                union perf_event *event __maybe_unused,
-                                struct perf_sample *sample __maybe_unused,
-                                struct perf_tool *tool __maybe_unused)
+static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
+{
+       struct arm_spe_queue *speq = data;
+       struct auxtrace_buffer *buffer = speq->buffer;
+       struct auxtrace_buffer *old_buffer = speq->old_buffer;
+       struct auxtrace_queue *queue;
+
+       queue = &speq->spe->queues.queue_array[speq->queue_nr];
+
+       buffer = auxtrace_buffer__next(queue, buffer);
+       /* If no more data, drop the previous auxtrace_buffer and return */
+       if (!buffer) {
+               if (old_buffer)
+                       auxtrace_buffer__drop_data(old_buffer);
+               b->len = 0;
+               return 0;
+       }
+
+       speq->buffer = buffer;
+
+       /* If the aux_buffer doesn't have data associated, try to load it */
+       if (!buffer->data) {
+               /* get the file desc associated with the perf data file */
+               int fd = perf_data__fd(speq->spe->session->data);
+
+               buffer->data = auxtrace_buffer__get_data(buffer, fd);
+               if (!buffer->data)
+                       return -ENOMEM;
+       }
+
+       b->len = buffer->size;
+       b->buf = buffer->data;
+
+       if (b->len) {
+               if (old_buffer)
+                       auxtrace_buffer__drop_data(old_buffer);
+               speq->old_buffer = buffer;
+       } else {
+               auxtrace_buffer__drop_data(buffer);
+               return arm_spe_get_trace(b, data);
+       }
+
+       return 0;
+}
+
+static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
+               unsigned int queue_nr)
+{
+       struct arm_spe_params params = { .get_trace = 0, };
+       struct arm_spe_queue *speq;
+
+       speq = zalloc(sizeof(*speq));
+       if (!speq)
+               return NULL;
+
+       speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
+       if (!speq->event_buf)
+               goto out_free;
+
+       speq->spe = spe;
+       speq->queue_nr = queue_nr;
+       speq->pid = -1;
+       speq->tid = -1;
+       speq->cpu = -1;
+
+       /* params set */
+       params.get_trace = arm_spe_get_trace;
+       params.data = speq;
+
+       /* create new decoder */
+       speq->decoder = arm_spe_decoder_new(&params);
+       if (!speq->decoder)
+               goto out_free;
+
+       return speq;
+
+out_free:
+       zfree(&speq->event_buf);
+       free(speq);
+
+       return NULL;
+}
+
+static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
+{
+       return ip >= spe->kernel_start ?
+               PERF_RECORD_MISC_KERNEL :
+               PERF_RECORD_MISC_USER;
+}
+
+static void arm_spe_prep_sample(struct arm_spe *spe,
+                               struct arm_spe_queue *speq,
+                               union perf_event *event,
+                               struct perf_sample *sample)
+{
+       struct arm_spe_record *record = &speq->decoder->record;
+
+       if (!spe->timeless_decoding)
+               sample->time = speq->timestamp;
+
+       sample->ip = record->from_ip;
+       sample->cpumode = arm_spe_cpumode(spe, sample->ip);
+       sample->pid = speq->pid;
+       sample->tid = speq->tid;
+       sample->addr = record->to_ip;
+       sample->period = 1;
+       sample->cpu = speq->cpu;
+
+       event->sample.header.type = PERF_RECORD_SAMPLE;
+       event->sample.header.misc = sample->cpumode;
+       event->sample.header.size = sizeof(struct perf_event_header);
+}
+
+static inline int
+arm_spe_deliver_synth_event(struct arm_spe *spe,
+                           struct arm_spe_queue *speq __maybe_unused,
+                           union perf_event *event,
+                           struct perf_sample *sample)
+{
+       int ret;
+
+       ret = perf_session__deliver_synth_event(spe->session, event, sample);
+       if (ret)
+               pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
+
+       return ret;
+}
+
+static int
+arm_spe_synth_spe_events_sample(struct arm_spe_queue *speq,
+                               u64 spe_events_id)
+{
+       struct arm_spe *spe = speq->spe;
+       union perf_event *event = speq->event_buf;
+       struct perf_sample sample = { .ip = 0, };
+
+       arm_spe_prep_sample(spe, speq, event, &sample);
+
+       sample.id = spe_events_id;
+       sample.stream_id = spe_events_id;
+
+       return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+}
+
+static int arm_spe_sample(struct arm_spe_queue *speq)
+{
+       const struct arm_spe_record *record = &speq->decoder->record;
+       struct arm_spe *spe = speq->spe;
+       int err;
+
+       if (spe->sample_flc) {
+               if (record->type & ARM_SPE_L1D_MISS) {
+                       err = arm_spe_synth_spe_events_sample(
+                                       speq, spe->l1d_miss_id);
+                       if (err)
+                               return err;
+               }
+
+               if (record->type & ARM_SPE_L1D_ACCESS) {
+                       err = arm_spe_synth_spe_events_sample(
+                                       speq, spe->l1d_access_id);
+                       if (err)
+                               return err;
+               }
+       }
+
+       if (spe->sample_llc) {
+               if (record->type & ARM_SPE_LLC_MISS) {
+                       err = arm_spe_synth_spe_events_sample(
+                                       speq, spe->llc_miss_id);
+                       if (err)
+                               return err;
+               }
+
+               if (record->type & ARM_SPE_LLC_ACCESS) {
+                       err = arm_spe_synth_spe_events_sample(
+                                       speq, spe->llc_access_id);
+                       if (err)
+                               return err;
+               }
+       }
+
+       if (spe->sample_tlb) {
+               if (record->type & ARM_SPE_TLB_MISS) {
+                       err = arm_spe_synth_spe_events_sample(
+                                       speq, spe->tlb_miss_id);
+                       if (err)
+                               return err;
+               }
+
+               if (record->type & ARM_SPE_TLB_ACCESS) {
+                       err = arm_spe_synth_spe_events_sample(
+                                       speq, spe->tlb_access_id);
+                       if (err)
+                               return err;
+               }
+       }
+
+       if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) {
+               err = arm_spe_synth_spe_events_sample(speq,
+                                                     spe->branch_miss_id);
+               if (err)
+                       return err;
+       }
+
+       if (spe->sample_remote_access &&
+           (record->type & ARM_SPE_REMOTE_ACCESS)) {
+               err = arm_spe_synth_spe_events_sample(speq,
+                                                     spe->remote_access_id);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
+{
+       struct arm_spe *spe = speq->spe;
+       int ret;
+
+       if (!spe->kernel_start)
+               spe->kernel_start = machine__kernel_start(spe->machine);
+
+       while (1) {
+               ret = arm_spe_decode(speq->decoder);
+               if (!ret) {
+                       pr_debug("No data or all data has been processed.\n");
+                       return 1;
+               }
+
+               /*
+                * Error is detected when decode SPE trace data, continue to
+                * the next trace data and find out more records.
+                */
+               if (ret < 0)
+                       continue;
+
+               ret = arm_spe_sample(speq);
+               if (ret)
+                       return ret;
+
+               if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
+                       *timestamp = speq->timestamp;
+                       return 0;
+               }
+       }
+
+       return 0;
+}
+
+static int arm_spe__setup_queue(struct arm_spe *spe,
+                              struct auxtrace_queue *queue,
+                              unsigned int queue_nr)
+{
+       struct arm_spe_queue *speq = queue->priv;
+       struct arm_spe_record *record;
+
+       if (list_empty(&queue->head) || speq)
+               return 0;
+
+       speq = arm_spe__alloc_queue(spe, queue_nr);
+
+       if (!speq)
+               return -ENOMEM;
+
+       queue->priv = speq;
+
+       if (queue->cpu != -1)
+               speq->cpu = queue->cpu;
+
+       if (!speq->on_heap) {
+               int ret;
+
+               if (spe->timeless_decoding)
+                       return 0;
+
+retry:
+               ret = arm_spe_decode(speq->decoder);
+
+               if (!ret)
+                       return 0;
+
+               if (ret < 0)
+                       goto retry;
+
+               record = &speq->decoder->record;
+
+               speq->timestamp = record->timestamp;
+               ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
+               if (ret)
+                       return ret;
+               speq->on_heap = true;
+       }
+
+       return 0;
+}
+
+static int arm_spe__setup_queues(struct arm_spe *spe)
+{
+       unsigned int i;
+       int ret;
+
+       for (i = 0; i < spe->queues.nr_queues; i++) {
+               ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static int arm_spe__update_queues(struct arm_spe *spe)
 {
+       if (spe->queues.new_data) {
+               spe->queues.new_data = false;
+               return arm_spe__setup_queues(spe);
+       }
+
        return 0;
 }
 
+static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
+{
+       struct evsel *evsel;
+       struct evlist *evlist = spe->session->evlist;
+       bool timeless_decoding = true;
+
+       /*
+        * Circle through the list of event and complain if we find one
+        * with the time bit set.
+        */
+       evlist__for_each_entry(evlist, evsel) {
+               if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
+                       timeless_decoding = false;
+       }
+
+       return timeless_decoding;
+}
+
+static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
+                                   struct auxtrace_queue *queue)
+{
+       struct arm_spe_queue *speq = queue->priv;
+       pid_t tid;
+
+       tid = machine__get_current_tid(spe->machine, speq->cpu);
+       if (tid != -1) {
+               speq->tid = tid;
+               thread__zput(speq->thread);
+       } else
+               speq->tid = queue->tid;
+
+       if ((!speq->thread) && (speq->tid != -1)) {
+               speq->thread = machine__find_thread(spe->machine, -1,
+                                                   speq->tid);
+       }
+
+       if (speq->thread) {
+               speq->pid = speq->thread->pid_;
+               if (queue->cpu == -1)
+                       speq->cpu = speq->thread->cpu;
+       }
+}
+
+static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
+{
+       unsigned int queue_nr;
+       u64 ts;
+       int ret;
+
+       while (1) {
+               struct auxtrace_queue *queue;
+               struct arm_spe_queue *speq;
+
+               if (!spe->heap.heap_cnt)
+                       return 0;
+
+               if (spe->heap.heap_array[0].ordinal >= timestamp)
+                       return 0;
+
+               queue_nr = spe->heap.heap_array[0].queue_nr;
+               queue = &spe->queues.queue_array[queue_nr];
+               speq = queue->priv;
+
+               auxtrace_heap__pop(&spe->heap);
+
+               if (spe->heap.heap_cnt) {
+                       ts = spe->heap.heap_array[0].ordinal + 1;
+                       if (ts > timestamp)
+                               ts = timestamp;
+               } else {
+                       ts = timestamp;
+               }
+
+               arm_spe_set_pid_tid_cpu(spe, queue);
+
+               ret = arm_spe_run_decoder(speq, &ts);
+               if (ret < 0) {
+                       auxtrace_heap__add(&spe->heap, queue_nr, ts);
+                       return ret;
+               }
+
+               if (!ret) {
+                       ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
+                       if (ret < 0)
+                               return ret;
+               } else {
+                       speq->on_heap = false;
+               }
+       }
+
+       return 0;
+}
+
+static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
+                                           u64 time_)
+{
+       struct auxtrace_queues *queues = &spe->queues;
+       unsigned int i;
+       u64 ts = 0;
+
+       for (i = 0; i < queues->nr_queues; i++) {
+               struct auxtrace_queue *queue = &spe->queues.queue_array[i];
+               struct arm_spe_queue *speq = queue->priv;
+
+               if (speq && (tid == -1 || speq->tid == tid)) {
+                       speq->time = time_;
+                       arm_spe_set_pid_tid_cpu(spe, queue);
+                       arm_spe_run_decoder(speq, &ts);
+               }
+       }
+       return 0;
+}
+
+static int arm_spe_process_event(struct perf_session *session,
+                                union perf_event *event,
+                                struct perf_sample *sample,
+                                struct perf_tool *tool)
+{
+       int err = 0;
+       u64 timestamp;
+       struct arm_spe *spe = container_of(session->auxtrace,
+                       struct arm_spe, auxtrace);
+
+       if (dump_trace)
+               return 0;
+
+       if (!tool->ordered_events) {
+               pr_err("SPE trace requires ordered events\n");
+               return -EINVAL;
+       }
+
+       if (sample->time && (sample->time != (u64) -1))
+               timestamp = sample->time;
+       else
+               timestamp = 0;
+
+       if (timestamp || spe->timeless_decoding) {
+               err = arm_spe__update_queues(spe);
+               if (err)
+                       return err;
+       }
+
+       if (spe->timeless_decoding) {
+               if (event->header.type == PERF_RECORD_EXIT) {
+                       err = arm_spe_process_timeless_queues(spe,
+                                       event->fork.tid,
+                                       sample->time);
+               }
+       } else if (timestamp) {
+               if (event->header.type == PERF_RECORD_EXIT) {
+                       err = arm_spe_process_queues(spe, timestamp);
+                       if (err)
+                               return err;
+               }
+       }
+
+       return err;
+}
+
 static int arm_spe_process_auxtrace_event(struct perf_session *session,
                                          union perf_event *event,
                                          struct perf_tool *tool __maybe_unused)
 {
        struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
                                             auxtrace);
-       struct auxtrace_buffer *buffer;
-       off_t data_offset;
-       int fd = perf_data__fd(session->data);
-       int err;
 
-       if (perf_data__is_pipe(session->data)) {
-               data_offset = 0;
-       } else {
-               data_offset = lseek(fd, 0, SEEK_CUR);
-               if (data_offset == -1)
-                       return -errno;
-       }
+       if (!spe->data_queued) {
+               struct auxtrace_buffer *buffer;
+               off_t data_offset;
+               int fd = perf_data__fd(session->data);
+               int err;
 
-       err = auxtrace_queues__add_event(&spe->queues, session, event,
-                                        data_offset, &buffer);
-       if (err)
-               return err;
+               if (perf_data__is_pipe(session->data)) {
+                       data_offset = 0;
+               } else {
+                       data_offset = lseek(fd, 0, SEEK_CUR);
+                       if (data_offset == -1)
+                               return -errno;
+               }
 
-       /* Dump here now we have copied a piped trace out of the pipe */
-       if (dump_trace) {
-               if (auxtrace_buffer__get_data(buffer, fd)) {
-                       arm_spe_dump_event(spe, buffer->data,
-                                            buffer->size);
-                       auxtrace_buffer__put_data(buffer);
+               err = auxtrace_queues__add_event(&spe->queues, session, event,
+                               data_offset, &buffer);
+               if (err)
+                       return err;
+
+               /* Dump here now we have copied a piped trace out of the pipe */
+               if (dump_trace) {
+                       if (auxtrace_buffer__get_data(buffer, fd)) {
+                               arm_spe_dump_event(spe, buffer->data,
+                                               buffer->size);
+                               auxtrace_buffer__put_data(buffer);
+                       }
                }
        }
 
@@ -139,7 +654,25 @@ static int arm_spe_process_auxtrace_event(struct perf_session *session,
 static int arm_spe_flush(struct perf_session *session __maybe_unused,
                         struct perf_tool *tool __maybe_unused)
 {
-       return 0;
+       struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
+                       auxtrace);
+       int ret;
+
+       if (dump_trace)
+               return 0;
+
+       if (!tool->ordered_events)
+               return -EINVAL;
+
+       ret = arm_spe__update_queues(spe);
+       if (ret < 0)
+               return ret;
+
+       if (spe->timeless_decoding)
+               return arm_spe_process_timeless_queues(spe, -1,
+                               MAX_TIMESTAMP - 1);
+
+       return arm_spe_process_queues(spe, MAX_TIMESTAMP);
 }
 
 static void arm_spe_free_queue(void *priv)
@@ -148,6 +681,9 @@ static void arm_spe_free_queue(void *priv)
 
        if (!speq)
                return;
+       thread__zput(speq->thread);
+       arm_spe_decoder_free(speq->decoder);
+       zfree(&speq->event_buf);
        free(speq);
 }
 
@@ -196,11 +732,189 @@ static void arm_spe_print_info(__u64 *arr)
        fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
 }
 
+struct arm_spe_synth {
+       struct perf_tool dummy_tool;
+       struct perf_session *session;
+};
+
+static int arm_spe_event_synth(struct perf_tool *tool,
+                              union perf_event *event,
+                              struct perf_sample *sample __maybe_unused,
+                              struct machine *machine __maybe_unused)
+{
+       struct arm_spe_synth *arm_spe_synth =
+                     container_of(tool, struct arm_spe_synth, dummy_tool);
+
+       return perf_session__deliver_synth_event(arm_spe_synth->session,
+                                                event, NULL);
+}
+
+static int arm_spe_synth_event(struct perf_session *session,
+                              struct perf_event_attr *attr, u64 id)
+{
+       struct arm_spe_synth arm_spe_synth;
+
+       memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth));
+       arm_spe_synth.session = session;
+
+       return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1,
+                                          &id, arm_spe_event_synth);
+}
+
+static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
+                                   const char *name)
+{
+       struct evsel *evsel;
+
+       evlist__for_each_entry(evlist, evsel) {
+               if (evsel->core.id && evsel->core.id[0] == id) {
+                       if (evsel->name)
+                               zfree(&evsel->name);
+                       evsel->name = strdup(name);
+                       break;
+               }
+       }
+}
+
+static int
+arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
+{
+       struct evlist *evlist = session->evlist;
+       struct evsel *evsel;
+       struct perf_event_attr attr;
+       bool found = false;
+       u64 id;
+       int err;
+
+       evlist__for_each_entry(evlist, evsel) {
+               if (evsel->core.attr.type == spe->pmu_type) {
+                       found = true;
+                       break;
+               }
+       }
+
+       if (!found) {
+               pr_debug("No selected events with SPE trace data\n");
+               return 0;
+       }
+
+       memset(&attr, 0, sizeof(struct perf_event_attr));
+       attr.size = sizeof(struct perf_event_attr);
+       attr.type = PERF_TYPE_HARDWARE;
+       attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
+       attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
+               PERF_SAMPLE_PERIOD;
+       if (spe->timeless_decoding)
+               attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
+       else
+               attr.sample_type |= PERF_SAMPLE_TIME;
+
+       attr.exclude_user = evsel->core.attr.exclude_user;
+       attr.exclude_kernel = evsel->core.attr.exclude_kernel;
+       attr.exclude_hv = evsel->core.attr.exclude_hv;
+       attr.exclude_host = evsel->core.attr.exclude_host;
+       attr.exclude_guest = evsel->core.attr.exclude_guest;
+       attr.sample_id_all = evsel->core.attr.sample_id_all;
+       attr.read_format = evsel->core.attr.read_format;
+
+       /* create new id val to be a fixed offset from evsel id */
+       id = evsel->core.id[0] + 1000000000;
+
+       if (!id)
+               id = 1;
+
+       if (spe->synth_opts.flc) {
+               spe->sample_flc = true;
+
+               /* Level 1 data cache miss */
+               err = arm_spe_synth_event(session, &attr, id);
+               if (err)
+                       return err;
+               spe->l1d_miss_id = id;
+               arm_spe_set_event_name(evlist, id, "l1d-miss");
+               id += 1;
+
+               /* Level 1 data cache access */
+               err = arm_spe_synth_event(session, &attr, id);
+               if (err)
+                       return err;
+               spe->l1d_access_id = id;
+               arm_spe_set_event_name(evlist, id, "l1d-access");
+               id += 1;
+       }
+
+       if (spe->synth_opts.llc) {
+               spe->sample_llc = true;
+
+               /* Last level cache miss */
+               err = arm_spe_synth_event(session, &attr, id);
+               if (err)
+                       return err;
+               spe->llc_miss_id = id;
+               arm_spe_set_event_name(evlist, id, "llc-miss");
+               id += 1;
+
+               /* Last level cache access */
+               err = arm_spe_synth_event(session, &attr, id);
+               if (err)
+                       return err;
+               spe->llc_access_id = id;
+               arm_spe_set_event_name(evlist, id, "llc-access");
+               id += 1;
+       }
+
+       if (spe->synth_opts.tlb) {
+               spe->sample_tlb = true;
+
+               /* TLB miss */
+               err = arm_spe_synth_event(session, &attr, id);
+               if (err)
+                       return err;
+               spe->tlb_miss_id = id;
+               arm_spe_set_event_name(evlist, id, "tlb-miss");
+               id += 1;
+
+               /* TLB access */
+               err = arm_spe_synth_event(session, &attr, id);
+               if (err)
+                       return err;
+               spe->tlb_access_id = id;
+               arm_spe_set_event_name(evlist, id, "tlb-access");
+               id += 1;
+       }
+
+       if (spe->synth_opts.branches) {
+               spe->sample_branch = true;
+
+               /* Branch miss */
+               err = arm_spe_synth_event(session, &attr, id);
+               if (err)
+                       return err;
+               spe->branch_miss_id = id;
+               arm_spe_set_event_name(evlist, id, "branch-miss");
+               id += 1;
+       }
+
+       if (spe->synth_opts.remote_access) {
+               spe->sample_remote_access = true;
+
+               /* Remote access */
+               err = arm_spe_synth_event(session, &attr, id);
+               if (err)
+                       return err;
+               spe->remote_access_id = id;
+               arm_spe_set_event_name(evlist, id, "remote-access");
+               id += 1;
+       }
+
+       return 0;
+}
+
 int arm_spe_process_auxtrace_info(union perf_event *event,
                                  struct perf_session *session)
 {
        struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
-       size_t min_sz = sizeof(u64) * ARM_SPE_PMU_TYPE;
+       size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
        struct arm_spe *spe;
        int err;
 
@@ -221,6 +935,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
        spe->auxtrace_type = auxtrace_info->type;
        spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
 
+       spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
        spe->auxtrace.process_event = arm_spe_process_event;
        spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
        spe->auxtrace.flush_events = arm_spe_flush;
@@ -231,8 +946,30 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
 
        arm_spe_print_info(&auxtrace_info->priv[0]);
 
+       if (dump_trace)
+               return 0;
+
+       if (session->itrace_synth_opts && session->itrace_synth_opts->set)
+               spe->synth_opts = *session->itrace_synth_opts;
+       else
+               itrace_synth_opts__set_default(&spe->synth_opts, false);
+
+       err = arm_spe_synth_events(spe, session);
+       if (err)
+               goto err_free_queues;
+
+       err = auxtrace_queues__process_index(&spe->queues, session);
+       if (err)
+               goto err_free_queues;
+
+       if (spe->queues.populated)
+               spe->data_queued = true;
+
        return 0;
 
+err_free_queues:
+       auxtrace_queues__free(&spe->queues);
+       session->auxtrace = NULL;
 err_free:
        free(spe);
        return err;