perf/core: Change the layout of perf_sample_data
authorNamhyung Kim <namhyung@kernel.org>
Thu, 29 Dec 2022 20:40:59 +0000 (12:40 -0800)
committerIngo Molnar <mingo@kernel.org>
Mon, 9 Jan 2023 11:22:09 +0000 (12:22 +0100)
The layout of perf_sample_data is designed to minimize cache-line
access.  The perf_sample_data_init() used to initialize a couple of
fields unconditionally so they were placed together at the head.

But it's changed now to set the fields according to the actual
sample_type flags.  The main user (the perf tools) sets the IP, TID,
TIME, PERIOD always.  Also group relevant fields like addr, phys_addr
and data_page_size.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20221229204101.1099430-1-namhyung@kernel.org
include/linux/perf_event.h

index ad92ad3..03949d0 100644 (file)
@@ -1098,47 +1098,51 @@ extern u64 perf_event_read_value(struct perf_event *event,
 
 struct perf_sample_data {
        /*
-        * Fields set by perf_sample_data_init(), group so as to
-        * minimize the cachelines touched.
+        * Fields set by perf_sample_data_init() unconditionally,
+        * group so as to minimize the cachelines touched.
         */
        u64                             sample_flags;
        u64                             period;
 
        /*
-        * The other fields, optionally {set,used} by
-        * perf_{prepare,output}_sample().
+        * Fields commonly set by __perf_event_header__init_id(),
+        * group so as to minimize the cachelines touched.
         */
-       struct perf_branch_stack        *br_stack;
-       union perf_sample_weight        weight;
-       union  perf_mem_data_src        data_src;
-       u64                             txn;
-       u64                             addr;
-       struct perf_raw_record          *raw;
-
        u64                             type;
-       u64                             ip;
        struct {
                u32     pid;
                u32     tid;
        }                               tid_entry;
        u64                             time;
        u64                             id;
-       u64                             stream_id;
        struct {
                u32     cpu;
                u32     reserved;
        }                               cpu_entry;
+
+       /*
+        * The other fields, optionally {set,used} by
+        * perf_{prepare,output}_sample().
+        */
+       u64                             ip;
        struct perf_callchain_entry     *callchain;
-       u64                             aux_size;
+       struct perf_raw_record          *raw;
+       struct perf_branch_stack        *br_stack;
+       union perf_sample_weight        weight;
+       union  perf_mem_data_src        data_src;
+       u64                             txn;
 
        struct perf_regs                regs_user;
        struct perf_regs                regs_intr;
        u64                             stack_user_size;
 
-       u64                             phys_addr;
+       u64                             stream_id;
        u64                             cgroup;
+       u64                             addr;
+       u64                             phys_addr;
        u64                             data_page_size;
        u64                             code_page_size;
+       u64                             aux_size;
 } ____cacheline_aligned;
 
 /* default value for data source */