perf: Improve the perf_sample_data struct layout 90/160790/5
authorPeter Zijlstra <peterz@infradead.org>
Wed, 24 Sep 2014 11:48:42 +0000 (13:48 +0200)
committerSeung-Woo Kim <sw0312.kim@samsung.com>
Mon, 20 Nov 2017 10:36:20 +0000 (19:36 +0900)
This patch reorders fields in the perf_sample_data struct in order to
minimize the number of cachelines touched in perf_sample_data_init().
It also removes some intializations which are redundant with the code
in kernel/events/core.c

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1411559322-16548-7-git-send-email-eranian@google.com
Cc: cebbert.lkml@gmail.com
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: jolsa@redhat.com
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
[inki.dae: backport mainline commit 2565711fb7d7 to show proper perf result graph]
Signed-off-by: Inki Dae <inki.dae@samsung.com>
Change-Id: I92db818e36740e3601da3a5a00fc5973840ef8c2

include/linux/perf_event.h
kernel/events/core.c

index 765668f..0f92e85 100644 (file)
@@ -551,35 +551,40 @@ extern u64 perf_event_read_value(struct perf_event *event,
 
 
 struct perf_sample_data {
-       u64                             type;
+       /*
+        * Fields set by perf_sample_data_init(), group so as to
+        * minimize the cachelines touched.
+        */
+       u64                             addr;
+       struct perf_raw_record          *raw;
+       struct perf_branch_stack        *br_stack;
+       u64                             period;
+       u64                             weight;
+       u64                             txn;
+       union  perf_mem_data_src        data_src;
 
+       /*
+        * The other fields, optionally {set,used} by
+        * perf_{prepare,output}_sample().
+        */
+       u64                             type;
        u64                             ip;
        struct {
                u32     pid;
                u32     tid;
        }                               tid_entry;
        u64                             time;
-       u64                             addr;
        u64                             id;
        u64                             stream_id;
        struct {
                u32     cpu;
                u32     reserved;
        }                               cpu_entry;
-       u64                             period;
-       union  perf_mem_data_src        data_src;
        struct perf_callchain_entry     *callchain;
-       struct perf_raw_record          *raw;
-       struct perf_branch_stack        *br_stack;
        struct perf_regs                regs_user;
        struct perf_regs                regs_intr;
        u64                             stack_user_size;
-       u64                             weight;
-       /*
-        * Transaction flags for abort events:
-        */
-       u64                             txn;
-};
+} ____cacheline_aligned;
 
 static inline void perf_sample_data_init(struct perf_sample_data *data,
                                         u64 addr, u64 period)
@@ -589,14 +594,9 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
        data->raw  = NULL;
        data->br_stack = NULL;
        data->period = period;
-       data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE;
-       data->regs_user.regs = NULL;
-       data->stack_user_size = 0;
        data->weight = 0;
        data->data_src.val = 0;
        data->txn = 0;
-       data->regs_intr.abi = PERF_SAMPLE_REGS_ABI_NONE;
-       data->regs_intr.regs = NULL;
 }
 
 extern void perf_output_sample(struct perf_output_handle *handle,
index da3f627..e3bf2c4 100644 (file)
@@ -4217,8 +4217,11 @@ static void perf_sample_regs_user(struct perf_regs *regs_user,
        }
 
        if (regs) {
-               regs_user->regs = regs;
                regs_user->abi  = perf_reg_abi(current);
+               regs_user->regs = regs;
+       } else {
+               regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
+               regs_user->regs = NULL;
        }
 }
 
@@ -4691,12 +4694,13 @@ void perf_prepare_sample(struct perf_event_header *header,
                header->size += size;
        }
 
+       if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER))
+               perf_sample_regs_user(&data->regs_user, regs);
+
        if (sample_type & PERF_SAMPLE_REGS_USER) {
                /* regs dump ABI info */
                int size = sizeof(u64);
 
-               perf_sample_regs_user(&data->regs_user, regs);
-
                if (data->regs_user.regs) {
                        u64 mask = event->attr.sample_regs_user;
                        size += hweight64(mask) * sizeof(u64);
@@ -4712,15 +4716,11 @@ void perf_prepare_sample(struct perf_event_header *header,
                 * in case new sample type is added, because we could eat
                 * up the rest of the sample size.
                 */
-               struct perf_regs *uregs = &data->regs_user;
                u16 stack_size = event->attr.sample_stack_user;
                u16 size = sizeof(u64);
 
-               if (!uregs->abi)
-                       perf_sample_regs_user(uregs, regs);
-
                stack_size = perf_sample_ustack_size(stack_size, header->size,
-                                                    uregs->regs);
+                                                    data->regs_user.regs);
 
                /*
                 * If there is something to dump, add space for the dump