perf/core: Introduce perf_prepare_header()
authorNamhyung Kim <namhyung@kernel.org>
Wed, 18 Jan 2023 06:05:58 +0000 (22:05 -0800)
committerIngo Molnar <mingo@kernel.org>
Wed, 18 Jan 2023 10:57:20 +0000 (11:57 +0100)
Factor out perf_prepare_header() so that it can call
perf_prepare_sample() without a header if not needed.

Also it checks the filtered_sample_type to avoid duplicate
work when perf_prepare_sample() is called twice (or more).

Suggested-by: Peter Zijlstr <peterz@infradead.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Tested-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Song Liu <song@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230118060559.615653-8-namhyung@kernel.org
arch/s390/kernel/perf_cpum_sf.c
arch/x86/events/intel/ds.c
include/linux/perf_event.h
kernel/events/core.c

index ce886a0..ead6eea 100644 (file)
@@ -672,7 +672,8 @@ static void cpumsf_output_event_pid(struct perf_event *event,
        /* Protect callchain buffers, tasks */
        rcu_read_lock();
 
-       perf_prepare_sample(&header, data, event, regs);
+       perf_prepare_sample(data, event, regs);
+       perf_prepare_header(&header, data, event, regs);
        if (perf_output_begin(&handle, data, event, header.size))
                goto out;
 
index 07c8a2c..183efa9 100644 (file)
@@ -807,7 +807,8 @@ int intel_pmu_drain_bts_buffer(void)
         * the sample.
         */
        rcu_read_lock();
-       perf_prepare_sample(&header, &data, event, &regs);
+       perf_prepare_sample(&data, event, &regs);
+       perf_prepare_header(&header, &data, event, &regs);
 
        if (perf_output_begin(&handle, &data, event,
                              header.size * (top - base - skip)))
index 7db0e9c..d5628a7 100644 (file)
@@ -1250,6 +1250,17 @@ static inline void perf_sample_save_brstack(struct perf_sample_data *data,
        data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
 }
 
+static inline u32 perf_sample_data_size(struct perf_sample_data *data,
+                                       struct perf_event *event)
+{
+       u32 size = sizeof(struct perf_event_header);
+
+       size += event->header_size + event->id_header_size;
+       size += data->dyn_size;
+
+       return size;
+}
+
 /*
  * Clear all bitfields in the perf_branch_entry.
  * The to and from fields are not cleared because they are
@@ -1271,7 +1282,10 @@ extern void perf_output_sample(struct perf_output_handle *handle,
                               struct perf_event_header *header,
                               struct perf_sample_data *data,
                               struct perf_event *event);
-extern void perf_prepare_sample(struct perf_event_header *header,
+extern void perf_prepare_sample(struct perf_sample_data *data,
+                               struct perf_event *event,
+                               struct pt_regs *regs);
+extern void perf_prepare_header(struct perf_event_header *header,
                                struct perf_sample_data *data,
                                struct perf_event *event,
                                struct pt_regs *regs);
index 47bfd99..4aa73ed 100644 (file)
@@ -7568,20 +7568,13 @@ static __always_inline u64 __cond_set(u64 flags, u64 s, u64 d)
        return d * !!(flags & s);
 }
 
-void perf_prepare_sample(struct perf_event_header *header,
-                        struct perf_sample_data *data,
+void perf_prepare_sample(struct perf_sample_data *data,
                         struct perf_event *event,
                         struct pt_regs *regs)
 {
        u64 sample_type = event->attr.sample_type;
        u64 filtered_sample_type;
 
-       header->type = PERF_RECORD_SAMPLE;
-       header->size = sizeof(*header) + event->header_size + event->id_header_size;
-
-       header->misc = 0;
-       header->misc |= perf_misc_flags(regs);
-
        /*
         * Add the sample flags that are dependent to others.  And clear the
         * sample flags that have already been done by the PMU driver.
@@ -7595,6 +7588,12 @@ void perf_prepare_sample(struct perf_event_header *header,
                                           PERF_SAMPLE_REGS_USER);
        filtered_sample_type &= ~data->sample_flags;
 
+       if (filtered_sample_type == 0) {
+               /* Make sure it has the correct data->type for output */
+               data->type = event->attr.sample_type;
+               return;
+       }
+
        __perf_event_header__init_id(data, event, filtered_sample_type);
 
        if (filtered_sample_type & PERF_SAMPLE_IP) {
@@ -7646,9 +7645,10 @@ void perf_prepare_sample(struct perf_event_header *header,
                 * up the rest of the sample size.
                 */
                u16 stack_size = event->attr.sample_stack_user;
+               u16 header_size = perf_sample_data_size(data, event);
                u16 size = sizeof(u64);
 
-               stack_size = perf_sample_ustack_size(stack_size, header->size,
+               stack_size = perf_sample_ustack_size(stack_size, header_size,
                                                     data->regs_user.regs);
 
                /*
@@ -7733,8 +7733,9 @@ void perf_prepare_sample(struct perf_event_header *header,
 
        if (filtered_sample_type & PERF_SAMPLE_AUX) {
                u64 size;
+               u16 header_size = perf_sample_data_size(data, event);
 
-               header->size += sizeof(u64); /* size */
+               header_size += sizeof(u64); /* size */
 
                /*
                 * Given the 16bit nature of header::size, an AUX sample can
@@ -7742,17 +7743,25 @@ void perf_prepare_sample(struct perf_event_header *header,
                 * Make sure this doesn't happen by using up to U16_MAX bytes
                 * per sample in total (rounded down to 8 byte boundary).
                 */
-               size = min_t(size_t, U16_MAX - header->size,
+               size = min_t(size_t, U16_MAX - header_size,
                             event->attr.aux_sample_size);
                size = rounddown(size, 8);
                size = perf_prepare_sample_aux(event, data, size);
 
-               WARN_ON_ONCE(size + header->size > U16_MAX);
+               WARN_ON_ONCE(size + header_size > U16_MAX);
                data->dyn_size += size + sizeof(u64); /* size above */
                data->sample_flags |= PERF_SAMPLE_AUX;
        }
+}
 
-       header->size += data->dyn_size;
+void perf_prepare_header(struct perf_event_header *header,
+                        struct perf_sample_data *data,
+                        struct perf_event *event,
+                        struct pt_regs *regs)
+{
+       header->type = PERF_RECORD_SAMPLE;
+       header->size = perf_sample_data_size(data, event);
+       header->misc = perf_misc_flags(regs);
 
        /*
         * If you're adding more sample types here, you likely need to do
@@ -7781,7 +7790,8 @@ __perf_event_output(struct perf_event *event,
        /* protect the callchain buffers */
        rcu_read_lock();
 
-       perf_prepare_sample(&header, data, event, regs);
+       perf_prepare_sample(data, event, regs);
+       perf_prepare_header(&header, data, event, regs);
 
        err = output_begin(&handle, data, event, header.size);
        if (err)