s390/cpum_sf: correctly set the PID and TID in perf samples
authorHendrik Brueckner <brueckner@linux.vnet.ibm.com>
Tue, 8 Mar 2016 13:00:23 +0000 (14:00 +0100)
committerMartin Schwidefsky <schwidefsky@de.ibm.com>
Thu, 16 Nov 2017 14:06:17 +0000 (15:06 +0100)
The hardware sampler creates samples that are processed at a later
point in time.  The PID and TID values of the perf samples that are
created for hardware samples are initialized with values from the
current task.  Hence, the PID and TID values are not correct and
perf samples are associated with wrong processes.

The PID and TID values are obtained from the Host Program Parameter
(HPP) field in the basic-sampling data entries.  These PIDs are
valid in the init PID namespace.  Ensure that the PIDs in the perf
samples are resolved considering the PID namespace in which the
perf event was created.

To correct the PID and TID values in the created perf samples,
a special overflow handler is installed.  It replaces the default
overflow handler and does not become effective if any other
overflow handler is used.  With the special overflow handler most
of the perf samples are associated with the right processes.
For processes, that are no longer exist, the association might
still be wrong.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
arch/s390/include/asm/setup.h
arch/s390/kernel/perf_cpum_sf.c
arch/s390/mm/fault.c

index 8bc87dc..2eb0c8a 100644 (file)
@@ -36,7 +36,7 @@
 #define MACHINE_FLAG_SCC       _BITUL(17)
 
 #define LPP_MAGIC              _BITUL(31)
-#define LPP_PFAULT_PID_MASK    _AC(0xffffffff, UL)
+#define LPP_PID_MASK           _AC(0xffffffff, UL)
 
 #ifndef __ASSEMBLY__
 
index dbb62c0..227b38b 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/perf_event.h>
 #include <linux/percpu.h>
+#include <linux/pid.h>
 #include <linux/notifier.h>
 #include <linux/export.h>
 #include <linux/slab.h>
@@ -615,6 +616,67 @@ static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
                       si->min_sampl_rate, si->max_sampl_rate);
 }
 
+static u32 cpumsf_pid_type(struct perf_event *event,
+                          u32 pid, enum pid_type type)
+{
+       struct task_struct *tsk;
+
+       /* Idle process */
+       if (!pid)
+               goto out;
+
+       tsk = find_task_by_pid_ns(pid, &init_pid_ns);
+       pid = -1;
+       if (tsk) {
+               /*
+                * Only top level events contain the pid namespace in which
+                * they are created.
+                */
+               if (event->parent)
+                       event = event->parent;
+               pid = __task_pid_nr_ns(tsk, type, event->ns);
+               /*
+                * See also 1d953111b648
+                * "perf/core: Don't report zero PIDs for exiting tasks".
+                */
+               if (!pid && !pid_alive(tsk))
+                       pid = -1;
+       }
+out:
+       return pid;
+}
+
+static void cpumsf_output_event_pid(struct perf_event *event,
+                                   struct perf_sample_data *data,
+                                   struct pt_regs *regs)
+{
+       u32 pid;
+       struct perf_event_header header;
+       struct perf_output_handle handle;
+
+       /*
+        * Obtain the PID from the basic-sampling data entry and
+        * correct the data->tid_entry.pid value.
+        */
+       pid = data->tid_entry.pid;
+
+       /* Protect callchain buffers, tasks */
+       rcu_read_lock();
+
+       perf_prepare_sample(&header, data, event, regs);
+       if (perf_output_begin(&handle, event, header.size))
+               goto out;
+
+       /* Update the process ID (see also kernel/events/core.c) */
+       data->tid_entry.pid = cpumsf_pid_type(event, pid, __PIDTYPE_TGID);
+       data->tid_entry.tid = cpumsf_pid_type(event, pid, PIDTYPE_PID);
+
+       perf_output_sample(&handle, &header, data, event);
+       perf_output_end(&handle);
+out:
+       rcu_read_unlock();
+}
+
 static int __hw_perf_event_init(struct perf_event *event)
 {
        struct cpu_hw_sf *cpuhw;
@@ -748,6 +810,14 @@ static int __hw_perf_event_init(struct perf_event *event)
                                break;
                }
        }
+
+       /* If PID/TID sampling is active, replace the default overflow
+        * handler to extract and resolve the PIDs from the basic-sampling
+        * data entries.
+        */
+       if (event->attr.sample_type & PERF_SAMPLE_TID)
+               if (is_default_overflow_handler(event))
+                       event->overflow_handler = cpumsf_output_event_pid;
 out:
        return err;
 }
@@ -985,6 +1055,12 @@ static int perf_push_sample(struct perf_event *event,
                break;
        }
 
+       /*
+        * Store the PID value from the sample-data-entry to be
+        * processed and resolved by cpumsf_output_event_pid().
+        */
+       data.tid_entry.pid = basic->hpp & LPP_PID_MASK;
+
        overflow = 0;
        if (perf_exclude_event(event, &regs, sde_regs))
                goto out;
index 1465400..93faeca 100644 (file)
@@ -728,7 +728,7 @@ static void pfault_interrupt(struct ext_code ext_code,
                return;
        inc_irq_stat(IRQEXT_PFL);
        /* Get the token (= pid of the affected task). */
-       pid = param64 & LPP_PFAULT_PID_MASK;
+       pid = param64 & LPP_PID_MASK;
        rcu_read_lock();
        tsk = find_task_by_pid_ns(pid, &init_pid_ns);
        if (tsk)