perf/x86: Fix n_metric for cancelled txn
authorPeter Zijlstra <peterz@infradead.org>
Mon, 5 Oct 2020 08:10:24 +0000 (10:10 +0200)
committerPeter Zijlstra <peterz@infradead.org>
Tue, 6 Oct 2020 13:18:17 +0000 (15:18 +0200)
When a group that has TopDown members is failed to be scheduled, any
later TopDown groups will not return valid values.

Here is an example.

A background perf that occupies all the GP counters and the fixed
counter 1.
 $perf stat -e "{cycles,cycles,cycles,cycles,cycles,cycles,cycles,
                 cycles,cycles}:D" -a

A user monitors a TopDown group. It works well, because the fixed
counter 3 and the PERF_METRICS are available.
 $perf stat -x, --topdown -- ./workload
   retiring,bad speculation,frontend bound,backend bound,
   18.0,16.1,40.4,25.5,

Then the user tries to monitor a group that has TopDown members.
Because of the cycles event, the group is failed to be scheduled.
 $perf stat -x, -e '{slots,topdown-retiring,topdown-be-bound,
                     topdown-fe-bound,topdown-bad-spec,cycles}'
                     -- ./workload
    <not counted>,,slots,0,0.00,,
    <not counted>,,topdown-retiring,0,0.00,,
    <not counted>,,topdown-be-bound,0,0.00,,
    <not counted>,,topdown-fe-bound,0,0.00,,
    <not counted>,,topdown-bad-spec,0,0.00,,
    <not counted>,,cycles,0,0.00,,

The user tries to monitor a TopDown group again. It doesn't work anymore.
 $perf stat -x, --topdown -- ./workload

    ,,,,,

In a txn, cancel_txn() is to truncate the event_list for a canceled
group and update the number of events added in this transaction.
However, the number of TopDown events added in this transaction is not
updated. The kernel will probably fail to add new Topdown events.

Fixes: 7b2c05a15d29 ("perf/x86/intel: Generic support for hardware TopDown metrics")
Reported-by: Andi Kleen <ak@linux.intel.com>
Reported-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Kan Liang <kan.liang@linux.intel.com>
Link: https://lkml.kernel.org/r/20201005082611.GH2628@hirez.programming.kicks-ass.net
arch/x86/events/core.c
arch/x86/events/perf_event.h

index a7248a3c4b2f43f70850ee360fc013f3e4dc4f4d..7b802a7780148b2a5642eba799a3a67b84111215 100644 (file)
@@ -1041,6 +1041,7 @@ static int add_nr_metric_event(struct cpu_hw_events *cpuc,
                if (cpuc->n_metric == INTEL_TD_METRIC_NUM)
                        return -EINVAL;
                cpuc->n_metric++;
+               cpuc->n_txn_metric++;
        }
 
        return 0;
@@ -2009,6 +2010,7 @@ static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
        perf_pmu_disable(pmu);
        __this_cpu_write(cpu_hw_events.n_txn, 0);
        __this_cpu_write(cpu_hw_events.n_txn_pair, 0);
+       __this_cpu_write(cpu_hw_events.n_txn_metric, 0);
 }
 
 /*
@@ -2035,6 +2037,7 @@ static void x86_pmu_cancel_txn(struct pmu *pmu)
        __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
        __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
        __this_cpu_sub(cpu_hw_events.n_pair, __this_cpu_read(cpu_hw_events.n_txn_pair));
+       __this_cpu_sub(cpu_hw_events.n_metric, __this_cpu_read(cpu_hw_events.n_txn_metric));
        perf_pmu_enable(pmu);
 }
 
index 93e56d76980f377f6c87f90017214618821a6e61..ee2b9b9fc2a50e3e63c93bf8f4bd7d13acbf9c60 100644 (file)
@@ -236,6 +236,7 @@ struct cpu_hw_events {
        int                     n_txn;    /* the # last events in the below arrays;
                                             added in the current transaction */
        int                     n_txn_pair;
+       int                     n_txn_metric;
        int                     assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
        u64                     tags[X86_PMC_IDX_MAX];