Merge tag 'perf-core-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git...
[platform/kernel/linux-rpi.git] / arch / x86 / events / core.c
index 1cbf57d..a88c94d 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/bitops.h>
 #include <linux/device.h>
 #include <linux/nospec.h>
+#include <linux/static_call.h>
 
 #include <asm/apic.h>
 #include <asm/stacktrace.h>
@@ -52,6 +53,34 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
 DEFINE_STATIC_KEY_FALSE(rdpmc_never_available_key);
 DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key);
 
+/*
+ * This here uses DEFINE_STATIC_CALL_NULL() to get a static_call defined
+ * from just a typename, as opposed to an actual function.
+ */
+DEFINE_STATIC_CALL_NULL(x86_pmu_handle_irq,  *x86_pmu.handle_irq);
+DEFINE_STATIC_CALL_NULL(x86_pmu_disable_all, *x86_pmu.disable_all);
+DEFINE_STATIC_CALL_NULL(x86_pmu_enable_all,  *x86_pmu.enable_all);
+DEFINE_STATIC_CALL_NULL(x86_pmu_enable,             *x86_pmu.enable);
+DEFINE_STATIC_CALL_NULL(x86_pmu_disable,     *x86_pmu.disable);
+
+DEFINE_STATIC_CALL_NULL(x86_pmu_add,  *x86_pmu.add);
+DEFINE_STATIC_CALL_NULL(x86_pmu_del,  *x86_pmu.del);
+DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read);
+
+DEFINE_STATIC_CALL_NULL(x86_pmu_schedule_events,       *x86_pmu.schedule_events);
+DEFINE_STATIC_CALL_NULL(x86_pmu_get_event_constraints, *x86_pmu.get_event_constraints);
+DEFINE_STATIC_CALL_NULL(x86_pmu_put_event_constraints, *x86_pmu.put_event_constraints);
+
+DEFINE_STATIC_CALL_NULL(x86_pmu_start_scheduling,  *x86_pmu.start_scheduling);
+DEFINE_STATIC_CALL_NULL(x86_pmu_commit_scheduling, *x86_pmu.commit_scheduling);
+DEFINE_STATIC_CALL_NULL(x86_pmu_stop_scheduling,   *x86_pmu.stop_scheduling);
+
+DEFINE_STATIC_CALL_NULL(x86_pmu_sched_task,    *x86_pmu.sched_task);
+DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx);
+
+DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs,   *x86_pmu.drain_pebs);
+DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases);
+
 u64 __read_mostly hw_cache_event_ids
                                [PERF_COUNT_HW_CACHE_MAX]
                                [PERF_COUNT_HW_CACHE_OP_MAX]
@@ -76,6 +105,9 @@ u64 x86_perf_event_update(struct perf_event *event)
        if (unlikely(!hwc->event_base))
                return 0;
 
+       if (unlikely(is_topdown_count(event)) && x86_pmu.update_topdown_event)
+               return x86_pmu.update_topdown_event(event);
+
        /*
         * Careful: an NMI might modify the previous event value.
         *
@@ -660,7 +692,7 @@ static void x86_pmu_disable(struct pmu *pmu)
        cpuc->enabled = 0;
        barrier();
 
-       x86_pmu.disable_all();
+       static_call(x86_pmu_disable_all)();
 }
 
 void x86_pmu_enable_all(int added)
@@ -907,8 +939,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
        if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
                n0 -= cpuc->n_txn;
 
-       if (x86_pmu.start_scheduling)
-               x86_pmu.start_scheduling(cpuc);
+       static_call_cond(x86_pmu_start_scheduling)(cpuc);
 
        for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
                c = cpuc->event_constraint[i];
@@ -925,7 +956,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
                 * change due to external factors (sibling state, allow_tfa).
                 */
                if (!c || (c->flags & PERF_X86_EVENT_DYNAMIC)) {
-                       c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
+                       c = static_call(x86_pmu_get_event_constraints)(cpuc, i, cpuc->event_list[i]);
                        cpuc->event_constraint[i] = c;
                }
 
@@ -1008,8 +1039,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
        if (!unsched && assign) {
                for (i = 0; i < n; i++) {
                        e = cpuc->event_list[i];
-                       if (x86_pmu.commit_scheduling)
-                               x86_pmu.commit_scheduling(cpuc, i, assign[i]);
+                       static_call_cond(x86_pmu_commit_scheduling)(cpuc, i, assign[i]);
                }
        } else {
                for (i = n0; i < n; i++) {
@@ -1018,19 +1048,56 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
                        /*
                         * release events that failed scheduling
                         */
-                       if (x86_pmu.put_event_constraints)
-                               x86_pmu.put_event_constraints(cpuc, e);
+                       static_call_cond(x86_pmu_put_event_constraints)(cpuc, e);
 
                        cpuc->event_constraint[i] = NULL;
                }
        }
 
-       if (x86_pmu.stop_scheduling)
-               x86_pmu.stop_scheduling(cpuc);
+       static_call_cond(x86_pmu_stop_scheduling)(cpuc);
 
        return unsched ? -EINVAL : 0;
 }
 
+static int add_nr_metric_event(struct cpu_hw_events *cpuc,
+                              struct perf_event *event)
+{
+       if (is_metric_event(event)) {
+               if (cpuc->n_metric == INTEL_TD_METRIC_NUM)
+                       return -EINVAL;
+               cpuc->n_metric++;
+               cpuc->n_txn_metric++;
+       }
+
+       return 0;
+}
+
+static void del_nr_metric_event(struct cpu_hw_events *cpuc,
+                               struct perf_event *event)
+{
+       if (is_metric_event(event))
+               cpuc->n_metric--;
+}
+
+static int collect_event(struct cpu_hw_events *cpuc, struct perf_event *event,
+                        int max_count, int n)
+{
+
+       if (x86_pmu.intel_cap.perf_metrics && add_nr_metric_event(cpuc, event))
+               return -EINVAL;
+
+       if (n >= max_count + cpuc->n_metric)
+               return -EINVAL;
+
+       cpuc->event_list[n] = event;
+       if (is_counter_pair(&event->hw)) {
+               cpuc->n_pair++;
+               cpuc->n_txn_pair++;
+       }
+
+       return 0;
+}
+
 /*
  * dogrp: true if must collect siblings events (group)
  * returns total number of events and error code
@@ -1067,28 +1134,22 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
        }
 
        if (is_x86_event(leader)) {
-               if (n >= max_count)
+               if (collect_event(cpuc, leader, max_count, n))
                        return -EINVAL;
-               cpuc->event_list[n] = leader;
                n++;
-               if (is_counter_pair(&leader->hw))
-                       cpuc->n_pair++;
        }
+
        if (!dogrp)
                return n;
 
        for_each_sibling_event(event, leader) {
-               if (!is_x86_event(event) ||
-                   event->state <= PERF_EVENT_STATE_OFF)
+               if (!is_x86_event(event) || event->state <= PERF_EVENT_STATE_OFF)
                        continue;
 
-               if (n >= max_count)
+               if (collect_event(cpuc, event, max_count, n))
                        return -EINVAL;
 
-               cpuc->event_list[n] = event;
                n++;
-               if (is_counter_pair(&event->hw))
-                       cpuc->n_pair++;
        }
        return n;
 }
@@ -1110,11 +1171,16 @@ static inline void x86_assign_hw_event(struct perf_event *event,
                hwc->event_base = 0;
                break;
 
+       case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
+               /* All the metric events are mapped onto the fixed counter 3. */
+               idx = INTEL_PMC_IDX_FIXED_SLOTS;
+               /* fall through */
        case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS-1:
                hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
                hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 +
                                (idx - INTEL_PMC_IDX_FIXED);
-               hwc->event_base_rdpmc = (idx - INTEL_PMC_IDX_FIXED) | 1<<30;
+               hwc->event_base_rdpmc = (idx - INTEL_PMC_IDX_FIXED) |
+                                       INTEL_PMC_FIXED_RDPMC_BASE;
                break;
 
        default:
@@ -1226,7 +1292,7 @@ static void x86_pmu_enable(struct pmu *pmu)
        cpuc->enabled = 1;
        barrier();
 
-       x86_pmu.enable_all(added);
+       static_call(x86_pmu_enable_all)(added);
 }
 
 static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -1245,6 +1311,10 @@ int x86_perf_event_set_period(struct perf_event *event)
        if (unlikely(!hwc->event_base))
                return 0;
 
+       if (unlikely(is_topdown_count(event)) &&
+           x86_pmu.set_topdown_event_period)
+               return x86_pmu.set_topdown_event_period(event);
+
        /*
         * If we are way outside a reasonable range then just skip forward:
         */
@@ -1284,11 +1354,11 @@ int x86_perf_event_set_period(struct perf_event *event)
        wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
 
        /*
-        * Clear the Merge event counter's upper 16 bits since
+        * Sign extend the Merge event counter's upper 16 bits since
         * we currently declare a 48-bit counter width
         */
        if (is_counter_pair(hwc))
-               wrmsrl(x86_pmu_event_addr(idx + 1), 0);
+               wrmsrl(x86_pmu_event_addr(idx + 1), 0xffff);
 
        /*
         * Due to erratum on certan cpu we need
@@ -1347,7 +1417,7 @@ static int x86_pmu_add(struct perf_event *event, int flags)
        if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
                goto done_collect;
 
-       ret = x86_pmu.schedule_events(cpuc, n, assign);
+       ret = static_call(x86_pmu_schedule_events)(cpuc, n, assign);
        if (ret)
                goto out;
        /*
@@ -1365,13 +1435,11 @@ done_collect:
        cpuc->n_added += n - n0;
        cpuc->n_txn += n - n0;
 
-       if (x86_pmu.add) {
-               /*
-                * This is before x86_pmu_enable() will call x86_pmu_start(),
-                * so we enable LBRs before an event needs them etc..
-                */
-               x86_pmu.add(event);
-       }
+       /*
+        * This is before x86_pmu_enable() will call x86_pmu_start(),
+        * so we enable LBRs before an event needs them etc..
+        */
+       static_call_cond(x86_pmu_add)(event);
 
        ret = 0;
 out:
@@ -1399,7 +1467,7 @@ static void x86_pmu_start(struct perf_event *event, int flags)
        cpuc->events[idx] = event;
        __set_bit(idx, cpuc->active_mask);
        __set_bit(idx, cpuc->running);
-       x86_pmu.enable(event);
+       static_call(x86_pmu_enable)(event);
        perf_event_update_userpage(event);
 }
 
@@ -1469,7 +1537,7 @@ void x86_pmu_stop(struct perf_event *event, int flags)
        struct hw_perf_event *hwc = &event->hw;
 
        if (test_bit(hwc->idx, cpuc->active_mask)) {
-               x86_pmu.disable(event);
+               static_call(x86_pmu_disable)(event);
                __clear_bit(hwc->idx, cpuc->active_mask);
                cpuc->events[hwc->idx] = NULL;
                WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
@@ -1519,8 +1587,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
        if (i >= cpuc->n_events - cpuc->n_added)
                --cpuc->n_added;
 
-       if (x86_pmu.put_event_constraints)
-               x86_pmu.put_event_constraints(cpuc, event);
+       static_call_cond(x86_pmu_put_event_constraints)(cpuc, event);
 
        /* Delete the array entry. */
        while (++i < cpuc->n_events) {
@@ -1529,17 +1596,18 @@ static void x86_pmu_del(struct perf_event *event, int flags)
        }
        cpuc->event_constraint[i-1] = NULL;
        --cpuc->n_events;
+       if (x86_pmu.intel_cap.perf_metrics)
+               del_nr_metric_event(cpuc, event);
 
        perf_event_update_userpage(event);
 
 do_del:
-       if (x86_pmu.del) {
-               /*
-                * This is after x86_pmu_stop(); so we disable LBRs after any
-                * event can need them etc..
-                */
-               x86_pmu.del(event);
-       }
+
+       /*
+        * This is after x86_pmu_stop(); so we disable LBRs after any
+        * event can need them etc..
+        */
+       static_call_cond(x86_pmu_del)(event);
 }
 
 int x86_pmu_handle_irq(struct pt_regs *regs)
@@ -1617,7 +1685,7 @@ perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
                return NMI_DONE;
 
        start_clock = sched_clock();
-       ret = x86_pmu.handle_irq(regs);
+       ret = static_call(x86_pmu_handle_irq)(regs);
        finish_clock = sched_clock();
 
        perf_sample_event_took(finish_clock - start_clock);
@@ -1830,6 +1898,38 @@ ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
 static struct attribute_group x86_pmu_attr_group;
 static struct attribute_group x86_pmu_caps_group;
 
+static void x86_pmu_static_call_update(void)
+{
+       static_call_update(x86_pmu_handle_irq, x86_pmu.handle_irq);
+       static_call_update(x86_pmu_disable_all, x86_pmu.disable_all);
+       static_call_update(x86_pmu_enable_all, x86_pmu.enable_all);
+       static_call_update(x86_pmu_enable, x86_pmu.enable);
+       static_call_update(x86_pmu_disable, x86_pmu.disable);
+
+       static_call_update(x86_pmu_add, x86_pmu.add);
+       static_call_update(x86_pmu_del, x86_pmu.del);
+       static_call_update(x86_pmu_read, x86_pmu.read);
+
+       static_call_update(x86_pmu_schedule_events, x86_pmu.schedule_events);
+       static_call_update(x86_pmu_get_event_constraints, x86_pmu.get_event_constraints);
+       static_call_update(x86_pmu_put_event_constraints, x86_pmu.put_event_constraints);
+
+       static_call_update(x86_pmu_start_scheduling, x86_pmu.start_scheduling);
+       static_call_update(x86_pmu_commit_scheduling, x86_pmu.commit_scheduling);
+       static_call_update(x86_pmu_stop_scheduling, x86_pmu.stop_scheduling);
+
+       static_call_update(x86_pmu_sched_task, x86_pmu.sched_task);
+       static_call_update(x86_pmu_swap_task_ctx, x86_pmu.swap_task_ctx);
+
+       static_call_update(x86_pmu_drain_pebs, x86_pmu.drain_pebs);
+       static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases);
+}
+
+static void _x86_pmu_read(struct perf_event *event)
+{
+       x86_perf_event_update(event);
+}
+
 static int __init init_hw_perf_events(void)
 {
        struct x86_pmu_quirk *quirk;
@@ -1898,6 +1998,11 @@ static int __init init_hw_perf_events(void)
        pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
        pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
 
+       if (!x86_pmu.read)
+               x86_pmu.read = _x86_pmu_read;
+
+       x86_pmu_static_call_update();
+
        /*
         * Install callbacks. Core will call them for each online
         * cpu.
@@ -1934,11 +2039,9 @@ out:
 }
 early_initcall(init_hw_perf_events);
 
-static inline void x86_pmu_read(struct perf_event *event)
+static void x86_pmu_read(struct perf_event *event)
 {
-       if (x86_pmu.read)
-               return x86_pmu.read(event);
-       x86_perf_event_update(event);
+       static_call(x86_pmu_read)(event);
 }
 
 /*
@@ -1962,6 +2065,8 @@ static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
 
        perf_pmu_disable(pmu);
        __this_cpu_write(cpu_hw_events.n_txn, 0);
+       __this_cpu_write(cpu_hw_events.n_txn_pair, 0);
+       __this_cpu_write(cpu_hw_events.n_txn_metric, 0);
 }
 
 /*
@@ -1987,6 +2092,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu)
         */
        __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
        __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
+       __this_cpu_sub(cpu_hw_events.n_pair, __this_cpu_read(cpu_hw_events.n_txn_pair));
+       __this_cpu_sub(cpu_hw_events.n_metric, __this_cpu_read(cpu_hw_events.n_txn_metric));
        perf_pmu_enable(pmu);
 }
 
@@ -2015,7 +2122,7 @@ static int x86_pmu_commit_txn(struct pmu *pmu)
        if (!x86_pmu_initialized())
                return -EAGAIN;
 
-       ret = x86_pmu.schedule_events(cpuc, n, assign);
+       ret = static_call(x86_pmu_schedule_events)(cpuc, n, assign);
        if (ret)
                return ret;
 
@@ -2208,17 +2315,15 @@ static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *m
 
 static int x86_pmu_event_idx(struct perf_event *event)
 {
-       int idx = event->hw.idx;
+       struct hw_perf_event *hwc = &event->hw;
 
-       if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
+       if (!(hwc->flags & PERF_X86_EVENT_RDPMC_ALLOWED))
                return 0;
 
-       if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
-               idx -= INTEL_PMC_IDX_FIXED;
-               idx |= 1 << 30;
-       }
-
-       return idx + 1;
+       if (is_metric_idx(hwc->idx))
+               return INTEL_PMC_FIXED_RDPMC_METRICS + 1;
+       else
+               return hwc->event_base_rdpmc + 1;
 }
 
 static ssize_t get_attr_rdpmc(struct device *cdev,
@@ -2308,15 +2413,13 @@ static const struct attribute_group *x86_pmu_attr_groups[] = {
 
 static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
 {
-       if (x86_pmu.sched_task)
-               x86_pmu.sched_task(ctx, sched_in);
+       static_call_cond(x86_pmu_sched_task)(ctx, sched_in);
 }
 
 static void x86_pmu_swap_task_ctx(struct perf_event_context *prev,
                                  struct perf_event_context *next)
 {
-       if (x86_pmu.swap_task_ctx)
-               x86_pmu.swap_task_ctx(prev, next);
+       static_call_cond(x86_pmu_swap_task_ctx)(prev, next);
 }
 
 void perf_check_microcode(void)