riscv: Prepare for user-space perf event mmap support
authorAlexandre Ghiti <alexghiti@rivosinc.com>
Wed, 2 Aug 2023 08:03:23 +0000 (10:03 +0200)
committerPalmer Dabbelt <palmer@rivosinc.com>
Wed, 16 Aug 2023 14:28:19 +0000 (07:28 -0700)
Provide all the necessary bits in the generic riscv pmu driver to be
able to mmap perf events in userspace: the heavy lifting lies in the
driver backend, namely the legacy and sbi implementations.

Note that arch_perf_update_userpage is almost a copy of arm64 code.

Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Reviewed-by: Atish Patra <atishp@rivosinc.com>
drivers/perf/riscv_pmu.c
include/linux/perf/riscv_pmu.h

index ebca5ea..432ad2e 100644 (file)
 #include <linux/perf/riscv_pmu.h>
 #include <linux/printk.h>
 #include <linux/smp.h>
+#include <linux/sched_clock.h>
 
 #include <asm/sbi.h>
 
+static bool riscv_perf_user_access(struct perf_event *event)
+{
+       return ((event->attr.type == PERF_TYPE_HARDWARE) ||
+               (event->attr.type == PERF_TYPE_HW_CACHE) ||
+               (event->attr.type == PERF_TYPE_RAW)) &&
+               !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT);
+}
+
+void arch_perf_update_userpage(struct perf_event *event,
+                              struct perf_event_mmap_page *userpg, u64 now)
+{
+       struct clock_read_data *rd;
+       unsigned int seq;
+       u64 ns;
+
+       userpg->cap_user_time = 0;
+       userpg->cap_user_time_zero = 0;
+       userpg->cap_user_time_short = 0;
+       userpg->cap_user_rdpmc = riscv_perf_user_access(event);
+
+       userpg->pmc_width = 64;
+
+       do {
+               rd = sched_clock_read_begin(&seq);
+
+               userpg->time_mult = rd->mult;
+               userpg->time_shift = rd->shift;
+               userpg->time_zero = rd->epoch_ns;
+               userpg->time_cycles = rd->epoch_cyc;
+               userpg->time_mask = rd->sched_clock_mask;
+
+               /*
+                * Subtract the cycle base, such that software that
+                * doesn't know about cap_user_time_short still 'works'
+                * assuming no wraps.
+                */
+               ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift);
+               userpg->time_zero -= ns;
+
+       } while (sched_clock_read_retry(seq));
+
+       userpg->time_offset = userpg->time_zero - now;
+
+       /*
+        * time_shift is not expected to be greater than 31 due to
+        * the original published conversion algorithm shifting a
+        * 32-bit value (now specifies a 64-bit value) - refer
+        * perf_event_mmap_page documentation in perf_event.h.
+        */
+       if (userpg->time_shift == 32) {
+               userpg->time_shift = 31;
+               userpg->time_mult >>= 1;
+       }
+
+       /*
+        * Internal timekeeping for enabled/running/stopped times
+        * is always computed with the sched_clock.
+        */
+       userpg->cap_user_time = 1;
+       userpg->cap_user_time_zero = 1;
+       userpg->cap_user_time_short = 1;
+}
+
 static unsigned long csr_read_num(int csr_num)
 {
 #define switchcase_csr_read(__csr_num, __val)          {\
@@ -171,6 +235,8 @@ int riscv_pmu_event_set_period(struct perf_event *event)
 
        local64_set(&hwc->prev_count, (u64)-left);
 
+       perf_event_update_userpage(event);
+
        return overflow;
 }
 
@@ -267,6 +333,9 @@ static int riscv_pmu_event_init(struct perf_event *event)
        hwc->idx = -1;
        hwc->event_base = mapped_event;
 
+       if (rvpmu->event_init)
+               rvpmu->event_init(event);
+
        if (!is_sampling_event(event)) {
                /*
                 * For non-sampling runs, limit the sample_period to half
@@ -283,6 +352,39 @@ static int riscv_pmu_event_init(struct perf_event *event)
        return 0;
 }
 
+static int riscv_pmu_event_idx(struct perf_event *event)
+{
+       struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+
+       if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT))
+               return 0;
+
+       if (rvpmu->csr_index)
+               return rvpmu->csr_index(event) + 1;
+
+       return 0;
+}
+
+static void riscv_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
+{
+       struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+
+       if (rvpmu->event_mapped) {
+               rvpmu->event_mapped(event, mm);
+               perf_event_update_userpage(event);
+       }
+}
+
+static void riscv_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
+{
+       struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+
+       if (rvpmu->event_unmapped) {
+               rvpmu->event_unmapped(event, mm);
+               perf_event_update_userpage(event);
+       }
+}
+
 struct riscv_pmu *riscv_pmu_alloc(void)
 {
        struct riscv_pmu *pmu;
@@ -307,6 +409,9 @@ struct riscv_pmu *riscv_pmu_alloc(void)
        }
        pmu->pmu = (struct pmu) {
                .event_init     = riscv_pmu_event_init,
+               .event_mapped   = riscv_pmu_event_mapped,
+               .event_unmapped = riscv_pmu_event_unmapped,
+               .event_idx      = riscv_pmu_event_idx,
                .add            = riscv_pmu_add,
                .del            = riscv_pmu_del,
                .start          = riscv_pmu_start,
index 5deeea0..43282e2 100644 (file)
@@ -55,6 +55,10 @@ struct riscv_pmu {
        void            (*ctr_start)(struct perf_event *event, u64 init_val);
        void            (*ctr_stop)(struct perf_event *event, unsigned long flag);
        int             (*event_map)(struct perf_event *event, u64 *config);
+       void            (*event_init)(struct perf_event *event);
+       void            (*event_mapped)(struct perf_event *event, struct mm_struct *mm);
+       void            (*event_unmapped)(struct perf_event *event, struct mm_struct *mm);
+       uint8_t         (*csr_index)(struct perf_event *event);
 
        struct cpu_hw_events    __percpu *hw_events;
        struct hlist_node       node;