usb: typec: mux: fix static inline syntax error
[platform/kernel/linux-starfive.git] / arch / arm64 / kvm / pmu-emul.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2015 Linaro Ltd.
4  * Author: Shannon Zhao <shannon.zhao@linaro.org>
5  */
6
7 #include <linux/cpu.h>
8 #include <linux/kvm.h>
9 #include <linux/kvm_host.h>
10 #include <linux/list.h>
11 #include <linux/perf_event.h>
12 #include <linux/perf/arm_pmu.h>
13 #include <linux/uaccess.h>
14 #include <asm/kvm_emulate.h>
15 #include <kvm/arm_pmu.h>
16 #include <kvm/arm_vgic.h>
17
18 #define PERF_ATTR_CFG1_COUNTER_64BIT    BIT(0)
19
20 DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
21
22 static LIST_HEAD(arm_pmus);
23 static DEFINE_MUTEX(arm_pmus_lock);
24
25 static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc);
26 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc);
27
28 static struct kvm_vcpu *kvm_pmc_to_vcpu(const struct kvm_pmc *pmc)
29 {
30         return container_of(pmc, struct kvm_vcpu, arch.pmu.pmc[pmc->idx]);
31 }
32
33 static struct kvm_pmc *kvm_vcpu_idx_to_pmc(struct kvm_vcpu *vcpu, int cnt_idx)
34 {
35         return &vcpu->arch.pmu.pmc[cnt_idx];
36 }
37
38 static u32 kvm_pmu_event_mask(struct kvm *kvm)
39 {
40         unsigned int pmuver;
41
42         pmuver = kvm->arch.arm_pmu->pmuver;
43
44         switch (pmuver) {
45         case ID_AA64DFR0_EL1_PMUVer_IMP:
46                 return GENMASK(9, 0);
47         case ID_AA64DFR0_EL1_PMUVer_V3P1:
48         case ID_AA64DFR0_EL1_PMUVer_V3P4:
49         case ID_AA64DFR0_EL1_PMUVer_V3P5:
50         case ID_AA64DFR0_EL1_PMUVer_V3P7:
51                 return GENMASK(15, 0);
52         default:                /* Shouldn't be here, just for sanity */
53                 WARN_ONCE(1, "Unknown PMU version %d\n", pmuver);
54                 return 0;
55         }
56 }
57
58 /**
59  * kvm_pmc_is_64bit - determine if counter is 64bit
60  * @pmc: counter context
61  */
62 static bool kvm_pmc_is_64bit(struct kvm_pmc *pmc)
63 {
64         return (pmc->idx == ARMV8_PMU_CYCLE_IDX ||
65                 kvm_pmu_is_3p5(kvm_pmc_to_vcpu(pmc)));
66 }
67
68 static bool kvm_pmc_has_64bit_overflow(struct kvm_pmc *pmc)
69 {
70         u64 val = __vcpu_sys_reg(kvm_pmc_to_vcpu(pmc), PMCR_EL0);
71
72         return (pmc->idx < ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LP)) ||
73                (pmc->idx == ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LC));
74 }
75
76 static bool kvm_pmu_counter_can_chain(struct kvm_pmc *pmc)
77 {
78         return (!(pmc->idx & 1) && (pmc->idx + 1) < ARMV8_PMU_CYCLE_IDX &&
79                 !kvm_pmc_has_64bit_overflow(pmc));
80 }
81
82 static u32 counter_index_to_reg(u64 idx)
83 {
84         return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + idx;
85 }
86
87 static u32 counter_index_to_evtreg(u64 idx)
88 {
89         return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + idx;
90 }
91
92 static u64 kvm_pmu_get_pmc_value(struct kvm_pmc *pmc)
93 {
94         struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
95         u64 counter, reg, enabled, running;
96
97         reg = counter_index_to_reg(pmc->idx);
98         counter = __vcpu_sys_reg(vcpu, reg);
99
100         /*
101          * The real counter value is equal to the value of counter register plus
102          * the value perf event counts.
103          */
104         if (pmc->perf_event)
105                 counter += perf_event_read_value(pmc->perf_event, &enabled,
106                                                  &running);
107
108         if (!kvm_pmc_is_64bit(pmc))
109                 counter = lower_32_bits(counter);
110
111         return counter;
112 }
113
114 /**
115  * kvm_pmu_get_counter_value - get PMU counter value
116  * @vcpu: The vcpu pointer
117  * @select_idx: The counter index
118  */
119 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
120 {
121         if (!kvm_vcpu_has_pmu(vcpu))
122                 return 0;
123
124         return kvm_pmu_get_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx));
125 }
126
127 static void kvm_pmu_set_pmc_value(struct kvm_pmc *pmc, u64 val, bool force)
128 {
129         struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
130         u64 reg;
131
132         kvm_pmu_release_perf_event(pmc);
133
134         reg = counter_index_to_reg(pmc->idx);
135
136         if (vcpu_mode_is_32bit(vcpu) && pmc->idx != ARMV8_PMU_CYCLE_IDX &&
137             !force) {
138                 /*
139                  * Even with PMUv3p5, AArch32 cannot write to the top
140                  * 32bit of the counters. The only possible course of
141                  * action is to use PMCR.P, which will reset them to
142                  * 0 (the only use of the 'force' parameter).
143                  */
144                 val  = __vcpu_sys_reg(vcpu, reg) & GENMASK(63, 32);
145                 val |= lower_32_bits(val);
146         }
147
148         __vcpu_sys_reg(vcpu, reg) = val;
149
150         /* Recreate the perf event to reflect the updated sample_period */
151         kvm_pmu_create_perf_event(pmc);
152 }
153
154 /**
155  * kvm_pmu_set_counter_value - set PMU counter value
156  * @vcpu: The vcpu pointer
157  * @select_idx: The counter index
158  * @val: The counter value
159  */
160 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
161 {
162         if (!kvm_vcpu_has_pmu(vcpu))
163                 return;
164
165         kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx), val, false);
166 }
167
168 /**
169  * kvm_pmu_release_perf_event - remove the perf event
170  * @pmc: The PMU counter pointer
171  */
172 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
173 {
174         if (pmc->perf_event) {
175                 perf_event_disable(pmc->perf_event);
176                 perf_event_release_kernel(pmc->perf_event);
177                 pmc->perf_event = NULL;
178         }
179 }
180
181 /**
182  * kvm_pmu_stop_counter - stop PMU counter
183  * @pmc: The PMU counter pointer
184  *
185  * If this counter has been configured to monitor some event, release it here.
186  */
187 static void kvm_pmu_stop_counter(struct kvm_pmc *pmc)
188 {
189         struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
190         u64 reg, val;
191
192         if (!pmc->perf_event)
193                 return;
194
195         val = kvm_pmu_get_pmc_value(pmc);
196
197         reg = counter_index_to_reg(pmc->idx);
198
199         __vcpu_sys_reg(vcpu, reg) = val;
200
201         kvm_pmu_release_perf_event(pmc);
202 }
203
204 /**
205  * kvm_pmu_vcpu_init - assign pmu counter idx for cpu
206  * @vcpu: The vcpu pointer
207  *
208  */
209 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
210 {
211         int i;
212         struct kvm_pmu *pmu = &vcpu->arch.pmu;
213
214         for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
215                 pmu->pmc[i].idx = i;
216 }
217
218 /**
219  * kvm_pmu_vcpu_reset - reset pmu state for cpu
220  * @vcpu: The vcpu pointer
221  *
222  */
223 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
224 {
225         unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
226         int i;
227
228         for_each_set_bit(i, &mask, 32)
229                 kvm_pmu_stop_counter(kvm_vcpu_idx_to_pmc(vcpu, i));
230 }
231
232 /**
233  * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu
234  * @vcpu: The vcpu pointer
235  *
236  */
237 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
238 {
239         int i;
240
241         for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
242                 kvm_pmu_release_perf_event(kvm_vcpu_idx_to_pmc(vcpu, i));
243         irq_work_sync(&vcpu->arch.pmu.overflow_work);
244 }
245
246 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
247 {
248         u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT;
249
250         val &= ARMV8_PMU_PMCR_N_MASK;
251         if (val == 0)
252                 return BIT(ARMV8_PMU_CYCLE_IDX);
253         else
254                 return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX);
255 }
256
257 /**
258  * kvm_pmu_enable_counter_mask - enable selected PMU counters
259  * @vcpu: The vcpu pointer
260  * @val: the value guest writes to PMCNTENSET register
261  *
262  * Call perf_event_enable to start counting the perf event
263  */
264 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
265 {
266         int i;
267         if (!kvm_vcpu_has_pmu(vcpu))
268                 return;
269
270         if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
271                 return;
272
273         for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
274                 struct kvm_pmc *pmc;
275
276                 if (!(val & BIT(i)))
277                         continue;
278
279                 pmc = kvm_vcpu_idx_to_pmc(vcpu, i);
280
281                 if (!pmc->perf_event) {
282                         kvm_pmu_create_perf_event(pmc);
283                 } else {
284                         perf_event_enable(pmc->perf_event);
285                         if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
286                                 kvm_debug("fail to enable perf event\n");
287                 }
288         }
289 }
290
291 /**
292  * kvm_pmu_disable_counter_mask - disable selected PMU counters
293  * @vcpu: The vcpu pointer
294  * @val: the value guest writes to PMCNTENCLR register
295  *
296  * Call perf_event_disable to stop counting the perf event
297  */
298 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
299 {
300         int i;
301
302         if (!kvm_vcpu_has_pmu(vcpu) || !val)
303                 return;
304
305         for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
306                 struct kvm_pmc *pmc;
307
308                 if (!(val & BIT(i)))
309                         continue;
310
311                 pmc = kvm_vcpu_idx_to_pmc(vcpu, i);
312
313                 if (pmc->perf_event)
314                         perf_event_disable(pmc->perf_event);
315         }
316 }
317
318 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
319 {
320         u64 reg = 0;
321
322         if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
323                 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
324                 reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
325                 reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
326         }
327
328         return reg;
329 }
330
331 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
332 {
333         struct kvm_pmu *pmu = &vcpu->arch.pmu;
334         bool overflow;
335
336         if (!kvm_vcpu_has_pmu(vcpu))
337                 return;
338
339         overflow = !!kvm_pmu_overflow_status(vcpu);
340         if (pmu->irq_level == overflow)
341                 return;
342
343         pmu->irq_level = overflow;
344
345         if (likely(irqchip_in_kernel(vcpu->kvm))) {
346                 int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
347                                               pmu->irq_num, overflow, pmu);
348                 WARN_ON(ret);
349         }
350 }
351
352 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
353 {
354         struct kvm_pmu *pmu = &vcpu->arch.pmu;
355         struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
356         bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU;
357
358         if (likely(irqchip_in_kernel(vcpu->kvm)))
359                 return false;
360
361         return pmu->irq_level != run_level;
362 }
363
364 /*
365  * Reflect the PMU overflow interrupt output level into the kvm_run structure
366  */
367 void kvm_pmu_update_run(struct kvm_vcpu *vcpu)
368 {
369         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
370
371         /* Populate the timer bitmap for user space */
372         regs->device_irq_level &= ~KVM_ARM_DEV_PMU;
373         if (vcpu->arch.pmu.irq_level)
374                 regs->device_irq_level |= KVM_ARM_DEV_PMU;
375 }
376
377 /**
378  * kvm_pmu_flush_hwstate - flush pmu state to cpu
379  * @vcpu: The vcpu pointer
380  *
381  * Check if the PMU has overflowed while we were running in the host, and inject
382  * an interrupt if that was the case.
383  */
384 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu)
385 {
386         kvm_pmu_update_state(vcpu);
387 }
388
389 /**
390  * kvm_pmu_sync_hwstate - sync pmu state from cpu
391  * @vcpu: The vcpu pointer
392  *
393  * Check if the PMU has overflowed while we were running in the guest, and
394  * inject an interrupt if that was the case.
395  */
396 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
397 {
398         kvm_pmu_update_state(vcpu);
399 }
400
401 /**
402  * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding
403  * to the event.
404  * This is why we need a callback to do it once outside of the NMI context.
405  */
406 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work)
407 {
408         struct kvm_vcpu *vcpu;
409
410         vcpu = container_of(work, struct kvm_vcpu, arch.pmu.overflow_work);
411         kvm_vcpu_kick(vcpu);
412 }
413
414 /*
415  * Perform an increment on any of the counters described in @mask,
416  * generating the overflow if required, and propagate it as a chained
417  * event if possible.
418  */
419 static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu,
420                                       unsigned long mask, u32 event)
421 {
422         int i;
423
424         if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
425                 return;
426
427         /* Weed out disabled counters */
428         mask &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
429
430         for_each_set_bit(i, &mask, ARMV8_PMU_CYCLE_IDX) {
431                 struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, i);
432                 u64 type, reg;
433
434                 /* Filter on event type */
435                 type = __vcpu_sys_reg(vcpu, counter_index_to_evtreg(i));
436                 type &= kvm_pmu_event_mask(vcpu->kvm);
437                 if (type != event)
438                         continue;
439
440                 /* Increment this counter */
441                 reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1;
442                 if (!kvm_pmc_is_64bit(pmc))
443                         reg = lower_32_bits(reg);
444                 __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg;
445
446                 /* No overflow? move on */
447                 if (kvm_pmc_has_64bit_overflow(pmc) ? reg : lower_32_bits(reg))
448                         continue;
449
450                 /* Mark overflow */
451                 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
452
453                 if (kvm_pmu_counter_can_chain(pmc))
454                         kvm_pmu_counter_increment(vcpu, BIT(i + 1),
455                                                   ARMV8_PMUV3_PERFCTR_CHAIN);
456         }
457 }
458
459 /* Compute the sample period for a given counter value */
460 static u64 compute_period(struct kvm_pmc *pmc, u64 counter)
461 {
462         u64 val;
463
464         if (kvm_pmc_is_64bit(pmc) && kvm_pmc_has_64bit_overflow(pmc))
465                 val = (-counter) & GENMASK(63, 0);
466         else
467                 val = (-counter) & GENMASK(31, 0);
468
469         return val;
470 }
471
472 /**
473  * When the perf event overflows, set the overflow status and inform the vcpu.
474  */
475 static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
476                                   struct perf_sample_data *data,
477                                   struct pt_regs *regs)
478 {
479         struct kvm_pmc *pmc = perf_event->overflow_handler_context;
480         struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
481         struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
482         int idx = pmc->idx;
483         u64 period;
484
485         cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE);
486
487         /*
488          * Reset the sample period to the architectural limit,
489          * i.e. the point where the counter overflows.
490          */
491         period = compute_period(pmc, local64_read(&perf_event->count));
492
493         local64_set(&perf_event->hw.period_left, 0);
494         perf_event->attr.sample_period = period;
495         perf_event->hw.sample_period = period;
496
497         __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
498
499         if (kvm_pmu_counter_can_chain(pmc))
500                 kvm_pmu_counter_increment(vcpu, BIT(idx + 1),
501                                           ARMV8_PMUV3_PERFCTR_CHAIN);
502
503         if (kvm_pmu_overflow_status(vcpu)) {
504                 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
505
506                 if (!in_nmi())
507                         kvm_vcpu_kick(vcpu);
508                 else
509                         irq_work_queue(&vcpu->arch.pmu.overflow_work);
510         }
511
512         cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD);
513 }
514
515 /**
516  * kvm_pmu_software_increment - do software increment
517  * @vcpu: The vcpu pointer
518  * @val: the value guest writes to PMSWINC register
519  */
520 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
521 {
522         kvm_pmu_counter_increment(vcpu, val, ARMV8_PMUV3_PERFCTR_SW_INCR);
523 }
524
525 /**
526  * kvm_pmu_handle_pmcr - handle PMCR register
527  * @vcpu: The vcpu pointer
528  * @val: the value guest writes to PMCR register
529  */
530 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
531 {
532         int i;
533
534         if (!kvm_vcpu_has_pmu(vcpu))
535                 return;
536
537         /* Fixup PMCR_EL0 to reconcile the PMU version and the LP bit */
538         if (!kvm_pmu_is_3p5(vcpu))
539                 val &= ~ARMV8_PMU_PMCR_LP;
540
541         /* The reset bits don't indicate any state, and shouldn't be saved. */
542         __vcpu_sys_reg(vcpu, PMCR_EL0) = val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P);
543
544         if (val & ARMV8_PMU_PMCR_E) {
545                 kvm_pmu_enable_counter_mask(vcpu,
546                        __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
547         } else {
548                 kvm_pmu_disable_counter_mask(vcpu,
549                        __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
550         }
551
552         if (val & ARMV8_PMU_PMCR_C)
553                 kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
554
555         if (val & ARMV8_PMU_PMCR_P) {
556                 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
557                 mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
558                 for_each_set_bit(i, &mask, 32)
559                         kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, i), 0, true);
560         }
561         kvm_vcpu_pmu_restore_guest(vcpu);
562 }
563
564 static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc)
565 {
566         struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
567         return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
568                (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(pmc->idx));
569 }
570
571 /**
572  * kvm_pmu_create_perf_event - create a perf event for a counter
573  * @pmc: Counter context
574  */
575 static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc)
576 {
577         struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
578         struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu;
579         struct perf_event *event;
580         struct perf_event_attr attr;
581         u64 eventsel, reg, data;
582
583         reg = counter_index_to_evtreg(pmc->idx);
584         data = __vcpu_sys_reg(vcpu, reg);
585
586         kvm_pmu_stop_counter(pmc);
587         if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
588                 eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
589         else
590                 eventsel = data & kvm_pmu_event_mask(vcpu->kvm);
591
592         /*
593          * Neither SW increment nor chained events need to be backed
594          * by a perf event.
595          */
596         if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR ||
597             eventsel == ARMV8_PMUV3_PERFCTR_CHAIN)
598                 return;
599
600         /*
601          * If we have a filter in place and that the event isn't allowed, do
602          * not install a perf event either.
603          */
604         if (vcpu->kvm->arch.pmu_filter &&
605             !test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
606                 return;
607
608         memset(&attr, 0, sizeof(struct perf_event_attr));
609         attr.type = arm_pmu->pmu.type;
610         attr.size = sizeof(attr);
611         attr.pinned = 1;
612         attr.disabled = !kvm_pmu_counter_is_enabled(pmc);
613         attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
614         attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
615         attr.exclude_hv = 1; /* Don't count EL2 events */
616         attr.exclude_host = 1; /* Don't count host events */
617         attr.config = eventsel;
618
619         /*
620          * If counting with a 64bit counter, advertise it to the perf
621          * code, carefully dealing with the initial sample period
622          * which also depends on the overflow.
623          */
624         if (kvm_pmc_is_64bit(pmc))
625                 attr.config1 |= PERF_ATTR_CFG1_COUNTER_64BIT;
626
627         attr.sample_period = compute_period(pmc, kvm_pmu_get_pmc_value(pmc));
628
629         event = perf_event_create_kernel_counter(&attr, -1, current,
630                                                  kvm_pmu_perf_overflow, pmc);
631
632         if (IS_ERR(event)) {
633                 pr_err_once("kvm: pmu event creation failed %ld\n",
634                             PTR_ERR(event));
635                 return;
636         }
637
638         pmc->perf_event = event;
639 }
640
641 /**
642  * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
643  * @vcpu: The vcpu pointer
644  * @data: The data guest writes to PMXEVTYPER_EL0
645  * @select_idx: The number of selected counter
646  *
647  * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
648  * event with given hardware event number. Here we call perf_event API to
649  * emulate this action and create a kernel perf event for it.
650  */
651 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
652                                     u64 select_idx)
653 {
654         struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, select_idx);
655         u64 reg, mask;
656
657         if (!kvm_vcpu_has_pmu(vcpu))
658                 return;
659
660         mask  =  ARMV8_PMU_EVTYPE_MASK;
661         mask &= ~ARMV8_PMU_EVTYPE_EVENT;
662         mask |= kvm_pmu_event_mask(vcpu->kvm);
663
664         reg = counter_index_to_evtreg(pmc->idx);
665
666         __vcpu_sys_reg(vcpu, reg) = data & mask;
667
668         kvm_pmu_create_perf_event(pmc);
669 }
670
671 void kvm_host_pmu_init(struct arm_pmu *pmu)
672 {
673         struct arm_pmu_entry *entry;
674
675         if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI ||
676             pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
677                 return;
678
679         mutex_lock(&arm_pmus_lock);
680
681         entry = kmalloc(sizeof(*entry), GFP_KERNEL);
682         if (!entry)
683                 goto out_unlock;
684
685         entry->arm_pmu = pmu;
686         list_add_tail(&entry->entry, &arm_pmus);
687
688         if (list_is_singular(&arm_pmus))
689                 static_branch_enable(&kvm_arm_pmu_available);
690
691 out_unlock:
692         mutex_unlock(&arm_pmus_lock);
693 }
694
695 static struct arm_pmu *kvm_pmu_probe_armpmu(void)
696 {
697         struct perf_event_attr attr = { };
698         struct perf_event *event;
699         struct arm_pmu *pmu = NULL;
700
701         /*
702          * Create a dummy event that only counts user cycles. As we'll never
703          * leave this function with the event being live, it will never
704          * count anything. But it allows us to probe some of the PMU
705          * details. Yes, this is terrible.
706          */
707         attr.type = PERF_TYPE_RAW;
708         attr.size = sizeof(attr);
709         attr.pinned = 1;
710         attr.disabled = 0;
711         attr.exclude_user = 0;
712         attr.exclude_kernel = 1;
713         attr.exclude_hv = 1;
714         attr.exclude_host = 1;
715         attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
716         attr.sample_period = GENMASK(63, 0);
717
718         event = perf_event_create_kernel_counter(&attr, -1, current,
719                                                  kvm_pmu_perf_overflow, &attr);
720
721         if (IS_ERR(event)) {
722                 pr_err_once("kvm: pmu event creation failed %ld\n",
723                             PTR_ERR(event));
724                 return NULL;
725         }
726
727         if (event->pmu) {
728                 pmu = to_arm_pmu(event->pmu);
729                 if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI ||
730                     pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
731                         pmu = NULL;
732         }
733
734         perf_event_disable(event);
735         perf_event_release_kernel(event);
736
737         return pmu;
738 }
739
740 u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
741 {
742         unsigned long *bmap = vcpu->kvm->arch.pmu_filter;
743         u64 val, mask = 0;
744         int base, i, nr_events;
745
746         if (!kvm_vcpu_has_pmu(vcpu))
747                 return 0;
748
749         if (!pmceid1) {
750                 val = read_sysreg(pmceid0_el0);
751                 /* always support CHAIN */
752                 val |= BIT(ARMV8_PMUV3_PERFCTR_CHAIN);
753                 base = 0;
754         } else {
755                 val = read_sysreg(pmceid1_el0);
756                 /*
757                  * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled
758                  * as RAZ
759                  */
760                 if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4)
761                         val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32);
762                 base = 32;
763         }
764
765         if (!bmap)
766                 return val;
767
768         nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
769
770         for (i = 0; i < 32; i += 8) {
771                 u64 byte;
772
773                 byte = bitmap_get_value8(bmap, base + i);
774                 mask |= byte << i;
775                 if (nr_events >= (0x4000 + base + 32)) {
776                         byte = bitmap_get_value8(bmap, 0x4000 + base + i);
777                         mask |= byte << (32 + i);
778                 }
779         }
780
781         return val & mask;
782 }
783
784 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
785 {
786         if (!kvm_vcpu_has_pmu(vcpu))
787                 return 0;
788
789         if (!vcpu->arch.pmu.created)
790                 return -EINVAL;
791
792         /*
793          * A valid interrupt configuration for the PMU is either to have a
794          * properly configured interrupt number and using an in-kernel
795          * irqchip, or to not have an in-kernel GIC and not set an IRQ.
796          */
797         if (irqchip_in_kernel(vcpu->kvm)) {
798                 int irq = vcpu->arch.pmu.irq_num;
799                 /*
800                  * If we are using an in-kernel vgic, at this point we know
801                  * the vgic will be initialized, so we can check the PMU irq
802                  * number against the dimensions of the vgic and make sure
803                  * it's valid.
804                  */
805                 if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq))
806                         return -EINVAL;
807         } else if (kvm_arm_pmu_irq_initialized(vcpu)) {
808                    return -EINVAL;
809         }
810
811         /* One-off reload of the PMU on first run */
812         kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
813
814         return 0;
815 }
816
817 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
818 {
819         if (irqchip_in_kernel(vcpu->kvm)) {
820                 int ret;
821
822                 /*
823                  * If using the PMU with an in-kernel virtual GIC
824                  * implementation, we require the GIC to be already
825                  * initialized when initializing the PMU.
826                  */
827                 if (!vgic_initialized(vcpu->kvm))
828                         return -ENODEV;
829
830                 if (!kvm_arm_pmu_irq_initialized(vcpu))
831                         return -ENXIO;
832
833                 ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num,
834                                          &vcpu->arch.pmu);
835                 if (ret)
836                         return ret;
837         }
838
839         init_irq_work(&vcpu->arch.pmu.overflow_work,
840                       kvm_pmu_perf_overflow_notify_vcpu);
841
842         vcpu->arch.pmu.created = true;
843         return 0;
844 }
845
846 /*
847  * For one VM the interrupt type must be same for each vcpu.
848  * As a PPI, the interrupt number is the same for all vcpus,
849  * while as an SPI it must be a separate number per vcpu.
850  */
851 static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
852 {
853         unsigned long i;
854         struct kvm_vcpu *vcpu;
855
856         kvm_for_each_vcpu(i, vcpu, kvm) {
857                 if (!kvm_arm_pmu_irq_initialized(vcpu))
858                         continue;
859
860                 if (irq_is_ppi(irq)) {
861                         if (vcpu->arch.pmu.irq_num != irq)
862                                 return false;
863                 } else {
864                         if (vcpu->arch.pmu.irq_num == irq)
865                                 return false;
866                 }
867         }
868
869         return true;
870 }
871
872 static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id)
873 {
874         struct kvm *kvm = vcpu->kvm;
875         struct arm_pmu_entry *entry;
876         struct arm_pmu *arm_pmu;
877         int ret = -ENXIO;
878
879         lockdep_assert_held(&kvm->arch.config_lock);
880         mutex_lock(&arm_pmus_lock);
881
882         list_for_each_entry(entry, &arm_pmus, entry) {
883                 arm_pmu = entry->arm_pmu;
884                 if (arm_pmu->pmu.type == pmu_id) {
885                         if (kvm_vm_has_ran_once(kvm) ||
886                             (kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) {
887                                 ret = -EBUSY;
888                                 break;
889                         }
890
891                         kvm->arch.arm_pmu = arm_pmu;
892                         cpumask_copy(kvm->arch.supported_cpus, &arm_pmu->supported_cpus);
893                         ret = 0;
894                         break;
895                 }
896         }
897
898         mutex_unlock(&arm_pmus_lock);
899         return ret;
900 }
901
902 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
903 {
904         struct kvm *kvm = vcpu->kvm;
905
906         lockdep_assert_held(&kvm->arch.config_lock);
907
908         if (!kvm_vcpu_has_pmu(vcpu))
909                 return -ENODEV;
910
911         if (vcpu->arch.pmu.created)
912                 return -EBUSY;
913
914         if (!kvm->arch.arm_pmu) {
915                 /* No PMU set, get the default one */
916                 kvm->arch.arm_pmu = kvm_pmu_probe_armpmu();
917                 if (!kvm->arch.arm_pmu)
918                         return -ENODEV;
919         }
920
921         switch (attr->attr) {
922         case KVM_ARM_VCPU_PMU_V3_IRQ: {
923                 int __user *uaddr = (int __user *)(long)attr->addr;
924                 int irq;
925
926                 if (!irqchip_in_kernel(kvm))
927                         return -EINVAL;
928
929                 if (get_user(irq, uaddr))
930                         return -EFAULT;
931
932                 /* The PMU overflow interrupt can be a PPI or a valid SPI. */
933                 if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
934                         return -EINVAL;
935
936                 if (!pmu_irq_is_valid(kvm, irq))
937                         return -EINVAL;
938
939                 if (kvm_arm_pmu_irq_initialized(vcpu))
940                         return -EBUSY;
941
942                 kvm_debug("Set kvm ARM PMU irq: %d\n", irq);
943                 vcpu->arch.pmu.irq_num = irq;
944                 return 0;
945         }
946         case KVM_ARM_VCPU_PMU_V3_FILTER: {
947                 struct kvm_pmu_event_filter __user *uaddr;
948                 struct kvm_pmu_event_filter filter;
949                 int nr_events;
950
951                 nr_events = kvm_pmu_event_mask(kvm) + 1;
952
953                 uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
954
955                 if (copy_from_user(&filter, uaddr, sizeof(filter)))
956                         return -EFAULT;
957
958                 if (((u32)filter.base_event + filter.nevents) > nr_events ||
959                     (filter.action != KVM_PMU_EVENT_ALLOW &&
960                      filter.action != KVM_PMU_EVENT_DENY))
961                         return -EINVAL;
962
963                 if (kvm_vm_has_ran_once(kvm))
964                         return -EBUSY;
965
966                 if (!kvm->arch.pmu_filter) {
967                         kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT);
968                         if (!kvm->arch.pmu_filter)
969                                 return -ENOMEM;
970
971                         /*
972                          * The default depends on the first applied filter.
973                          * If it allows events, the default is to deny.
974                          * Conversely, if the first filter denies a set of
975                          * events, the default is to allow.
976                          */
977                         if (filter.action == KVM_PMU_EVENT_ALLOW)
978                                 bitmap_zero(kvm->arch.pmu_filter, nr_events);
979                         else
980                                 bitmap_fill(kvm->arch.pmu_filter, nr_events);
981                 }
982
983                 if (filter.action == KVM_PMU_EVENT_ALLOW)
984                         bitmap_set(kvm->arch.pmu_filter, filter.base_event, filter.nevents);
985                 else
986                         bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents);
987
988                 return 0;
989         }
990         case KVM_ARM_VCPU_PMU_V3_SET_PMU: {
991                 int __user *uaddr = (int __user *)(long)attr->addr;
992                 int pmu_id;
993
994                 if (get_user(pmu_id, uaddr))
995                         return -EFAULT;
996
997                 return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id);
998         }
999         case KVM_ARM_VCPU_PMU_V3_INIT:
1000                 return kvm_arm_pmu_v3_init(vcpu);
1001         }
1002
1003         return -ENXIO;
1004 }
1005
1006 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1007 {
1008         switch (attr->attr) {
1009         case KVM_ARM_VCPU_PMU_V3_IRQ: {
1010                 int __user *uaddr = (int __user *)(long)attr->addr;
1011                 int irq;
1012
1013                 if (!irqchip_in_kernel(vcpu->kvm))
1014                         return -EINVAL;
1015
1016                 if (!kvm_vcpu_has_pmu(vcpu))
1017                         return -ENODEV;
1018
1019                 if (!kvm_arm_pmu_irq_initialized(vcpu))
1020                         return -ENXIO;
1021
1022                 irq = vcpu->arch.pmu.irq_num;
1023                 return put_user(irq, uaddr);
1024         }
1025         }
1026
1027         return -ENXIO;
1028 }
1029
1030 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1031 {
1032         switch (attr->attr) {
1033         case KVM_ARM_VCPU_PMU_V3_IRQ:
1034         case KVM_ARM_VCPU_PMU_V3_INIT:
1035         case KVM_ARM_VCPU_PMU_V3_FILTER:
1036         case KVM_ARM_VCPU_PMU_V3_SET_PMU:
1037                 if (kvm_vcpu_has_pmu(vcpu))
1038                         return 0;
1039         }
1040
1041         return -ENXIO;
1042 }
1043
1044 u8 kvm_arm_pmu_get_pmuver_limit(void)
1045 {
1046         u64 tmp;
1047
1048         tmp = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
1049         tmp = cpuid_feature_cap_perfmon_field(tmp,
1050                                               ID_AA64DFR0_EL1_PMUVer_SHIFT,
1051                                               ID_AA64DFR0_EL1_PMUVer_V3P5);
1052         return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), tmp);
1053 }