Merge 6.4-rc5 into usb-next
[platform/kernel/linux-starfive.git] / arch / arm64 / kvm / pmu-emul.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2015 Linaro Ltd.
4  * Author: Shannon Zhao <shannon.zhao@linaro.org>
5  */
6
7 #include <linux/cpu.h>
8 #include <linux/kvm.h>
9 #include <linux/kvm_host.h>
10 #include <linux/list.h>
11 #include <linux/perf_event.h>
12 #include <linux/perf/arm_pmu.h>
13 #include <linux/uaccess.h>
14 #include <asm/kvm_emulate.h>
15 #include <kvm/arm_pmu.h>
16 #include <kvm/arm_vgic.h>
17
18 #define PERF_ATTR_CFG1_COUNTER_64BIT    BIT(0)
19
20 DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
21
22 static LIST_HEAD(arm_pmus);
23 static DEFINE_MUTEX(arm_pmus_lock);
24
25 static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc);
26 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc);
27
28 static struct kvm_vcpu *kvm_pmc_to_vcpu(const struct kvm_pmc *pmc)
29 {
30         return container_of(pmc, struct kvm_vcpu, arch.pmu.pmc[pmc->idx]);
31 }
32
33 static struct kvm_pmc *kvm_vcpu_idx_to_pmc(struct kvm_vcpu *vcpu, int cnt_idx)
34 {
35         return &vcpu->arch.pmu.pmc[cnt_idx];
36 }
37
38 static u32 kvm_pmu_event_mask(struct kvm *kvm)
39 {
40         unsigned int pmuver;
41
42         pmuver = kvm->arch.arm_pmu->pmuver;
43
44         switch (pmuver) {
45         case ID_AA64DFR0_EL1_PMUVer_IMP:
46                 return GENMASK(9, 0);
47         case ID_AA64DFR0_EL1_PMUVer_V3P1:
48         case ID_AA64DFR0_EL1_PMUVer_V3P4:
49         case ID_AA64DFR0_EL1_PMUVer_V3P5:
50         case ID_AA64DFR0_EL1_PMUVer_V3P7:
51                 return GENMASK(15, 0);
52         default:                /* Shouldn't be here, just for sanity */
53                 WARN_ONCE(1, "Unknown PMU version %d\n", pmuver);
54                 return 0;
55         }
56 }
57
58 /**
59  * kvm_pmc_is_64bit - determine if counter is 64bit
60  * @pmc: counter context
61  */
62 static bool kvm_pmc_is_64bit(struct kvm_pmc *pmc)
63 {
64         return (pmc->idx == ARMV8_PMU_CYCLE_IDX ||
65                 kvm_pmu_is_3p5(kvm_pmc_to_vcpu(pmc)));
66 }
67
68 static bool kvm_pmc_has_64bit_overflow(struct kvm_pmc *pmc)
69 {
70         u64 val = __vcpu_sys_reg(kvm_pmc_to_vcpu(pmc), PMCR_EL0);
71
72         return (pmc->idx < ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LP)) ||
73                (pmc->idx == ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LC));
74 }
75
76 static bool kvm_pmu_counter_can_chain(struct kvm_pmc *pmc)
77 {
78         return (!(pmc->idx & 1) && (pmc->idx + 1) < ARMV8_PMU_CYCLE_IDX &&
79                 !kvm_pmc_has_64bit_overflow(pmc));
80 }
81
82 static u32 counter_index_to_reg(u64 idx)
83 {
84         return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + idx;
85 }
86
87 static u32 counter_index_to_evtreg(u64 idx)
88 {
89         return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + idx;
90 }
91
92 static u64 kvm_pmu_get_pmc_value(struct kvm_pmc *pmc)
93 {
94         struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
95         u64 counter, reg, enabled, running;
96
97         reg = counter_index_to_reg(pmc->idx);
98         counter = __vcpu_sys_reg(vcpu, reg);
99
100         /*
101          * The real counter value is equal to the value of counter register plus
102          * the value perf event counts.
103          */
104         if (pmc->perf_event)
105                 counter += perf_event_read_value(pmc->perf_event, &enabled,
106                                                  &running);
107
108         if (!kvm_pmc_is_64bit(pmc))
109                 counter = lower_32_bits(counter);
110
111         return counter;
112 }
113
114 /**
115  * kvm_pmu_get_counter_value - get PMU counter value
116  * @vcpu: The vcpu pointer
117  * @select_idx: The counter index
118  */
119 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
120 {
121         if (!kvm_vcpu_has_pmu(vcpu))
122                 return 0;
123
124         return kvm_pmu_get_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx));
125 }
126
127 static void kvm_pmu_set_pmc_value(struct kvm_pmc *pmc, u64 val, bool force)
128 {
129         struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
130         u64 reg;
131
132         kvm_pmu_release_perf_event(pmc);
133
134         reg = counter_index_to_reg(pmc->idx);
135
136         if (vcpu_mode_is_32bit(vcpu) && pmc->idx != ARMV8_PMU_CYCLE_IDX &&
137             !force) {
138                 /*
139                  * Even with PMUv3p5, AArch32 cannot write to the top
140                  * 32bit of the counters. The only possible course of
141                  * action is to use PMCR.P, which will reset them to
142                  * 0 (the only use of the 'force' parameter).
143                  */
144                 val  = __vcpu_sys_reg(vcpu, reg) & GENMASK(63, 32);
145                 val |= lower_32_bits(val);
146         }
147
148         __vcpu_sys_reg(vcpu, reg) = val;
149
150         /* Recreate the perf event to reflect the updated sample_period */
151         kvm_pmu_create_perf_event(pmc);
152 }
153
154 /**
155  * kvm_pmu_set_counter_value - set PMU counter value
156  * @vcpu: The vcpu pointer
157  * @select_idx: The counter index
158  * @val: The counter value
159  */
160 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
161 {
162         if (!kvm_vcpu_has_pmu(vcpu))
163                 return;
164
165         kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx), val, false);
166 }
167
168 /**
169  * kvm_pmu_release_perf_event - remove the perf event
170  * @pmc: The PMU counter pointer
171  */
172 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
173 {
174         if (pmc->perf_event) {
175                 perf_event_disable(pmc->perf_event);
176                 perf_event_release_kernel(pmc->perf_event);
177                 pmc->perf_event = NULL;
178         }
179 }
180
181 /**
182  * kvm_pmu_stop_counter - stop PMU counter
183  * @pmc: The PMU counter pointer
184  *
185  * If this counter has been configured to monitor some event, release it here.
186  */
187 static void kvm_pmu_stop_counter(struct kvm_pmc *pmc)
188 {
189         struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
190         u64 reg, val;
191
192         if (!pmc->perf_event)
193                 return;
194
195         val = kvm_pmu_get_pmc_value(pmc);
196
197         reg = counter_index_to_reg(pmc->idx);
198
199         __vcpu_sys_reg(vcpu, reg) = val;
200
201         kvm_pmu_release_perf_event(pmc);
202 }
203
204 /**
205  * kvm_pmu_vcpu_init - assign pmu counter idx for cpu
206  * @vcpu: The vcpu pointer
207  *
208  */
209 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
210 {
211         int i;
212         struct kvm_pmu *pmu = &vcpu->arch.pmu;
213
214         for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
215                 pmu->pmc[i].idx = i;
216 }
217
218 /**
219  * kvm_pmu_vcpu_reset - reset pmu state for cpu
220  * @vcpu: The vcpu pointer
221  *
222  */
223 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
224 {
225         unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
226         int i;
227
228         for_each_set_bit(i, &mask, 32)
229                 kvm_pmu_stop_counter(kvm_vcpu_idx_to_pmc(vcpu, i));
230 }
231
232 /**
233  * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu
234  * @vcpu: The vcpu pointer
235  *
236  */
237 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
238 {
239         int i;
240
241         for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
242                 kvm_pmu_release_perf_event(kvm_vcpu_idx_to_pmc(vcpu, i));
243         irq_work_sync(&vcpu->arch.pmu.overflow_work);
244 }
245
246 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
247 {
248         u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT;
249
250         val &= ARMV8_PMU_PMCR_N_MASK;
251         if (val == 0)
252                 return BIT(ARMV8_PMU_CYCLE_IDX);
253         else
254                 return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX);
255 }
256
257 /**
258  * kvm_pmu_enable_counter_mask - enable selected PMU counters
259  * @vcpu: The vcpu pointer
260  * @val: the value guest writes to PMCNTENSET register
261  *
262  * Call perf_event_enable to start counting the perf event
263  */
264 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
265 {
266         int i;
267         if (!kvm_vcpu_has_pmu(vcpu))
268                 return;
269
270         if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
271                 return;
272
273         for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
274                 struct kvm_pmc *pmc;
275
276                 if (!(val & BIT(i)))
277                         continue;
278
279                 pmc = kvm_vcpu_idx_to_pmc(vcpu, i);
280
281                 if (!pmc->perf_event) {
282                         kvm_pmu_create_perf_event(pmc);
283                 } else {
284                         perf_event_enable(pmc->perf_event);
285                         if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
286                                 kvm_debug("fail to enable perf event\n");
287                 }
288         }
289 }
290
291 /**
292  * kvm_pmu_disable_counter_mask - disable selected PMU counters
293  * @vcpu: The vcpu pointer
294  * @val: the value guest writes to PMCNTENCLR register
295  *
296  * Call perf_event_disable to stop counting the perf event
297  */
298 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
299 {
300         int i;
301
302         if (!kvm_vcpu_has_pmu(vcpu) || !val)
303                 return;
304
305         for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
306                 struct kvm_pmc *pmc;
307
308                 if (!(val & BIT(i)))
309                         continue;
310
311                 pmc = kvm_vcpu_idx_to_pmc(vcpu, i);
312
313                 if (pmc->perf_event)
314                         perf_event_disable(pmc->perf_event);
315         }
316 }
317
318 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
319 {
320         u64 reg = 0;
321
322         if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
323                 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
324                 reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
325                 reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
326         }
327
328         return reg;
329 }
330
331 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
332 {
333         struct kvm_pmu *pmu = &vcpu->arch.pmu;
334         bool overflow;
335
336         if (!kvm_vcpu_has_pmu(vcpu))
337                 return;
338
339         overflow = !!kvm_pmu_overflow_status(vcpu);
340         if (pmu->irq_level == overflow)
341                 return;
342
343         pmu->irq_level = overflow;
344
345         if (likely(irqchip_in_kernel(vcpu->kvm))) {
346                 int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
347                                               pmu->irq_num, overflow, pmu);
348                 WARN_ON(ret);
349         }
350 }
351
352 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
353 {
354         struct kvm_pmu *pmu = &vcpu->arch.pmu;
355         struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
356         bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU;
357
358         if (likely(irqchip_in_kernel(vcpu->kvm)))
359                 return false;
360
361         return pmu->irq_level != run_level;
362 }
363
364 /*
365  * Reflect the PMU overflow interrupt output level into the kvm_run structure
366  */
367 void kvm_pmu_update_run(struct kvm_vcpu *vcpu)
368 {
369         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
370
371         /* Populate the timer bitmap for user space */
372         regs->device_irq_level &= ~KVM_ARM_DEV_PMU;
373         if (vcpu->arch.pmu.irq_level)
374                 regs->device_irq_level |= KVM_ARM_DEV_PMU;
375 }
376
377 /**
378  * kvm_pmu_flush_hwstate - flush pmu state to cpu
379  * @vcpu: The vcpu pointer
380  *
381  * Check if the PMU has overflowed while we were running in the host, and inject
382  * an interrupt if that was the case.
383  */
384 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu)
385 {
386         kvm_pmu_update_state(vcpu);
387 }
388
389 /**
390  * kvm_pmu_sync_hwstate - sync pmu state from cpu
391  * @vcpu: The vcpu pointer
392  *
393  * Check if the PMU has overflowed while we were running in the guest, and
394  * inject an interrupt if that was the case.
395  */
396 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
397 {
398         kvm_pmu_update_state(vcpu);
399 }
400
401 /**
402  * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding
403  * to the event.
404  * This is why we need a callback to do it once outside of the NMI context.
405  */
406 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work)
407 {
408         struct kvm_vcpu *vcpu;
409
410         vcpu = container_of(work, struct kvm_vcpu, arch.pmu.overflow_work);
411         kvm_vcpu_kick(vcpu);
412 }
413
414 /*
415  * Perform an increment on any of the counters described in @mask,
416  * generating the overflow if required, and propagate it as a chained
417  * event if possible.
418  */
419 static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu,
420                                       unsigned long mask, u32 event)
421 {
422         int i;
423
424         if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
425                 return;
426
427         /* Weed out disabled counters */
428         mask &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
429
430         for_each_set_bit(i, &mask, ARMV8_PMU_CYCLE_IDX) {
431                 struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, i);
432                 u64 type, reg;
433
434                 /* Filter on event type */
435                 type = __vcpu_sys_reg(vcpu, counter_index_to_evtreg(i));
436                 type &= kvm_pmu_event_mask(vcpu->kvm);
437                 if (type != event)
438                         continue;
439
440                 /* Increment this counter */
441                 reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1;
442                 if (!kvm_pmc_is_64bit(pmc))
443                         reg = lower_32_bits(reg);
444                 __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg;
445
446                 /* No overflow? move on */
447                 if (kvm_pmc_has_64bit_overflow(pmc) ? reg : lower_32_bits(reg))
448                         continue;
449
450                 /* Mark overflow */
451                 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
452
453                 if (kvm_pmu_counter_can_chain(pmc))
454                         kvm_pmu_counter_increment(vcpu, BIT(i + 1),
455                                                   ARMV8_PMUV3_PERFCTR_CHAIN);
456         }
457 }
458
459 /* Compute the sample period for a given counter value */
460 static u64 compute_period(struct kvm_pmc *pmc, u64 counter)
461 {
462         u64 val;
463
464         if (kvm_pmc_is_64bit(pmc) && kvm_pmc_has_64bit_overflow(pmc))
465                 val = (-counter) & GENMASK(63, 0);
466         else
467                 val = (-counter) & GENMASK(31, 0);
468
469         return val;
470 }
471
472 /**
473  * When the perf event overflows, set the overflow status and inform the vcpu.
474  */
475 static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
476                                   struct perf_sample_data *data,
477                                   struct pt_regs *regs)
478 {
479         struct kvm_pmc *pmc = perf_event->overflow_handler_context;
480         struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
481         struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
482         int idx = pmc->idx;
483         u64 period;
484
485         cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE);
486
487         /*
488          * Reset the sample period to the architectural limit,
489          * i.e. the point where the counter overflows.
490          */
491         period = compute_period(pmc, local64_read(&perf_event->count));
492
493         local64_set(&perf_event->hw.period_left, 0);
494         perf_event->attr.sample_period = period;
495         perf_event->hw.sample_period = period;
496
497         __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
498
499         if (kvm_pmu_counter_can_chain(pmc))
500                 kvm_pmu_counter_increment(vcpu, BIT(idx + 1),
501                                           ARMV8_PMUV3_PERFCTR_CHAIN);
502
503         if (kvm_pmu_overflow_status(vcpu)) {
504                 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
505
506                 if (!in_nmi())
507                         kvm_vcpu_kick(vcpu);
508                 else
509                         irq_work_queue(&vcpu->arch.pmu.overflow_work);
510         }
511
512         cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD);
513 }
514
515 /**
516  * kvm_pmu_software_increment - do software increment
517  * @vcpu: The vcpu pointer
518  * @val: the value guest writes to PMSWINC register
519  */
520 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
521 {
522         kvm_pmu_counter_increment(vcpu, val, ARMV8_PMUV3_PERFCTR_SW_INCR);
523 }
524
525 /**
526  * kvm_pmu_handle_pmcr - handle PMCR register
527  * @vcpu: The vcpu pointer
528  * @val: the value guest writes to PMCR register
529  */
530 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
531 {
532         int i;
533
534         if (!kvm_vcpu_has_pmu(vcpu))
535                 return;
536
537         /* Fixup PMCR_EL0 to reconcile the PMU version and the LP bit */
538         if (!kvm_pmu_is_3p5(vcpu))
539                 val &= ~ARMV8_PMU_PMCR_LP;
540
541         /* The reset bits don't indicate any state, and shouldn't be saved. */
542         __vcpu_sys_reg(vcpu, PMCR_EL0) = val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P);
543
544         if (val & ARMV8_PMU_PMCR_E) {
545                 kvm_pmu_enable_counter_mask(vcpu,
546                        __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
547         } else {
548                 kvm_pmu_disable_counter_mask(vcpu,
549                        __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
550         }
551
552         if (val & ARMV8_PMU_PMCR_C)
553                 kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
554
555         if (val & ARMV8_PMU_PMCR_P) {
556                 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
557                 mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
558                 for_each_set_bit(i, &mask, 32)
559                         kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, i), 0, true);
560         }
561         kvm_vcpu_pmu_restore_guest(vcpu);
562 }
563
564 static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc)
565 {
566         struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
567         return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
568                (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(pmc->idx));
569 }
570
571 /**
572  * kvm_pmu_create_perf_event - create a perf event for a counter
573  * @pmc: Counter context
574  */
575 static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc)
576 {
577         struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
578         struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu;
579         struct perf_event *event;
580         struct perf_event_attr attr;
581         u64 eventsel, reg, data;
582
583         reg = counter_index_to_evtreg(pmc->idx);
584         data = __vcpu_sys_reg(vcpu, reg);
585
586         kvm_pmu_stop_counter(pmc);
587         if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
588                 eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
589         else
590                 eventsel = data & kvm_pmu_event_mask(vcpu->kvm);
591
592         /*
593          * Neither SW increment nor chained events need to be backed
594          * by a perf event.
595          */
596         if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR ||
597             eventsel == ARMV8_PMUV3_PERFCTR_CHAIN)
598                 return;
599
600         /*
601          * If we have a filter in place and that the event isn't allowed, do
602          * not install a perf event either.
603          */
604         if (vcpu->kvm->arch.pmu_filter &&
605             !test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
606                 return;
607
608         memset(&attr, 0, sizeof(struct perf_event_attr));
609         attr.type = arm_pmu->pmu.type;
610         attr.size = sizeof(attr);
611         attr.pinned = 1;
612         attr.disabled = !kvm_pmu_counter_is_enabled(pmc);
613         attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
614         attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
615         attr.exclude_hv = 1; /* Don't count EL2 events */
616         attr.exclude_host = 1; /* Don't count host events */
617         attr.config = eventsel;
618
619         /*
620          * If counting with a 64bit counter, advertise it to the perf
621          * code, carefully dealing with the initial sample period
622          * which also depends on the overflow.
623          */
624         if (kvm_pmc_is_64bit(pmc))
625                 attr.config1 |= PERF_ATTR_CFG1_COUNTER_64BIT;
626
627         attr.sample_period = compute_period(pmc, kvm_pmu_get_pmc_value(pmc));
628
629         event = perf_event_create_kernel_counter(&attr, -1, current,
630                                                  kvm_pmu_perf_overflow, pmc);
631
632         if (IS_ERR(event)) {
633                 pr_err_once("kvm: pmu event creation failed %ld\n",
634                             PTR_ERR(event));
635                 return;
636         }
637
638         pmc->perf_event = event;
639 }
640
641 /**
642  * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
643  * @vcpu: The vcpu pointer
644  * @data: The data guest writes to PMXEVTYPER_EL0
645  * @select_idx: The number of selected counter
646  *
647  * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
648  * event with given hardware event number. Here we call perf_event API to
649  * emulate this action and create a kernel perf event for it.
650  */
651 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
652                                     u64 select_idx)
653 {
654         struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, select_idx);
655         u64 reg, mask;
656
657         if (!kvm_vcpu_has_pmu(vcpu))
658                 return;
659
660         mask  =  ARMV8_PMU_EVTYPE_MASK;
661         mask &= ~ARMV8_PMU_EVTYPE_EVENT;
662         mask |= kvm_pmu_event_mask(vcpu->kvm);
663
664         reg = counter_index_to_evtreg(pmc->idx);
665
666         __vcpu_sys_reg(vcpu, reg) = data & mask;
667
668         kvm_pmu_create_perf_event(pmc);
669 }
670
671 void kvm_host_pmu_init(struct arm_pmu *pmu)
672 {
673         struct arm_pmu_entry *entry;
674
675         if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI ||
676             pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
677                 return;
678
679         mutex_lock(&arm_pmus_lock);
680
681         entry = kmalloc(sizeof(*entry), GFP_KERNEL);
682         if (!entry)
683                 goto out_unlock;
684
685         entry->arm_pmu = pmu;
686         list_add_tail(&entry->entry, &arm_pmus);
687
688         if (list_is_singular(&arm_pmus))
689                 static_branch_enable(&kvm_arm_pmu_available);
690
691 out_unlock:
692         mutex_unlock(&arm_pmus_lock);
693 }
694
695 static struct arm_pmu *kvm_pmu_probe_armpmu(void)
696 {
697         struct arm_pmu *tmp, *pmu = NULL;
698         struct arm_pmu_entry *entry;
699         int cpu;
700
701         mutex_lock(&arm_pmus_lock);
702
703         cpu = smp_processor_id();
704         list_for_each_entry(entry, &arm_pmus, entry) {
705                 tmp = entry->arm_pmu;
706
707                 if (cpumask_test_cpu(cpu, &tmp->supported_cpus)) {
708                         pmu = tmp;
709                         break;
710                 }
711         }
712
713         mutex_unlock(&arm_pmus_lock);
714
715         return pmu;
716 }
717
718 u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
719 {
720         unsigned long *bmap = vcpu->kvm->arch.pmu_filter;
721         u64 val, mask = 0;
722         int base, i, nr_events;
723
724         if (!kvm_vcpu_has_pmu(vcpu))
725                 return 0;
726
727         if (!pmceid1) {
728                 val = read_sysreg(pmceid0_el0);
729                 /* always support CHAIN */
730                 val |= BIT(ARMV8_PMUV3_PERFCTR_CHAIN);
731                 base = 0;
732         } else {
733                 val = read_sysreg(pmceid1_el0);
734                 /*
735                  * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled
736                  * as RAZ
737                  */
738                 if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4)
739                         val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32);
740                 base = 32;
741         }
742
743         if (!bmap)
744                 return val;
745
746         nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
747
748         for (i = 0; i < 32; i += 8) {
749                 u64 byte;
750
751                 byte = bitmap_get_value8(bmap, base + i);
752                 mask |= byte << i;
753                 if (nr_events >= (0x4000 + base + 32)) {
754                         byte = bitmap_get_value8(bmap, 0x4000 + base + i);
755                         mask |= byte << (32 + i);
756                 }
757         }
758
759         return val & mask;
760 }
761
762 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
763 {
764         if (!kvm_vcpu_has_pmu(vcpu))
765                 return 0;
766
767         if (!vcpu->arch.pmu.created)
768                 return -EINVAL;
769
770         /*
771          * A valid interrupt configuration for the PMU is either to have a
772          * properly configured interrupt number and using an in-kernel
773          * irqchip, or to not have an in-kernel GIC and not set an IRQ.
774          */
775         if (irqchip_in_kernel(vcpu->kvm)) {
776                 int irq = vcpu->arch.pmu.irq_num;
777                 /*
778                  * If we are using an in-kernel vgic, at this point we know
779                  * the vgic will be initialized, so we can check the PMU irq
780                  * number against the dimensions of the vgic and make sure
781                  * it's valid.
782                  */
783                 if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq))
784                         return -EINVAL;
785         } else if (kvm_arm_pmu_irq_initialized(vcpu)) {
786                    return -EINVAL;
787         }
788
789         /* One-off reload of the PMU on first run */
790         kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
791
792         return 0;
793 }
794
795 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
796 {
797         if (irqchip_in_kernel(vcpu->kvm)) {
798                 int ret;
799
800                 /*
801                  * If using the PMU with an in-kernel virtual GIC
802                  * implementation, we require the GIC to be already
803                  * initialized when initializing the PMU.
804                  */
805                 if (!vgic_initialized(vcpu->kvm))
806                         return -ENODEV;
807
808                 if (!kvm_arm_pmu_irq_initialized(vcpu))
809                         return -ENXIO;
810
811                 ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num,
812                                          &vcpu->arch.pmu);
813                 if (ret)
814                         return ret;
815         }
816
817         init_irq_work(&vcpu->arch.pmu.overflow_work,
818                       kvm_pmu_perf_overflow_notify_vcpu);
819
820         vcpu->arch.pmu.created = true;
821         return 0;
822 }
823
824 /*
825  * For one VM the interrupt type must be same for each vcpu.
826  * As a PPI, the interrupt number is the same for all vcpus,
827  * while as an SPI it must be a separate number per vcpu.
828  */
829 static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
830 {
831         unsigned long i;
832         struct kvm_vcpu *vcpu;
833
834         kvm_for_each_vcpu(i, vcpu, kvm) {
835                 if (!kvm_arm_pmu_irq_initialized(vcpu))
836                         continue;
837
838                 if (irq_is_ppi(irq)) {
839                         if (vcpu->arch.pmu.irq_num != irq)
840                                 return false;
841                 } else {
842                         if (vcpu->arch.pmu.irq_num == irq)
843                                 return false;
844                 }
845         }
846
847         return true;
848 }
849
850 static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id)
851 {
852         struct kvm *kvm = vcpu->kvm;
853         struct arm_pmu_entry *entry;
854         struct arm_pmu *arm_pmu;
855         int ret = -ENXIO;
856
857         lockdep_assert_held(&kvm->arch.config_lock);
858         mutex_lock(&arm_pmus_lock);
859
860         list_for_each_entry(entry, &arm_pmus, entry) {
861                 arm_pmu = entry->arm_pmu;
862                 if (arm_pmu->pmu.type == pmu_id) {
863                         if (kvm_vm_has_ran_once(kvm) ||
864                             (kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) {
865                                 ret = -EBUSY;
866                                 break;
867                         }
868
869                         kvm->arch.arm_pmu = arm_pmu;
870                         cpumask_copy(kvm->arch.supported_cpus, &arm_pmu->supported_cpus);
871                         ret = 0;
872                         break;
873                 }
874         }
875
876         mutex_unlock(&arm_pmus_lock);
877         return ret;
878 }
879
880 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
881 {
882         struct kvm *kvm = vcpu->kvm;
883
884         lockdep_assert_held(&kvm->arch.config_lock);
885
886         if (!kvm_vcpu_has_pmu(vcpu))
887                 return -ENODEV;
888
889         if (vcpu->arch.pmu.created)
890                 return -EBUSY;
891
892         if (!kvm->arch.arm_pmu) {
893                 /*
894                  * No PMU set, get the default one.
895                  *
896                  * The observant among you will notice that the supported_cpus
897                  * mask does not get updated for the default PMU even though it
898                  * is quite possible the selected instance supports only a
899                  * subset of cores in the system. This is intentional, and
900                  * upholds the preexisting behavior on heterogeneous systems
901                  * where vCPUs can be scheduled on any core but the guest
902                  * counters could stop working.
903                  */
904                 kvm->arch.arm_pmu = kvm_pmu_probe_armpmu();
905                 if (!kvm->arch.arm_pmu)
906                         return -ENODEV;
907         }
908
909         switch (attr->attr) {
910         case KVM_ARM_VCPU_PMU_V3_IRQ: {
911                 int __user *uaddr = (int __user *)(long)attr->addr;
912                 int irq;
913
914                 if (!irqchip_in_kernel(kvm))
915                         return -EINVAL;
916
917                 if (get_user(irq, uaddr))
918                         return -EFAULT;
919
920                 /* The PMU overflow interrupt can be a PPI or a valid SPI. */
921                 if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
922                         return -EINVAL;
923
924                 if (!pmu_irq_is_valid(kvm, irq))
925                         return -EINVAL;
926
927                 if (kvm_arm_pmu_irq_initialized(vcpu))
928                         return -EBUSY;
929
930                 kvm_debug("Set kvm ARM PMU irq: %d\n", irq);
931                 vcpu->arch.pmu.irq_num = irq;
932                 return 0;
933         }
934         case KVM_ARM_VCPU_PMU_V3_FILTER: {
935                 struct kvm_pmu_event_filter __user *uaddr;
936                 struct kvm_pmu_event_filter filter;
937                 int nr_events;
938
939                 nr_events = kvm_pmu_event_mask(kvm) + 1;
940
941                 uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
942
943                 if (copy_from_user(&filter, uaddr, sizeof(filter)))
944                         return -EFAULT;
945
946                 if (((u32)filter.base_event + filter.nevents) > nr_events ||
947                     (filter.action != KVM_PMU_EVENT_ALLOW &&
948                      filter.action != KVM_PMU_EVENT_DENY))
949                         return -EINVAL;
950
951                 if (kvm_vm_has_ran_once(kvm))
952                         return -EBUSY;
953
954                 if (!kvm->arch.pmu_filter) {
955                         kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT);
956                         if (!kvm->arch.pmu_filter)
957                                 return -ENOMEM;
958
959                         /*
960                          * The default depends on the first applied filter.
961                          * If it allows events, the default is to deny.
962                          * Conversely, if the first filter denies a set of
963                          * events, the default is to allow.
964                          */
965                         if (filter.action == KVM_PMU_EVENT_ALLOW)
966                                 bitmap_zero(kvm->arch.pmu_filter, nr_events);
967                         else
968                                 bitmap_fill(kvm->arch.pmu_filter, nr_events);
969                 }
970
971                 if (filter.action == KVM_PMU_EVENT_ALLOW)
972                         bitmap_set(kvm->arch.pmu_filter, filter.base_event, filter.nevents);
973                 else
974                         bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents);
975
976                 return 0;
977         }
978         case KVM_ARM_VCPU_PMU_V3_SET_PMU: {
979                 int __user *uaddr = (int __user *)(long)attr->addr;
980                 int pmu_id;
981
982                 if (get_user(pmu_id, uaddr))
983                         return -EFAULT;
984
985                 return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id);
986         }
987         case KVM_ARM_VCPU_PMU_V3_INIT:
988                 return kvm_arm_pmu_v3_init(vcpu);
989         }
990
991         return -ENXIO;
992 }
993
994 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
995 {
996         switch (attr->attr) {
997         case KVM_ARM_VCPU_PMU_V3_IRQ: {
998                 int __user *uaddr = (int __user *)(long)attr->addr;
999                 int irq;
1000
1001                 if (!irqchip_in_kernel(vcpu->kvm))
1002                         return -EINVAL;
1003
1004                 if (!kvm_vcpu_has_pmu(vcpu))
1005                         return -ENODEV;
1006
1007                 if (!kvm_arm_pmu_irq_initialized(vcpu))
1008                         return -ENXIO;
1009
1010                 irq = vcpu->arch.pmu.irq_num;
1011                 return put_user(irq, uaddr);
1012         }
1013         }
1014
1015         return -ENXIO;
1016 }
1017
1018 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1019 {
1020         switch (attr->attr) {
1021         case KVM_ARM_VCPU_PMU_V3_IRQ:
1022         case KVM_ARM_VCPU_PMU_V3_INIT:
1023         case KVM_ARM_VCPU_PMU_V3_FILTER:
1024         case KVM_ARM_VCPU_PMU_V3_SET_PMU:
1025                 if (kvm_vcpu_has_pmu(vcpu))
1026                         return 0;
1027         }
1028
1029         return -ENXIO;
1030 }
1031
1032 u8 kvm_arm_pmu_get_pmuver_limit(void)
1033 {
1034         u64 tmp;
1035
1036         tmp = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
1037         tmp = cpuid_feature_cap_perfmon_field(tmp,
1038                                               ID_AA64DFR0_EL1_PMUVer_SHIFT,
1039                                               ID_AA64DFR0_EL1_PMUVer_V3P5);
1040         return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), tmp);
1041 }