Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
[platform/kernel/linux-rpi.git] / arch / arm64 / kvm / pmu-emul.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2015 Linaro Ltd.
4  * Author: Shannon Zhao <shannon.zhao@linaro.org>
5  */
6
7 #include <linux/cpu.h>
8 #include <linux/kvm.h>
9 #include <linux/kvm_host.h>
10 #include <linux/perf_event.h>
11 #include <linux/perf/arm_pmu.h>
12 #include <linux/uaccess.h>
13 #include <asm/kvm_emulate.h>
14 #include <kvm/arm_pmu.h>
15 #include <kvm/arm_vgic.h>
16
17 DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
18
19 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
20 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx);
21 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
22
23 #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
24
25 static u32 kvm_pmu_event_mask(struct kvm *kvm)
26 {
27         switch (kvm->arch.pmuver) {
28         case ID_AA64DFR0_PMUVER_8_0:
29                 return GENMASK(9, 0);
30         case ID_AA64DFR0_PMUVER_8_1:
31         case ID_AA64DFR0_PMUVER_8_4:
32         case ID_AA64DFR0_PMUVER_8_5:
33         case ID_AA64DFR0_PMUVER_8_7:
34                 return GENMASK(15, 0);
35         default:                /* Shouldn't be here, just for sanity */
36                 WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver);
37                 return 0;
38         }
39 }
40
41 /**
42  * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter
43  * @vcpu: The vcpu pointer
44  * @select_idx: The counter index
45  */
46 static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx)
47 {
48         return (select_idx == ARMV8_PMU_CYCLE_IDX &&
49                 __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC);
50 }
51
52 static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
53 {
54         struct kvm_pmu *pmu;
55         struct kvm_vcpu_arch *vcpu_arch;
56
57         pmc -= pmc->idx;
58         pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
59         vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
60         return container_of(vcpu_arch, struct kvm_vcpu, arch);
61 }
62
63 /**
64  * kvm_pmu_pmc_is_chained - determine if the pmc is chained
65  * @pmc: The PMU counter pointer
66  */
67 static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc)
68 {
69         struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
70
71         return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
72 }
73
74 /**
75  * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter
76  * @select_idx: The counter index
77  */
78 static bool kvm_pmu_idx_is_high_counter(u64 select_idx)
79 {
80         return select_idx & 0x1;
81 }
82
83 /**
84  * kvm_pmu_get_canonical_pmc - obtain the canonical pmc
85  * @pmc: The PMU counter pointer
86  *
87  * When a pair of PMCs are chained together we use the low counter (canonical)
88  * to hold the underlying perf event.
89  */
90 static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc)
91 {
92         if (kvm_pmu_pmc_is_chained(pmc) &&
93             kvm_pmu_idx_is_high_counter(pmc->idx))
94                 return pmc - 1;
95
96         return pmc;
97 }
98 static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc)
99 {
100         if (kvm_pmu_idx_is_high_counter(pmc->idx))
101                 return pmc - 1;
102         else
103                 return pmc + 1;
104 }
105
106 /**
107  * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
108  * @vcpu: The vcpu pointer
109  * @select_idx: The counter index
110  */
111 static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx)
112 {
113         u64 eventsel, reg;
114
115         select_idx |= 0x1;
116
117         if (select_idx == ARMV8_PMU_CYCLE_IDX)
118                 return false;
119
120         reg = PMEVTYPER0_EL0 + select_idx;
121         eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm);
122
123         return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN;
124 }
125
126 /**
127  * kvm_pmu_get_pair_counter_value - get PMU counter value
128  * @vcpu: The vcpu pointer
129  * @pmc: The PMU counter pointer
130  */
131 static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu,
132                                           struct kvm_pmc *pmc)
133 {
134         u64 counter, counter_high, reg, enabled, running;
135
136         if (kvm_pmu_pmc_is_chained(pmc)) {
137                 pmc = kvm_pmu_get_canonical_pmc(pmc);
138                 reg = PMEVCNTR0_EL0 + pmc->idx;
139
140                 counter = __vcpu_sys_reg(vcpu, reg);
141                 counter_high = __vcpu_sys_reg(vcpu, reg + 1);
142
143                 counter = lower_32_bits(counter) | (counter_high << 32);
144         } else {
145                 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
146                       ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
147                 counter = __vcpu_sys_reg(vcpu, reg);
148         }
149
150         /*
151          * The real counter value is equal to the value of counter register plus
152          * the value perf event counts.
153          */
154         if (pmc->perf_event)
155                 counter += perf_event_read_value(pmc->perf_event, &enabled,
156                                                  &running);
157
158         return counter;
159 }
160
161 /**
162  * kvm_pmu_get_counter_value - get PMU counter value
163  * @vcpu: The vcpu pointer
164  * @select_idx: The counter index
165  */
166 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
167 {
168         u64 counter;
169         struct kvm_pmu *pmu = &vcpu->arch.pmu;
170         struct kvm_pmc *pmc = &pmu->pmc[select_idx];
171
172         counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
173
174         if (kvm_pmu_pmc_is_chained(pmc) &&
175             kvm_pmu_idx_is_high_counter(select_idx))
176                 counter = upper_32_bits(counter);
177         else if (select_idx != ARMV8_PMU_CYCLE_IDX)
178                 counter = lower_32_bits(counter);
179
180         return counter;
181 }
182
183 /**
184  * kvm_pmu_set_counter_value - set PMU counter value
185  * @vcpu: The vcpu pointer
186  * @select_idx: The counter index
187  * @val: The counter value
188  */
189 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
190 {
191         u64 reg;
192
193         reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
194               ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
195         __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
196
197         /* Recreate the perf event to reflect the updated sample_period */
198         kvm_pmu_create_perf_event(vcpu, select_idx);
199 }
200
201 /**
202  * kvm_pmu_release_perf_event - remove the perf event
203  * @pmc: The PMU counter pointer
204  */
205 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
206 {
207         pmc = kvm_pmu_get_canonical_pmc(pmc);
208         if (pmc->perf_event) {
209                 perf_event_disable(pmc->perf_event);
210                 perf_event_release_kernel(pmc->perf_event);
211                 pmc->perf_event = NULL;
212         }
213 }
214
215 /**
216  * kvm_pmu_stop_counter - stop PMU counter
217  * @pmc: The PMU counter pointer
218  *
219  * If this counter has been configured to monitor some event, release it here.
220  */
221 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
222 {
223         u64 counter, reg, val;
224
225         pmc = kvm_pmu_get_canonical_pmc(pmc);
226         if (!pmc->perf_event)
227                 return;
228
229         counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
230
231         if (pmc->idx == ARMV8_PMU_CYCLE_IDX) {
232                 reg = PMCCNTR_EL0;
233                 val = counter;
234         } else {
235                 reg = PMEVCNTR0_EL0 + pmc->idx;
236                 val = lower_32_bits(counter);
237         }
238
239         __vcpu_sys_reg(vcpu, reg) = val;
240
241         if (kvm_pmu_pmc_is_chained(pmc))
242                 __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
243
244         kvm_pmu_release_perf_event(pmc);
245 }
246
247 /**
248  * kvm_pmu_vcpu_init - assign pmu counter idx for cpu
249  * @vcpu: The vcpu pointer
250  *
251  */
252 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
253 {
254         int i;
255         struct kvm_pmu *pmu = &vcpu->arch.pmu;
256
257         for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
258                 pmu->pmc[i].idx = i;
259 }
260
261 /**
262  * kvm_pmu_vcpu_reset - reset pmu state for cpu
263  * @vcpu: The vcpu pointer
264  *
265  */
266 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
267 {
268         unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
269         struct kvm_pmu *pmu = &vcpu->arch.pmu;
270         int i;
271
272         for_each_set_bit(i, &mask, 32)
273                 kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
274
275         bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
276 }
277
278 /**
279  * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu
280  * @vcpu: The vcpu pointer
281  *
282  */
283 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
284 {
285         int i;
286         struct kvm_pmu *pmu = &vcpu->arch.pmu;
287
288         for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
289                 kvm_pmu_release_perf_event(&pmu->pmc[i]);
290         irq_work_sync(&vcpu->arch.pmu.overflow_work);
291 }
292
293 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
294 {
295         u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT;
296
297         val &= ARMV8_PMU_PMCR_N_MASK;
298         if (val == 0)
299                 return BIT(ARMV8_PMU_CYCLE_IDX);
300         else
301                 return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX);
302 }
303
304 /**
305  * kvm_pmu_enable_counter_mask - enable selected PMU counters
306  * @vcpu: The vcpu pointer
307  * @val: the value guest writes to PMCNTENSET register
308  *
309  * Call perf_event_enable to start counting the perf event
310  */
311 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
312 {
313         int i;
314         struct kvm_pmu *pmu = &vcpu->arch.pmu;
315         struct kvm_pmc *pmc;
316
317         if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
318                 return;
319
320         for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
321                 if (!(val & BIT(i)))
322                         continue;
323
324                 pmc = &pmu->pmc[i];
325
326                 /* A change in the enable state may affect the chain state */
327                 kvm_pmu_update_pmc_chained(vcpu, i);
328                 kvm_pmu_create_perf_event(vcpu, i);
329
330                 /* At this point, pmc must be the canonical */
331                 if (pmc->perf_event) {
332                         perf_event_enable(pmc->perf_event);
333                         if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
334                                 kvm_debug("fail to enable perf event\n");
335                 }
336         }
337 }
338
339 /**
340  * kvm_pmu_disable_counter_mask - disable selected PMU counters
341  * @vcpu: The vcpu pointer
342  * @val: the value guest writes to PMCNTENCLR register
343  *
344  * Call perf_event_disable to stop counting the perf event
345  */
346 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
347 {
348         int i;
349         struct kvm_pmu *pmu = &vcpu->arch.pmu;
350         struct kvm_pmc *pmc;
351
352         if (!val)
353                 return;
354
355         for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
356                 if (!(val & BIT(i)))
357                         continue;
358
359                 pmc = &pmu->pmc[i];
360
361                 /* A change in the enable state may affect the chain state */
362                 kvm_pmu_update_pmc_chained(vcpu, i);
363                 kvm_pmu_create_perf_event(vcpu, i);
364
365                 /* At this point, pmc must be the canonical */
366                 if (pmc->perf_event)
367                         perf_event_disable(pmc->perf_event);
368         }
369 }
370
371 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
372 {
373         u64 reg = 0;
374
375         if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
376                 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
377                 reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
378                 reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
379         }
380
381         return reg;
382 }
383
384 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
385 {
386         struct kvm_pmu *pmu = &vcpu->arch.pmu;
387         bool overflow;
388
389         if (!kvm_vcpu_has_pmu(vcpu))
390                 return;
391
392         overflow = !!kvm_pmu_overflow_status(vcpu);
393         if (pmu->irq_level == overflow)
394                 return;
395
396         pmu->irq_level = overflow;
397
398         if (likely(irqchip_in_kernel(vcpu->kvm))) {
399                 int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
400                                               pmu->irq_num, overflow, pmu);
401                 WARN_ON(ret);
402         }
403 }
404
405 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
406 {
407         struct kvm_pmu *pmu = &vcpu->arch.pmu;
408         struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
409         bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU;
410
411         if (likely(irqchip_in_kernel(vcpu->kvm)))
412                 return false;
413
414         return pmu->irq_level != run_level;
415 }
416
417 /*
418  * Reflect the PMU overflow interrupt output level into the kvm_run structure
419  */
420 void kvm_pmu_update_run(struct kvm_vcpu *vcpu)
421 {
422         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
423
424         /* Populate the timer bitmap for user space */
425         regs->device_irq_level &= ~KVM_ARM_DEV_PMU;
426         if (vcpu->arch.pmu.irq_level)
427                 regs->device_irq_level |= KVM_ARM_DEV_PMU;
428 }
429
430 /**
431  * kvm_pmu_flush_hwstate - flush pmu state to cpu
432  * @vcpu: The vcpu pointer
433  *
434  * Check if the PMU has overflowed while we were running in the host, and inject
435  * an interrupt if that was the case.
436  */
437 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu)
438 {
439         kvm_pmu_update_state(vcpu);
440 }
441
442 /**
443  * kvm_pmu_sync_hwstate - sync pmu state from cpu
444  * @vcpu: The vcpu pointer
445  *
446  * Check if the PMU has overflowed while we were running in the guest, and
447  * inject an interrupt if that was the case.
448  */
449 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
450 {
451         kvm_pmu_update_state(vcpu);
452 }
453
454 /**
455  * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding
456  * to the event.
457  * This is why we need a callback to do it once outside of the NMI context.
458  */
459 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work)
460 {
461         struct kvm_vcpu *vcpu;
462         struct kvm_pmu *pmu;
463
464         pmu = container_of(work, struct kvm_pmu, overflow_work);
465         vcpu = kvm_pmc_to_vcpu(pmu->pmc);
466
467         kvm_vcpu_kick(vcpu);
468 }
469
470 /**
471  * When the perf event overflows, set the overflow status and inform the vcpu.
472  */
473 static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
474                                   struct perf_sample_data *data,
475                                   struct pt_regs *regs)
476 {
477         struct kvm_pmc *pmc = perf_event->overflow_handler_context;
478         struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
479         struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
480         int idx = pmc->idx;
481         u64 period;
482
483         cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE);
484
485         /*
486          * Reset the sample period to the architectural limit,
487          * i.e. the point where the counter overflows.
488          */
489         period = -(local64_read(&perf_event->count));
490
491         if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
492                 period &= GENMASK(31, 0);
493
494         local64_set(&perf_event->hw.period_left, 0);
495         perf_event->attr.sample_period = period;
496         perf_event->hw.sample_period = period;
497
498         __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
499
500         if (kvm_pmu_overflow_status(vcpu)) {
501                 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
502
503                 if (!in_nmi())
504                         kvm_vcpu_kick(vcpu);
505                 else
506                         irq_work_queue(&vcpu->arch.pmu.overflow_work);
507         }
508
509         cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD);
510 }
511
512 /**
513  * kvm_pmu_software_increment - do software increment
514  * @vcpu: The vcpu pointer
515  * @val: the value guest writes to PMSWINC register
516  */
517 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
518 {
519         struct kvm_pmu *pmu = &vcpu->arch.pmu;
520         int i;
521
522         if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
523                 return;
524
525         /* Weed out disabled counters */
526         val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
527
528         for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
529                 u64 type, reg;
530
531                 if (!(val & BIT(i)))
532                         continue;
533
534                 /* PMSWINC only applies to ... SW_INC! */
535                 type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i);
536                 type &= kvm_pmu_event_mask(vcpu->kvm);
537                 if (type != ARMV8_PMUV3_PERFCTR_SW_INCR)
538                         continue;
539
540                 /* increment this even SW_INC counter */
541                 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
542                 reg = lower_32_bits(reg);
543                 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
544
545                 if (reg) /* no overflow on the low part */
546                         continue;
547
548                 if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) {
549                         /* increment the high counter */
550                         reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1;
551                         reg = lower_32_bits(reg);
552                         __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
553                         if (!reg) /* mark overflow on the high counter */
554                                 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
555                 } else {
556                         /* mark overflow on low counter */
557                         __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
558                 }
559         }
560 }
561
562 /**
563  * kvm_pmu_handle_pmcr - handle PMCR register
564  * @vcpu: The vcpu pointer
565  * @val: the value guest writes to PMCR register
566  */
567 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
568 {
569         int i;
570
571         if (val & ARMV8_PMU_PMCR_E) {
572                 kvm_pmu_enable_counter_mask(vcpu,
573                        __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
574         } else {
575                 kvm_pmu_disable_counter_mask(vcpu,
576                        __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
577         }
578
579         if (val & ARMV8_PMU_PMCR_C)
580                 kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
581
582         if (val & ARMV8_PMU_PMCR_P) {
583                 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
584                 mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
585                 for_each_set_bit(i, &mask, 32)
586                         kvm_pmu_set_counter_value(vcpu, i, 0);
587         }
588 }
589
590 static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
591 {
592         return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
593                (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
594 }
595
596 /**
597  * kvm_pmu_create_perf_event - create a perf event for a counter
598  * @vcpu: The vcpu pointer
599  * @select_idx: The number of selected counter
600  */
601 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
602 {
603         struct kvm_pmu *pmu = &vcpu->arch.pmu;
604         struct kvm_pmc *pmc;
605         struct perf_event *event;
606         struct perf_event_attr attr;
607         u64 eventsel, counter, reg, data;
608
609         /*
610          * For chained counters the event type and filtering attributes are
611          * obtained from the low/even counter. We also use this counter to
612          * determine if the event is enabled/disabled.
613          */
614         pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]);
615
616         reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
617               ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx;
618         data = __vcpu_sys_reg(vcpu, reg);
619
620         kvm_pmu_stop_counter(vcpu, pmc);
621         if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
622                 eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
623         else
624                 eventsel = data & kvm_pmu_event_mask(vcpu->kvm);
625
626         /* Software increment event doesn't need to be backed by a perf event */
627         if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR)
628                 return;
629
630         /*
631          * If we have a filter in place and that the event isn't allowed, do
632          * not install a perf event either.
633          */
634         if (vcpu->kvm->arch.pmu_filter &&
635             !test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
636                 return;
637
638         memset(&attr, 0, sizeof(struct perf_event_attr));
639         attr.type = PERF_TYPE_RAW;
640         attr.size = sizeof(attr);
641         attr.pinned = 1;
642         attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
643         attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
644         attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
645         attr.exclude_hv = 1; /* Don't count EL2 events */
646         attr.exclude_host = 1; /* Don't count host events */
647         attr.config = eventsel;
648
649         counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
650
651         if (kvm_pmu_pmc_is_chained(pmc)) {
652                 /**
653                  * The initial sample period (overflow count) of an event. For
654                  * chained counters we only support overflow interrupts on the
655                  * high counter.
656                  */
657                 attr.sample_period = (-counter) & GENMASK(63, 0);
658                 attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
659
660                 event = perf_event_create_kernel_counter(&attr, -1, current,
661                                                          kvm_pmu_perf_overflow,
662                                                          pmc + 1);
663         } else {
664                 /* The initial sample period (overflow count) of an event. */
665                 if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
666                         attr.sample_period = (-counter) & GENMASK(63, 0);
667                 else
668                         attr.sample_period = (-counter) & GENMASK(31, 0);
669
670                 event = perf_event_create_kernel_counter(&attr, -1, current,
671                                                  kvm_pmu_perf_overflow, pmc);
672         }
673
674         if (IS_ERR(event)) {
675                 pr_err_once("kvm: pmu event creation failed %ld\n",
676                             PTR_ERR(event));
677                 return;
678         }
679
680         pmc->perf_event = event;
681 }
682
683 /**
684  * kvm_pmu_update_pmc_chained - update chained bitmap
685  * @vcpu: The vcpu pointer
686  * @select_idx: The number of selected counter
687  *
688  * Update the chained bitmap based on the event type written in the
689  * typer register and the enable state of the odd register.
690  */
691 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)
692 {
693         struct kvm_pmu *pmu = &vcpu->arch.pmu;
694         struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc;
695         bool new_state, old_state;
696
697         old_state = kvm_pmu_pmc_is_chained(pmc);
698         new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
699                     kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1);
700
701         if (old_state == new_state)
702                 return;
703
704         canonical_pmc = kvm_pmu_get_canonical_pmc(pmc);
705         kvm_pmu_stop_counter(vcpu, canonical_pmc);
706         if (new_state) {
707                 /*
708                  * During promotion from !chained to chained we must ensure
709                  * the adjacent counter is stopped and its event destroyed
710                  */
711                 kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc));
712                 set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
713                 return;
714         }
715         clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
716 }
717
718 /**
719  * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
720  * @vcpu: The vcpu pointer
721  * @data: The data guest writes to PMXEVTYPER_EL0
722  * @select_idx: The number of selected counter
723  *
724  * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
725  * event with given hardware event number. Here we call perf_event API to
726  * emulate this action and create a kernel perf event for it.
727  */
728 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
729                                     u64 select_idx)
730 {
731         u64 reg, mask;
732
733         mask  =  ARMV8_PMU_EVTYPE_MASK;
734         mask &= ~ARMV8_PMU_EVTYPE_EVENT;
735         mask |= kvm_pmu_event_mask(vcpu->kvm);
736
737         reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
738               ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx;
739
740         __vcpu_sys_reg(vcpu, reg) = data & mask;
741
742         kvm_pmu_update_pmc_chained(vcpu, select_idx);
743         kvm_pmu_create_perf_event(vcpu, select_idx);
744 }
745
746 void kvm_host_pmu_init(struct arm_pmu *pmu)
747 {
748         if (pmu->pmuver != 0 && pmu->pmuver != ID_AA64DFR0_PMUVER_IMP_DEF &&
749             !kvm_arm_support_pmu_v3() && !is_protected_kvm_enabled())
750                 static_branch_enable(&kvm_arm_pmu_available);
751 }
752
753 static int kvm_pmu_probe_pmuver(void)
754 {
755         struct perf_event_attr attr = { };
756         struct perf_event *event;
757         struct arm_pmu *pmu;
758         int pmuver = ID_AA64DFR0_PMUVER_IMP_DEF;
759
760         /*
761          * Create a dummy event that only counts user cycles. As we'll never
762          * leave this function with the event being live, it will never
763          * count anything. But it allows us to probe some of the PMU
764          * details. Yes, this is terrible.
765          */
766         attr.type = PERF_TYPE_RAW;
767         attr.size = sizeof(attr);
768         attr.pinned = 1;
769         attr.disabled = 0;
770         attr.exclude_user = 0;
771         attr.exclude_kernel = 1;
772         attr.exclude_hv = 1;
773         attr.exclude_host = 1;
774         attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
775         attr.sample_period = GENMASK(63, 0);
776
777         event = perf_event_create_kernel_counter(&attr, -1, current,
778                                                  kvm_pmu_perf_overflow, &attr);
779
780         if (IS_ERR(event)) {
781                 pr_err_once("kvm: pmu event creation failed %ld\n",
782                             PTR_ERR(event));
783                 return ID_AA64DFR0_PMUVER_IMP_DEF;
784         }
785
786         if (event->pmu) {
787                 pmu = to_arm_pmu(event->pmu);
788                 if (pmu->pmuver)
789                         pmuver = pmu->pmuver;
790         }
791
792         perf_event_disable(event);
793         perf_event_release_kernel(event);
794
795         return pmuver;
796 }
797
798 u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
799 {
800         unsigned long *bmap = vcpu->kvm->arch.pmu_filter;
801         u64 val, mask = 0;
802         int base, i, nr_events;
803
804         if (!pmceid1) {
805                 val = read_sysreg(pmceid0_el0);
806                 base = 0;
807         } else {
808                 val = read_sysreg(pmceid1_el0);
809                 /*
810                  * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled
811                  * as RAZ
812                  */
813                 if (vcpu->kvm->arch.pmuver >= ID_AA64DFR0_PMUVER_8_4)
814                         val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32);
815                 base = 32;
816         }
817
818         if (!bmap)
819                 return val;
820
821         nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
822
823         for (i = 0; i < 32; i += 8) {
824                 u64 byte;
825
826                 byte = bitmap_get_value8(bmap, base + i);
827                 mask |= byte << i;
828                 if (nr_events >= (0x4000 + base + 32)) {
829                         byte = bitmap_get_value8(bmap, 0x4000 + base + i);
830                         mask |= byte << (32 + i);
831                 }
832         }
833
834         return val & mask;
835 }
836
837 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
838 {
839         if (!kvm_vcpu_has_pmu(vcpu))
840                 return 0;
841
842         if (!vcpu->arch.pmu.created)
843                 return -EINVAL;
844
845         /*
846          * A valid interrupt configuration for the PMU is either to have a
847          * properly configured interrupt number and using an in-kernel
848          * irqchip, or to not have an in-kernel GIC and not set an IRQ.
849          */
850         if (irqchip_in_kernel(vcpu->kvm)) {
851                 int irq = vcpu->arch.pmu.irq_num;
852                 /*
853                  * If we are using an in-kernel vgic, at this point we know
854                  * the vgic will be initialized, so we can check the PMU irq
855                  * number against the dimensions of the vgic and make sure
856                  * it's valid.
857                  */
858                 if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq))
859                         return -EINVAL;
860         } else if (kvm_arm_pmu_irq_initialized(vcpu)) {
861                    return -EINVAL;
862         }
863
864         /* One-off reload of the PMU on first run */
865         kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
866
867         return 0;
868 }
869
870 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
871 {
872         if (irqchip_in_kernel(vcpu->kvm)) {
873                 int ret;
874
875                 /*
876                  * If using the PMU with an in-kernel virtual GIC
877                  * implementation, we require the GIC to be already
878                  * initialized when initializing the PMU.
879                  */
880                 if (!vgic_initialized(vcpu->kvm))
881                         return -ENODEV;
882
883                 if (!kvm_arm_pmu_irq_initialized(vcpu))
884                         return -ENXIO;
885
886                 ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num,
887                                          &vcpu->arch.pmu);
888                 if (ret)
889                         return ret;
890         }
891
892         init_irq_work(&vcpu->arch.pmu.overflow_work,
893                       kvm_pmu_perf_overflow_notify_vcpu);
894
895         vcpu->arch.pmu.created = true;
896         return 0;
897 }
898
899 /*
900  * For one VM the interrupt type must be same for each vcpu.
901  * As a PPI, the interrupt number is the same for all vcpus,
902  * while as an SPI it must be a separate number per vcpu.
903  */
904 static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
905 {
906         unsigned long i;
907         struct kvm_vcpu *vcpu;
908
909         kvm_for_each_vcpu(i, vcpu, kvm) {
910                 if (!kvm_arm_pmu_irq_initialized(vcpu))
911                         continue;
912
913                 if (irq_is_ppi(irq)) {
914                         if (vcpu->arch.pmu.irq_num != irq)
915                                 return false;
916                 } else {
917                         if (vcpu->arch.pmu.irq_num == irq)
918                                 return false;
919                 }
920         }
921
922         return true;
923 }
924
925 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
926 {
927         if (!kvm_vcpu_has_pmu(vcpu))
928                 return -ENODEV;
929
930         if (vcpu->arch.pmu.created)
931                 return -EBUSY;
932
933         if (!vcpu->kvm->arch.pmuver)
934                 vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver();
935
936         if (vcpu->kvm->arch.pmuver == ID_AA64DFR0_PMUVER_IMP_DEF)
937                 return -ENODEV;
938
939         switch (attr->attr) {
940         case KVM_ARM_VCPU_PMU_V3_IRQ: {
941                 int __user *uaddr = (int __user *)(long)attr->addr;
942                 int irq;
943
944                 if (!irqchip_in_kernel(vcpu->kvm))
945                         return -EINVAL;
946
947                 if (get_user(irq, uaddr))
948                         return -EFAULT;
949
950                 /* The PMU overflow interrupt can be a PPI or a valid SPI. */
951                 if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
952                         return -EINVAL;
953
954                 if (!pmu_irq_is_valid(vcpu->kvm, irq))
955                         return -EINVAL;
956
957                 if (kvm_arm_pmu_irq_initialized(vcpu))
958                         return -EBUSY;
959
960                 kvm_debug("Set kvm ARM PMU irq: %d\n", irq);
961                 vcpu->arch.pmu.irq_num = irq;
962                 return 0;
963         }
964         case KVM_ARM_VCPU_PMU_V3_FILTER: {
965                 struct kvm_pmu_event_filter __user *uaddr;
966                 struct kvm_pmu_event_filter filter;
967                 int nr_events;
968
969                 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
970
971                 uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
972
973                 if (copy_from_user(&filter, uaddr, sizeof(filter)))
974                         return -EFAULT;
975
976                 if (((u32)filter.base_event + filter.nevents) > nr_events ||
977                     (filter.action != KVM_PMU_EVENT_ALLOW &&
978                      filter.action != KVM_PMU_EVENT_DENY))
979                         return -EINVAL;
980
981                 mutex_lock(&vcpu->kvm->lock);
982
983                 if (!vcpu->kvm->arch.pmu_filter) {
984                         vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT);
985                         if (!vcpu->kvm->arch.pmu_filter) {
986                                 mutex_unlock(&vcpu->kvm->lock);
987                                 return -ENOMEM;
988                         }
989
990                         /*
991                          * The default depends on the first applied filter.
992                          * If it allows events, the default is to deny.
993                          * Conversely, if the first filter denies a set of
994                          * events, the default is to allow.
995                          */
996                         if (filter.action == KVM_PMU_EVENT_ALLOW)
997                                 bitmap_zero(vcpu->kvm->arch.pmu_filter, nr_events);
998                         else
999                                 bitmap_fill(vcpu->kvm->arch.pmu_filter, nr_events);
1000                 }
1001
1002                 if (filter.action == KVM_PMU_EVENT_ALLOW)
1003                         bitmap_set(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
1004                 else
1005                         bitmap_clear(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
1006
1007                 mutex_unlock(&vcpu->kvm->lock);
1008
1009                 return 0;
1010         }
1011         case KVM_ARM_VCPU_PMU_V3_INIT:
1012                 return kvm_arm_pmu_v3_init(vcpu);
1013         }
1014
1015         return -ENXIO;
1016 }
1017
1018 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1019 {
1020         switch (attr->attr) {
1021         case KVM_ARM_VCPU_PMU_V3_IRQ: {
1022                 int __user *uaddr = (int __user *)(long)attr->addr;
1023                 int irq;
1024
1025                 if (!irqchip_in_kernel(vcpu->kvm))
1026                         return -EINVAL;
1027
1028                 if (!kvm_vcpu_has_pmu(vcpu))
1029                         return -ENODEV;
1030
1031                 if (!kvm_arm_pmu_irq_initialized(vcpu))
1032                         return -ENXIO;
1033
1034                 irq = vcpu->arch.pmu.irq_num;
1035                 return put_user(irq, uaddr);
1036         }
1037         }
1038
1039         return -ENXIO;
1040 }
1041
1042 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1043 {
1044         switch (attr->attr) {
1045         case KVM_ARM_VCPU_PMU_V3_IRQ:
1046         case KVM_ARM_VCPU_PMU_V3_INIT:
1047         case KVM_ARM_VCPU_PMU_V3_FILTER:
1048                 if (kvm_vcpu_has_pmu(vcpu))
1049                         return 0;
1050         }
1051
1052         return -ENXIO;
1053 }