powerpc/mm: Avoid calling arch_enter/leave_lazy_mmu() in set_ptes
[platform/kernel/linux-starfive.git] / arch / arm64 / kvm / arch_timer.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2012 ARM Ltd.
4  * Author: Marc Zyngier <marc.zyngier@arm.com>
5  */
6
7 #include <linux/cpu.h>
8 #include <linux/kvm.h>
9 #include <linux/kvm_host.h>
10 #include <linux/interrupt.h>
11 #include <linux/irq.h>
12 #include <linux/irqdomain.h>
13 #include <linux/uaccess.h>
14
15 #include <clocksource/arm_arch_timer.h>
16 #include <asm/arch_timer.h>
17 #include <asm/kvm_emulate.h>
18 #include <asm/kvm_hyp.h>
19 #include <asm/kvm_nested.h>
20
21 #include <kvm/arm_vgic.h>
22 #include <kvm/arm_arch_timer.h>
23
24 #include "trace.h"
25
26 static struct timecounter *timecounter;
27 static unsigned int host_vtimer_irq;
28 static unsigned int host_ptimer_irq;
29 static u32 host_vtimer_irq_flags;
30 static u32 host_ptimer_irq_flags;
31
32 static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
33
34 static const u8 default_ppi[] = {
35         [TIMER_PTIMER]  = 30,
36         [TIMER_VTIMER]  = 27,
37         [TIMER_HPTIMER] = 26,
38         [TIMER_HVTIMER] = 28,
39 };
40
41 static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx);
42 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
43                                  struct arch_timer_context *timer_ctx);
44 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx);
45 static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
46                                 struct arch_timer_context *timer,
47                                 enum kvm_arch_timer_regs treg,
48                                 u64 val);
49 static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
50                               struct arch_timer_context *timer,
51                               enum kvm_arch_timer_regs treg);
52 static bool kvm_arch_timer_get_input_level(int vintid);
53
54 static struct irq_ops arch_timer_irq_ops = {
55         .get_input_level = kvm_arch_timer_get_input_level,
56 };
57
58 static bool has_cntpoff(void)
59 {
60         return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
61 }
62
63 static int nr_timers(struct kvm_vcpu *vcpu)
64 {
65         if (!vcpu_has_nv(vcpu))
66                 return NR_KVM_EL0_TIMERS;
67
68         return NR_KVM_TIMERS;
69 }
70
71 u32 timer_get_ctl(struct arch_timer_context *ctxt)
72 {
73         struct kvm_vcpu *vcpu = ctxt->vcpu;
74
75         switch(arch_timer_ctx_index(ctxt)) {
76         case TIMER_VTIMER:
77                 return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0);
78         case TIMER_PTIMER:
79                 return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0);
80         case TIMER_HVTIMER:
81                 return __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2);
82         case TIMER_HPTIMER:
83                 return __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2);
84         default:
85                 WARN_ON(1);
86                 return 0;
87         }
88 }
89
90 u64 timer_get_cval(struct arch_timer_context *ctxt)
91 {
92         struct kvm_vcpu *vcpu = ctxt->vcpu;
93
94         switch(arch_timer_ctx_index(ctxt)) {
95         case TIMER_VTIMER:
96                 return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
97         case TIMER_PTIMER:
98                 return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
99         case TIMER_HVTIMER:
100                 return __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2);
101         case TIMER_HPTIMER:
102                 return __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
103         default:
104                 WARN_ON(1);
105                 return 0;
106         }
107 }
108
109 static u64 timer_get_offset(struct arch_timer_context *ctxt)
110 {
111         u64 offset = 0;
112
113         if (!ctxt)
114                 return 0;
115
116         if (ctxt->offset.vm_offset)
117                 offset += *ctxt->offset.vm_offset;
118         if (ctxt->offset.vcpu_offset)
119                 offset += *ctxt->offset.vcpu_offset;
120
121         return offset;
122 }
123
124 static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
125 {
126         struct kvm_vcpu *vcpu = ctxt->vcpu;
127
128         switch(arch_timer_ctx_index(ctxt)) {
129         case TIMER_VTIMER:
130                 __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = ctl;
131                 break;
132         case TIMER_PTIMER:
133                 __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl;
134                 break;
135         case TIMER_HVTIMER:
136                 __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2) = ctl;
137                 break;
138         case TIMER_HPTIMER:
139                 __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2) = ctl;
140                 break;
141         default:
142                 WARN_ON(1);
143         }
144 }
145
146 static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval)
147 {
148         struct kvm_vcpu *vcpu = ctxt->vcpu;
149
150         switch(arch_timer_ctx_index(ctxt)) {
151         case TIMER_VTIMER:
152                 __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = cval;
153                 break;
154         case TIMER_PTIMER:
155                 __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval;
156                 break;
157         case TIMER_HVTIMER:
158                 __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2) = cval;
159                 break;
160         case TIMER_HPTIMER:
161                 __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = cval;
162                 break;
163         default:
164                 WARN_ON(1);
165         }
166 }
167
168 static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset)
169 {
170         if (!ctxt->offset.vm_offset) {
171                 WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt));
172                 return;
173         }
174
175         WRITE_ONCE(*ctxt->offset.vm_offset, offset);
176 }
177
178 u64 kvm_phys_timer_read(void)
179 {
180         return timecounter->cc->read(timecounter->cc);
181 }
182
183 static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
184 {
185         if (vcpu_has_nv(vcpu)) {
186                 if (is_hyp_ctxt(vcpu)) {
187                         map->direct_vtimer = vcpu_hvtimer(vcpu);
188                         map->direct_ptimer = vcpu_hptimer(vcpu);
189                         map->emul_vtimer = vcpu_vtimer(vcpu);
190                         map->emul_ptimer = vcpu_ptimer(vcpu);
191                 } else {
192                         map->direct_vtimer = vcpu_vtimer(vcpu);
193                         map->direct_ptimer = vcpu_ptimer(vcpu);
194                         map->emul_vtimer = vcpu_hvtimer(vcpu);
195                         map->emul_ptimer = vcpu_hptimer(vcpu);
196                 }
197         } else if (has_vhe()) {
198                 map->direct_vtimer = vcpu_vtimer(vcpu);
199                 map->direct_ptimer = vcpu_ptimer(vcpu);
200                 map->emul_vtimer = NULL;
201                 map->emul_ptimer = NULL;
202         } else {
203                 map->direct_vtimer = vcpu_vtimer(vcpu);
204                 map->direct_ptimer = NULL;
205                 map->emul_vtimer = NULL;
206                 map->emul_ptimer = vcpu_ptimer(vcpu);
207         }
208
209         trace_kvm_get_timer_map(vcpu->vcpu_id, map);
210 }
211
212 static inline bool userspace_irqchip(struct kvm *kvm)
213 {
214         return static_branch_unlikely(&userspace_irqchip_in_use) &&
215                 unlikely(!irqchip_in_kernel(kvm));
216 }
217
218 static void soft_timer_start(struct hrtimer *hrt, u64 ns)
219 {
220         hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns),
221                       HRTIMER_MODE_ABS_HARD);
222 }
223
224 static void soft_timer_cancel(struct hrtimer *hrt)
225 {
226         hrtimer_cancel(hrt);
227 }
228
229 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
230 {
231         struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
232         struct arch_timer_context *ctx;
233         struct timer_map map;
234
235         /*
236          * We may see a timer interrupt after vcpu_put() has been called which
237          * sets the CPU's vcpu pointer to NULL, because even though the timer
238          * has been disabled in timer_save_state(), the hardware interrupt
239          * signal may not have been retired from the interrupt controller yet.
240          */
241         if (!vcpu)
242                 return IRQ_HANDLED;
243
244         get_timer_map(vcpu, &map);
245
246         if (irq == host_vtimer_irq)
247                 ctx = map.direct_vtimer;
248         else
249                 ctx = map.direct_ptimer;
250
251         if (kvm_timer_should_fire(ctx))
252                 kvm_timer_update_irq(vcpu, true, ctx);
253
254         if (userspace_irqchip(vcpu->kvm) &&
255             !static_branch_unlikely(&has_gic_active_state))
256                 disable_percpu_irq(host_vtimer_irq);
257
258         return IRQ_HANDLED;
259 }
260
261 static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx,
262                                      u64 val)
263 {
264         u64 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx);
265
266         if (now < val) {
267                 u64 ns;
268
269                 ns = cyclecounter_cyc2ns(timecounter->cc,
270                                          val - now,
271                                          timecounter->mask,
272                                          &timer_ctx->ns_frac);
273                 return ns;
274         }
275
276         return 0;
277 }
278
279 static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
280 {
281         return kvm_counter_compute_delta(timer_ctx, timer_get_cval(timer_ctx));
282 }
283
284 static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
285 {
286         WARN_ON(timer_ctx && timer_ctx->loaded);
287         return timer_ctx &&
288                 ((timer_get_ctl(timer_ctx) &
289                   (ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE);
290 }
291
292 static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu)
293 {
294         return (cpus_have_final_cap(ARM64_HAS_WFXT) &&
295                 vcpu_get_flag(vcpu, IN_WFIT));
296 }
297
298 static u64 wfit_delay_ns(struct kvm_vcpu *vcpu)
299 {
300         u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
301         struct arch_timer_context *ctx;
302
303         ctx = (vcpu_has_nv(vcpu) && is_hyp_ctxt(vcpu)) ? vcpu_hvtimer(vcpu)
304                                                        : vcpu_vtimer(vcpu);
305
306         return kvm_counter_compute_delta(ctx, val);
307 }
308
309 /*
310  * Returns the earliest expiration time in ns among guest timers.
311  * Note that it will return 0 if none of timers can fire.
312  */
313 static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
314 {
315         u64 min_delta = ULLONG_MAX;
316         int i;
317
318         for (i = 0; i < nr_timers(vcpu); i++) {
319                 struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i];
320
321                 WARN(ctx->loaded, "timer %d loaded\n", i);
322                 if (kvm_timer_irq_can_fire(ctx))
323                         min_delta = min(min_delta, kvm_timer_compute_delta(ctx));
324         }
325
326         if (vcpu_has_wfit_active(vcpu))
327                 min_delta = min(min_delta, wfit_delay_ns(vcpu));
328
329         /* If none of timers can fire, then return 0 */
330         if (min_delta == ULLONG_MAX)
331                 return 0;
332
333         return min_delta;
334 }
335
336 static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt)
337 {
338         struct arch_timer_cpu *timer;
339         struct kvm_vcpu *vcpu;
340         u64 ns;
341
342         timer = container_of(hrt, struct arch_timer_cpu, bg_timer);
343         vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu);
344
345         /*
346          * Check that the timer has really expired from the guest's
347          * PoV (NTP on the host may have forced it to expire
348          * early). If we should have slept longer, restart it.
349          */
350         ns = kvm_timer_earliest_exp(vcpu);
351         if (unlikely(ns)) {
352                 hrtimer_forward_now(hrt, ns_to_ktime(ns));
353                 return HRTIMER_RESTART;
354         }
355
356         kvm_vcpu_wake_up(vcpu);
357         return HRTIMER_NORESTART;
358 }
359
360 static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt)
361 {
362         struct arch_timer_context *ctx;
363         struct kvm_vcpu *vcpu;
364         u64 ns;
365
366         ctx = container_of(hrt, struct arch_timer_context, hrtimer);
367         vcpu = ctx->vcpu;
368
369         trace_kvm_timer_hrtimer_expire(ctx);
370
371         /*
372          * Check that the timer has really expired from the guest's
373          * PoV (NTP on the host may have forced it to expire
374          * early). If not ready, schedule for a later time.
375          */
376         ns = kvm_timer_compute_delta(ctx);
377         if (unlikely(ns)) {
378                 hrtimer_forward_now(hrt, ns_to_ktime(ns));
379                 return HRTIMER_RESTART;
380         }
381
382         kvm_timer_update_irq(vcpu, true, ctx);
383         return HRTIMER_NORESTART;
384 }
385
386 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
387 {
388         enum kvm_arch_timers index;
389         u64 cval, now;
390
391         if (!timer_ctx)
392                 return false;
393
394         index = arch_timer_ctx_index(timer_ctx);
395
396         if (timer_ctx->loaded) {
397                 u32 cnt_ctl = 0;
398
399                 switch (index) {
400                 case TIMER_VTIMER:
401                 case TIMER_HVTIMER:
402                         cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL);
403                         break;
404                 case TIMER_PTIMER:
405                 case TIMER_HPTIMER:
406                         cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL);
407                         break;
408                 case NR_KVM_TIMERS:
409                         /* GCC is braindead */
410                         cnt_ctl = 0;
411                         break;
412                 }
413
414                 return  (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) &&
415                         (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) &&
416                        !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK);
417         }
418
419         if (!kvm_timer_irq_can_fire(timer_ctx))
420                 return false;
421
422         cval = timer_get_cval(timer_ctx);
423         now = kvm_phys_timer_read() - timer_get_offset(timer_ctx);
424
425         return cval <= now;
426 }
427
428 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
429 {
430         return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0;
431 }
432
433 /*
434  * Reflect the timer output level into the kvm_run structure
435  */
436 void kvm_timer_update_run(struct kvm_vcpu *vcpu)
437 {
438         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
439         struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
440         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
441
442         /* Populate the device bitmap with the timer states */
443         regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER |
444                                     KVM_ARM_DEV_EL1_PTIMER);
445         if (kvm_timer_should_fire(vtimer))
446                 regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER;
447         if (kvm_timer_should_fire(ptimer))
448                 regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER;
449 }
450
451 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
452                                  struct arch_timer_context *timer_ctx)
453 {
454         int ret;
455
456         timer_ctx->irq.level = new_level;
457         trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx),
458                                    timer_ctx->irq.level);
459
460         if (!userspace_irqchip(vcpu->kvm)) {
461                 ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
462                                           timer_irq(timer_ctx),
463                                           timer_ctx->irq.level,
464                                           timer_ctx);
465                 WARN_ON(ret);
466         }
467 }
468
469 /* Only called for a fully emulated timer */
470 static void timer_emulate(struct arch_timer_context *ctx)
471 {
472         bool should_fire = kvm_timer_should_fire(ctx);
473
474         trace_kvm_timer_emulate(ctx, should_fire);
475
476         if (should_fire != ctx->irq.level) {
477                 kvm_timer_update_irq(ctx->vcpu, should_fire, ctx);
478                 return;
479         }
480
481         /*
482          * If the timer can fire now, we don't need to have a soft timer
483          * scheduled for the future.  If the timer cannot fire at all,
484          * then we also don't need a soft timer.
485          */
486         if (should_fire || !kvm_timer_irq_can_fire(ctx))
487                 return;
488
489         soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx));
490 }
491
492 static void set_cntvoff(u64 cntvoff)
493 {
494         kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff);
495 }
496
497 static void set_cntpoff(u64 cntpoff)
498 {
499         if (has_cntpoff())
500                 write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2);
501 }
502
503 static void timer_save_state(struct arch_timer_context *ctx)
504 {
505         struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
506         enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
507         unsigned long flags;
508
509         if (!timer->enabled)
510                 return;
511
512         local_irq_save(flags);
513
514         if (!ctx->loaded)
515                 goto out;
516
517         switch (index) {
518                 u64 cval;
519
520         case TIMER_VTIMER:
521         case TIMER_HVTIMER:
522                 timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL));
523                 timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL));
524
525                 /* Disable the timer */
526                 write_sysreg_el0(0, SYS_CNTV_CTL);
527                 isb();
528
529                 /*
530                  * The kernel may decide to run userspace after
531                  * calling vcpu_put, so we reset cntvoff to 0 to
532                  * ensure a consistent read between user accesses to
533                  * the virtual counter and kernel access to the
534                  * physical counter of non-VHE case.
535                  *
536                  * For VHE, the virtual counter uses a fixed virtual
537                  * offset of zero, so no need to zero CNTVOFF_EL2
538                  * register, but this is actually useful when switching
539                  * between EL1/vEL2 with NV.
540                  *
541                  * Do it unconditionally, as this is either unavoidable
542                  * or dirt cheap.
543                  */
544                 set_cntvoff(0);
545                 break;
546         case TIMER_PTIMER:
547         case TIMER_HPTIMER:
548                 timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
549                 cval = read_sysreg_el0(SYS_CNTP_CVAL);
550
551                 if (!has_cntpoff())
552                         cval -= timer_get_offset(ctx);
553
554                 timer_set_cval(ctx, cval);
555
556                 /* Disable the timer */
557                 write_sysreg_el0(0, SYS_CNTP_CTL);
558                 isb();
559
560                 set_cntpoff(0);
561                 break;
562         case NR_KVM_TIMERS:
563                 BUG();
564         }
565
566         trace_kvm_timer_save_state(ctx);
567
568         ctx->loaded = false;
569 out:
570         local_irq_restore(flags);
571 }
572
573 /*
574  * Schedule the background timer before calling kvm_vcpu_halt, so that this
575  * thread is removed from its waitqueue and made runnable when there's a timer
576  * interrupt to handle.
577  */
578 static void kvm_timer_blocking(struct kvm_vcpu *vcpu)
579 {
580         struct arch_timer_cpu *timer = vcpu_timer(vcpu);
581         struct timer_map map;
582
583         get_timer_map(vcpu, &map);
584
585         /*
586          * If no timers are capable of raising interrupts (disabled or
587          * masked), then there's no more work for us to do.
588          */
589         if (!kvm_timer_irq_can_fire(map.direct_vtimer) &&
590             !kvm_timer_irq_can_fire(map.direct_ptimer) &&
591             !kvm_timer_irq_can_fire(map.emul_vtimer) &&
592             !kvm_timer_irq_can_fire(map.emul_ptimer) &&
593             !vcpu_has_wfit_active(vcpu))
594                 return;
595
596         /*
597          * At least one guest time will expire. Schedule a background timer.
598          * Set the earliest expiration time among the guest timers.
599          */
600         soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu));
601 }
602
603 static void kvm_timer_unblocking(struct kvm_vcpu *vcpu)
604 {
605         struct arch_timer_cpu *timer = vcpu_timer(vcpu);
606
607         soft_timer_cancel(&timer->bg_timer);
608 }
609
610 static void timer_restore_state(struct arch_timer_context *ctx)
611 {
612         struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
613         enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
614         unsigned long flags;
615
616         if (!timer->enabled)
617                 return;
618
619         local_irq_save(flags);
620
621         if (ctx->loaded)
622                 goto out;
623
624         switch (index) {
625                 u64 cval, offset;
626
627         case TIMER_VTIMER:
628         case TIMER_HVTIMER:
629                 set_cntvoff(timer_get_offset(ctx));
630                 write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL);
631                 isb();
632                 write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL);
633                 break;
634         case TIMER_PTIMER:
635         case TIMER_HPTIMER:
636                 cval = timer_get_cval(ctx);
637                 offset = timer_get_offset(ctx);
638                 set_cntpoff(offset);
639                 if (!has_cntpoff())
640                         cval += offset;
641                 write_sysreg_el0(cval, SYS_CNTP_CVAL);
642                 isb();
643                 write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);
644                 break;
645         case NR_KVM_TIMERS:
646                 BUG();
647         }
648
649         trace_kvm_timer_restore_state(ctx);
650
651         ctx->loaded = true;
652 out:
653         local_irq_restore(flags);
654 }
655
656 static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active)
657 {
658         int r;
659         r = irq_set_irqchip_state(ctx->host_timer_irq, IRQCHIP_STATE_ACTIVE, active);
660         WARN_ON(r);
661 }
662
663 static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
664 {
665         struct kvm_vcpu *vcpu = ctx->vcpu;
666         bool phys_active = false;
667
668         /*
669          * Update the timer output so that it is likely to match the
670          * state we're about to restore. If the timer expires between
671          * this point and the register restoration, we'll take the
672          * interrupt anyway.
673          */
674         kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx);
675
676         if (irqchip_in_kernel(vcpu->kvm))
677                 phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx));
678
679         phys_active |= ctx->irq.level;
680
681         set_timer_irq_phys_active(ctx, phys_active);
682 }
683
684 static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
685 {
686         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
687
688         /*
689          * Update the timer output so that it is likely to match the
690          * state we're about to restore. If the timer expires between
691          * this point and the register restoration, we'll take the
692          * interrupt anyway.
693          */
694         kvm_timer_update_irq(vcpu, kvm_timer_should_fire(vtimer), vtimer);
695
696         /*
697          * When using a userspace irqchip with the architected timers and a
698          * host interrupt controller that doesn't support an active state, we
699          * must still prevent continuously exiting from the guest, and
700          * therefore mask the physical interrupt by disabling it on the host
701          * interrupt controller when the virtual level is high, such that the
702          * guest can make forward progress.  Once we detect the output level
703          * being de-asserted, we unmask the interrupt again so that we exit
704          * from the guest when the timer fires.
705          */
706         if (vtimer->irq.level)
707                 disable_percpu_irq(host_vtimer_irq);
708         else
709                 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
710 }
711
712 /* If _pred is true, set bit in _set, otherwise set it in _clr */
713 #define assign_clear_set_bit(_pred, _bit, _clr, _set)                   \
714         do {                                                            \
715                 if (_pred)                                              \
716                         (_set) |= (_bit);                               \
717                 else                                                    \
718                         (_clr) |= (_bit);                               \
719         } while (0)
720
721 static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu,
722                                               struct timer_map *map)
723 {
724         int hw, ret;
725
726         if (!irqchip_in_kernel(vcpu->kvm))
727                 return;
728
729         /*
730          * We only ever unmap the vtimer irq on a VHE system that runs nested
731          * virtualization, in which case we have both a valid emul_vtimer,
732          * emul_ptimer, direct_vtimer, and direct_ptimer.
733          *
734          * Since this is called from kvm_timer_vcpu_load(), a change between
735          * vEL2 and vEL1/0 will have just happened, and the timer_map will
736          * represent this, and therefore we switch the emul/direct mappings
737          * below.
738          */
739         hw = kvm_vgic_get_map(vcpu, timer_irq(map->direct_vtimer));
740         if (hw < 0) {
741                 kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_vtimer));
742                 kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_ptimer));
743
744                 ret = kvm_vgic_map_phys_irq(vcpu,
745                                             map->direct_vtimer->host_timer_irq,
746                                             timer_irq(map->direct_vtimer),
747                                             &arch_timer_irq_ops);
748                 WARN_ON_ONCE(ret);
749                 ret = kvm_vgic_map_phys_irq(vcpu,
750                                             map->direct_ptimer->host_timer_irq,
751                                             timer_irq(map->direct_ptimer),
752                                             &arch_timer_irq_ops);
753                 WARN_ON_ONCE(ret);
754
755                 /*
756                  * The virtual offset behaviour is "interresting", as it
757                  * always applies when HCR_EL2.E2H==0, but only when
758                  * accessed from EL1 when HCR_EL2.E2H==1. So make sure we
759                  * track E2H when putting the HV timer in "direct" mode.
760                  */
761                 if (map->direct_vtimer == vcpu_hvtimer(vcpu)) {
762                         struct arch_timer_offset *offs = &map->direct_vtimer->offset;
763
764                         if (vcpu_el2_e2h_is_set(vcpu))
765                                 offs->vcpu_offset = NULL;
766                         else
767                                 offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2);
768                 }
769         }
770 }
771
772 static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
773 {
774         bool tpt, tpc;
775         u64 clr, set;
776
777         /*
778          * No trapping gets configured here with nVHE. See
779          * __timer_enable_traps(), which is where the stuff happens.
780          */
781         if (!has_vhe())
782                 return;
783
784         /*
785          * Our default policy is not to trap anything. As we progress
786          * within this function, reality kicks in and we start adding
787          * traps based on emulation requirements.
788          */
789         tpt = tpc = false;
790
791         /*
792          * We have two possibility to deal with a physical offset:
793          *
794          * - Either we have CNTPOFF (yay!) or the offset is 0:
795          *   we let the guest freely access the HW
796          *
797          * - or neither of these condition apply:
798          *   we trap accesses to the HW, but still use it
799          *   after correcting the physical offset
800          */
801         if (!has_cntpoff() && timer_get_offset(map->direct_ptimer))
802                 tpt = tpc = true;
803
804         /*
805          * Apply the enable bits that the guest hypervisor has requested for
806          * its own guest. We can only add traps that wouldn't have been set
807          * above.
808          */
809         if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
810                 u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
811
812                 /* Use the VHE format for mental sanity */
813                 if (!vcpu_el2_e2h_is_set(vcpu))
814                         val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10;
815
816                 tpt |= !(val & (CNTHCTL_EL1PCEN << 10));
817                 tpc |= !(val & (CNTHCTL_EL1PCTEN << 10));
818         }
819
820         /*
821          * Now that we have collected our requirements, compute the
822          * trap and enable bits.
823          */
824         set = 0;
825         clr = 0;
826
827         assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr);
828         assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr);
829
830         /* This only happens on VHE, so use the CNTHCTL_EL2 accessor. */
831         sysreg_clear_set(cnthctl_el2, clr, set);
832 }
833
834 void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
835 {
836         struct arch_timer_cpu *timer = vcpu_timer(vcpu);
837         struct timer_map map;
838
839         if (unlikely(!timer->enabled))
840                 return;
841
842         get_timer_map(vcpu, &map);
843
844         if (static_branch_likely(&has_gic_active_state)) {
845                 if (vcpu_has_nv(vcpu))
846                         kvm_timer_vcpu_load_nested_switch(vcpu, &map);
847
848                 kvm_timer_vcpu_load_gic(map.direct_vtimer);
849                 if (map.direct_ptimer)
850                         kvm_timer_vcpu_load_gic(map.direct_ptimer);
851         } else {
852                 kvm_timer_vcpu_load_nogic(vcpu);
853         }
854
855         kvm_timer_unblocking(vcpu);
856
857         timer_restore_state(map.direct_vtimer);
858         if (map.direct_ptimer)
859                 timer_restore_state(map.direct_ptimer);
860         if (map.emul_vtimer)
861                 timer_emulate(map.emul_vtimer);
862         if (map.emul_ptimer)
863                 timer_emulate(map.emul_ptimer);
864
865         timer_set_traps(vcpu, &map);
866 }
867
868 bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
869 {
870         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
871         struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
872         struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
873         bool vlevel, plevel;
874
875         if (likely(irqchip_in_kernel(vcpu->kvm)))
876                 return false;
877
878         vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER;
879         plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER;
880
881         return kvm_timer_should_fire(vtimer) != vlevel ||
882                kvm_timer_should_fire(ptimer) != plevel;
883 }
884
885 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
886 {
887         struct arch_timer_cpu *timer = vcpu_timer(vcpu);
888         struct timer_map map;
889
890         if (unlikely(!timer->enabled))
891                 return;
892
893         get_timer_map(vcpu, &map);
894
895         timer_save_state(map.direct_vtimer);
896         if (map.direct_ptimer)
897                 timer_save_state(map.direct_ptimer);
898
899         /*
900          * Cancel soft timer emulation, because the only case where we
901          * need it after a vcpu_put is in the context of a sleeping VCPU, and
902          * in that case we already factor in the deadline for the physical
903          * timer when scheduling the bg_timer.
904          *
905          * In any case, we re-schedule the hrtimer for the physical timer when
906          * coming back to the VCPU thread in kvm_timer_vcpu_load().
907          */
908         if (map.emul_vtimer)
909                 soft_timer_cancel(&map.emul_vtimer->hrtimer);
910         if (map.emul_ptimer)
911                 soft_timer_cancel(&map.emul_ptimer->hrtimer);
912
913         if (kvm_vcpu_is_blocking(vcpu))
914                 kvm_timer_blocking(vcpu);
915 }
916
917 /*
918  * With a userspace irqchip we have to check if the guest de-asserted the
919  * timer and if so, unmask the timer irq signal on the host interrupt
920  * controller to ensure that we see future timer signals.
921  */
922 static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
923 {
924         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
925
926         if (!kvm_timer_should_fire(vtimer)) {
927                 kvm_timer_update_irq(vcpu, false, vtimer);
928                 if (static_branch_likely(&has_gic_active_state))
929                         set_timer_irq_phys_active(vtimer, false);
930                 else
931                         enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
932         }
933 }
934
935 void kvm_timer_sync_user(struct kvm_vcpu *vcpu)
936 {
937         struct arch_timer_cpu *timer = vcpu_timer(vcpu);
938
939         if (unlikely(!timer->enabled))
940                 return;
941
942         if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
943                 unmask_vtimer_irq_user(vcpu);
944 }
945
946 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
947 {
948         struct arch_timer_cpu *timer = vcpu_timer(vcpu);
949         struct timer_map map;
950
951         get_timer_map(vcpu, &map);
952
953         /*
954          * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
955          * and to 0 for ARMv7.  We provide an implementation that always
956          * resets the timer to be disabled and unmasked and is compliant with
957          * the ARMv7 architecture.
958          */
959         for (int i = 0; i < nr_timers(vcpu); i++)
960                 timer_set_ctl(vcpu_get_timer(vcpu, i), 0);
961
962         /*
963          * A vcpu running at EL2 is in charge of the offset applied to
964          * the virtual timer, so use the physical VM offset, and point
965          * the vcpu offset to CNTVOFF_EL2.
966          */
967         if (vcpu_has_nv(vcpu)) {
968                 struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset;
969
970                 offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2);
971                 offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset;
972         }
973
974         if (timer->enabled) {
975                 for (int i = 0; i < nr_timers(vcpu); i++)
976                         kvm_timer_update_irq(vcpu, false,
977                                              vcpu_get_timer(vcpu, i));
978
979                 if (irqchip_in_kernel(vcpu->kvm)) {
980                         kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_vtimer));
981                         if (map.direct_ptimer)
982                                 kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_ptimer));
983                 }
984         }
985
986         if (map.emul_vtimer)
987                 soft_timer_cancel(&map.emul_vtimer->hrtimer);
988         if (map.emul_ptimer)
989                 soft_timer_cancel(&map.emul_ptimer->hrtimer);
990
991         return 0;
992 }
993
994 static void timer_context_init(struct kvm_vcpu *vcpu, int timerid)
995 {
996         struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid);
997         struct kvm *kvm = vcpu->kvm;
998
999         ctxt->vcpu = vcpu;
1000
1001         if (timerid == TIMER_VTIMER)
1002                 ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset;
1003         else
1004                 ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset;
1005
1006         hrtimer_init(&ctxt->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
1007         ctxt->hrtimer.function = kvm_hrtimer_expire;
1008
1009         switch (timerid) {
1010         case TIMER_PTIMER:
1011         case TIMER_HPTIMER:
1012                 ctxt->host_timer_irq = host_ptimer_irq;
1013                 break;
1014         case TIMER_VTIMER:
1015         case TIMER_HVTIMER:
1016                 ctxt->host_timer_irq = host_vtimer_irq;
1017                 break;
1018         }
1019 }
1020
1021 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
1022 {
1023         struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1024
1025         for (int i = 0; i < NR_KVM_TIMERS; i++)
1026                 timer_context_init(vcpu, i);
1027
1028         /* Synchronize offsets across timers of a VM if not already provided */
1029         if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) {
1030                 timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read());
1031                 timer_set_offset(vcpu_ptimer(vcpu), 0);
1032         }
1033
1034         hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
1035         timer->bg_timer.function = kvm_bg_timer_expire;
1036 }
1037
1038 void kvm_timer_init_vm(struct kvm *kvm)
1039 {
1040         for (int i = 0; i < NR_KVM_TIMERS; i++)
1041                 kvm->arch.timer_data.ppi[i] = default_ppi[i];
1042 }
1043
1044 void kvm_timer_cpu_up(void)
1045 {
1046         enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
1047         if (host_ptimer_irq)
1048                 enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags);
1049 }
1050
1051 void kvm_timer_cpu_down(void)
1052 {
1053         disable_percpu_irq(host_vtimer_irq);
1054         if (host_ptimer_irq)
1055                 disable_percpu_irq(host_ptimer_irq);
1056 }
1057
1058 int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
1059 {
1060         struct arch_timer_context *timer;
1061
1062         switch (regid) {
1063         case KVM_REG_ARM_TIMER_CTL:
1064                 timer = vcpu_vtimer(vcpu);
1065                 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
1066                 break;
1067         case KVM_REG_ARM_TIMER_CNT:
1068                 if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET,
1069                               &vcpu->kvm->arch.flags)) {
1070                         timer = vcpu_vtimer(vcpu);
1071                         timer_set_offset(timer, kvm_phys_timer_read() - value);
1072                 }
1073                 break;
1074         case KVM_REG_ARM_TIMER_CVAL:
1075                 timer = vcpu_vtimer(vcpu);
1076                 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
1077                 break;
1078         case KVM_REG_ARM_PTIMER_CTL:
1079                 timer = vcpu_ptimer(vcpu);
1080                 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
1081                 break;
1082         case KVM_REG_ARM_PTIMER_CNT:
1083                 if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET,
1084                               &vcpu->kvm->arch.flags)) {
1085                         timer = vcpu_ptimer(vcpu);
1086                         timer_set_offset(timer, kvm_phys_timer_read() - value);
1087                 }
1088                 break;
1089         case KVM_REG_ARM_PTIMER_CVAL:
1090                 timer = vcpu_ptimer(vcpu);
1091                 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
1092                 break;
1093
1094         default:
1095                 return -1;
1096         }
1097
1098         return 0;
1099 }
1100
1101 static u64 read_timer_ctl(struct arch_timer_context *timer)
1102 {
1103         /*
1104          * Set ISTATUS bit if it's expired.
1105          * Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is
1106          * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit
1107          * regardless of ENABLE bit for our implementation convenience.
1108          */
1109         u32 ctl = timer_get_ctl(timer);
1110
1111         if (!kvm_timer_compute_delta(timer))
1112                 ctl |= ARCH_TIMER_CTRL_IT_STAT;
1113
1114         return ctl;
1115 }
1116
1117 u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
1118 {
1119         switch (regid) {
1120         case KVM_REG_ARM_TIMER_CTL:
1121                 return kvm_arm_timer_read(vcpu,
1122                                           vcpu_vtimer(vcpu), TIMER_REG_CTL);
1123         case KVM_REG_ARM_TIMER_CNT:
1124                 return kvm_arm_timer_read(vcpu,
1125                                           vcpu_vtimer(vcpu), TIMER_REG_CNT);
1126         case KVM_REG_ARM_TIMER_CVAL:
1127                 return kvm_arm_timer_read(vcpu,
1128                                           vcpu_vtimer(vcpu), TIMER_REG_CVAL);
1129         case KVM_REG_ARM_PTIMER_CTL:
1130                 return kvm_arm_timer_read(vcpu,
1131                                           vcpu_ptimer(vcpu), TIMER_REG_CTL);
1132         case KVM_REG_ARM_PTIMER_CNT:
1133                 return kvm_arm_timer_read(vcpu,
1134                                           vcpu_ptimer(vcpu), TIMER_REG_CNT);
1135         case KVM_REG_ARM_PTIMER_CVAL:
1136                 return kvm_arm_timer_read(vcpu,
1137                                           vcpu_ptimer(vcpu), TIMER_REG_CVAL);
1138         }
1139         return (u64)-1;
1140 }
1141
1142 static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
1143                               struct arch_timer_context *timer,
1144                               enum kvm_arch_timer_regs treg)
1145 {
1146         u64 val;
1147
1148         switch (treg) {
1149         case TIMER_REG_TVAL:
1150                 val = timer_get_cval(timer) - kvm_phys_timer_read() + timer_get_offset(timer);
1151                 val = lower_32_bits(val);
1152                 break;
1153
1154         case TIMER_REG_CTL:
1155                 val = read_timer_ctl(timer);
1156                 break;
1157
1158         case TIMER_REG_CVAL:
1159                 val = timer_get_cval(timer);
1160                 break;
1161
1162         case TIMER_REG_CNT:
1163                 val = kvm_phys_timer_read() - timer_get_offset(timer);
1164                 break;
1165
1166         case TIMER_REG_VOFF:
1167                 val = *timer->offset.vcpu_offset;
1168                 break;
1169
1170         default:
1171                 BUG();
1172         }
1173
1174         return val;
1175 }
1176
1177 u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
1178                               enum kvm_arch_timers tmr,
1179                               enum kvm_arch_timer_regs treg)
1180 {
1181         struct arch_timer_context *timer;
1182         struct timer_map map;
1183         u64 val;
1184
1185         get_timer_map(vcpu, &map);
1186         timer = vcpu_get_timer(vcpu, tmr);
1187
1188         if (timer == map.emul_vtimer || timer == map.emul_ptimer)
1189                 return kvm_arm_timer_read(vcpu, timer, treg);
1190
1191         preempt_disable();
1192         timer_save_state(timer);
1193
1194         val = kvm_arm_timer_read(vcpu, timer, treg);
1195
1196         timer_restore_state(timer);
1197         preempt_enable();
1198
1199         return val;
1200 }
1201
1202 static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
1203                                 struct arch_timer_context *timer,
1204                                 enum kvm_arch_timer_regs treg,
1205                                 u64 val)
1206 {
1207         switch (treg) {
1208         case TIMER_REG_TVAL:
1209                 timer_set_cval(timer, kvm_phys_timer_read() - timer_get_offset(timer) + (s32)val);
1210                 break;
1211
1212         case TIMER_REG_CTL:
1213                 timer_set_ctl(timer, val & ~ARCH_TIMER_CTRL_IT_STAT);
1214                 break;
1215
1216         case TIMER_REG_CVAL:
1217                 timer_set_cval(timer, val);
1218                 break;
1219
1220         case TIMER_REG_VOFF:
1221                 *timer->offset.vcpu_offset = val;
1222                 break;
1223
1224         default:
1225                 BUG();
1226         }
1227 }
1228
1229 void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu,
1230                                 enum kvm_arch_timers tmr,
1231                                 enum kvm_arch_timer_regs treg,
1232                                 u64 val)
1233 {
1234         struct arch_timer_context *timer;
1235         struct timer_map map;
1236
1237         get_timer_map(vcpu, &map);
1238         timer = vcpu_get_timer(vcpu, tmr);
1239         if (timer == map.emul_vtimer || timer == map.emul_ptimer) {
1240                 soft_timer_cancel(&timer->hrtimer);
1241                 kvm_arm_timer_write(vcpu, timer, treg, val);
1242                 timer_emulate(timer);
1243         } else {
1244                 preempt_disable();
1245                 timer_save_state(timer);
1246                 kvm_arm_timer_write(vcpu, timer, treg, val);
1247                 timer_restore_state(timer);
1248                 preempt_enable();
1249         }
1250 }
1251
1252 static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
1253 {
1254         if (vcpu)
1255                 irqd_set_forwarded_to_vcpu(d);
1256         else
1257                 irqd_clr_forwarded_to_vcpu(d);
1258
1259         return 0;
1260 }
1261
1262 static int timer_irq_set_irqchip_state(struct irq_data *d,
1263                                        enum irqchip_irq_state which, bool val)
1264 {
1265         if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d))
1266                 return irq_chip_set_parent_state(d, which, val);
1267
1268         if (val)
1269                 irq_chip_mask_parent(d);
1270         else
1271                 irq_chip_unmask_parent(d);
1272
1273         return 0;
1274 }
1275
1276 static void timer_irq_eoi(struct irq_data *d)
1277 {
1278         if (!irqd_is_forwarded_to_vcpu(d))
1279                 irq_chip_eoi_parent(d);
1280 }
1281
1282 static void timer_irq_ack(struct irq_data *d)
1283 {
1284         d = d->parent_data;
1285         if (d->chip->irq_ack)
1286                 d->chip->irq_ack(d);
1287 }
1288
1289 static struct irq_chip timer_chip = {
1290         .name                   = "KVM",
1291         .irq_ack                = timer_irq_ack,
1292         .irq_mask               = irq_chip_mask_parent,
1293         .irq_unmask             = irq_chip_unmask_parent,
1294         .irq_eoi                = timer_irq_eoi,
1295         .irq_set_type           = irq_chip_set_type_parent,
1296         .irq_set_vcpu_affinity  = timer_irq_set_vcpu_affinity,
1297         .irq_set_irqchip_state  = timer_irq_set_irqchip_state,
1298 };
1299
1300 static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
1301                                   unsigned int nr_irqs, void *arg)
1302 {
1303         irq_hw_number_t hwirq = (uintptr_t)arg;
1304
1305         return irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
1306                                              &timer_chip, NULL);
1307 }
1308
1309 static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq,
1310                                   unsigned int nr_irqs)
1311 {
1312 }
1313
1314 static const struct irq_domain_ops timer_domain_ops = {
1315         .alloc  = timer_irq_domain_alloc,
1316         .free   = timer_irq_domain_free,
1317 };
1318
1319 static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags)
1320 {
1321         *flags = irq_get_trigger_type(virq);
1322         if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) {
1323                 kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n",
1324                         virq);
1325                 *flags = IRQF_TRIGGER_LOW;
1326         }
1327 }
1328
1329 static int kvm_irq_init(struct arch_timer_kvm_info *info)
1330 {
1331         struct irq_domain *domain = NULL;
1332
1333         if (info->virtual_irq <= 0) {
1334                 kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
1335                         info->virtual_irq);
1336                 return -ENODEV;
1337         }
1338
1339         host_vtimer_irq = info->virtual_irq;
1340         kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags);
1341
1342         if (kvm_vgic_global_state.no_hw_deactivation) {
1343                 struct fwnode_handle *fwnode;
1344                 struct irq_data *data;
1345
1346                 fwnode = irq_domain_alloc_named_fwnode("kvm-timer");
1347                 if (!fwnode)
1348                         return -ENOMEM;
1349
1350                 /* Assume both vtimer and ptimer in the same parent */
1351                 data = irq_get_irq_data(host_vtimer_irq);
1352                 domain = irq_domain_create_hierarchy(data->domain, 0,
1353                                                      NR_KVM_TIMERS, fwnode,
1354                                                      &timer_domain_ops, NULL);
1355                 if (!domain) {
1356                         irq_domain_free_fwnode(fwnode);
1357                         return -ENOMEM;
1358                 }
1359
1360                 arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE;
1361                 WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq,
1362                                             (void *)TIMER_VTIMER));
1363         }
1364
1365         if (info->physical_irq > 0) {
1366                 host_ptimer_irq = info->physical_irq;
1367                 kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags);
1368
1369                 if (domain)
1370                         WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq,
1371                                                     (void *)TIMER_PTIMER));
1372         }
1373
1374         return 0;
1375 }
1376
1377 int __init kvm_timer_hyp_init(bool has_gic)
1378 {
1379         struct arch_timer_kvm_info *info;
1380         int err;
1381
1382         info = arch_timer_get_kvm_info();
1383         timecounter = &info->timecounter;
1384
1385         if (!timecounter->cc) {
1386                 kvm_err("kvm_arch_timer: uninitialized timecounter\n");
1387                 return -ENODEV;
1388         }
1389
1390         err = kvm_irq_init(info);
1391         if (err)
1392                 return err;
1393
1394         /* First, do the virtual EL1 timer irq */
1395
1396         err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
1397                                  "kvm guest vtimer", kvm_get_running_vcpus());
1398         if (err) {
1399                 kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n",
1400                         host_vtimer_irq, err);
1401                 return err;
1402         }
1403
1404         if (has_gic) {
1405                 err = irq_set_vcpu_affinity(host_vtimer_irq,
1406                                             kvm_get_running_vcpus());
1407                 if (err) {
1408                         kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
1409                         goto out_free_vtimer_irq;
1410                 }
1411
1412                 static_branch_enable(&has_gic_active_state);
1413         }
1414
1415         kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq);
1416
1417         /* Now let's do the physical EL1 timer irq */
1418
1419         if (info->physical_irq > 0) {
1420                 err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler,
1421                                          "kvm guest ptimer", kvm_get_running_vcpus());
1422                 if (err) {
1423                         kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n",
1424                                 host_ptimer_irq, err);
1425                         goto out_free_vtimer_irq;
1426                 }
1427
1428                 if (has_gic) {
1429                         err = irq_set_vcpu_affinity(host_ptimer_irq,
1430                                                     kvm_get_running_vcpus());
1431                         if (err) {
1432                                 kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
1433                                 goto out_free_ptimer_irq;
1434                         }
1435                 }
1436
1437                 kvm_debug("physical timer IRQ%d\n", host_ptimer_irq);
1438         } else if (has_vhe()) {
1439                 kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n",
1440                         info->physical_irq);
1441                 err = -ENODEV;
1442                 goto out_free_vtimer_irq;
1443         }
1444
1445         return 0;
1446
1447 out_free_ptimer_irq:
1448         if (info->physical_irq > 0)
1449                 free_percpu_irq(host_ptimer_irq, kvm_get_running_vcpus());
1450 out_free_vtimer_irq:
1451         free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus());
1452         return err;
1453 }
1454
1455 void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
1456 {
1457         struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1458
1459         soft_timer_cancel(&timer->bg_timer);
1460 }
1461
1462 static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
1463 {
1464         u32 ppis = 0;
1465         bool valid;
1466
1467         mutex_lock(&vcpu->kvm->arch.config_lock);
1468
1469         for (int i = 0; i < nr_timers(vcpu); i++) {
1470                 struct arch_timer_context *ctx;
1471                 int irq;
1472
1473                 ctx = vcpu_get_timer(vcpu, i);
1474                 irq = timer_irq(ctx);
1475                 if (kvm_vgic_set_owner(vcpu, irq, ctx))
1476                         break;
1477
1478                 /*
1479                  * We know by construction that we only have PPIs, so
1480                  * all values are less than 32.
1481                  */
1482                 ppis |= BIT(irq);
1483         }
1484
1485         valid = hweight32(ppis) == nr_timers(vcpu);
1486
1487         if (valid)
1488                 set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags);
1489
1490         mutex_unlock(&vcpu->kvm->arch.config_lock);
1491
1492         return valid;
1493 }
1494
1495 static bool kvm_arch_timer_get_input_level(int vintid)
1496 {
1497         struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
1498
1499         if (WARN(!vcpu, "No vcpu context!\n"))
1500                 return false;
1501
1502         for (int i = 0; i < nr_timers(vcpu); i++) {
1503                 struct arch_timer_context *ctx;
1504
1505                 ctx = vcpu_get_timer(vcpu, i);
1506                 if (timer_irq(ctx) == vintid)
1507                         return kvm_timer_should_fire(ctx);
1508         }
1509
1510         /* A timer IRQ has fired, but no matching timer was found? */
1511         WARN_RATELIMIT(1, "timer INTID%d unknown\n", vintid);
1512
1513         return false;
1514 }
1515
1516 int kvm_timer_enable(struct kvm_vcpu *vcpu)
1517 {
1518         struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1519         struct timer_map map;
1520         int ret;
1521
1522         if (timer->enabled)
1523                 return 0;
1524
1525         /* Without a VGIC we do not map virtual IRQs to physical IRQs */
1526         if (!irqchip_in_kernel(vcpu->kvm))
1527                 goto no_vgic;
1528
1529         /*
1530          * At this stage, we have the guarantee that the vgic is both
1531          * available and initialized.
1532          */
1533         if (!timer_irqs_are_valid(vcpu)) {
1534                 kvm_debug("incorrectly configured timer irqs\n");
1535                 return -EINVAL;
1536         }
1537
1538         get_timer_map(vcpu, &map);
1539
1540         ret = kvm_vgic_map_phys_irq(vcpu,
1541                                     map.direct_vtimer->host_timer_irq,
1542                                     timer_irq(map.direct_vtimer),
1543                                     &arch_timer_irq_ops);
1544         if (ret)
1545                 return ret;
1546
1547         if (map.direct_ptimer) {
1548                 ret = kvm_vgic_map_phys_irq(vcpu,
1549                                             map.direct_ptimer->host_timer_irq,
1550                                             timer_irq(map.direct_ptimer),
1551                                             &arch_timer_irq_ops);
1552         }
1553
1554         if (ret)
1555                 return ret;
1556
1557 no_vgic:
1558         timer->enabled = 1;
1559         return 0;
1560 }
1561
1562 /* If we have CNTPOFF, permanently set ECV to enable it */
1563 void kvm_timer_init_vhe(void)
1564 {
1565         if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF))
1566                 sysreg_clear_set(cnthctl_el2, 0, CNTHCTL_ECV);
1567 }
1568
1569 int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1570 {
1571         int __user *uaddr = (int __user *)(long)attr->addr;
1572         int irq, idx, ret = 0;
1573
1574         if (!irqchip_in_kernel(vcpu->kvm))
1575                 return -EINVAL;
1576
1577         if (get_user(irq, uaddr))
1578                 return -EFAULT;
1579
1580         if (!(irq_is_ppi(irq)))
1581                 return -EINVAL;
1582
1583         mutex_lock(&vcpu->kvm->arch.config_lock);
1584
1585         if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE,
1586                      &vcpu->kvm->arch.flags)) {
1587                 ret = -EBUSY;
1588                 goto out;
1589         }
1590
1591         switch (attr->attr) {
1592         case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
1593                 idx = TIMER_VTIMER;
1594                 break;
1595         case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
1596                 idx = TIMER_PTIMER;
1597                 break;
1598         case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
1599                 idx = TIMER_HVTIMER;
1600                 break;
1601         case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
1602                 idx = TIMER_HPTIMER;
1603                 break;
1604         default:
1605                 ret = -ENXIO;
1606                 goto out;
1607         }
1608
1609         /*
1610          * We cannot validate the IRQ unicity before we run, so take it at
1611          * face value. The verdict will be given on first vcpu run, for each
1612          * vcpu. Yes this is late. Blame it on the stupid API.
1613          */
1614         vcpu->kvm->arch.timer_data.ppi[idx] = irq;
1615
1616 out:
1617         mutex_unlock(&vcpu->kvm->arch.config_lock);
1618         return ret;
1619 }
1620
1621 int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1622 {
1623         int __user *uaddr = (int __user *)(long)attr->addr;
1624         struct arch_timer_context *timer;
1625         int irq;
1626
1627         switch (attr->attr) {
1628         case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
1629                 timer = vcpu_vtimer(vcpu);
1630                 break;
1631         case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
1632                 timer = vcpu_ptimer(vcpu);
1633                 break;
1634         case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
1635                 timer = vcpu_hvtimer(vcpu);
1636                 break;
1637         case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
1638                 timer = vcpu_hptimer(vcpu);
1639                 break;
1640         default:
1641                 return -ENXIO;
1642         }
1643
1644         irq = timer_irq(timer);
1645         return put_user(irq, uaddr);
1646 }
1647
1648 int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1649 {
1650         switch (attr->attr) {
1651         case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
1652         case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
1653         case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
1654         case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
1655                 return 0;
1656         }
1657
1658         return -ENXIO;
1659 }
1660
1661 int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm,
1662                                     struct kvm_arm_counter_offset *offset)
1663 {
1664         int ret = 0;
1665
1666         if (offset->reserved)
1667                 return -EINVAL;
1668
1669         mutex_lock(&kvm->lock);
1670
1671         if (lock_all_vcpus(kvm)) {
1672                 set_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &kvm->arch.flags);
1673
1674                 /*
1675                  * If userspace decides to set the offset using this
1676                  * API rather than merely restoring the counter
1677                  * values, the offset applies to both the virtual and
1678                  * physical views.
1679                  */
1680                 kvm->arch.timer_data.voffset = offset->counter_offset;
1681                 kvm->arch.timer_data.poffset = offset->counter_offset;
1682
1683                 unlock_all_vcpus(kvm);
1684         } else {
1685                 ret = -EBUSY;
1686         }
1687
1688         mutex_unlock(&kvm->lock);
1689
1690         return ret;
1691 }