2 * VGIC MMIO handling functions
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
14 #include <linux/bitops.h>
15 #include <linux/bsearch.h>
16 #include <linux/kvm.h>
17 #include <linux/kvm_host.h>
18 #include <kvm/iodev.h>
19 #include <kvm/arm_arch_timer.h>
20 #include <kvm/arm_vgic.h>
23 #include "vgic-mmio.h"
25 unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu,
26 gpa_t addr, unsigned int len)
31 unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu,
32 gpa_t addr, unsigned int len)
37 void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
38 unsigned int len, unsigned long val)
44 * Read accesses to both GICD_ICENABLER and GICD_ISENABLER return the value
45 * of the enabled bit, so there is only one function for both here.
47 unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu,
48 gpa_t addr, unsigned int len)
50 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
54 /* Loop over all IRQs affected by this read */
55 for (i = 0; i < len * 8; i++) {
56 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
61 vgic_put_irq(vcpu->kvm, irq);
67 void vgic_mmio_write_senable(struct kvm_vcpu *vcpu,
68 gpa_t addr, unsigned int len,
71 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
75 for_each_set_bit(i, &val, len * 8) {
76 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
78 spin_lock_irqsave(&irq->irq_lock, flags);
80 vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
82 vgic_put_irq(vcpu->kvm, irq);
86 void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu,
87 gpa_t addr, unsigned int len,
90 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
94 for_each_set_bit(i, &val, len * 8) {
95 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
97 spin_lock_irqsave(&irq->irq_lock, flags);
101 spin_unlock_irqrestore(&irq->irq_lock, flags);
102 vgic_put_irq(vcpu->kvm, irq);
106 unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
107 gpa_t addr, unsigned int len)
109 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
113 /* Loop over all IRQs affected by this read */
114 for (i = 0; i < len * 8; i++) {
115 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
118 spin_lock_irqsave(&irq->irq_lock, flags);
119 if (irq_is_pending(irq))
121 spin_unlock_irqrestore(&irq->irq_lock, flags);
123 vgic_put_irq(vcpu->kvm, irq);
130 * This function will return the VCPU that performed the MMIO access and
131 * trapped from within the VM, and will return NULL if this is a userspace
134 * We can disable preemption locally around accessing the per-CPU variable,
135 * and use the resolved vcpu pointer after enabling preemption again, because
136 * even if the current thread is migrated to another CPU, reading the per-CPU
137 * value later will give us the same value as we update the per-CPU variable
138 * in the preempt notifier handlers.
140 static struct kvm_vcpu *vgic_get_mmio_requester_vcpu(void)
142 struct kvm_vcpu *vcpu;
145 vcpu = kvm_arm_get_running_vcpu();
150 /* Must be called with irq->irq_lock held */
151 static void vgic_hw_irq_spending(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
157 irq->pending_latch = true;
158 vgic_irq_set_phys_active(irq, true);
161 void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
162 gpa_t addr, unsigned int len,
165 bool is_uaccess = !vgic_get_mmio_requester_vcpu();
166 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
170 for_each_set_bit(i, &val, len * 8) {
171 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
173 spin_lock_irqsave(&irq->irq_lock, flags);
175 vgic_hw_irq_spending(vcpu, irq, is_uaccess);
177 irq->pending_latch = true;
178 vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
179 vgic_put_irq(vcpu->kvm, irq);
183 /* Must be called with irq->irq_lock held */
184 static void vgic_hw_irq_cpending(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
190 irq->pending_latch = false;
193 * We don't want the guest to effectively mask the physical
194 * interrupt by doing a write to SPENDR followed by a write to
195 * CPENDR for HW interrupts, so we clear the active state on
196 * the physical side if the virtual interrupt is not active.
197 * This may lead to taking an additional interrupt on the
198 * host, but that should not be a problem as the worst that
199 * can happen is an additional vgic injection. We also clear
200 * the pending state to maintain proper semantics for edge HW
203 vgic_irq_set_phys_pending(irq, false);
205 vgic_irq_set_phys_active(irq, false);
208 void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
209 gpa_t addr, unsigned int len,
212 bool is_uaccess = !vgic_get_mmio_requester_vcpu();
213 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
217 for_each_set_bit(i, &val, len * 8) {
218 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
220 spin_lock_irqsave(&irq->irq_lock, flags);
223 vgic_hw_irq_cpending(vcpu, irq, is_uaccess);
225 irq->pending_latch = false;
227 spin_unlock_irqrestore(&irq->irq_lock, flags);
228 vgic_put_irq(vcpu->kvm, irq);
232 unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
233 gpa_t addr, unsigned int len)
235 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
239 /* Loop over all IRQs affected by this read */
240 for (i = 0; i < len * 8; i++) {
241 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
246 vgic_put_irq(vcpu->kvm, irq);
252 /* Must be called with irq->irq_lock held */
253 static void vgic_hw_irq_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
254 bool active, bool is_uaccess)
259 irq->active = active;
260 vgic_irq_set_phys_active(irq, active);
263 static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
267 struct kvm_vcpu *requester_vcpu = vgic_get_mmio_requester_vcpu();
269 spin_lock_irqsave(&irq->irq_lock, flags);
272 * If this virtual IRQ was written into a list register, we
273 * have to make sure the CPU that runs the VCPU thread has
274 * synced back the LR state to the struct vgic_irq.
276 * As long as the conditions below are true, we know the VCPU thread
277 * may be on its way back from the guest (we kicked the VCPU thread in
278 * vgic_change_active_prepare) and still has to sync back this IRQ,
279 * so we release and re-acquire the spin_lock to let the other thread
282 * When accessing VGIC state from user space, requester_vcpu is
283 * NULL, which is fine, because we guarantee that no VCPUs are running
284 * when accessing VGIC state from user space so irq->vcpu->cpu is
287 while (irq->vcpu && /* IRQ may have state in an LR somewhere */
288 irq->vcpu != requester_vcpu && /* Current thread is not the VCPU thread */
289 irq->vcpu->cpu != -1) /* VCPU thread is running */
290 cond_resched_lock(&irq->irq_lock);
293 vgic_hw_irq_change_active(vcpu, irq, active, !requester_vcpu);
295 irq->active = active;
298 vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
300 spin_unlock_irqrestore(&irq->irq_lock, flags);
304 * If we are fiddling with an IRQ's active state, we have to make sure the IRQ
305 * is not queued on some running VCPU's LRs, because then the change to the
306 * active state can be overwritten when the VCPU's state is synced coming back
309 * For shared interrupts, we have to stop all the VCPUs because interrupts can
310 * be migrated while we don't hold the IRQ locks and we don't want to be
311 * chasing moving targets.
313 * For private interrupts we don't have to do anything because userspace
314 * accesses to the VGIC state already require all VCPUs to be stopped, and
315 * only the VCPU itself can modify its private interrupts active state, which
316 * guarantees that the VCPU is not running.
318 static void vgic_change_active_prepare(struct kvm_vcpu *vcpu, u32 intid)
320 if (intid > VGIC_NR_PRIVATE_IRQS)
321 kvm_arm_halt_guest(vcpu->kvm);
324 /* See vgic_change_active_prepare */
325 static void vgic_change_active_finish(struct kvm_vcpu *vcpu, u32 intid)
327 if (intid > VGIC_NR_PRIVATE_IRQS)
328 kvm_arm_resume_guest(vcpu->kvm);
331 static void __vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
332 gpa_t addr, unsigned int len,
335 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
338 for_each_set_bit(i, &val, len * 8) {
339 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
340 vgic_mmio_change_active(vcpu, irq, false);
341 vgic_put_irq(vcpu->kvm, irq);
345 void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
346 gpa_t addr, unsigned int len,
349 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
351 mutex_lock(&vcpu->kvm->lock);
352 vgic_change_active_prepare(vcpu, intid);
354 __vgic_mmio_write_cactive(vcpu, addr, len, val);
356 vgic_change_active_finish(vcpu, intid);
357 mutex_unlock(&vcpu->kvm->lock);
360 void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
361 gpa_t addr, unsigned int len,
364 __vgic_mmio_write_cactive(vcpu, addr, len, val);
367 static void __vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
368 gpa_t addr, unsigned int len,
371 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
374 for_each_set_bit(i, &val, len * 8) {
375 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
376 vgic_mmio_change_active(vcpu, irq, true);
377 vgic_put_irq(vcpu->kvm, irq);
381 void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
382 gpa_t addr, unsigned int len,
385 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
387 mutex_lock(&vcpu->kvm->lock);
388 vgic_change_active_prepare(vcpu, intid);
390 __vgic_mmio_write_sactive(vcpu, addr, len, val);
392 vgic_change_active_finish(vcpu, intid);
393 mutex_unlock(&vcpu->kvm->lock);
396 void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
397 gpa_t addr, unsigned int len,
400 __vgic_mmio_write_sactive(vcpu, addr, len, val);
403 unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
404 gpa_t addr, unsigned int len)
406 u32 intid = VGIC_ADDR_TO_INTID(addr, 8);
410 for (i = 0; i < len; i++) {
411 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
413 val |= (u64)irq->priority << (i * 8);
415 vgic_put_irq(vcpu->kvm, irq);
422 * We currently don't handle changing the priority of an interrupt that
423 * is already pending on a VCPU. If there is a need for this, we would
424 * need to make this VCPU exit and re-evaluate the priorities, potentially
425 * leading to this interrupt getting presented now to the guest (if it has
426 * been masked by the priority mask before).
428 void vgic_mmio_write_priority(struct kvm_vcpu *vcpu,
429 gpa_t addr, unsigned int len,
432 u32 intid = VGIC_ADDR_TO_INTID(addr, 8);
436 for (i = 0; i < len; i++) {
437 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
439 spin_lock_irqsave(&irq->irq_lock, flags);
440 /* Narrow the priority range to what we actually support */
441 irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS);
442 spin_unlock_irqrestore(&irq->irq_lock, flags);
444 vgic_put_irq(vcpu->kvm, irq);
448 unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu,
449 gpa_t addr, unsigned int len)
451 u32 intid = VGIC_ADDR_TO_INTID(addr, 2);
455 for (i = 0; i < len * 4; i++) {
456 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
458 if (irq->config == VGIC_CONFIG_EDGE)
459 value |= (2U << (i * 2));
461 vgic_put_irq(vcpu->kvm, irq);
467 void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
468 gpa_t addr, unsigned int len,
471 u32 intid = VGIC_ADDR_TO_INTID(addr, 2);
475 for (i = 0; i < len * 4; i++) {
476 struct vgic_irq *irq;
479 * The configuration cannot be changed for SGIs in general,
480 * for PPIs this is IMPLEMENTATION DEFINED. The arch timer
481 * code relies on PPIs being level triggered, so we also
482 * make them read-only here.
484 if (intid + i < VGIC_NR_PRIVATE_IRQS)
487 irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
488 spin_lock_irqsave(&irq->irq_lock, flags);
490 if (test_bit(i * 2 + 1, &val))
491 irq->config = VGIC_CONFIG_EDGE;
493 irq->config = VGIC_CONFIG_LEVEL;
495 spin_unlock_irqrestore(&irq->irq_lock, flags);
496 vgic_put_irq(vcpu->kvm, irq);
500 u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid)
504 int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
506 for (i = 0; i < 32; i++) {
507 struct vgic_irq *irq;
509 if ((intid + i) < VGIC_NR_SGIS || (intid + i) >= nr_irqs)
512 irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
513 if (irq->config == VGIC_CONFIG_LEVEL && irq->line_level)
516 vgic_put_irq(vcpu->kvm, irq);
522 void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid,
526 int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
529 for (i = 0; i < 32; i++) {
530 struct vgic_irq *irq;
533 if ((intid + i) < VGIC_NR_SGIS || (intid + i) >= nr_irqs)
536 irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
539 * Line level is set irrespective of irq type
540 * (level or edge) to avoid dependency that VM should
541 * restore irq config before line level.
543 new_level = !!(val & (1U << i));
544 spin_lock_irqsave(&irq->irq_lock, flags);
545 irq->line_level = new_level;
547 vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
549 spin_unlock_irqrestore(&irq->irq_lock, flags);
551 vgic_put_irq(vcpu->kvm, irq);
555 static int match_region(const void *key, const void *elt)
557 const unsigned int offset = (unsigned long)key;
558 const struct vgic_register_region *region = elt;
560 if (offset < region->reg_offset)
563 if (offset >= region->reg_offset + region->len)
569 const struct vgic_register_region *
570 vgic_find_mmio_region(const struct vgic_register_region *regions,
571 int nr_regions, unsigned int offset)
573 return bsearch((void *)(uintptr_t)offset, regions, nr_regions,
574 sizeof(regions[0]), match_region);
577 void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
579 if (kvm_vgic_global_state.type == VGIC_V2)
580 vgic_v2_set_vmcr(vcpu, vmcr);
582 vgic_v3_set_vmcr(vcpu, vmcr);
585 void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
587 if (kvm_vgic_global_state.type == VGIC_V2)
588 vgic_v2_get_vmcr(vcpu, vmcr);
590 vgic_v3_get_vmcr(vcpu, vmcr);
594 * kvm_mmio_read_buf() returns a value in a format where it can be converted
595 * to a byte array and be directly observed as the guest wanted it to appear
596 * in memory if it had done the store itself, which is LE for the GIC, as the
597 * guest knows the GIC is always LE.
599 * We convert this value to the CPUs native format to deal with it as a data
602 unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len)
604 unsigned long data = kvm_mmio_read_buf(val, len);
610 return le16_to_cpu(data);
612 return le32_to_cpu(data);
614 return le64_to_cpu(data);
619 * kvm_mmio_write_buf() expects a value in a format such that if converted to
620 * a byte array it is observed as the guest would see it if it could perform
621 * the load directly. Since the GIC is LE, and the guest knows this, the
622 * guest expects a value in little endian format.
624 * We convert the data value from the CPUs native format to LE so that the
625 * value is returned in the proper format.
627 void vgic_data_host_to_mmio_bus(void *buf, unsigned int len,
634 data = cpu_to_le16(data);
637 data = cpu_to_le32(data);
640 data = cpu_to_le64(data);
643 kvm_mmio_write_buf(buf, len, data);
647 struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev)
649 return container_of(dev, struct vgic_io_device, dev);
652 static bool check_region(const struct kvm *kvm,
653 const struct vgic_register_region *region,
656 int flags, nr_irqs = kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
660 flags = VGIC_ACCESS_8bit;
663 flags = VGIC_ACCESS_32bit;
666 flags = VGIC_ACCESS_64bit;
672 if ((region->access_flags & flags) && IS_ALIGNED(addr, len)) {
673 if (!region->bits_per_irq)
676 /* Do we access a non-allocated IRQ? */
677 return VGIC_ADDR_TO_INTID(addr, region->bits_per_irq) < nr_irqs;
683 const struct vgic_register_region *
684 vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
687 const struct vgic_register_region *region;
689 region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
690 addr - iodev->base_addr);
691 if (!region || !check_region(vcpu->kvm, region, addr, len))
697 static int vgic_uaccess_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
698 gpa_t addr, u32 *val)
700 struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
701 const struct vgic_register_region *region;
702 struct kvm_vcpu *r_vcpu;
704 region = vgic_get_mmio_region(vcpu, iodev, addr, sizeof(u32));
710 r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
711 if (region->uaccess_read)
712 *val = region->uaccess_read(r_vcpu, addr, sizeof(u32));
714 *val = region->read(r_vcpu, addr, sizeof(u32));
719 static int vgic_uaccess_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
720 gpa_t addr, const u32 *val)
722 struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
723 const struct vgic_register_region *region;
724 struct kvm_vcpu *r_vcpu;
726 region = vgic_get_mmio_region(vcpu, iodev, addr, sizeof(u32));
730 r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
731 if (region->uaccess_write)
732 region->uaccess_write(r_vcpu, addr, sizeof(u32), *val);
734 region->write(r_vcpu, addr, sizeof(u32), *val);
740 * Userland access to VGIC registers.
742 int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev,
743 bool is_write, int offset, u32 *val)
746 return vgic_uaccess_write(vcpu, &dev->dev, offset, val);
748 return vgic_uaccess_read(vcpu, &dev->dev, offset, val);
751 static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
752 gpa_t addr, int len, void *val)
754 struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
755 const struct vgic_register_region *region;
756 unsigned long data = 0;
758 region = vgic_get_mmio_region(vcpu, iodev, addr, len);
764 switch (iodev->iodev_type) {
766 data = region->read(vcpu, addr, len);
769 data = region->read(vcpu, addr, len);
772 data = region->read(iodev->redist_vcpu, addr, len);
775 data = region->its_read(vcpu->kvm, iodev->its, addr, len);
779 vgic_data_host_to_mmio_bus(val, len, data);
783 static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
784 gpa_t addr, int len, const void *val)
786 struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
787 const struct vgic_register_region *region;
788 unsigned long data = vgic_data_mmio_bus_to_host(val, len);
790 region = vgic_get_mmio_region(vcpu, iodev, addr, len);
794 switch (iodev->iodev_type) {
796 region->write(vcpu, addr, len, data);
799 region->write(vcpu, addr, len, data);
802 region->write(iodev->redist_vcpu, addr, len, data);
805 region->its_write(vcpu->kvm, iodev->its, addr, len, data);
812 struct kvm_io_device_ops kvm_io_gic_ops = {
813 .read = dispatch_mmio_read,
814 .write = dispatch_mmio_write,
817 int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
820 struct vgic_io_device *io_device = &kvm->arch.vgic.dist_iodev;
826 len = vgic_v2_init_dist_iodev(io_device);
829 len = vgic_v3_init_dist_iodev(io_device);
835 io_device->base_addr = dist_base_address;
836 io_device->iodev_type = IODEV_DIST;
837 io_device->redist_vcpu = NULL;
839 mutex_lock(&kvm->slots_lock);
840 ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address,
841 len, &io_device->dev);
842 mutex_unlock(&kvm->slots_lock);