KVM: arm/arm64: vgic: Fix kvm_device leak in vgic_its_destroy
[platform/kernel/linux-rpi.git] / virt / kvm / arm / vgic / vgic-its.c
1 /*
2  * GICv3 ITS emulation
3  *
4  * Copyright (C) 2015,2016 ARM Ltd.
5  * Author: Andre Przywara <andre.przywara@arm.com>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18  */
19
20 #include <linux/cpu.h>
21 #include <linux/kvm.h>
22 #include <linux/kvm_host.h>
23 #include <linux/interrupt.h>
24 #include <linux/list.h>
25 #include <linux/uaccess.h>
26 #include <linux/list_sort.h>
27
28 #include <linux/irqchip/arm-gic-v3.h>
29
30 #include <asm/kvm_emulate.h>
31 #include <asm/kvm_arm.h>
32 #include <asm/kvm_mmu.h>
33
34 #include "vgic.h"
35 #include "vgic-mmio.h"
36
37 static int vgic_its_save_tables_v0(struct vgic_its *its);
38 static int vgic_its_restore_tables_v0(struct vgic_its *its);
39 static int vgic_its_commit_v0(struct vgic_its *its);
40 static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
41                              struct kvm_vcpu *filter_vcpu, bool needs_inv);
42
43 /*
44  * Creates a new (reference to a) struct vgic_irq for a given LPI.
45  * If this LPI is already mapped on another ITS, we increase its refcount
46  * and return a pointer to the existing structure.
47  * If this is a "new" LPI, we allocate and initialize a new struct vgic_irq.
48  * This function returns a pointer to the _unlocked_ structure.
49  */
50 static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
51                                      struct kvm_vcpu *vcpu)
52 {
53         struct vgic_dist *dist = &kvm->arch.vgic;
54         struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intid), *oldirq;
55         unsigned long flags;
56         int ret;
57
58         /* In this case there is no put, since we keep the reference. */
59         if (irq)
60                 return irq;
61
62         irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL);
63         if (!irq)
64                 return ERR_PTR(-ENOMEM);
65
66         INIT_LIST_HEAD(&irq->lpi_list);
67         INIT_LIST_HEAD(&irq->ap_list);
68         spin_lock_init(&irq->irq_lock);
69
70         irq->config = VGIC_CONFIG_EDGE;
71         kref_init(&irq->refcount);
72         irq->intid = intid;
73         irq->target_vcpu = vcpu;
74         irq->group = 1;
75
76         raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
77
78         /*
79          * There could be a race with another vgic_add_lpi(), so we need to
80          * check that we don't add a second list entry with the same LPI.
81          */
82         list_for_each_entry(oldirq, &dist->lpi_list_head, lpi_list) {
83                 if (oldirq->intid != intid)
84                         continue;
85
86                 /* Someone was faster with adding this LPI, lets use that. */
87                 kfree(irq);
88                 irq = oldirq;
89
90                 /*
91                  * This increases the refcount, the caller is expected to
92                  * call vgic_put_irq() on the returned pointer once it's
93                  * finished with the IRQ.
94                  */
95                 vgic_get_irq_kref(irq);
96
97                 goto out_unlock;
98         }
99
100         list_add_tail(&irq->lpi_list, &dist->lpi_list_head);
101         dist->lpi_list_count++;
102
103 out_unlock:
104         raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
105
106         /*
107          * We "cache" the configuration table entries in our struct vgic_irq's.
108          * However we only have those structs for mapped IRQs, so we read in
109          * the respective config data from memory here upon mapping the LPI.
110          */
111         ret = update_lpi_config(kvm, irq, NULL, false);
112         if (ret)
113                 return ERR_PTR(ret);
114
115         ret = vgic_v3_lpi_sync_pending_status(kvm, irq);
116         if (ret)
117                 return ERR_PTR(ret);
118
119         return irq;
120 }
121
122 struct its_device {
123         struct list_head dev_list;
124
125         /* the head for the list of ITTEs */
126         struct list_head itt_head;
127         u32 num_eventid_bits;
128         gpa_t itt_addr;
129         u32 device_id;
130 };
131
132 #define COLLECTION_NOT_MAPPED ((u32)~0)
133
134 struct its_collection {
135         struct list_head coll_list;
136
137         u32 collection_id;
138         u32 target_addr;
139 };
140
141 #define its_is_collection_mapped(coll) ((coll) && \
142                                 ((coll)->target_addr != COLLECTION_NOT_MAPPED))
143
144 struct its_ite {
145         struct list_head ite_list;
146
147         struct vgic_irq *irq;
148         struct its_collection *collection;
149         u32 event_id;
150 };
151
152 /**
153  * struct vgic_its_abi - ITS abi ops and settings
154  * @cte_esz: collection table entry size
155  * @dte_esz: device table entry size
156  * @ite_esz: interrupt translation table entry size
157  * @save tables: save the ITS tables into guest RAM
158  * @restore_tables: restore the ITS internal structs from tables
159  *  stored in guest RAM
160  * @commit: initialize the registers which expose the ABI settings,
161  *  especially the entry sizes
162  */
163 struct vgic_its_abi {
164         int cte_esz;
165         int dte_esz;
166         int ite_esz;
167         int (*save_tables)(struct vgic_its *its);
168         int (*restore_tables)(struct vgic_its *its);
169         int (*commit)(struct vgic_its *its);
170 };
171
172 #define ABI_0_ESZ       8
173 #define ESZ_MAX         ABI_0_ESZ
174
175 static const struct vgic_its_abi its_table_abi_versions[] = {
176         [0] = {
177          .cte_esz = ABI_0_ESZ,
178          .dte_esz = ABI_0_ESZ,
179          .ite_esz = ABI_0_ESZ,
180          .save_tables = vgic_its_save_tables_v0,
181          .restore_tables = vgic_its_restore_tables_v0,
182          .commit = vgic_its_commit_v0,
183         },
184 };
185
186 #define NR_ITS_ABIS     ARRAY_SIZE(its_table_abi_versions)
187
188 inline const struct vgic_its_abi *vgic_its_get_abi(struct vgic_its *its)
189 {
190         return &its_table_abi_versions[its->abi_rev];
191 }
192
193 static int vgic_its_set_abi(struct vgic_its *its, u32 rev)
194 {
195         const struct vgic_its_abi *abi;
196
197         its->abi_rev = rev;
198         abi = vgic_its_get_abi(its);
199         return abi->commit(its);
200 }
201
202 /*
203  * Find and returns a device in the device table for an ITS.
204  * Must be called with the its_lock mutex held.
205  */
206 static struct its_device *find_its_device(struct vgic_its *its, u32 device_id)
207 {
208         struct its_device *device;
209
210         list_for_each_entry(device, &its->device_list, dev_list)
211                 if (device_id == device->device_id)
212                         return device;
213
214         return NULL;
215 }
216
217 /*
218  * Find and returns an interrupt translation table entry (ITTE) for a given
219  * Device ID/Event ID pair on an ITS.
220  * Must be called with the its_lock mutex held.
221  */
222 static struct its_ite *find_ite(struct vgic_its *its, u32 device_id,
223                                   u32 event_id)
224 {
225         struct its_device *device;
226         struct its_ite *ite;
227
228         device = find_its_device(its, device_id);
229         if (device == NULL)
230                 return NULL;
231
232         list_for_each_entry(ite, &device->itt_head, ite_list)
233                 if (ite->event_id == event_id)
234                         return ite;
235
236         return NULL;
237 }
238
239 /* To be used as an iterator this macro misses the enclosing parentheses */
240 #define for_each_lpi_its(dev, ite, its) \
241         list_for_each_entry(dev, &(its)->device_list, dev_list) \
242                 list_for_each_entry(ite, &(dev)->itt_head, ite_list)
243
244 /*
245  * We only implement 48 bits of PA at the moment, although the ITS
246  * supports more. Let's be restrictive here.
247  */
248 #define BASER_ADDRESS(x)        ((x) & GENMASK_ULL(47, 16))
249 #define CBASER_ADDRESS(x)       ((x) & GENMASK_ULL(47, 12))
250
251 #define GIC_LPI_OFFSET 8192
252
253 #define VITS_TYPER_IDBITS 16
254 #define VITS_TYPER_DEVBITS 16
255 #define VITS_DTE_MAX_DEVID_OFFSET       (BIT(14) - 1)
256 #define VITS_ITE_MAX_EVENTID_OFFSET     (BIT(16) - 1)
257
258 /*
259  * Finds and returns a collection in the ITS collection table.
260  * Must be called with the its_lock mutex held.
261  */
262 static struct its_collection *find_collection(struct vgic_its *its, int coll_id)
263 {
264         struct its_collection *collection;
265
266         list_for_each_entry(collection, &its->collection_list, coll_list) {
267                 if (coll_id == collection->collection_id)
268                         return collection;
269         }
270
271         return NULL;
272 }
273
274 #define LPI_PROP_ENABLE_BIT(p)  ((p) & LPI_PROP_ENABLED)
275 #define LPI_PROP_PRIORITY(p)    ((p) & 0xfc)
276
277 /*
278  * Reads the configuration data for a given LPI from guest memory and
279  * updates the fields in struct vgic_irq.
280  * If filter_vcpu is not NULL, applies only if the IRQ is targeting this
281  * VCPU. Unconditionally applies if filter_vcpu is NULL.
282  */
283 static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
284                              struct kvm_vcpu *filter_vcpu, bool needs_inv)
285 {
286         u64 propbase = GICR_PROPBASER_ADDRESS(kvm->arch.vgic.propbaser);
287         u8 prop;
288         int ret;
289         unsigned long flags;
290
291         ret = kvm_read_guest_lock(kvm, propbase + irq->intid - GIC_LPI_OFFSET,
292                                   &prop, 1);
293
294         if (ret)
295                 return ret;
296
297         spin_lock_irqsave(&irq->irq_lock, flags);
298
299         if (!filter_vcpu || filter_vcpu == irq->target_vcpu) {
300                 irq->priority = LPI_PROP_PRIORITY(prop);
301                 irq->enabled = LPI_PROP_ENABLE_BIT(prop);
302
303                 if (!irq->hw) {
304                         vgic_queue_irq_unlock(kvm, irq, flags);
305                         return 0;
306                 }
307         }
308
309         spin_unlock_irqrestore(&irq->irq_lock, flags);
310
311         if (irq->hw)
312                 return its_prop_update_vlpi(irq->host_irq, prop, needs_inv);
313
314         return 0;
315 }
316
317 /*
318  * Create a snapshot of the current LPIs targeting @vcpu, so that we can
319  * enumerate those LPIs without holding any lock.
320  * Returns their number and puts the kmalloc'ed array into intid_ptr.
321  */
322 int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
323 {
324         struct vgic_dist *dist = &kvm->arch.vgic;
325         struct vgic_irq *irq;
326         unsigned long flags;
327         u32 *intids;
328         int irq_count, i = 0;
329
330         /*
331          * There is an obvious race between allocating the array and LPIs
332          * being mapped/unmapped. If we ended up here as a result of a
333          * command, we're safe (locks are held, preventing another
334          * command). If coming from another path (such as enabling LPIs),
335          * we must be careful not to overrun the array.
336          */
337         irq_count = READ_ONCE(dist->lpi_list_count);
338         intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL);
339         if (!intids)
340                 return -ENOMEM;
341
342         raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
343         list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
344                 if (i == irq_count)
345                         break;
346                 /* We don't need to "get" the IRQ, as we hold the list lock. */
347                 if (vcpu && irq->target_vcpu != vcpu)
348                         continue;
349                 intids[i++] = irq->intid;
350         }
351         raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
352
353         *intid_ptr = intids;
354         return i;
355 }
356
357 static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu)
358 {
359         int ret = 0;
360         unsigned long flags;
361
362         spin_lock_irqsave(&irq->irq_lock, flags);
363         irq->target_vcpu = vcpu;
364         spin_unlock_irqrestore(&irq->irq_lock, flags);
365
366         if (irq->hw) {
367                 struct its_vlpi_map map;
368
369                 ret = its_get_vlpi(irq->host_irq, &map);
370                 if (ret)
371                         return ret;
372
373                 map.vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
374
375                 ret = its_map_vlpi(irq->host_irq, &map);
376         }
377
378         return ret;
379 }
380
381 /*
382  * Promotes the ITS view of affinity of an ITTE (which redistributor this LPI
383  * is targeting) to the VGIC's view, which deals with target VCPUs.
384  * Needs to be called whenever either the collection for a LPIs has
385  * changed or the collection itself got retargeted.
386  */
387 static void update_affinity_ite(struct kvm *kvm, struct its_ite *ite)
388 {
389         struct kvm_vcpu *vcpu;
390
391         if (!its_is_collection_mapped(ite->collection))
392                 return;
393
394         vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr);
395         update_affinity(ite->irq, vcpu);
396 }
397
398 /*
399  * Updates the target VCPU for every LPI targeting this collection.
400  * Must be called with the its_lock mutex held.
401  */
402 static void update_affinity_collection(struct kvm *kvm, struct vgic_its *its,
403                                        struct its_collection *coll)
404 {
405         struct its_device *device;
406         struct its_ite *ite;
407
408         for_each_lpi_its(device, ite, its) {
409                 if (!ite->collection || coll != ite->collection)
410                         continue;
411
412                 update_affinity_ite(kvm, ite);
413         }
414 }
415
416 static u32 max_lpis_propbaser(u64 propbaser)
417 {
418         int nr_idbits = (propbaser & 0x1f) + 1;
419
420         return 1U << min(nr_idbits, INTERRUPT_ID_BITS_ITS);
421 }
422
423 /*
424  * Sync the pending table pending bit of LPIs targeting @vcpu
425  * with our own data structures. This relies on the LPI being
426  * mapped before.
427  */
428 static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
429 {
430         gpa_t pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser);
431         struct vgic_irq *irq;
432         int last_byte_offset = -1;
433         int ret = 0;
434         u32 *intids;
435         int nr_irqs, i;
436         unsigned long flags;
437         u8 pendmask;
438
439         nr_irqs = vgic_copy_lpi_list(vcpu->kvm, vcpu, &intids);
440         if (nr_irqs < 0)
441                 return nr_irqs;
442
443         for (i = 0; i < nr_irqs; i++) {
444                 int byte_offset, bit_nr;
445
446                 byte_offset = intids[i] / BITS_PER_BYTE;
447                 bit_nr = intids[i] % BITS_PER_BYTE;
448
449                 /*
450                  * For contiguously allocated LPIs chances are we just read
451                  * this very same byte in the last iteration. Reuse that.
452                  */
453                 if (byte_offset != last_byte_offset) {
454                         ret = kvm_read_guest_lock(vcpu->kvm,
455                                                   pendbase + byte_offset,
456                                                   &pendmask, 1);
457                         if (ret) {
458                                 kfree(intids);
459                                 return ret;
460                         }
461                         last_byte_offset = byte_offset;
462                 }
463
464                 irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]);
465                 spin_lock_irqsave(&irq->irq_lock, flags);
466                 irq->pending_latch = pendmask & (1U << bit_nr);
467                 vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
468                 vgic_put_irq(vcpu->kvm, irq);
469         }
470
471         kfree(intids);
472
473         return ret;
474 }
475
476 static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm,
477                                               struct vgic_its *its,
478                                               gpa_t addr, unsigned int len)
479 {
480         const struct vgic_its_abi *abi = vgic_its_get_abi(its);
481         u64 reg = GITS_TYPER_PLPIS;
482
483         /*
484          * We use linear CPU numbers for redistributor addressing,
485          * so GITS_TYPER.PTA is 0.
486          * Also we force all PROPBASER registers to be the same, so
487          * CommonLPIAff is 0 as well.
488          * To avoid memory waste in the guest, we keep the number of IDBits and
489          * DevBits low - as least for the time being.
490          */
491         reg |= GIC_ENCODE_SZ(VITS_TYPER_DEVBITS, 5) << GITS_TYPER_DEVBITS_SHIFT;
492         reg |= GIC_ENCODE_SZ(VITS_TYPER_IDBITS, 5) << GITS_TYPER_IDBITS_SHIFT;
493         reg |= GIC_ENCODE_SZ(abi->ite_esz, 4) << GITS_TYPER_ITT_ENTRY_SIZE_SHIFT;
494
495         return extract_bytes(reg, addr & 7, len);
496 }
497
498 static unsigned long vgic_mmio_read_its_iidr(struct kvm *kvm,
499                                              struct vgic_its *its,
500                                              gpa_t addr, unsigned int len)
501 {
502         u32 val;
503
504         val = (its->abi_rev << GITS_IIDR_REV_SHIFT) & GITS_IIDR_REV_MASK;
505         val |= (PRODUCT_ID_KVM << GITS_IIDR_PRODUCTID_SHIFT) | IMPLEMENTER_ARM;
506         return val;
507 }
508
509 static int vgic_mmio_uaccess_write_its_iidr(struct kvm *kvm,
510                                             struct vgic_its *its,
511                                             gpa_t addr, unsigned int len,
512                                             unsigned long val)
513 {
514         u32 rev = GITS_IIDR_REV(val);
515
516         if (rev >= NR_ITS_ABIS)
517                 return -EINVAL;
518         return vgic_its_set_abi(its, rev);
519 }
520
521 static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm,
522                                                struct vgic_its *its,
523                                                gpa_t addr, unsigned int len)
524 {
525         switch (addr & 0xffff) {
526         case GITS_PIDR0:
527                 return 0x92;    /* part number, bits[7:0] */
528         case GITS_PIDR1:
529                 return 0xb4;    /* part number, bits[11:8] */
530         case GITS_PIDR2:
531                 return GIC_PIDR2_ARCH_GICv3 | 0x0b;
532         case GITS_PIDR4:
533                 return 0x40;    /* This is a 64K software visible page */
534         /* The following are the ID registers for (any) GIC. */
535         case GITS_CIDR0:
536                 return 0x0d;
537         case GITS_CIDR1:
538                 return 0xf0;
539         case GITS_CIDR2:
540                 return 0x05;
541         case GITS_CIDR3:
542                 return 0xb1;
543         }
544
545         return 0;
546 }
547
548 int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
549                          u32 devid, u32 eventid, struct vgic_irq **irq)
550 {
551         struct kvm_vcpu *vcpu;
552         struct its_ite *ite;
553
554         if (!its->enabled)
555                 return -EBUSY;
556
557         ite = find_ite(its, devid, eventid);
558         if (!ite || !its_is_collection_mapped(ite->collection))
559                 return E_ITS_INT_UNMAPPED_INTERRUPT;
560
561         vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr);
562         if (!vcpu)
563                 return E_ITS_INT_UNMAPPED_INTERRUPT;
564
565         if (!vcpu->arch.vgic_cpu.lpis_enabled)
566                 return -EBUSY;
567
568         *irq = ite->irq;
569         return 0;
570 }
571
572 struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi)
573 {
574         u64 address;
575         struct kvm_io_device *kvm_io_dev;
576         struct vgic_io_device *iodev;
577
578         if (!vgic_has_its(kvm))
579                 return ERR_PTR(-ENODEV);
580
581         if (!(msi->flags & KVM_MSI_VALID_DEVID))
582                 return ERR_PTR(-EINVAL);
583
584         address = (u64)msi->address_hi << 32 | msi->address_lo;
585
586         kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address);
587         if (!kvm_io_dev)
588                 return ERR_PTR(-EINVAL);
589
590         if (kvm_io_dev->ops != &kvm_io_gic_ops)
591                 return ERR_PTR(-EINVAL);
592
593         iodev = container_of(kvm_io_dev, struct vgic_io_device, dev);
594         if (iodev->iodev_type != IODEV_ITS)
595                 return ERR_PTR(-EINVAL);
596
597         return iodev->its;
598 }
599
600 /*
601  * Find the target VCPU and the LPI number for a given devid/eventid pair
602  * and make this IRQ pending, possibly injecting it.
603  * Must be called with the its_lock mutex held.
604  * Returns 0 on success, a positive error value for any ITS mapping
605  * related errors and negative error values for generic errors.
606  */
607 static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
608                                 u32 devid, u32 eventid)
609 {
610         struct vgic_irq *irq = NULL;
611         unsigned long flags;
612         int err;
613
614         err = vgic_its_resolve_lpi(kvm, its, devid, eventid, &irq);
615         if (err)
616                 return err;
617
618         if (irq->hw)
619                 return irq_set_irqchip_state(irq->host_irq,
620                                              IRQCHIP_STATE_PENDING, true);
621
622         spin_lock_irqsave(&irq->irq_lock, flags);
623         irq->pending_latch = true;
624         vgic_queue_irq_unlock(kvm, irq, flags);
625
626         return 0;
627 }
628
629 /*
630  * Queries the KVM IO bus framework to get the ITS pointer from the given
631  * doorbell address.
632  * We then call vgic_its_trigger_msi() with the decoded data.
633  * According to the KVM_SIGNAL_MSI API description returns 1 on success.
634  */
635 int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
636 {
637         struct vgic_its *its;
638         int ret;
639
640         its = vgic_msi_to_its(kvm, msi);
641         if (IS_ERR(its))
642                 return PTR_ERR(its);
643
644         mutex_lock(&its->its_lock);
645         ret = vgic_its_trigger_msi(kvm, its, msi->devid, msi->data);
646         mutex_unlock(&its->its_lock);
647
648         if (ret < 0)
649                 return ret;
650
651         /*
652          * KVM_SIGNAL_MSI demands a return value > 0 for success and 0
653          * if the guest has blocked the MSI. So we map any LPI mapping
654          * related error to that.
655          */
656         if (ret)
657                 return 0;
658         else
659                 return 1;
660 }
661
662 /* Requires the its_lock to be held. */
663 static void its_free_ite(struct kvm *kvm, struct its_ite *ite)
664 {
665         list_del(&ite->ite_list);
666
667         /* This put matches the get in vgic_add_lpi. */
668         if (ite->irq) {
669                 if (ite->irq->hw)
670                         WARN_ON(its_unmap_vlpi(ite->irq->host_irq));
671
672                 vgic_put_irq(kvm, ite->irq);
673         }
674
675         kfree(ite);
676 }
677
678 static u64 its_cmd_mask_field(u64 *its_cmd, int word, int shift, int size)
679 {
680         return (le64_to_cpu(its_cmd[word]) >> shift) & (BIT_ULL(size) - 1);
681 }
682
683 #define its_cmd_get_command(cmd)        its_cmd_mask_field(cmd, 0,  0,  8)
684 #define its_cmd_get_deviceid(cmd)       its_cmd_mask_field(cmd, 0, 32, 32)
685 #define its_cmd_get_size(cmd)           (its_cmd_mask_field(cmd, 1,  0,  5) + 1)
686 #define its_cmd_get_id(cmd)             its_cmd_mask_field(cmd, 1,  0, 32)
687 #define its_cmd_get_physical_id(cmd)    its_cmd_mask_field(cmd, 1, 32, 32)
688 #define its_cmd_get_collection(cmd)     its_cmd_mask_field(cmd, 2,  0, 16)
689 #define its_cmd_get_ittaddr(cmd)        (its_cmd_mask_field(cmd, 2,  8, 44) << 8)
690 #define its_cmd_get_target_addr(cmd)    its_cmd_mask_field(cmd, 2, 16, 32)
691 #define its_cmd_get_validbit(cmd)       its_cmd_mask_field(cmd, 2, 63,  1)
692
693 /*
694  * The DISCARD command frees an Interrupt Translation Table Entry (ITTE).
695  * Must be called with the its_lock mutex held.
696  */
697 static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its,
698                                        u64 *its_cmd)
699 {
700         u32 device_id = its_cmd_get_deviceid(its_cmd);
701         u32 event_id = its_cmd_get_id(its_cmd);
702         struct its_ite *ite;
703
704
705         ite = find_ite(its, device_id, event_id);
706         if (ite && ite->collection) {
707                 /*
708                  * Though the spec talks about removing the pending state, we
709                  * don't bother here since we clear the ITTE anyway and the
710                  * pending state is a property of the ITTE struct.
711                  */
712                 its_free_ite(kvm, ite);
713                 return 0;
714         }
715
716         return E_ITS_DISCARD_UNMAPPED_INTERRUPT;
717 }
718
719 /*
720  * The MOVI command moves an ITTE to a different collection.
721  * Must be called with the its_lock mutex held.
722  */
723 static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its,
724                                     u64 *its_cmd)
725 {
726         u32 device_id = its_cmd_get_deviceid(its_cmd);
727         u32 event_id = its_cmd_get_id(its_cmd);
728         u32 coll_id = its_cmd_get_collection(its_cmd);
729         struct kvm_vcpu *vcpu;
730         struct its_ite *ite;
731         struct its_collection *collection;
732
733         ite = find_ite(its, device_id, event_id);
734         if (!ite)
735                 return E_ITS_MOVI_UNMAPPED_INTERRUPT;
736
737         if (!its_is_collection_mapped(ite->collection))
738                 return E_ITS_MOVI_UNMAPPED_COLLECTION;
739
740         collection = find_collection(its, coll_id);
741         if (!its_is_collection_mapped(collection))
742                 return E_ITS_MOVI_UNMAPPED_COLLECTION;
743
744         ite->collection = collection;
745         vcpu = kvm_get_vcpu(kvm, collection->target_addr);
746
747         return update_affinity(ite->irq, vcpu);
748 }
749
750 /*
751  * Check whether an ID can be stored into the corresponding guest table.
752  * For a direct table this is pretty easy, but gets a bit nasty for
753  * indirect tables. We check whether the resulting guest physical address
754  * is actually valid (covered by a memslot and guest accessible).
755  * For this we have to read the respective first level entry.
756  */
757 static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
758                               gpa_t *eaddr)
759 {
760         int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
761         u64 indirect_ptr, type = GITS_BASER_TYPE(baser);
762         int esz = GITS_BASER_ENTRY_SIZE(baser);
763         int index, idx;
764         gfn_t gfn;
765         bool ret;
766
767         switch (type) {
768         case GITS_BASER_TYPE_DEVICE:
769                 if (id >= BIT_ULL(VITS_TYPER_DEVBITS))
770                         return false;
771                 break;
772         case GITS_BASER_TYPE_COLLECTION:
773                 /* as GITS_TYPER.CIL == 0, ITS supports 16-bit collection ID */
774                 if (id >= BIT_ULL(16))
775                         return false;
776                 break;
777         default:
778                 return false;
779         }
780
781         if (!(baser & GITS_BASER_INDIRECT)) {
782                 phys_addr_t addr;
783
784                 if (id >= (l1_tbl_size / esz))
785                         return false;
786
787                 addr = BASER_ADDRESS(baser) + id * esz;
788                 gfn = addr >> PAGE_SHIFT;
789
790                 if (eaddr)
791                         *eaddr = addr;
792
793                 goto out;
794         }
795
796         /* calculate and check the index into the 1st level */
797         index = id / (SZ_64K / esz);
798         if (index >= (l1_tbl_size / sizeof(u64)))
799                 return false;
800
801         /* Each 1st level entry is represented by a 64-bit value. */
802         if (kvm_read_guest_lock(its->dev->kvm,
803                            BASER_ADDRESS(baser) + index * sizeof(indirect_ptr),
804                            &indirect_ptr, sizeof(indirect_ptr)))
805                 return false;
806
807         indirect_ptr = le64_to_cpu(indirect_ptr);
808
809         /* check the valid bit of the first level entry */
810         if (!(indirect_ptr & BIT_ULL(63)))
811                 return false;
812
813         /*
814          * Mask the guest physical address and calculate the frame number.
815          * Any address beyond our supported 48 bits of PA will be caught
816          * by the actual check in the final step.
817          */
818         indirect_ptr &= GENMASK_ULL(51, 16);
819
820         /* Find the address of the actual entry */
821         index = id % (SZ_64K / esz);
822         indirect_ptr += index * esz;
823         gfn = indirect_ptr >> PAGE_SHIFT;
824
825         if (eaddr)
826                 *eaddr = indirect_ptr;
827
828 out:
829         idx = srcu_read_lock(&its->dev->kvm->srcu);
830         ret = kvm_is_visible_gfn(its->dev->kvm, gfn);
831         srcu_read_unlock(&its->dev->kvm->srcu, idx);
832         return ret;
833 }
834
835 static int vgic_its_alloc_collection(struct vgic_its *its,
836                                      struct its_collection **colp,
837                                      u32 coll_id)
838 {
839         struct its_collection *collection;
840
841         if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL))
842                 return E_ITS_MAPC_COLLECTION_OOR;
843
844         collection = kzalloc(sizeof(*collection), GFP_KERNEL);
845         if (!collection)
846                 return -ENOMEM;
847
848         collection->collection_id = coll_id;
849         collection->target_addr = COLLECTION_NOT_MAPPED;
850
851         list_add_tail(&collection->coll_list, &its->collection_list);
852         *colp = collection;
853
854         return 0;
855 }
856
857 static void vgic_its_free_collection(struct vgic_its *its, u32 coll_id)
858 {
859         struct its_collection *collection;
860         struct its_device *device;
861         struct its_ite *ite;
862
863         /*
864          * Clearing the mapping for that collection ID removes the
865          * entry from the list. If there wasn't any before, we can
866          * go home early.
867          */
868         collection = find_collection(its, coll_id);
869         if (!collection)
870                 return;
871
872         for_each_lpi_its(device, ite, its)
873                 if (ite->collection &&
874                     ite->collection->collection_id == coll_id)
875                         ite->collection = NULL;
876
877         list_del(&collection->coll_list);
878         kfree(collection);
879 }
880
881 /* Must be called with its_lock mutex held */
882 static struct its_ite *vgic_its_alloc_ite(struct its_device *device,
883                                           struct its_collection *collection,
884                                           u32 event_id)
885 {
886         struct its_ite *ite;
887
888         ite = kzalloc(sizeof(*ite), GFP_KERNEL);
889         if (!ite)
890                 return ERR_PTR(-ENOMEM);
891
892         ite->event_id   = event_id;
893         ite->collection = collection;
894
895         list_add_tail(&ite->ite_list, &device->itt_head);
896         return ite;
897 }
898
899 /*
900  * The MAPTI and MAPI commands map LPIs to ITTEs.
901  * Must be called with its_lock mutex held.
902  */
903 static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
904                                     u64 *its_cmd)
905 {
906         u32 device_id = its_cmd_get_deviceid(its_cmd);
907         u32 event_id = its_cmd_get_id(its_cmd);
908         u32 coll_id = its_cmd_get_collection(its_cmd);
909         struct its_ite *ite;
910         struct kvm_vcpu *vcpu = NULL;
911         struct its_device *device;
912         struct its_collection *collection, *new_coll = NULL;
913         struct vgic_irq *irq;
914         int lpi_nr;
915
916         device = find_its_device(its, device_id);
917         if (!device)
918                 return E_ITS_MAPTI_UNMAPPED_DEVICE;
919
920         if (event_id >= BIT_ULL(device->num_eventid_bits))
921                 return E_ITS_MAPTI_ID_OOR;
922
923         if (its_cmd_get_command(its_cmd) == GITS_CMD_MAPTI)
924                 lpi_nr = its_cmd_get_physical_id(its_cmd);
925         else
926                 lpi_nr = event_id;
927         if (lpi_nr < GIC_LPI_OFFSET ||
928             lpi_nr >= max_lpis_propbaser(kvm->arch.vgic.propbaser))
929                 return E_ITS_MAPTI_PHYSICALID_OOR;
930
931         /* If there is an existing mapping, behavior is UNPREDICTABLE. */
932         if (find_ite(its, device_id, event_id))
933                 return 0;
934
935         collection = find_collection(its, coll_id);
936         if (!collection) {
937                 int ret = vgic_its_alloc_collection(its, &collection, coll_id);
938                 if (ret)
939                         return ret;
940                 new_coll = collection;
941         }
942
943         ite = vgic_its_alloc_ite(device, collection, event_id);
944         if (IS_ERR(ite)) {
945                 if (new_coll)
946                         vgic_its_free_collection(its, coll_id);
947                 return PTR_ERR(ite);
948         }
949
950         if (its_is_collection_mapped(collection))
951                 vcpu = kvm_get_vcpu(kvm, collection->target_addr);
952
953         irq = vgic_add_lpi(kvm, lpi_nr, vcpu);
954         if (IS_ERR(irq)) {
955                 if (new_coll)
956                         vgic_its_free_collection(its, coll_id);
957                 its_free_ite(kvm, ite);
958                 return PTR_ERR(irq);
959         }
960         ite->irq = irq;
961
962         return 0;
963 }
964
965 /* Requires the its_lock to be held. */
966 static void vgic_its_free_device(struct kvm *kvm, struct its_device *device)
967 {
968         struct its_ite *ite, *temp;
969
970         /*
971          * The spec says that unmapping a device with still valid
972          * ITTEs associated is UNPREDICTABLE. We remove all ITTEs,
973          * since we cannot leave the memory unreferenced.
974          */
975         list_for_each_entry_safe(ite, temp, &device->itt_head, ite_list)
976                 its_free_ite(kvm, ite);
977
978         list_del(&device->dev_list);
979         kfree(device);
980 }
981
982 /* its lock must be held */
983 static void vgic_its_free_device_list(struct kvm *kvm, struct vgic_its *its)
984 {
985         struct its_device *cur, *temp;
986
987         list_for_each_entry_safe(cur, temp, &its->device_list, dev_list)
988                 vgic_its_free_device(kvm, cur);
989 }
990
991 /* its lock must be held */
992 static void vgic_its_free_collection_list(struct kvm *kvm, struct vgic_its *its)
993 {
994         struct its_collection *cur, *temp;
995
996         list_for_each_entry_safe(cur, temp, &its->collection_list, coll_list)
997                 vgic_its_free_collection(its, cur->collection_id);
998 }
999
1000 /* Must be called with its_lock mutex held */
1001 static struct its_device *vgic_its_alloc_device(struct vgic_its *its,
1002                                                 u32 device_id, gpa_t itt_addr,
1003                                                 u8 num_eventid_bits)
1004 {
1005         struct its_device *device;
1006
1007         device = kzalloc(sizeof(*device), GFP_KERNEL);
1008         if (!device)
1009                 return ERR_PTR(-ENOMEM);
1010
1011         device->device_id = device_id;
1012         device->itt_addr = itt_addr;
1013         device->num_eventid_bits = num_eventid_bits;
1014         INIT_LIST_HEAD(&device->itt_head);
1015
1016         list_add_tail(&device->dev_list, &its->device_list);
1017         return device;
1018 }
1019
1020 /*
1021  * MAPD maps or unmaps a device ID to Interrupt Translation Tables (ITTs).
1022  * Must be called with the its_lock mutex held.
1023  */
1024 static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its,
1025                                     u64 *its_cmd)
1026 {
1027         u32 device_id = its_cmd_get_deviceid(its_cmd);
1028         bool valid = its_cmd_get_validbit(its_cmd);
1029         u8 num_eventid_bits = its_cmd_get_size(its_cmd);
1030         gpa_t itt_addr = its_cmd_get_ittaddr(its_cmd);
1031         struct its_device *device;
1032
1033         if (!vgic_its_check_id(its, its->baser_device_table, device_id, NULL))
1034                 return E_ITS_MAPD_DEVICE_OOR;
1035
1036         if (valid && num_eventid_bits > VITS_TYPER_IDBITS)
1037                 return E_ITS_MAPD_ITTSIZE_OOR;
1038
1039         device = find_its_device(its, device_id);
1040
1041         /*
1042          * The spec says that calling MAPD on an already mapped device
1043          * invalidates all cached data for this device. We implement this
1044          * by removing the mapping and re-establishing it.
1045          */
1046         if (device)
1047                 vgic_its_free_device(kvm, device);
1048
1049         /*
1050          * The spec does not say whether unmapping a not-mapped device
1051          * is an error, so we are done in any case.
1052          */
1053         if (!valid)
1054                 return 0;
1055
1056         device = vgic_its_alloc_device(its, device_id, itt_addr,
1057                                        num_eventid_bits);
1058
1059         return PTR_ERR_OR_ZERO(device);
1060 }
1061
1062 /*
1063  * The MAPC command maps collection IDs to redistributors.
1064  * Must be called with the its_lock mutex held.
1065  */
1066 static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its,
1067                                     u64 *its_cmd)
1068 {
1069         u16 coll_id;
1070         u32 target_addr;
1071         struct its_collection *collection;
1072         bool valid;
1073
1074         valid = its_cmd_get_validbit(its_cmd);
1075         coll_id = its_cmd_get_collection(its_cmd);
1076         target_addr = its_cmd_get_target_addr(its_cmd);
1077
1078         if (target_addr >= atomic_read(&kvm->online_vcpus))
1079                 return E_ITS_MAPC_PROCNUM_OOR;
1080
1081         if (!valid) {
1082                 vgic_its_free_collection(its, coll_id);
1083         } else {
1084                 collection = find_collection(its, coll_id);
1085
1086                 if (!collection) {
1087                         int ret;
1088
1089                         ret = vgic_its_alloc_collection(its, &collection,
1090                                                         coll_id);
1091                         if (ret)
1092                                 return ret;
1093                         collection->target_addr = target_addr;
1094                 } else {
1095                         collection->target_addr = target_addr;
1096                         update_affinity_collection(kvm, its, collection);
1097                 }
1098         }
1099
1100         return 0;
1101 }
1102
1103 /*
1104  * The CLEAR command removes the pending state for a particular LPI.
1105  * Must be called with the its_lock mutex held.
1106  */
1107 static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its,
1108                                      u64 *its_cmd)
1109 {
1110         u32 device_id = its_cmd_get_deviceid(its_cmd);
1111         u32 event_id = its_cmd_get_id(its_cmd);
1112         struct its_ite *ite;
1113
1114
1115         ite = find_ite(its, device_id, event_id);
1116         if (!ite)
1117                 return E_ITS_CLEAR_UNMAPPED_INTERRUPT;
1118
1119         ite->irq->pending_latch = false;
1120
1121         if (ite->irq->hw)
1122                 return irq_set_irqchip_state(ite->irq->host_irq,
1123                                              IRQCHIP_STATE_PENDING, false);
1124
1125         return 0;
1126 }
1127
1128 /*
1129  * The INV command syncs the configuration bits from the memory table.
1130  * Must be called with the its_lock mutex held.
1131  */
1132 static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its,
1133                                    u64 *its_cmd)
1134 {
1135         u32 device_id = its_cmd_get_deviceid(its_cmd);
1136         u32 event_id = its_cmd_get_id(its_cmd);
1137         struct its_ite *ite;
1138
1139
1140         ite = find_ite(its, device_id, event_id);
1141         if (!ite)
1142                 return E_ITS_INV_UNMAPPED_INTERRUPT;
1143
1144         return update_lpi_config(kvm, ite->irq, NULL, true);
1145 }
1146
1147 /*
1148  * The INVALL command requests flushing of all IRQ data in this collection.
1149  * Find the VCPU mapped to that collection, then iterate over the VM's list
1150  * of mapped LPIs and update the configuration for each IRQ which targets
1151  * the specified vcpu. The configuration will be read from the in-memory
1152  * configuration table.
1153  * Must be called with the its_lock mutex held.
1154  */
1155 static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
1156                                       u64 *its_cmd)
1157 {
1158         u32 coll_id = its_cmd_get_collection(its_cmd);
1159         struct its_collection *collection;
1160         struct kvm_vcpu *vcpu;
1161         struct vgic_irq *irq;
1162         u32 *intids;
1163         int irq_count, i;
1164
1165         collection = find_collection(its, coll_id);
1166         if (!its_is_collection_mapped(collection))
1167                 return E_ITS_INVALL_UNMAPPED_COLLECTION;
1168
1169         vcpu = kvm_get_vcpu(kvm, collection->target_addr);
1170
1171         irq_count = vgic_copy_lpi_list(kvm, vcpu, &intids);
1172         if (irq_count < 0)
1173                 return irq_count;
1174
1175         for (i = 0; i < irq_count; i++) {
1176                 irq = vgic_get_irq(kvm, NULL, intids[i]);
1177                 if (!irq)
1178                         continue;
1179                 update_lpi_config(kvm, irq, vcpu, false);
1180                 vgic_put_irq(kvm, irq);
1181         }
1182
1183         kfree(intids);
1184
1185         if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm)
1186                 its_invall_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe);
1187
1188         return 0;
1189 }
1190
1191 /*
1192  * The MOVALL command moves the pending state of all IRQs targeting one
1193  * redistributor to another. We don't hold the pending state in the VCPUs,
1194  * but in the IRQs instead, so there is really not much to do for us here.
1195  * However the spec says that no IRQ must target the old redistributor
1196  * afterwards, so we make sure that no LPI is using the associated target_vcpu.
1197  * This command affects all LPIs in the system that target that redistributor.
1198  */
1199 static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
1200                                       u64 *its_cmd)
1201 {
1202         u32 target1_addr = its_cmd_get_target_addr(its_cmd);
1203         u32 target2_addr = its_cmd_mask_field(its_cmd, 3, 16, 32);
1204         struct kvm_vcpu *vcpu1, *vcpu2;
1205         struct vgic_irq *irq;
1206         u32 *intids;
1207         int irq_count, i;
1208
1209         if (target1_addr >= atomic_read(&kvm->online_vcpus) ||
1210             target2_addr >= atomic_read(&kvm->online_vcpus))
1211                 return E_ITS_MOVALL_PROCNUM_OOR;
1212
1213         if (target1_addr == target2_addr)
1214                 return 0;
1215
1216         vcpu1 = kvm_get_vcpu(kvm, target1_addr);
1217         vcpu2 = kvm_get_vcpu(kvm, target2_addr);
1218
1219         irq_count = vgic_copy_lpi_list(kvm, vcpu1, &intids);
1220         if (irq_count < 0)
1221                 return irq_count;
1222
1223         for (i = 0; i < irq_count; i++) {
1224                 irq = vgic_get_irq(kvm, NULL, intids[i]);
1225
1226                 update_affinity(irq, vcpu2);
1227
1228                 vgic_put_irq(kvm, irq);
1229         }
1230
1231         kfree(intids);
1232         return 0;
1233 }
1234
1235 /*
1236  * The INT command injects the LPI associated with that DevID/EvID pair.
1237  * Must be called with the its_lock mutex held.
1238  */
1239 static int vgic_its_cmd_handle_int(struct kvm *kvm, struct vgic_its *its,
1240                                    u64 *its_cmd)
1241 {
1242         u32 msi_data = its_cmd_get_id(its_cmd);
1243         u64 msi_devid = its_cmd_get_deviceid(its_cmd);
1244
1245         return vgic_its_trigger_msi(kvm, its, msi_devid, msi_data);
1246 }
1247
1248 /*
1249  * This function is called with the its_cmd lock held, but the ITS data
1250  * structure lock dropped.
1251  */
1252 static int vgic_its_handle_command(struct kvm *kvm, struct vgic_its *its,
1253                                    u64 *its_cmd)
1254 {
1255         int ret = -ENODEV;
1256
1257         mutex_lock(&its->its_lock);
1258         switch (its_cmd_get_command(its_cmd)) {
1259         case GITS_CMD_MAPD:
1260                 ret = vgic_its_cmd_handle_mapd(kvm, its, its_cmd);
1261                 break;
1262         case GITS_CMD_MAPC:
1263                 ret = vgic_its_cmd_handle_mapc(kvm, its, its_cmd);
1264                 break;
1265         case GITS_CMD_MAPI:
1266                 ret = vgic_its_cmd_handle_mapi(kvm, its, its_cmd);
1267                 break;
1268         case GITS_CMD_MAPTI:
1269                 ret = vgic_its_cmd_handle_mapi(kvm, its, its_cmd);
1270                 break;
1271         case GITS_CMD_MOVI:
1272                 ret = vgic_its_cmd_handle_movi(kvm, its, its_cmd);
1273                 break;
1274         case GITS_CMD_DISCARD:
1275                 ret = vgic_its_cmd_handle_discard(kvm, its, its_cmd);
1276                 break;
1277         case GITS_CMD_CLEAR:
1278                 ret = vgic_its_cmd_handle_clear(kvm, its, its_cmd);
1279                 break;
1280         case GITS_CMD_MOVALL:
1281                 ret = vgic_its_cmd_handle_movall(kvm, its, its_cmd);
1282                 break;
1283         case GITS_CMD_INT:
1284                 ret = vgic_its_cmd_handle_int(kvm, its, its_cmd);
1285                 break;
1286         case GITS_CMD_INV:
1287                 ret = vgic_its_cmd_handle_inv(kvm, its, its_cmd);
1288                 break;
1289         case GITS_CMD_INVALL:
1290                 ret = vgic_its_cmd_handle_invall(kvm, its, its_cmd);
1291                 break;
1292         case GITS_CMD_SYNC:
1293                 /* we ignore this command: we are in sync all of the time */
1294                 ret = 0;
1295                 break;
1296         }
1297         mutex_unlock(&its->its_lock);
1298
1299         return ret;
1300 }
1301
1302 static u64 vgic_sanitise_its_baser(u64 reg)
1303 {
1304         reg = vgic_sanitise_field(reg, GITS_BASER_SHAREABILITY_MASK,
1305                                   GITS_BASER_SHAREABILITY_SHIFT,
1306                                   vgic_sanitise_shareability);
1307         reg = vgic_sanitise_field(reg, GITS_BASER_INNER_CACHEABILITY_MASK,
1308                                   GITS_BASER_INNER_CACHEABILITY_SHIFT,
1309                                   vgic_sanitise_inner_cacheability);
1310         reg = vgic_sanitise_field(reg, GITS_BASER_OUTER_CACHEABILITY_MASK,
1311                                   GITS_BASER_OUTER_CACHEABILITY_SHIFT,
1312                                   vgic_sanitise_outer_cacheability);
1313
1314         /* Bits 15:12 contain bits 51:48 of the PA, which we don't support. */
1315         reg &= ~GENMASK_ULL(15, 12);
1316
1317         /* We support only one (ITS) page size: 64K */
1318         reg = (reg & ~GITS_BASER_PAGE_SIZE_MASK) | GITS_BASER_PAGE_SIZE_64K;
1319
1320         return reg;
1321 }
1322
1323 static u64 vgic_sanitise_its_cbaser(u64 reg)
1324 {
1325         reg = vgic_sanitise_field(reg, GITS_CBASER_SHAREABILITY_MASK,
1326                                   GITS_CBASER_SHAREABILITY_SHIFT,
1327                                   vgic_sanitise_shareability);
1328         reg = vgic_sanitise_field(reg, GITS_CBASER_INNER_CACHEABILITY_MASK,
1329                                   GITS_CBASER_INNER_CACHEABILITY_SHIFT,
1330                                   vgic_sanitise_inner_cacheability);
1331         reg = vgic_sanitise_field(reg, GITS_CBASER_OUTER_CACHEABILITY_MASK,
1332                                   GITS_CBASER_OUTER_CACHEABILITY_SHIFT,
1333                                   vgic_sanitise_outer_cacheability);
1334
1335         /*
1336          * Sanitise the physical address to be 64k aligned.
1337          * Also limit the physical addresses to 48 bits.
1338          */
1339         reg &= ~(GENMASK_ULL(51, 48) | GENMASK_ULL(15, 12));
1340
1341         return reg;
1342 }
1343
1344 static unsigned long vgic_mmio_read_its_cbaser(struct kvm *kvm,
1345                                                struct vgic_its *its,
1346                                                gpa_t addr, unsigned int len)
1347 {
1348         return extract_bytes(its->cbaser, addr & 7, len);
1349 }
1350
1351 static void vgic_mmio_write_its_cbaser(struct kvm *kvm, struct vgic_its *its,
1352                                        gpa_t addr, unsigned int len,
1353                                        unsigned long val)
1354 {
1355         /* When GITS_CTLR.Enable is 1, this register is RO. */
1356         if (its->enabled)
1357                 return;
1358
1359         mutex_lock(&its->cmd_lock);
1360         its->cbaser = update_64bit_reg(its->cbaser, addr & 7, len, val);
1361         its->cbaser = vgic_sanitise_its_cbaser(its->cbaser);
1362         its->creadr = 0;
1363         /*
1364          * CWRITER is architecturally UNKNOWN on reset, but we need to reset
1365          * it to CREADR to make sure we start with an empty command buffer.
1366          */
1367         its->cwriter = its->creadr;
1368         mutex_unlock(&its->cmd_lock);
1369 }
1370
1371 #define ITS_CMD_BUFFER_SIZE(baser)      ((((baser) & 0xff) + 1) << 12)
1372 #define ITS_CMD_SIZE                    32
1373 #define ITS_CMD_OFFSET(reg)             ((reg) & GENMASK(19, 5))
1374
1375 /* Must be called with the cmd_lock held. */
1376 static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its)
1377 {
1378         gpa_t cbaser;
1379         u64 cmd_buf[4];
1380
1381         /* Commands are only processed when the ITS is enabled. */
1382         if (!its->enabled)
1383                 return;
1384
1385         cbaser = CBASER_ADDRESS(its->cbaser);
1386
1387         while (its->cwriter != its->creadr) {
1388                 int ret = kvm_read_guest_lock(kvm, cbaser + its->creadr,
1389                                               cmd_buf, ITS_CMD_SIZE);
1390                 /*
1391                  * If kvm_read_guest() fails, this could be due to the guest
1392                  * programming a bogus value in CBASER or something else going
1393                  * wrong from which we cannot easily recover.
1394                  * According to section 6.3.2 in the GICv3 spec we can just
1395                  * ignore that command then.
1396                  */
1397                 if (!ret)
1398                         vgic_its_handle_command(kvm, its, cmd_buf);
1399
1400                 its->creadr += ITS_CMD_SIZE;
1401                 if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser))
1402                         its->creadr = 0;
1403         }
1404 }
1405
1406 /*
1407  * By writing to CWRITER the guest announces new commands to be processed.
1408  * To avoid any races in the first place, we take the its_cmd lock, which
1409  * protects our ring buffer variables, so that there is only one user
1410  * per ITS handling commands at a given time.
1411  */
1412 static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
1413                                         gpa_t addr, unsigned int len,
1414                                         unsigned long val)
1415 {
1416         u64 reg;
1417
1418         if (!its)
1419                 return;
1420
1421         mutex_lock(&its->cmd_lock);
1422
1423         reg = update_64bit_reg(its->cwriter, addr & 7, len, val);
1424         reg = ITS_CMD_OFFSET(reg);
1425         if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
1426                 mutex_unlock(&its->cmd_lock);
1427                 return;
1428         }
1429         its->cwriter = reg;
1430
1431         vgic_its_process_commands(kvm, its);
1432
1433         mutex_unlock(&its->cmd_lock);
1434 }
1435
1436 static unsigned long vgic_mmio_read_its_cwriter(struct kvm *kvm,
1437                                                 struct vgic_its *its,
1438                                                 gpa_t addr, unsigned int len)
1439 {
1440         return extract_bytes(its->cwriter, addr & 0x7, len);
1441 }
1442
1443 static unsigned long vgic_mmio_read_its_creadr(struct kvm *kvm,
1444                                                struct vgic_its *its,
1445                                                gpa_t addr, unsigned int len)
1446 {
1447         return extract_bytes(its->creadr, addr & 0x7, len);
1448 }
1449
1450 static int vgic_mmio_uaccess_write_its_creadr(struct kvm *kvm,
1451                                               struct vgic_its *its,
1452                                               gpa_t addr, unsigned int len,
1453                                               unsigned long val)
1454 {
1455         u32 cmd_offset;
1456         int ret = 0;
1457
1458         mutex_lock(&its->cmd_lock);
1459
1460         if (its->enabled) {
1461                 ret = -EBUSY;
1462                 goto out;
1463         }
1464
1465         cmd_offset = ITS_CMD_OFFSET(val);
1466         if (cmd_offset >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
1467                 ret = -EINVAL;
1468                 goto out;
1469         }
1470
1471         its->creadr = cmd_offset;
1472 out:
1473         mutex_unlock(&its->cmd_lock);
1474         return ret;
1475 }
1476
1477 #define BASER_INDEX(addr) (((addr) / sizeof(u64)) & 0x7)
1478 static unsigned long vgic_mmio_read_its_baser(struct kvm *kvm,
1479                                               struct vgic_its *its,
1480                                               gpa_t addr, unsigned int len)
1481 {
1482         u64 reg;
1483
1484         switch (BASER_INDEX(addr)) {
1485         case 0:
1486                 reg = its->baser_device_table;
1487                 break;
1488         case 1:
1489                 reg = its->baser_coll_table;
1490                 break;
1491         default:
1492                 reg = 0;
1493                 break;
1494         }
1495
1496         return extract_bytes(reg, addr & 7, len);
1497 }
1498
1499 #define GITS_BASER_RO_MASK      (GENMASK_ULL(52, 48) | GENMASK_ULL(58, 56))
1500 static void vgic_mmio_write_its_baser(struct kvm *kvm,
1501                                       struct vgic_its *its,
1502                                       gpa_t addr, unsigned int len,
1503                                       unsigned long val)
1504 {
1505         const struct vgic_its_abi *abi = vgic_its_get_abi(its);
1506         u64 entry_size, table_type;
1507         u64 reg, *regptr, clearbits = 0;
1508
1509         /* When GITS_CTLR.Enable is 1, we ignore write accesses. */
1510         if (its->enabled)
1511                 return;
1512
1513         switch (BASER_INDEX(addr)) {
1514         case 0:
1515                 regptr = &its->baser_device_table;
1516                 entry_size = abi->dte_esz;
1517                 table_type = GITS_BASER_TYPE_DEVICE;
1518                 break;
1519         case 1:
1520                 regptr = &its->baser_coll_table;
1521                 entry_size = abi->cte_esz;
1522                 table_type = GITS_BASER_TYPE_COLLECTION;
1523                 clearbits = GITS_BASER_INDIRECT;
1524                 break;
1525         default:
1526                 return;
1527         }
1528
1529         reg = update_64bit_reg(*regptr, addr & 7, len, val);
1530         reg &= ~GITS_BASER_RO_MASK;
1531         reg &= ~clearbits;
1532
1533         reg |= (entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT;
1534         reg |= table_type << GITS_BASER_TYPE_SHIFT;
1535         reg = vgic_sanitise_its_baser(reg);
1536
1537         *regptr = reg;
1538
1539         if (!(reg & GITS_BASER_VALID)) {
1540                 /* Take the its_lock to prevent a race with a save/restore */
1541                 mutex_lock(&its->its_lock);
1542                 switch (table_type) {
1543                 case GITS_BASER_TYPE_DEVICE:
1544                         vgic_its_free_device_list(kvm, its);
1545                         break;
1546                 case GITS_BASER_TYPE_COLLECTION:
1547                         vgic_its_free_collection_list(kvm, its);
1548                         break;
1549                 }
1550                 mutex_unlock(&its->its_lock);
1551         }
1552 }
1553
1554 static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
1555                                              struct vgic_its *its,
1556                                              gpa_t addr, unsigned int len)
1557 {
1558         u32 reg = 0;
1559
1560         mutex_lock(&its->cmd_lock);
1561         if (its->creadr == its->cwriter)
1562                 reg |= GITS_CTLR_QUIESCENT;
1563         if (its->enabled)
1564                 reg |= GITS_CTLR_ENABLE;
1565         mutex_unlock(&its->cmd_lock);
1566
1567         return reg;
1568 }
1569
1570 static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
1571                                      gpa_t addr, unsigned int len,
1572                                      unsigned long val)
1573 {
1574         mutex_lock(&its->cmd_lock);
1575
1576         /*
1577          * It is UNPREDICTABLE to enable the ITS if any of the CBASER or
1578          * device/collection BASER are invalid
1579          */
1580         if (!its->enabled && (val & GITS_CTLR_ENABLE) &&
1581                 (!(its->baser_device_table & GITS_BASER_VALID) ||
1582                  !(its->baser_coll_table & GITS_BASER_VALID) ||
1583                  !(its->cbaser & GITS_CBASER_VALID)))
1584                 goto out;
1585
1586         its->enabled = !!(val & GITS_CTLR_ENABLE);
1587
1588         /*
1589          * Try to process any pending commands. This function bails out early
1590          * if the ITS is disabled or no commands have been queued.
1591          */
1592         vgic_its_process_commands(kvm, its);
1593
1594 out:
1595         mutex_unlock(&its->cmd_lock);
1596 }
1597
1598 #define REGISTER_ITS_DESC(off, rd, wr, length, acc)             \
1599 {                                                               \
1600         .reg_offset = off,                                      \
1601         .len = length,                                          \
1602         .access_flags = acc,                                    \
1603         .its_read = rd,                                         \
1604         .its_write = wr,                                        \
1605 }
1606
1607 #define REGISTER_ITS_DESC_UACCESS(off, rd, wr, uwr, length, acc)\
1608 {                                                               \
1609         .reg_offset = off,                                      \
1610         .len = length,                                          \
1611         .access_flags = acc,                                    \
1612         .its_read = rd,                                         \
1613         .its_write = wr,                                        \
1614         .uaccess_its_write = uwr,                               \
1615 }
1616
1617 static void its_mmio_write_wi(struct kvm *kvm, struct vgic_its *its,
1618                               gpa_t addr, unsigned int len, unsigned long val)
1619 {
1620         /* Ignore */
1621 }
1622
1623 static struct vgic_register_region its_registers[] = {
1624         REGISTER_ITS_DESC(GITS_CTLR,
1625                 vgic_mmio_read_its_ctlr, vgic_mmio_write_its_ctlr, 4,
1626                 VGIC_ACCESS_32bit),
1627         REGISTER_ITS_DESC_UACCESS(GITS_IIDR,
1628                 vgic_mmio_read_its_iidr, its_mmio_write_wi,
1629                 vgic_mmio_uaccess_write_its_iidr, 4,
1630                 VGIC_ACCESS_32bit),
1631         REGISTER_ITS_DESC(GITS_TYPER,
1632                 vgic_mmio_read_its_typer, its_mmio_write_wi, 8,
1633                 VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
1634         REGISTER_ITS_DESC(GITS_CBASER,
1635                 vgic_mmio_read_its_cbaser, vgic_mmio_write_its_cbaser, 8,
1636                 VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
1637         REGISTER_ITS_DESC(GITS_CWRITER,
1638                 vgic_mmio_read_its_cwriter, vgic_mmio_write_its_cwriter, 8,
1639                 VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
1640         REGISTER_ITS_DESC_UACCESS(GITS_CREADR,
1641                 vgic_mmio_read_its_creadr, its_mmio_write_wi,
1642                 vgic_mmio_uaccess_write_its_creadr, 8,
1643                 VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
1644         REGISTER_ITS_DESC(GITS_BASER,
1645                 vgic_mmio_read_its_baser, vgic_mmio_write_its_baser, 0x40,
1646                 VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
1647         REGISTER_ITS_DESC(GITS_IDREGS_BASE,
1648                 vgic_mmio_read_its_idregs, its_mmio_write_wi, 0x30,
1649                 VGIC_ACCESS_32bit),
1650 };
1651
1652 /* This is called on setting the LPI enable bit in the redistributor. */
1653 void vgic_enable_lpis(struct kvm_vcpu *vcpu)
1654 {
1655         if (!(vcpu->arch.vgic_cpu.pendbaser & GICR_PENDBASER_PTZ))
1656                 its_sync_lpi_pending_table(vcpu);
1657 }
1658
1659 static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its,
1660                                    u64 addr)
1661 {
1662         struct vgic_io_device *iodev = &its->iodev;
1663         int ret;
1664
1665         mutex_lock(&kvm->slots_lock);
1666         if (!IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) {
1667                 ret = -EBUSY;
1668                 goto out;
1669         }
1670
1671         its->vgic_its_base = addr;
1672         iodev->regions = its_registers;
1673         iodev->nr_regions = ARRAY_SIZE(its_registers);
1674         kvm_iodevice_init(&iodev->dev, &kvm_io_gic_ops);
1675
1676         iodev->base_addr = its->vgic_its_base;
1677         iodev->iodev_type = IODEV_ITS;
1678         iodev->its = its;
1679         ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, iodev->base_addr,
1680                                       KVM_VGIC_V3_ITS_SIZE, &iodev->dev);
1681 out:
1682         mutex_unlock(&kvm->slots_lock);
1683
1684         return ret;
1685 }
1686
1687 #define INITIAL_BASER_VALUE                                               \
1688         (GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb)                | \
1689          GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, SameAsInner)         | \
1690          GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable)             | \
1691          GITS_BASER_PAGE_SIZE_64K)
1692
1693 #define INITIAL_PROPBASER_VALUE                                           \
1694         (GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWb)            | \
1695          GIC_BASER_CACHEABILITY(GICR_PROPBASER, OUTER, SameAsInner)     | \
1696          GIC_BASER_SHAREABILITY(GICR_PROPBASER, InnerShareable))
1697
1698 static int vgic_its_create(struct kvm_device *dev, u32 type)
1699 {
1700         struct vgic_its *its;
1701
1702         if (type != KVM_DEV_TYPE_ARM_VGIC_ITS)
1703                 return -ENODEV;
1704
1705         its = kzalloc(sizeof(struct vgic_its), GFP_KERNEL);
1706         if (!its)
1707                 return -ENOMEM;
1708
1709         if (vgic_initialized(dev->kvm)) {
1710                 int ret = vgic_v4_init(dev->kvm);
1711                 if (ret < 0) {
1712                         kfree(its);
1713                         return ret;
1714                 }
1715         }
1716
1717         mutex_init(&its->its_lock);
1718         mutex_init(&its->cmd_lock);
1719
1720         its->vgic_its_base = VGIC_ADDR_UNDEF;
1721
1722         INIT_LIST_HEAD(&its->device_list);
1723         INIT_LIST_HEAD(&its->collection_list);
1724
1725         dev->kvm->arch.vgic.msis_require_devid = true;
1726         dev->kvm->arch.vgic.has_its = true;
1727         its->enabled = false;
1728         its->dev = dev;
1729
1730         its->baser_device_table = INITIAL_BASER_VALUE                   |
1731                 ((u64)GITS_BASER_TYPE_DEVICE << GITS_BASER_TYPE_SHIFT);
1732         its->baser_coll_table = INITIAL_BASER_VALUE |
1733                 ((u64)GITS_BASER_TYPE_COLLECTION << GITS_BASER_TYPE_SHIFT);
1734         dev->kvm->arch.vgic.propbaser = INITIAL_PROPBASER_VALUE;
1735
1736         dev->private = its;
1737
1738         return vgic_its_set_abi(its, NR_ITS_ABIS - 1);
1739 }
1740
1741 static void vgic_its_destroy(struct kvm_device *kvm_dev)
1742 {
1743         struct kvm *kvm = kvm_dev->kvm;
1744         struct vgic_its *its = kvm_dev->private;
1745
1746         mutex_lock(&its->its_lock);
1747
1748         vgic_its_free_device_list(kvm, its);
1749         vgic_its_free_collection_list(kvm, its);
1750
1751         mutex_unlock(&its->its_lock);
1752         kfree(its);
1753         kfree(kvm_dev);/* alloc by kvm_ioctl_create_device, free by .destroy */
1754 }
1755
1756 int vgic_its_has_attr_regs(struct kvm_device *dev,
1757                            struct kvm_device_attr *attr)
1758 {
1759         const struct vgic_register_region *region;
1760         gpa_t offset = attr->attr;
1761         int align;
1762
1763         align = (offset < GITS_TYPER) || (offset >= GITS_PIDR4) ? 0x3 : 0x7;
1764
1765         if (offset & align)
1766                 return -EINVAL;
1767
1768         region = vgic_find_mmio_region(its_registers,
1769                                        ARRAY_SIZE(its_registers),
1770                                        offset);
1771         if (!region)
1772                 return -ENXIO;
1773
1774         return 0;
1775 }
1776
1777 int vgic_its_attr_regs_access(struct kvm_device *dev,
1778                               struct kvm_device_attr *attr,
1779                               u64 *reg, bool is_write)
1780 {
1781         const struct vgic_register_region *region;
1782         struct vgic_its *its;
1783         gpa_t addr, offset;
1784         unsigned int len;
1785         int align, ret = 0;
1786
1787         its = dev->private;
1788         offset = attr->attr;
1789
1790         /*
1791          * Although the spec supports upper/lower 32-bit accesses to
1792          * 64-bit ITS registers, the userspace ABI requires 64-bit
1793          * accesses to all 64-bit wide registers. We therefore only
1794          * support 32-bit accesses to GITS_CTLR, GITS_IIDR and GITS ID
1795          * registers
1796          */
1797         if ((offset < GITS_TYPER) || (offset >= GITS_PIDR4))
1798                 align = 0x3;
1799         else
1800                 align = 0x7;
1801
1802         if (offset & align)
1803                 return -EINVAL;
1804
1805         mutex_lock(&dev->kvm->lock);
1806
1807         if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) {
1808                 ret = -ENXIO;
1809                 goto out;
1810         }
1811
1812         region = vgic_find_mmio_region(its_registers,
1813                                        ARRAY_SIZE(its_registers),
1814                                        offset);
1815         if (!region) {
1816                 ret = -ENXIO;
1817                 goto out;
1818         }
1819
1820         if (!lock_all_vcpus(dev->kvm)) {
1821                 ret = -EBUSY;
1822                 goto out;
1823         }
1824
1825         addr = its->vgic_its_base + offset;
1826
1827         len = region->access_flags & VGIC_ACCESS_64bit ? 8 : 4;
1828
1829         if (is_write) {
1830                 if (region->uaccess_its_write)
1831                         ret = region->uaccess_its_write(dev->kvm, its, addr,
1832                                                         len, *reg);
1833                 else
1834                         region->its_write(dev->kvm, its, addr, len, *reg);
1835         } else {
1836                 *reg = region->its_read(dev->kvm, its, addr, len);
1837         }
1838         unlock_all_vcpus(dev->kvm);
1839 out:
1840         mutex_unlock(&dev->kvm->lock);
1841         return ret;
1842 }
1843
1844 static u32 compute_next_devid_offset(struct list_head *h,
1845                                      struct its_device *dev)
1846 {
1847         struct its_device *next;
1848         u32 next_offset;
1849
1850         if (list_is_last(&dev->dev_list, h))
1851                 return 0;
1852         next = list_next_entry(dev, dev_list);
1853         next_offset = next->device_id - dev->device_id;
1854
1855         return min_t(u32, next_offset, VITS_DTE_MAX_DEVID_OFFSET);
1856 }
1857
1858 static u32 compute_next_eventid_offset(struct list_head *h, struct its_ite *ite)
1859 {
1860         struct its_ite *next;
1861         u32 next_offset;
1862
1863         if (list_is_last(&ite->ite_list, h))
1864                 return 0;
1865         next = list_next_entry(ite, ite_list);
1866         next_offset = next->event_id - ite->event_id;
1867
1868         return min_t(u32, next_offset, VITS_ITE_MAX_EVENTID_OFFSET);
1869 }
1870
1871 /**
1872  * entry_fn_t - Callback called on a table entry restore path
1873  * @its: its handle
1874  * @id: id of the entry
1875  * @entry: pointer to the entry
1876  * @opaque: pointer to an opaque data
1877  *
1878  * Return: < 0 on error, 0 if last element was identified, id offset to next
1879  * element otherwise
1880  */
1881 typedef int (*entry_fn_t)(struct vgic_its *its, u32 id, void *entry,
1882                           void *opaque);
1883
1884 /**
1885  * scan_its_table - Scan a contiguous table in guest RAM and applies a function
1886  * to each entry
1887  *
1888  * @its: its handle
1889  * @base: base gpa of the table
1890  * @size: size of the table in bytes
1891  * @esz: entry size in bytes
1892  * @start_id: the ID of the first entry in the table
1893  * (non zero for 2d level tables)
1894  * @fn: function to apply on each entry
1895  *
1896  * Return: < 0 on error, 0 if last element was identified, 1 otherwise
1897  * (the last element may not be found on second level tables)
1898  */
1899 static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz,
1900                           int start_id, entry_fn_t fn, void *opaque)
1901 {
1902         struct kvm *kvm = its->dev->kvm;
1903         unsigned long len = size;
1904         int id = start_id;
1905         gpa_t gpa = base;
1906         char entry[ESZ_MAX];
1907         int ret;
1908
1909         memset(entry, 0, esz);
1910
1911         while (len > 0) {
1912                 int next_offset;
1913                 size_t byte_offset;
1914
1915                 ret = kvm_read_guest_lock(kvm, gpa, entry, esz);
1916                 if (ret)
1917                         return ret;
1918
1919                 next_offset = fn(its, id, entry, opaque);
1920                 if (next_offset <= 0)
1921                         return next_offset;
1922
1923                 byte_offset = next_offset * esz;
1924                 id += next_offset;
1925                 gpa += byte_offset;
1926                 len -= byte_offset;
1927         }
1928         return 1;
1929 }
1930
1931 /**
1932  * vgic_its_save_ite - Save an interrupt translation entry at @gpa
1933  */
1934 static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev,
1935                               struct its_ite *ite, gpa_t gpa, int ite_esz)
1936 {
1937         struct kvm *kvm = its->dev->kvm;
1938         u32 next_offset;
1939         u64 val;
1940
1941         next_offset = compute_next_eventid_offset(&dev->itt_head, ite);
1942         val = ((u64)next_offset << KVM_ITS_ITE_NEXT_SHIFT) |
1943                ((u64)ite->irq->intid << KVM_ITS_ITE_PINTID_SHIFT) |
1944                 ite->collection->collection_id;
1945         val = cpu_to_le64(val);
1946         return kvm_write_guest_lock(kvm, gpa, &val, ite_esz);
1947 }
1948
1949 /**
1950  * vgic_its_restore_ite - restore an interrupt translation entry
1951  * @event_id: id used for indexing
1952  * @ptr: pointer to the ITE entry
1953  * @opaque: pointer to the its_device
1954  */
1955 static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id,
1956                                 void *ptr, void *opaque)
1957 {
1958         struct its_device *dev = (struct its_device *)opaque;
1959         struct its_collection *collection;
1960         struct kvm *kvm = its->dev->kvm;
1961         struct kvm_vcpu *vcpu = NULL;
1962         u64 val;
1963         u64 *p = (u64 *)ptr;
1964         struct vgic_irq *irq;
1965         u32 coll_id, lpi_id;
1966         struct its_ite *ite;
1967         u32 offset;
1968
1969         val = *p;
1970
1971         val = le64_to_cpu(val);
1972
1973         coll_id = val & KVM_ITS_ITE_ICID_MASK;
1974         lpi_id = (val & KVM_ITS_ITE_PINTID_MASK) >> KVM_ITS_ITE_PINTID_SHIFT;
1975
1976         if (!lpi_id)
1977                 return 1; /* invalid entry, no choice but to scan next entry */
1978
1979         if (lpi_id < VGIC_MIN_LPI)
1980                 return -EINVAL;
1981
1982         offset = val >> KVM_ITS_ITE_NEXT_SHIFT;
1983         if (event_id + offset >= BIT_ULL(dev->num_eventid_bits))
1984                 return -EINVAL;
1985
1986         collection = find_collection(its, coll_id);
1987         if (!collection)
1988                 return -EINVAL;
1989
1990         ite = vgic_its_alloc_ite(dev, collection, event_id);
1991         if (IS_ERR(ite))
1992                 return PTR_ERR(ite);
1993
1994         if (its_is_collection_mapped(collection))
1995                 vcpu = kvm_get_vcpu(kvm, collection->target_addr);
1996
1997         irq = vgic_add_lpi(kvm, lpi_id, vcpu);
1998         if (IS_ERR(irq))
1999                 return PTR_ERR(irq);
2000         ite->irq = irq;
2001
2002         return offset;
2003 }
2004
2005 static int vgic_its_ite_cmp(void *priv, struct list_head *a,
2006                             struct list_head *b)
2007 {
2008         struct its_ite *itea = container_of(a, struct its_ite, ite_list);
2009         struct its_ite *iteb = container_of(b, struct its_ite, ite_list);
2010
2011         if (itea->event_id < iteb->event_id)
2012                 return -1;
2013         else
2014                 return 1;
2015 }
2016
2017 static int vgic_its_save_itt(struct vgic_its *its, struct its_device *device)
2018 {
2019         const struct vgic_its_abi *abi = vgic_its_get_abi(its);
2020         gpa_t base = device->itt_addr;
2021         struct its_ite *ite;
2022         int ret;
2023         int ite_esz = abi->ite_esz;
2024
2025         list_sort(NULL, &device->itt_head, vgic_its_ite_cmp);
2026
2027         list_for_each_entry(ite, &device->itt_head, ite_list) {
2028                 gpa_t gpa = base + ite->event_id * ite_esz;
2029
2030                 /*
2031                  * If an LPI carries the HW bit, this means that this
2032                  * interrupt is controlled by GICv4, and we do not
2033                  * have direct access to that state. Let's simply fail
2034                  * the save operation...
2035                  */
2036                 if (ite->irq->hw)
2037                         return -EACCES;
2038
2039                 ret = vgic_its_save_ite(its, device, ite, gpa, ite_esz);
2040                 if (ret)
2041                         return ret;
2042         }
2043         return 0;
2044 }
2045
2046 /**
2047  * vgic_its_restore_itt - restore the ITT of a device
2048  *
2049  * @its: its handle
2050  * @dev: device handle
2051  *
2052  * Return 0 on success, < 0 on error
2053  */
2054 static int vgic_its_restore_itt(struct vgic_its *its, struct its_device *dev)
2055 {
2056         const struct vgic_its_abi *abi = vgic_its_get_abi(its);
2057         gpa_t base = dev->itt_addr;
2058         int ret;
2059         int ite_esz = abi->ite_esz;
2060         size_t max_size = BIT_ULL(dev->num_eventid_bits) * ite_esz;
2061
2062         ret = scan_its_table(its, base, max_size, ite_esz, 0,
2063                              vgic_its_restore_ite, dev);
2064
2065         /* scan_its_table returns +1 if all ITEs are invalid */
2066         if (ret > 0)
2067                 ret = 0;
2068
2069         return ret;
2070 }
2071
2072 /**
2073  * vgic_its_save_dte - Save a device table entry at a given GPA
2074  *
2075  * @its: ITS handle
2076  * @dev: ITS device
2077  * @ptr: GPA
2078  */
2079 static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev,
2080                              gpa_t ptr, int dte_esz)
2081 {
2082         struct kvm *kvm = its->dev->kvm;
2083         u64 val, itt_addr_field;
2084         u32 next_offset;
2085
2086         itt_addr_field = dev->itt_addr >> 8;
2087         next_offset = compute_next_devid_offset(&its->device_list, dev);
2088         val = (1ULL << KVM_ITS_DTE_VALID_SHIFT |
2089                ((u64)next_offset << KVM_ITS_DTE_NEXT_SHIFT) |
2090                (itt_addr_field << KVM_ITS_DTE_ITTADDR_SHIFT) |
2091                 (dev->num_eventid_bits - 1));
2092         val = cpu_to_le64(val);
2093         return kvm_write_guest_lock(kvm, ptr, &val, dte_esz);
2094 }
2095
2096 /**
2097  * vgic_its_restore_dte - restore a device table entry
2098  *
2099  * @its: its handle
2100  * @id: device id the DTE corresponds to
2101  * @ptr: kernel VA where the 8 byte DTE is located
2102  * @opaque: unused
2103  *
2104  * Return: < 0 on error, 0 if the dte is the last one, id offset to the
2105  * next dte otherwise
2106  */
2107 static int vgic_its_restore_dte(struct vgic_its *its, u32 id,
2108                                 void *ptr, void *opaque)
2109 {
2110         struct its_device *dev;
2111         gpa_t itt_addr;
2112         u8 num_eventid_bits;
2113         u64 entry = *(u64 *)ptr;
2114         bool valid;
2115         u32 offset;
2116         int ret;
2117
2118         entry = le64_to_cpu(entry);
2119
2120         valid = entry >> KVM_ITS_DTE_VALID_SHIFT;
2121         num_eventid_bits = (entry & KVM_ITS_DTE_SIZE_MASK) + 1;
2122         itt_addr = ((entry & KVM_ITS_DTE_ITTADDR_MASK)
2123                         >> KVM_ITS_DTE_ITTADDR_SHIFT) << 8;
2124
2125         if (!valid)
2126                 return 1;
2127
2128         /* dte entry is valid */
2129         offset = (entry & KVM_ITS_DTE_NEXT_MASK) >> KVM_ITS_DTE_NEXT_SHIFT;
2130
2131         dev = vgic_its_alloc_device(its, id, itt_addr, num_eventid_bits);
2132         if (IS_ERR(dev))
2133                 return PTR_ERR(dev);
2134
2135         ret = vgic_its_restore_itt(its, dev);
2136         if (ret) {
2137                 vgic_its_free_device(its->dev->kvm, dev);
2138                 return ret;
2139         }
2140
2141         return offset;
2142 }
2143
2144 static int vgic_its_device_cmp(void *priv, struct list_head *a,
2145                                struct list_head *b)
2146 {
2147         struct its_device *deva = container_of(a, struct its_device, dev_list);
2148         struct its_device *devb = container_of(b, struct its_device, dev_list);
2149
2150         if (deva->device_id < devb->device_id)
2151                 return -1;
2152         else
2153                 return 1;
2154 }
2155
2156 /**
2157  * vgic_its_save_device_tables - Save the device table and all ITT
2158  * into guest RAM
2159  *
2160  * L1/L2 handling is hidden by vgic_its_check_id() helper which directly
2161  * returns the GPA of the device entry
2162  */
2163 static int vgic_its_save_device_tables(struct vgic_its *its)
2164 {
2165         const struct vgic_its_abi *abi = vgic_its_get_abi(its);
2166         u64 baser = its->baser_device_table;
2167         struct its_device *dev;
2168         int dte_esz = abi->dte_esz;
2169
2170         if (!(baser & GITS_BASER_VALID))
2171                 return 0;
2172
2173         list_sort(NULL, &its->device_list, vgic_its_device_cmp);
2174
2175         list_for_each_entry(dev, &its->device_list, dev_list) {
2176                 int ret;
2177                 gpa_t eaddr;
2178
2179                 if (!vgic_its_check_id(its, baser,
2180                                        dev->device_id, &eaddr))
2181                         return -EINVAL;
2182
2183                 ret = vgic_its_save_itt(its, dev);
2184                 if (ret)
2185                         return ret;
2186
2187                 ret = vgic_its_save_dte(its, dev, eaddr, dte_esz);
2188                 if (ret)
2189                         return ret;
2190         }
2191         return 0;
2192 }
2193
2194 /**
2195  * handle_l1_dte - callback used for L1 device table entries (2 stage case)
2196  *
2197  * @its: its handle
2198  * @id: index of the entry in the L1 table
2199  * @addr: kernel VA
2200  * @opaque: unused
2201  *
2202  * L1 table entries are scanned by steps of 1 entry
2203  * Return < 0 if error, 0 if last dte was found when scanning the L2
2204  * table, +1 otherwise (meaning next L1 entry must be scanned)
2205  */
2206 static int handle_l1_dte(struct vgic_its *its, u32 id, void *addr,
2207                          void *opaque)
2208 {
2209         const struct vgic_its_abi *abi = vgic_its_get_abi(its);
2210         int l2_start_id = id * (SZ_64K / abi->dte_esz);
2211         u64 entry = *(u64 *)addr;
2212         int dte_esz = abi->dte_esz;
2213         gpa_t gpa;
2214         int ret;
2215
2216         entry = le64_to_cpu(entry);
2217
2218         if (!(entry & KVM_ITS_L1E_VALID_MASK))
2219                 return 1;
2220
2221         gpa = entry & KVM_ITS_L1E_ADDR_MASK;
2222
2223         ret = scan_its_table(its, gpa, SZ_64K, dte_esz,
2224                              l2_start_id, vgic_its_restore_dte, NULL);
2225
2226         return ret;
2227 }
2228
2229 /**
2230  * vgic_its_restore_device_tables - Restore the device table and all ITT
2231  * from guest RAM to internal data structs
2232  */
2233 static int vgic_its_restore_device_tables(struct vgic_its *its)
2234 {
2235         const struct vgic_its_abi *abi = vgic_its_get_abi(its);
2236         u64 baser = its->baser_device_table;
2237         int l1_esz, ret;
2238         int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
2239         gpa_t l1_gpa;
2240
2241         if (!(baser & GITS_BASER_VALID))
2242                 return 0;
2243
2244         l1_gpa = BASER_ADDRESS(baser);
2245
2246         if (baser & GITS_BASER_INDIRECT) {
2247                 l1_esz = GITS_LVL1_ENTRY_SIZE;
2248                 ret = scan_its_table(its, l1_gpa, l1_tbl_size, l1_esz, 0,
2249                                      handle_l1_dte, NULL);
2250         } else {
2251                 l1_esz = abi->dte_esz;
2252                 ret = scan_its_table(its, l1_gpa, l1_tbl_size, l1_esz, 0,
2253                                      vgic_its_restore_dte, NULL);
2254         }
2255
2256         /* scan_its_table returns +1 if all entries are invalid */
2257         if (ret > 0)
2258                 ret = 0;
2259
2260         return ret;
2261 }
2262
2263 static int vgic_its_save_cte(struct vgic_its *its,
2264                              struct its_collection *collection,
2265                              gpa_t gpa, int esz)
2266 {
2267         u64 val;
2268
2269         val = (1ULL << KVM_ITS_CTE_VALID_SHIFT |
2270                ((u64)collection->target_addr << KVM_ITS_CTE_RDBASE_SHIFT) |
2271                collection->collection_id);
2272         val = cpu_to_le64(val);
2273         return kvm_write_guest_lock(its->dev->kvm, gpa, &val, esz);
2274 }
2275
2276 static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz)
2277 {
2278         struct its_collection *collection;
2279         struct kvm *kvm = its->dev->kvm;
2280         u32 target_addr, coll_id;
2281         u64 val;
2282         int ret;
2283
2284         BUG_ON(esz > sizeof(val));
2285         ret = kvm_read_guest_lock(kvm, gpa, &val, esz);
2286         if (ret)
2287                 return ret;
2288         val = le64_to_cpu(val);
2289         if (!(val & KVM_ITS_CTE_VALID_MASK))
2290                 return 0;
2291
2292         target_addr = (u32)(val >> KVM_ITS_CTE_RDBASE_SHIFT);
2293         coll_id = val & KVM_ITS_CTE_ICID_MASK;
2294
2295         if (target_addr >= atomic_read(&kvm->online_vcpus))
2296                 return -EINVAL;
2297
2298         collection = find_collection(its, coll_id);
2299         if (collection)
2300                 return -EEXIST;
2301         ret = vgic_its_alloc_collection(its, &collection, coll_id);
2302         if (ret)
2303                 return ret;
2304         collection->target_addr = target_addr;
2305         return 1;
2306 }
2307
2308 /**
2309  * vgic_its_save_collection_table - Save the collection table into
2310  * guest RAM
2311  */
2312 static int vgic_its_save_collection_table(struct vgic_its *its)
2313 {
2314         const struct vgic_its_abi *abi = vgic_its_get_abi(its);
2315         u64 baser = its->baser_coll_table;
2316         gpa_t gpa = BASER_ADDRESS(baser);
2317         struct its_collection *collection;
2318         u64 val;
2319         size_t max_size, filled = 0;
2320         int ret, cte_esz = abi->cte_esz;
2321
2322         if (!(baser & GITS_BASER_VALID))
2323                 return 0;
2324
2325         max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
2326
2327         list_for_each_entry(collection, &its->collection_list, coll_list) {
2328                 ret = vgic_its_save_cte(its, collection, gpa, cte_esz);
2329                 if (ret)
2330                         return ret;
2331                 gpa += cte_esz;
2332                 filled += cte_esz;
2333         }
2334
2335         if (filled == max_size)
2336                 return 0;
2337
2338         /*
2339          * table is not fully filled, add a last dummy element
2340          * with valid bit unset
2341          */
2342         val = 0;
2343         BUG_ON(cte_esz > sizeof(val));
2344         ret = kvm_write_guest_lock(its->dev->kvm, gpa, &val, cte_esz);
2345         return ret;
2346 }
2347
2348 /**
2349  * vgic_its_restore_collection_table - reads the collection table
2350  * in guest memory and restores the ITS internal state. Requires the
2351  * BASER registers to be restored before.
2352  */
2353 static int vgic_its_restore_collection_table(struct vgic_its *its)
2354 {
2355         const struct vgic_its_abi *abi = vgic_its_get_abi(its);
2356         u64 baser = its->baser_coll_table;
2357         int cte_esz = abi->cte_esz;
2358         size_t max_size, read = 0;
2359         gpa_t gpa;
2360         int ret;
2361
2362         if (!(baser & GITS_BASER_VALID))
2363                 return 0;
2364
2365         gpa = BASER_ADDRESS(baser);
2366
2367         max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
2368
2369         while (read < max_size) {
2370                 ret = vgic_its_restore_cte(its, gpa, cte_esz);
2371                 if (ret <= 0)
2372                         break;
2373                 gpa += cte_esz;
2374                 read += cte_esz;
2375         }
2376
2377         if (ret > 0)
2378                 return 0;
2379
2380         return ret;
2381 }
2382
2383 /**
2384  * vgic_its_save_tables_v0 - Save the ITS tables into guest ARM
2385  * according to v0 ABI
2386  */
2387 static int vgic_its_save_tables_v0(struct vgic_its *its)
2388 {
2389         int ret;
2390
2391         ret = vgic_its_save_device_tables(its);
2392         if (ret)
2393                 return ret;
2394
2395         return vgic_its_save_collection_table(its);
2396 }
2397
2398 /**
2399  * vgic_its_restore_tables_v0 - Restore the ITS tables from guest RAM
2400  * to internal data structs according to V0 ABI
2401  *
2402  */
2403 static int vgic_its_restore_tables_v0(struct vgic_its *its)
2404 {
2405         int ret;
2406
2407         ret = vgic_its_restore_collection_table(its);
2408         if (ret)
2409                 return ret;
2410
2411         return vgic_its_restore_device_tables(its);
2412 }
2413
2414 static int vgic_its_commit_v0(struct vgic_its *its)
2415 {
2416         const struct vgic_its_abi *abi;
2417
2418         abi = vgic_its_get_abi(its);
2419         its->baser_coll_table &= ~GITS_BASER_ENTRY_SIZE_MASK;
2420         its->baser_device_table &= ~GITS_BASER_ENTRY_SIZE_MASK;
2421
2422         its->baser_coll_table |= (GIC_ENCODE_SZ(abi->cte_esz, 5)
2423                                         << GITS_BASER_ENTRY_SIZE_SHIFT);
2424
2425         its->baser_device_table |= (GIC_ENCODE_SZ(abi->dte_esz, 5)
2426                                         << GITS_BASER_ENTRY_SIZE_SHIFT);
2427         return 0;
2428 }
2429
2430 static void vgic_its_reset(struct kvm *kvm, struct vgic_its *its)
2431 {
2432         /* We need to keep the ABI specific field values */
2433         its->baser_coll_table &= ~GITS_BASER_VALID;
2434         its->baser_device_table &= ~GITS_BASER_VALID;
2435         its->cbaser = 0;
2436         its->creadr = 0;
2437         its->cwriter = 0;
2438         its->enabled = 0;
2439         vgic_its_free_device_list(kvm, its);
2440         vgic_its_free_collection_list(kvm, its);
2441 }
2442
2443 static int vgic_its_has_attr(struct kvm_device *dev,
2444                              struct kvm_device_attr *attr)
2445 {
2446         switch (attr->group) {
2447         case KVM_DEV_ARM_VGIC_GRP_ADDR:
2448                 switch (attr->attr) {
2449                 case KVM_VGIC_ITS_ADDR_TYPE:
2450                         return 0;
2451                 }
2452                 break;
2453         case KVM_DEV_ARM_VGIC_GRP_CTRL:
2454                 switch (attr->attr) {
2455                 case KVM_DEV_ARM_VGIC_CTRL_INIT:
2456                         return 0;
2457                 case KVM_DEV_ARM_ITS_CTRL_RESET:
2458                         return 0;
2459                 case KVM_DEV_ARM_ITS_SAVE_TABLES:
2460                         return 0;
2461                 case KVM_DEV_ARM_ITS_RESTORE_TABLES:
2462                         return 0;
2463                 }
2464                 break;
2465         case KVM_DEV_ARM_VGIC_GRP_ITS_REGS:
2466                 return vgic_its_has_attr_regs(dev, attr);
2467         }
2468         return -ENXIO;
2469 }
2470
2471 static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr)
2472 {
2473         const struct vgic_its_abi *abi = vgic_its_get_abi(its);
2474         int ret = 0;
2475
2476         if (attr == KVM_DEV_ARM_VGIC_CTRL_INIT) /* Nothing to do */
2477                 return 0;
2478
2479         mutex_lock(&kvm->lock);
2480         mutex_lock(&its->its_lock);
2481
2482         if (!lock_all_vcpus(kvm)) {
2483                 mutex_unlock(&its->its_lock);
2484                 mutex_unlock(&kvm->lock);
2485                 return -EBUSY;
2486         }
2487
2488         switch (attr) {
2489         case KVM_DEV_ARM_ITS_CTRL_RESET:
2490                 vgic_its_reset(kvm, its);
2491                 break;
2492         case KVM_DEV_ARM_ITS_SAVE_TABLES:
2493                 ret = abi->save_tables(its);
2494                 break;
2495         case KVM_DEV_ARM_ITS_RESTORE_TABLES:
2496                 ret = abi->restore_tables(its);
2497                 break;
2498         }
2499
2500         unlock_all_vcpus(kvm);
2501         mutex_unlock(&its->its_lock);
2502         mutex_unlock(&kvm->lock);
2503         return ret;
2504 }
2505
2506 static int vgic_its_set_attr(struct kvm_device *dev,
2507                              struct kvm_device_attr *attr)
2508 {
2509         struct vgic_its *its = dev->private;
2510         int ret;
2511
2512         switch (attr->group) {
2513         case KVM_DEV_ARM_VGIC_GRP_ADDR: {
2514                 u64 __user *uaddr = (u64 __user *)(long)attr->addr;
2515                 unsigned long type = (unsigned long)attr->attr;
2516                 u64 addr;
2517
2518                 if (type != KVM_VGIC_ITS_ADDR_TYPE)
2519                         return -ENODEV;
2520
2521                 if (copy_from_user(&addr, uaddr, sizeof(addr)))
2522                         return -EFAULT;
2523
2524                 ret = vgic_check_ioaddr(dev->kvm, &its->vgic_its_base,
2525                                         addr, SZ_64K);
2526                 if (ret)
2527                         return ret;
2528
2529                 return vgic_register_its_iodev(dev->kvm, its, addr);
2530         }
2531         case KVM_DEV_ARM_VGIC_GRP_CTRL:
2532                 return vgic_its_ctrl(dev->kvm, its, attr->attr);
2533         case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: {
2534                 u64 __user *uaddr = (u64 __user *)(long)attr->addr;
2535                 u64 reg;
2536
2537                 if (get_user(reg, uaddr))
2538                         return -EFAULT;
2539
2540                 return vgic_its_attr_regs_access(dev, attr, &reg, true);
2541         }
2542         }
2543         return -ENXIO;
2544 }
2545
2546 static int vgic_its_get_attr(struct kvm_device *dev,
2547                              struct kvm_device_attr *attr)
2548 {
2549         switch (attr->group) {
2550         case KVM_DEV_ARM_VGIC_GRP_ADDR: {
2551                 struct vgic_its *its = dev->private;
2552                 u64 addr = its->vgic_its_base;
2553                 u64 __user *uaddr = (u64 __user *)(long)attr->addr;
2554                 unsigned long type = (unsigned long)attr->attr;
2555
2556                 if (type != KVM_VGIC_ITS_ADDR_TYPE)
2557                         return -ENODEV;
2558
2559                 if (copy_to_user(uaddr, &addr, sizeof(addr)))
2560                         return -EFAULT;
2561                 break;
2562         }
2563         case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: {
2564                 u64 __user *uaddr = (u64 __user *)(long)attr->addr;
2565                 u64 reg;
2566                 int ret;
2567
2568                 ret = vgic_its_attr_regs_access(dev, attr, &reg, false);
2569                 if (ret)
2570                         return ret;
2571                 return put_user(reg, uaddr);
2572         }
2573         default:
2574                 return -ENXIO;
2575         }
2576
2577         return 0;
2578 }
2579
2580 static struct kvm_device_ops kvm_arm_vgic_its_ops = {
2581         .name = "kvm-arm-vgic-its",
2582         .create = vgic_its_create,
2583         .destroy = vgic_its_destroy,
2584         .set_attr = vgic_its_set_attr,
2585         .get_attr = vgic_its_get_attr,
2586         .has_attr = vgic_its_has_attr,
2587 };
2588
2589 int kvm_vgic_register_its_device(void)
2590 {
2591         return kvm_register_device_ops(&kvm_arm_vgic_its_ops,
2592                                        KVM_DEV_TYPE_ARM_VGIC_ITS);
2593 }