virt/kvm/arm/vgic/vgic.c

   1 /*
   2  * Copyright (C) 2015, 2016 ARM Ltd.
   3  *
   4  * This program is free software; you can redistribute it and/or modify
   5  * it under the terms of the GNU General Public License version 2 as
   6  * published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope that it will be useful,
   9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11  * GNU General Public License for more details.
  12  *
  13  * You should have received a copy of the GNU General Public License
  14  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  15  */
  16
  17 #include <linux/interrupt.h>
  18 #include <linux/irq.h>
  19 #include <linux/kvm.h>
  20 #include <linux/kvm_host.h>
  21 #include <linux/list_sort.h>
  22 #include <linux/nospec.h>
  23
  24 #include <asm/kvm_hyp.h>
  25
  26 #include "vgic.h"
  27
  28 #define CREATE_TRACE_POINTS
  29 #include "trace.h"
  30
  31 struct vgic_global kvm_vgic_global_state __ro_after_init = {
  32         .gicv3_cpuif = STATIC_KEY_FALSE_INIT,
  33 };
  34
  35 /*
  36  * Locking order is always:
  37  * kvm->lock (mutex)
  38  *   its->cmd_lock (mutex)
  39  *     its->its_lock (mutex)
  40  *       vgic_cpu->ap_list_lock         must be taken with IRQs disabled
  41  *         kvm->lpi_list_lock           must be taken with IRQs disabled
  42  *           vgic_irq->irq_lock         must be taken with IRQs disabled
  43  *
  44  * As the ap_list_lock might be taken from the timer interrupt handler,
  45  * we have to disable IRQs before taking this lock and everything lower
  46  * than it.
  47  *
  48  * If you need to take multiple locks, always take the upper lock first,
  49  * then the lower ones, e.g. first take the its_lock, then the irq_lock.
  50  * If you are already holding a lock and need to take a higher one, you
  51  * have to drop the lower ranking lock first and re-aquire it after having
  52  * taken the upper one.
  53  *
  54  * When taking more than one ap_list_lock at the same time, always take the
  55  * lowest numbered VCPU's ap_list_lock first, so:
  56  *   vcpuX->vcpu_id < vcpuY->vcpu_id:
  57  *     spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock);
  58  *     spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock);
  59  *
  60  * Since the VGIC must support injecting virtual interrupts from ISRs, we have
  61  * to use the spin_lock_irqsave/spin_unlock_irqrestore versions of outer
  62  * spinlocks for any lock that may be taken while injecting an interrupt.
  63  */
  64
  65 /*
  66  * Iterate over the VM's list of mapped LPIs to find the one with a
  67  * matching interrupt ID and return a reference to the IRQ structure.
  68  */
  69 static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
  70 {
  71         struct vgic_dist *dist = &kvm->arch.vgic;
  72         struct vgic_irq *irq = NULL;
  73         unsigned long flags;
  74
  75         spin_lock_irqsave(&dist->lpi_list_lock, flags);
  76
  77         list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
  78                 if (irq->intid != intid)
  79                         continue;
  80
  81                 /*
  82                  * This increases the refcount, the caller is expected to
  83                  * call vgic_put_irq() later once it's finished with the IRQ.
  84                  */
  85                 vgic_get_irq_kref(irq);
  86                 goto out_unlock;
  87         }
  88         irq = NULL;
  89
  90 out_unlock:
  91         spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
  92
  93         return irq;
  94 }
  95
  96 /*
  97  * This looks up the virtual interrupt ID to get the corresponding
  98  * struct vgic_irq. It also increases the refcount, so any caller is expected
  99  * to call vgic_put_irq() once it's finished with this IRQ.
 100  */
 101 struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
 102                               u32 intid)
 103 {
 104         /* SGIs and PPIs */
 105         if (intid <= VGIC_MAX_PRIVATE) {
 106                 intid = array_index_nospec(intid, VGIC_MAX_PRIVATE);
 107                 return &vcpu->arch.vgic_cpu.private_irqs[intid];
 108         }
 109
 110         /* SPIs */
 111         if (intid <= VGIC_MAX_SPI) {
 112                 intid = array_index_nospec(intid, VGIC_MAX_SPI);
 113                 return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS];
 114         }
 115
 116         /* LPIs */
 117         if (intid >= VGIC_MIN_LPI)
 118                 return vgic_get_lpi(kvm, intid);
 119
 120         WARN(1, "Looking up struct vgic_irq for reserved INTID");
 121         return NULL;
 122 }
 123
 124 /*
 125  * We can't do anything in here, because we lack the kvm pointer to
 126  * lock and remove the item from the lpi_list. So we keep this function
 127  * empty and use the return value of kref_put() to trigger the freeing.
 128  */
 129 static void vgic_irq_release(struct kref *ref)
 130 {
 131 }
 132
 133 void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
 134 {
 135         struct vgic_dist *dist = &kvm->arch.vgic;
 136         unsigned long flags;
 137
 138         if (irq->intid < VGIC_MIN_LPI)
 139                 return;
 140
 141         spin_lock_irqsave(&dist->lpi_list_lock, flags);
 142         if (!kref_put(&irq->refcount, vgic_irq_release)) {
 143                 spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
 144                 return;
 145         };
 146
 147         list_del(&irq->lpi_list);
 148         dist->lpi_list_count--;
 149         spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
 150
 151         kfree(irq);
 152 }
 153
 154 void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending)
 155 {
 156         WARN_ON(irq_set_irqchip_state(irq->host_irq,
 157                                       IRQCHIP_STATE_PENDING,
 158                                       pending));
 159 }
 160
 161 bool vgic_get_phys_line_level(struct vgic_irq *irq)
 162 {
 163         bool line_level;
 164
 165         BUG_ON(!irq->hw);
 166
 167         if (irq->get_input_level)
 168                 return irq->get_input_level(irq->intid);
 169
 170         WARN_ON(irq_get_irqchip_state(irq->host_irq,
 171                                       IRQCHIP_STATE_PENDING,
 172                                       &line_level));
 173         return line_level;
 174 }
 175
 176 /* Set/Clear the physical active state */
 177 void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active)
 178 {
 179
 180         BUG_ON(!irq->hw);
 181         WARN_ON(irq_set_irqchip_state(irq->host_irq,
 182                                       IRQCHIP_STATE_ACTIVE,
 183                                       active));
 184 }
 185
 186 /**
 187  * kvm_vgic_target_oracle - compute the target vcpu for an irq
 188  *
 189  * @irq:        The irq to route. Must be already locked.
 190  *
 191  * Based on the current state of the interrupt (enabled, pending,
 192  * active, vcpu and target_vcpu), compute the next vcpu this should be
 193  * given to. Return NULL if this shouldn't be injected at all.
 194  *
 195  * Requires the IRQ lock to be held.
 196  */
 197 static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
 198 {
 199         DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock));
 200
 201         /* If the interrupt is active, it must stay on the current vcpu */
 202         if (irq->active)
 203                 return irq->vcpu ? : irq->target_vcpu;
 204
 205         /*
 206          * If the IRQ is not active but enabled and pending, we should direct
 207          * it to its configured target VCPU.
 208          * If the distributor is disabled, pending interrupts shouldn't be
 209          * forwarded.
 210          */
 211         if (irq->enabled && irq_is_pending(irq)) {
 212                 if (unlikely(irq->target_vcpu &&
 213                              !irq->target_vcpu->kvm->arch.vgic.enabled))
 214                         return NULL;
 215
 216                 return irq->target_vcpu;
 217         }
 218
 219         /* If neither active nor pending and enabled, then this IRQ should not
 220          * be queued to any VCPU.
 221          */
 222         return NULL;
 223 }
 224
 225 /*
 226  * The order of items in the ap_lists defines how we'll pack things in LRs as
 227  * well, the first items in the list being the first things populated in the
 228  * LRs.
 229  *
 230  * A hard rule is that active interrupts can never be pushed out of the LRs
 231  * (and therefore take priority) since we cannot reliably trap on deactivation
 232  * of IRQs and therefore they have to be present in the LRs.
 233  *
 234  * Otherwise things should be sorted by the priority field and the GIC
 235  * hardware support will take care of preemption of priority groups etc.
 236  *
 237  * Return negative if "a" sorts before "b", 0 to preserve order, and positive
 238  * to sort "b" before "a".
 239  */
 240 static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b)
 241 {
 242         struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list);
 243         struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list);
 244         bool penda, pendb;
 245         int ret;
 246
 247         spin_lock(&irqa->irq_lock);
 248         spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING);
 249
 250         if (irqa->active || irqb->active) {
 251                 ret = (int)irqb->active - (int)irqa->active;
 252                 goto out;
 253         }
 254
 255         penda = irqa->enabled && irq_is_pending(irqa);
 256         pendb = irqb->enabled && irq_is_pending(irqb);
 257
 258         if (!penda || !pendb) {
 259                 ret = (int)pendb - (int)penda;
 260                 goto out;
 261         }
 262
 263         /* Both pending and enabled, sort by priority */
 264         ret = irqa->priority - irqb->priority;
 265 out:
 266         spin_unlock(&irqb->irq_lock);
 267         spin_unlock(&irqa->irq_lock);
 268         return ret;
 269 }
 270
 271 /* Must be called with the ap_list_lock held */
 272 static void vgic_sort_ap_list(struct kvm_vcpu *vcpu)
 273 {
 274         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 275
 276         DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
 277
 278         list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp);
 279 }
 280
 281 /*
 282  * Only valid injection if changing level for level-triggered IRQs or for a
 283  * rising edge, and in-kernel connected IRQ lines can only be controlled by
 284  * their owner.
 285  */
 286 static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner)
 287 {
 288         if (irq->owner != owner)
 289                 return false;
 290
 291         switch (irq->config) {
 292         case VGIC_CONFIG_LEVEL:
 293                 return irq->line_level != level;
 294         case VGIC_CONFIG_EDGE:
 295                 return level;
 296         }
 297
 298         return false;
 299 }
 300
 301 /*
 302  * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list.
 303  * Do the queuing if necessary, taking the right locks in the right order.
 304  * Returns true when the IRQ was queued, false otherwise.
 305  *
 306  * Needs to be entered with the IRQ lock already held, but will return
 307  * with all locks dropped.
 308  */
 309 bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
 310                            unsigned long flags)
 311 {
 312         struct kvm_vcpu *vcpu;
 313
 314         DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock));
 315
 316 retry:
 317         vcpu = vgic_target_oracle(irq);
 318         if (irq->vcpu || !vcpu) {
 319                 /*
 320                  * If this IRQ is already on a VCPU's ap_list, then it
 321                  * cannot be moved or modified and there is no more work for
 322                  * us to do.
 323                  *
 324                  * Otherwise, if the irq is not pending and enabled, it does
 325                  * not need to be inserted into an ap_list and there is also
 326                  * no more work for us to do.
 327                  */
 328                 spin_unlock_irqrestore(&irq->irq_lock, flags);
 329
 330                 /*
 331                  * We have to kick the VCPU here, because we could be
 332                  * queueing an edge-triggered interrupt for which we
 333                  * get no EOI maintenance interrupt. In that case,
 334                  * while the IRQ is already on the VCPU's AP list, the
 335                  * VCPU could have EOI'ed the original interrupt and
 336                  * won't see this one until it exits for some other
 337                  * reason.
 338                  */
 339                 if (vcpu) {
 340                         kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
 341                         kvm_vcpu_kick(vcpu);
 342                 }
 343                 return false;
 344         }
 345
 346         /*
 347          * We must unlock the irq lock to take the ap_list_lock where
 348          * we are going to insert this new pending interrupt.
 349          */
 350         spin_unlock_irqrestore(&irq->irq_lock, flags);
 351
 352         /* someone can do stuff here, which we re-check below */
 353
 354         spin_lock_irqsave(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
 355         spin_lock(&irq->irq_lock);
 356
 357         /*
 358          * Did something change behind our backs?
 359          *
 360          * There are two cases:
 361          * 1) The irq lost its pending state or was disabled behind our
 362          *    backs and/or it was queued to another VCPU's ap_list.
 363          * 2) Someone changed the affinity on this irq behind our
 364          *    backs and we are now holding the wrong ap_list_lock.
 365          *
 366          * In both cases, drop the locks and retry.
 367          */
 368
 369         if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) {
 370                 spin_unlock(&irq->irq_lock);
 371                 spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
 372
 373                 spin_lock_irqsave(&irq->irq_lock, flags);
 374                 goto retry;
 375         }
 376
 377         /*
 378          * Grab a reference to the irq to reflect the fact that it is
 379          * now in the ap_list.
 380          */
 381         vgic_get_irq_kref(irq);
 382         list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
 383         irq->vcpu = vcpu;
 384
 385         spin_unlock(&irq->irq_lock);
 386         spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
 387
 388         kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
 389         kvm_vcpu_kick(vcpu);
 390
 391         return true;
 392 }
 393
 394 /**
 395  * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
 396  * @kvm:     The VM structure pointer
 397  * @cpuid:   The CPU for PPIs
 398  * @intid:   The INTID to inject a new state to.
 399  * @level:   Edge-triggered:  true:  to trigger the interrupt
 400  *                            false: to ignore the call
 401  *           Level-sensitive  true:  raise the input signal
 402  *                            false: lower the input signal
 403  * @owner:   The opaque pointer to the owner of the IRQ being raised to verify
 404  *           that the caller is allowed to inject this IRQ.  Userspace
 405  *           injections will have owner == NULL.
 406  *
 407  * The VGIC is not concerned with devices being active-LOW or active-HIGH for
 408  * level-sensitive interrupts.  You can think of the level parameter as 1
 409  * being HIGH and 0 being LOW and all devices being active-HIGH.
 410  */
 411 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
 412                         bool level, void *owner)
 413 {
 414         struct kvm_vcpu *vcpu;
 415         struct vgic_irq *irq;
 416         unsigned long flags;
 417         int ret;
 418
 419         trace_vgic_update_irq_pending(cpuid, intid, level);
 420
 421         ret = vgic_lazy_init(kvm);
 422         if (ret)
 423                 return ret;
 424
 425         vcpu = kvm_get_vcpu(kvm, cpuid);
 426         if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS)
 427                 return -EINVAL;
 428
 429         irq = vgic_get_irq(kvm, vcpu, intid);
 430         if (!irq)
 431                 return -EINVAL;
 432
 433         spin_lock_irqsave(&irq->irq_lock, flags);
 434
 435         if (!vgic_validate_injection(irq, level, owner)) {
 436                 /* Nothing to see here, move along... */
 437                 spin_unlock_irqrestore(&irq->irq_lock, flags);
 438                 vgic_put_irq(kvm, irq);
 439                 return 0;
 440         }
 441
 442         if (irq->config == VGIC_CONFIG_LEVEL)
 443                 irq->line_level = level;
 444         else
 445                 irq->pending_latch = true;
 446
 447         vgic_queue_irq_unlock(kvm, irq, flags);
 448         vgic_put_irq(kvm, irq);
 449
 450         return 0;
 451 }
 452
 453 /* @irq->irq_lock must be held */
 454 static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
 455                             unsigned int host_irq,
 456                             bool (*get_input_level)(int vindid))
 457 {
 458         struct irq_desc *desc;
 459         struct irq_data *data;
 460
 461         /*
 462          * Find the physical IRQ number corresponding to @host_irq
 463          */
 464         desc = irq_to_desc(host_irq);
 465         if (!desc) {
 466                 kvm_err("%s: no interrupt descriptor\n", __func__);
 467                 return -EINVAL;
 468         }
 469         data = irq_desc_get_irq_data(desc);
 470         while (data->parent_data)
 471                 data = data->parent_data;
 472
 473         irq->hw = true;
 474         irq->host_irq = host_irq;
 475         irq->hwintid = data->hwirq;
 476         irq->get_input_level = get_input_level;
 477         return 0;
 478 }
 479
 480 /* @irq->irq_lock must be held */
 481 static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
 482 {
 483         irq->hw = false;
 484         irq->hwintid = 0;
 485         irq->get_input_level = NULL;
 486 }
 487
 488 int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
 489                           u32 vintid, bool (*get_input_level)(int vindid))
 490 {
 491         struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 492         unsigned long flags;
 493         int ret;
 494
 495         BUG_ON(!irq);
 496
 497         spin_lock_irqsave(&irq->irq_lock, flags);
 498         ret = kvm_vgic_map_irq(vcpu, irq, host_irq, get_input_level);
 499         spin_unlock_irqrestore(&irq->irq_lock, flags);
 500         vgic_put_irq(vcpu->kvm, irq);
 501
 502         return ret;
 503 }
 504
 505 /**
 506  * kvm_vgic_reset_mapped_irq - Reset a mapped IRQ
 507  * @vcpu: The VCPU pointer
 508  * @vintid: The INTID of the interrupt
 509  *
 510  * Reset the active and pending states of a mapped interrupt.  Kernel
 511  * subsystems injecting mapped interrupts should reset their interrupt lines
 512  * when we are doing a reset of the VM.
 513  */
 514 void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid)
 515 {
 516         struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 517         unsigned long flags;
 518
 519         if (!irq->hw)
 520                 goto out;
 521
 522         spin_lock_irqsave(&irq->irq_lock, flags);
 523         irq->active = false;
 524         irq->pending_latch = false;
 525         irq->line_level = false;
 526         spin_unlock_irqrestore(&irq->irq_lock, flags);
 527 out:
 528         vgic_put_irq(vcpu->kvm, irq);
 529 }
 530
 531 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid)
 532 {
 533         struct vgic_irq *irq;
 534         unsigned long flags;
 535
 536         if (!vgic_initialized(vcpu->kvm))
 537                 return -EAGAIN;
 538
 539         irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 540         BUG_ON(!irq);
 541
 542         spin_lock_irqsave(&irq->irq_lock, flags);
 543         kvm_vgic_unmap_irq(irq);
 544         spin_unlock_irqrestore(&irq->irq_lock, flags);
 545         vgic_put_irq(vcpu->kvm, irq);
 546
 547         return 0;
 548 }
 549
 550 /**
 551  * kvm_vgic_set_owner - Set the owner of an interrupt for a VM
 552  *
 553  * @vcpu:   Pointer to the VCPU (used for PPIs)
 554  * @intid:  The virtual INTID identifying the interrupt (PPI or SPI)
 555  * @owner:  Opaque pointer to the owner
 556  *
 557  * Returns 0 if intid is not already used by another in-kernel device and the
 558  * owner is set, otherwise returns an error code.
 559  */
 560 int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner)
 561 {
 562         struct vgic_irq *irq;
 563         unsigned long flags;
 564         int ret = 0;
 565
 566         if (!vgic_initialized(vcpu->kvm))
 567                 return -EAGAIN;
 568
 569         /* SGIs and LPIs cannot be wired up to any device */
 570         if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid))
 571                 return -EINVAL;
 572
 573         irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
 574         spin_lock_irqsave(&irq->irq_lock, flags);
 575         if (irq->owner && irq->owner != owner)
 576                 ret = -EEXIST;
 577         else
 578                 irq->owner = owner;
 579         spin_unlock_irqrestore(&irq->irq_lock, flags);
 580
 581         return ret;
 582 }
 583
 584 /**
 585  * vgic_prune_ap_list - Remove non-relevant interrupts from the list
 586  *
 587  * @vcpu: The VCPU pointer
 588  *
 589  * Go over the list of "interesting" interrupts, and prune those that we
 590  * won't have to consider in the near future.
 591  */
 592 static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
 593 {
 594         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 595         struct vgic_irq *irq, *tmp;
 596
 597         DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
 598
 599 retry:
 600         spin_lock(&vgic_cpu->ap_list_lock);
 601
 602         list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
 603                 struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
 604                 bool target_vcpu_needs_kick = false;
 605
 606                 spin_lock(&irq->irq_lock);
 607
 608                 BUG_ON(vcpu != irq->vcpu);
 609
 610                 target_vcpu = vgic_target_oracle(irq);
 611
 612                 if (!target_vcpu) {
 613                         /*
 614                          * We don't need to process this interrupt any
 615                          * further, move it off the list.
 616                          */
 617                         list_del(&irq->ap_list);
 618                         irq->vcpu = NULL;
 619                         spin_unlock(&irq->irq_lock);
 620
 621                         /*
 622                          * This vgic_put_irq call matches the
 623                          * vgic_get_irq_kref in vgic_queue_irq_unlock,
 624                          * where we added the LPI to the ap_list. As
 625                          * we remove the irq from the list, we drop
 626                          * also drop the refcount.
 627                          */
 628                         vgic_put_irq(vcpu->kvm, irq);
 629                         continue;
 630                 }
 631
 632                 if (target_vcpu == vcpu) {
 633                         /* We're on the right CPU */
 634                         spin_unlock(&irq->irq_lock);
 635                         continue;
 636                 }
 637
 638                 /* This interrupt looks like it has to be migrated. */
 639
 640                 spin_unlock(&irq->irq_lock);
 641                 spin_unlock(&vgic_cpu->ap_list_lock);
 642
 643                 /*
 644                  * Ensure locking order by always locking the smallest
 645                  * ID first.
 646                  */
 647                 if (vcpu->vcpu_id < target_vcpu->vcpu_id) {
 648                         vcpuA = vcpu;
 649                         vcpuB = target_vcpu;
 650                 } else {
 651                         vcpuA = target_vcpu;
 652                         vcpuB = vcpu;
 653                 }
 654
 655                 spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock);
 656                 spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock,
 657                                  SINGLE_DEPTH_NESTING);
 658                 spin_lock(&irq->irq_lock);
 659
 660                 /*
 661                  * If the affinity has been preserved, move the
 662                  * interrupt around. Otherwise, it means things have
 663                  * changed while the interrupt was unlocked, and we
 664                  * need to replay this.
 665                  *
 666                  * In all cases, we cannot trust the list not to have
 667                  * changed, so we restart from the beginning.
 668                  */
 669                 if (target_vcpu == vgic_target_oracle(irq)) {
 670                         struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu;
 671
 672                         list_del(&irq->ap_list);
 673                         irq->vcpu = target_vcpu;
 674                         list_add_tail(&irq->ap_list, &new_cpu->ap_list_head);
 675                         target_vcpu_needs_kick = true;
 676                 }
 677
 678                 spin_unlock(&irq->irq_lock);
 679                 spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock);
 680                 spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock);
 681
 682                 if (target_vcpu_needs_kick) {
 683                         kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu);
 684                         kvm_vcpu_kick(target_vcpu);
 685                 }
 686
 687                 goto retry;
 688         }
 689
 690         spin_unlock(&vgic_cpu->ap_list_lock);
 691 }
 692
 693 static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
 694 {
 695         if (kvm_vgic_global_state.type == VGIC_V2)
 696                 vgic_v2_fold_lr_state(vcpu);
 697         else
 698                 vgic_v3_fold_lr_state(vcpu);
 699 }
 700
 701 /* Requires the irq_lock to be held. */
 702 static inline void vgic_populate_lr(struct kvm_vcpu *vcpu,
 703                                     struct vgic_irq *irq, int lr)
 704 {
 705         DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock));
 706
 707         if (kvm_vgic_global_state.type == VGIC_V2)
 708                 vgic_v2_populate_lr(vcpu, irq, lr);
 709         else
 710                 vgic_v3_populate_lr(vcpu, irq, lr);
 711 }
 712
 713 static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr)
 714 {
 715         if (kvm_vgic_global_state.type == VGIC_V2)
 716                 vgic_v2_clear_lr(vcpu, lr);
 717         else
 718                 vgic_v3_clear_lr(vcpu, lr);
 719 }
 720
 721 static inline void vgic_set_underflow(struct kvm_vcpu *vcpu)
 722 {
 723         if (kvm_vgic_global_state.type == VGIC_V2)
 724                 vgic_v2_set_underflow(vcpu);
 725         else
 726                 vgic_v3_set_underflow(vcpu);
 727 }
 728
 729 /* Requires the ap_list_lock to be held. */
 730 static int compute_ap_list_depth(struct kvm_vcpu *vcpu,
 731                                  bool *multi_sgi)
 732 {
 733         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 734         struct vgic_irq *irq;
 735         int count = 0;
 736
 737         *multi_sgi = false;
 738
 739         DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
 740
 741         list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
 742                 int w;
 743
 744                 spin_lock(&irq->irq_lock);
 745                 /* GICv2 SGIs can count for more than one... */
 746                 w = vgic_irq_get_lr_count(irq);
 747                 spin_unlock(&irq->irq_lock);
 748
 749                 count += w;
 750                 *multi_sgi |= (w > 1);
 751         }
 752         return count;
 753 }
 754
 755 /* Requires the VCPU's ap_list_lock to be held. */
 756 static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
 757 {
 758         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 759         struct vgic_irq *irq;
 760         int count;
 761         bool multi_sgi;
 762         u8 prio = 0xff;
 763
 764         DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
 765
 766         count = compute_ap_list_depth(vcpu, &multi_sgi);
 767         if (count > kvm_vgic_global_state.nr_lr || multi_sgi)
 768                 vgic_sort_ap_list(vcpu);
 769
 770         count = 0;
 771
 772         list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
 773                 spin_lock(&irq->irq_lock);
 774
 775                 /*
 776                  * If we have multi-SGIs in the pipeline, we need to
 777                  * guarantee that they are all seen before any IRQ of
 778                  * lower priority. In that case, we need to filter out
 779                  * these interrupts by exiting early. This is easy as
 780                  * the AP list has been sorted already.
 781                  */
 782                 if (multi_sgi && irq->priority > prio) {
 783                         spin_unlock(&irq->irq_lock);
 784                         break;
 785                 }
 786
 787                 if (likely(vgic_target_oracle(irq) == vcpu)) {
 788                         vgic_populate_lr(vcpu, irq, count++);
 789
 790                         if (irq->source)
 791                                 prio = irq->priority;
 792                 }
 793
 794                 spin_unlock(&irq->irq_lock);
 795
 796                 if (count == kvm_vgic_global_state.nr_lr) {
 797                         if (!list_is_last(&irq->ap_list,
 798                                           &vgic_cpu->ap_list_head))
 799                                 vgic_set_underflow(vcpu);
 800                         break;
 801                 }
 802         }
 803
 804         vcpu->arch.vgic_cpu.used_lrs = count;
 805
 806         /* Nuke remaining LRs */
 807         for ( ; count < kvm_vgic_global_state.nr_lr; count++)
 808                 vgic_clear_lr(vcpu, count);
 809 }
 810
 811 static inline bool can_access_vgic_from_kernel(void)
 812 {
 813         /*
 814          * GICv2 can always be accessed from the kernel because it is
 815          * memory-mapped, and VHE systems can access GICv3 EL2 system
 816          * registers.
 817          */
 818         return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe();
 819 }
 820
 821 static inline void vgic_save_state(struct kvm_vcpu *vcpu)
 822 {
 823         if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
 824                 vgic_v2_save_state(vcpu);
 825         else
 826                 __vgic_v3_save_state(vcpu);
 827 }
 828
 829 /* Sync back the hardware VGIC state into our emulation after a guest's run. */
 830 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 831 {
 832         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 833
 834         WARN_ON(vgic_v4_sync_hwstate(vcpu));
 835
 836         /* An empty ap_list_head implies used_lrs == 0 */
 837         if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
 838                 return;
 839
 840         if (can_access_vgic_from_kernel())
 841                 vgic_save_state(vcpu);
 842
 843         if (vgic_cpu->used_lrs)
 844                 vgic_fold_lr_state(vcpu);
 845         vgic_prune_ap_list(vcpu);
 846 }
 847
 848 static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
 849 {
 850         if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
 851                 vgic_v2_restore_state(vcpu);
 852         else
 853                 __vgic_v3_restore_state(vcpu);
 854 }
 855
 856 /* Flush our emulation state into the GIC hardware before entering the guest. */
 857 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 858 {
 859         WARN_ON(vgic_v4_flush_hwstate(vcpu));
 860
 861         /*
 862          * If there are no virtual interrupts active or pending for this
 863          * VCPU, then there is no work to do and we can bail out without
 864          * taking any lock.  There is a potential race with someone injecting
 865          * interrupts to the VCPU, but it is a benign race as the VCPU will
 866          * either observe the new interrupt before or after doing this check,
 867          * and introducing additional synchronization mechanism doesn't change
 868          * this.
 869          */
 870         if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
 871                 return;
 872
 873         DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
 874
 875         spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
 876         vgic_flush_lr_state(vcpu);
 877         spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
 878
 879         if (can_access_vgic_from_kernel())
 880                 vgic_restore_state(vcpu);
 881 }
 882
 883 void kvm_vgic_load(struct kvm_vcpu *vcpu)
 884 {
 885         if (unlikely(!vgic_initialized(vcpu->kvm)))
 886                 return;
 887
 888         if (kvm_vgic_global_state.type == VGIC_V2)
 889                 vgic_v2_load(vcpu);
 890         else
 891                 vgic_v3_load(vcpu);
 892 }
 893
 894 void kvm_vgic_put(struct kvm_vcpu *vcpu)
 895 {
 896         if (unlikely(!vgic_initialized(vcpu->kvm)))
 897                 return;
 898
 899         if (kvm_vgic_global_state.type == VGIC_V2)
 900                 vgic_v2_put(vcpu);
 901         else
 902                 vgic_v3_put(vcpu);
 903 }
 904
 905 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
 906 {
 907         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 908         struct vgic_irq *irq;
 909         bool pending = false;
 910         unsigned long flags;
 911
 912         if (!vcpu->kvm->arch.vgic.enabled)
 913                 return false;
 914
 915         if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last)
 916                 return true;
 917
 918         spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
 919
 920         list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
 921                 spin_lock(&irq->irq_lock);
 922                 pending = irq_is_pending(irq) && irq->enabled;
 923                 spin_unlock(&irq->irq_lock);
 924
 925                 if (pending)
 926                         break;
 927         }
 928
 929         spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
 930
 931         return pending;
 932 }
 933
 934 void vgic_kick_vcpus(struct kvm *kvm)
 935 {
 936         struct kvm_vcpu *vcpu;
 937         int c;
 938
 939         /*
 940          * We've injected an interrupt, time to find out who deserves
 941          * a good kick...
 942          */
 943         kvm_for_each_vcpu(c, vcpu, kvm) {
 944                 if (kvm_vgic_vcpu_pending_irq(vcpu)) {
 945                         kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
 946                         kvm_vcpu_kick(vcpu);
 947                 }
 948         }
 949 }
 950
 951 bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid)
 952 {
 953         struct vgic_irq *irq;
 954         bool map_is_active;
 955         unsigned long flags;
 956
 957         if (!vgic_initialized(vcpu->kvm))
 958                 return false;
 959
 960         irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 961         spin_lock_irqsave(&irq->irq_lock, flags);
 962         map_is_active = irq->hw && irq->active;
 963         spin_unlock_irqrestore(&irq->irq_lock, flags);
 964         vgic_put_irq(vcpu->kvm, irq);
 965
 966         return map_is_active;
 967 }
 968