KVM: SVM: Take and hold ir_list_lock when updating vCPU's Physical ID entry
authorSean Christopherson <seanjc@google.com>
Tue, 8 Aug 2023 23:31:31 +0000 (16:31 -0700)
committerSean Christopherson <seanjc@google.com>
Thu, 17 Aug 2023 18:31:37 +0000 (11:31 -0700)
Hoist the acquisition of ir_list_lock from avic_update_iommu_vcpu_affinity()
to its two callers, avic_vcpu_load() and avic_vcpu_put(), specifically to
encapsulate the write to the vCPU's entry in the AVIC Physical ID table.
This will allow a future fix to pull information from the Physical ID entry
when updating the IRTE, without potentially consuming stale information,
i.e. without racing with the vCPU being (un)loaded.

Add a comment to call out that ir_list_lock does NOT protect against
multiple writers, specifically that reading the Physical ID entry in
avic_vcpu_put() outside of the lock is safe.

To preserve some semblance of independence from ir_list_lock, keep the
READ_ONCE() in avic_vcpu_load() even though acuiring the spinlock
effectively ensures the load(s) will be generated after acquiring the
lock.

Cc: stable@vger.kernel.org
Tested-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
Reviewed-by: Joao Martins <joao.m.martins@oracle.com>
Link: https://lore.kernel.org/r/20230808233132.2499764-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
arch/x86/kvm/svm/avic.c

index cfc8ab7..8e041b2 100644 (file)
@@ -986,10 +986,11 @@ static inline int
 avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
 {
        int ret = 0;
-       unsigned long flags;
        struct amd_svm_iommu_ir *ir;
        struct vcpu_svm *svm = to_svm(vcpu);
 
+       lockdep_assert_held(&svm->ir_list_lock);
+
        if (!kvm_arch_has_assigned_device(vcpu->kvm))
                return 0;
 
@@ -997,19 +998,15 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
         * Here, we go through the per-vcpu ir_list to update all existing
         * interrupt remapping table entry targeting this vcpu.
         */
-       spin_lock_irqsave(&svm->ir_list_lock, flags);
-
        if (list_empty(&svm->ir_list))
-               goto out;
+               return 0;
 
        list_for_each_entry(ir, &svm->ir_list, node) {
                ret = amd_iommu_update_ga(cpu, r, ir->data);
                if (ret)
-                       break;
+                       return ret;
        }
-out:
-       spin_unlock_irqrestore(&svm->ir_list_lock, flags);
-       return ret;
+       return 0;
 }
 
 void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -1017,6 +1014,7 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        u64 entry;
        int h_physical_id = kvm_cpu_get_apicid(cpu);
        struct vcpu_svm *svm = to_svm(vcpu);
+       unsigned long flags;
 
        lockdep_assert_preemption_disabled();
 
@@ -1033,6 +1031,8 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        if (kvm_vcpu_is_blocking(vcpu))
                return;
 
+       spin_lock_irqsave(&svm->ir_list_lock, flags);
+
        entry = READ_ONCE(*(svm->avic_physical_id_cache));
        WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
 
@@ -1042,25 +1042,40 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
        WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
        avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
+
+       spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 }
 
 void avic_vcpu_put(struct kvm_vcpu *vcpu)
 {
        u64 entry;
        struct vcpu_svm *svm = to_svm(vcpu);
+       unsigned long flags;
 
        lockdep_assert_preemption_disabled();
 
+       /*
+        * Note, reading the Physical ID entry outside of ir_list_lock is safe
+        * as only the pCPU that has loaded (or is loading) the vCPU is allowed
+        * to modify the entry, and preemption is disabled.  I.e. the vCPU
+        * can't be scheduled out and thus avic_vcpu_{put,load}() can't run
+        * recursively.
+        */
        entry = READ_ONCE(*(svm->avic_physical_id_cache));
 
        /* Nothing to do if IsRunning == '0' due to vCPU blocking. */
        if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK))
                return;
 
+       spin_lock_irqsave(&svm->ir_list_lock, flags);
+
        avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
 
        entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
        WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
+
+       spin_unlock_irqrestore(&svm->ir_list_lock, flags);
+
 }
 
 void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu)