KVM: VMX: avoid double list add with VT-d posted interrupts
authorPaolo Bonzini <pbonzini@redhat.com>
Tue, 6 Jun 2017 10:57:05 +0000 (12:57 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 5 Oct 2017 07:44:02 +0000 (09:44 +0200)
commit 8b306e2f3c41939ea528e6174c88cfbfff893ce1 upstream.

In some cases, for example involving hot-unplug of assigned
devices, pi_post_block can forget to remove the vCPU from the
blocked_vcpu_list.  When this happens, the next call to
pi_pre_block corrupts the list.

Fix this in two ways.  First, check vcpu->pre_pcpu in pi_pre_block
and WARN instead of adding the element twice in the list.  Second,
always do the list removal in pi_post_block if vcpu->pre_pcpu is
set (not -1).

The new code keeps interrupts disabled for the whole duration of
pi_pre_block/pi_post_block.  This is not strictly necessary, but
easier to follow.  For the same reason, PI.ON is checked only
after the cmpxchg, and to handle it we just call the post-block
code.  This removes duplication of the list removal code.

Cc: Huangweidong <weidong.huang@huawei.com>
Cc: Gonglei <arei.gonglei@huawei.com>
Cc: wangxin <wangxinxin.wang@huawei.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Tested-by: Longpeng (Mike) <longpeng2@huawei.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
arch/x86/kvm/vmx.c

index 98bacab..400ea91 100644 (file)
@@ -11005,10 +11005,11 @@ static void __pi_post_block(struct kvm_vcpu *vcpu)
        struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
        struct pi_desc old, new;
        unsigned int dest;
-       unsigned long flags;
 
        do {
                old.control = new.control = pi_desc->control;
+               WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
+                    "Wakeup handler not enabled while the VCPU is blocked\n");
 
                dest = cpu_physical_id(vcpu->cpu);
 
@@ -11025,14 +11026,10 @@ static void __pi_post_block(struct kvm_vcpu *vcpu)
        } while (cmpxchg(&pi_desc->control, old.control,
                        new.control) != old.control);
 
-       if(vcpu->pre_pcpu != -1) {
-               spin_lock_irqsave(
-                       &per_cpu(blocked_vcpu_on_cpu_lock,
-                       vcpu->pre_pcpu), flags);
+       if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
+               spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
                list_del(&vcpu->blocked_vcpu_list);
-               spin_unlock_irqrestore(
-                       &per_cpu(blocked_vcpu_on_cpu_lock,
-                       vcpu->pre_pcpu), flags);
+               spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
                vcpu->pre_pcpu = -1;
        }
 }
@@ -11052,7 +11049,6 @@ static void __pi_post_block(struct kvm_vcpu *vcpu)
  */
 static int pi_pre_block(struct kvm_vcpu *vcpu)
 {
-       unsigned long flags;
        unsigned int dest;
        struct pi_desc old, new;
        struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
@@ -11062,34 +11058,20 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
                !kvm_vcpu_apicv_active(vcpu))
                return 0;
 
-       vcpu->pre_pcpu = vcpu->cpu;
-       spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
-                         vcpu->pre_pcpu), flags);
-       list_add_tail(&vcpu->blocked_vcpu_list,
-                     &per_cpu(blocked_vcpu_on_cpu,
-                     vcpu->pre_pcpu));
-       spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
-                              vcpu->pre_pcpu), flags);
+       WARN_ON(irqs_disabled());
+       local_irq_disable();
+       if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
+               vcpu->pre_pcpu = vcpu->cpu;
+               spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+               list_add_tail(&vcpu->blocked_vcpu_list,
+                             &per_cpu(blocked_vcpu_on_cpu,
+                                      vcpu->pre_pcpu));
+               spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+       }
 
        do {
                old.control = new.control = pi_desc->control;
 
-               /*
-                * We should not block the vCPU if
-                * an interrupt is posted for it.
-                */
-               if (pi_test_on(pi_desc) == 1) {
-                       spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
-                                         vcpu->pre_pcpu), flags);
-                       list_del(&vcpu->blocked_vcpu_list);
-                       spin_unlock_irqrestore(
-                                       &per_cpu(blocked_vcpu_on_cpu_lock,
-                                       vcpu->pre_pcpu), flags);
-                       vcpu->pre_pcpu = -1;
-
-                       return 1;
-               }
-
                WARN((pi_desc->sn == 1),
                     "Warning: SN field of posted-interrupts "
                     "is set before blocking\n");
@@ -11114,7 +11096,12 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
        } while (cmpxchg(&pi_desc->control, old.control,
                        new.control) != old.control);
 
-       return 0;
+       /* We should not block the vCPU if an interrupt is posted for it.  */
+       if (pi_test_on(pi_desc) == 1)
+               __pi_post_block(vcpu);
+
+       local_irq_enable();
+       return (vcpu->pre_pcpu == -1);
 }
 
 static int vmx_pre_block(struct kvm_vcpu *vcpu)
@@ -11130,12 +11117,13 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu)
 
 static void pi_post_block(struct kvm_vcpu *vcpu)
 {
-       if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
-               !irq_remapping_cap(IRQ_POSTING_CAP)  ||
-               !kvm_vcpu_apicv_active(vcpu))
+       if (vcpu->pre_pcpu == -1)
                return;
 
+       WARN_ON(irqs_disabled());
+       local_irq_disable();
        __pi_post_block(vcpu);
+       local_irq_enable();
 }
 
 static void vmx_post_block(struct kvm_vcpu *vcpu)