KVM: x86: Prevent deadlock against tk_core.seq
authorThomas Gleixner <tglx@linutronix.de>
Thu, 6 May 2021 13:21:37 +0000 (15:21 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 19 May 2021 08:13:12 +0000 (10:13 +0200)
[ Upstream commit 3f804f6d201ca93adf4c3df04d1bfd152c1129d6 ]

syzbot reported a possible deadlock in pvclock_gtod_notify():

CPU 0                   CPU 1
write_seqcount_begin(&tk_core.seq);
  pvclock_gtod_notify()     spin_lock(&pool->lock);
    queue_work(..., &pvclock_gtod_work)     ktime_get()
     spin_lock(&pool->lock);       do {
      seq = read_seqcount_begin(tk_core.seq)
...
              } while (read_seqcount_retry(&tk_core.seq, seq);

While this is unlikely to happen, it's possible.

Delegate queue_work() to irq_work() which postpones it until the
tk_core.seq write held region is left and interrupts are reenabled.

Fixes: 16e8d74d2da9 ("KVM: x86: notifier for clocksource changes")
Reported-by: syzbot+6beae4000559d41d80f8@syzkaller.appspotmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Message-Id: <87h7jgm1zy.ffs@nanos.tec.linutronix.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
arch/x86/kvm/x86.c

index 4bd99f0..39ab3d7 100644 (file)
@@ -7849,6 +7849,18 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
 static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
 
 /*
+ * Indirection to move queue_work() out of the tk_core.seq write held
+ * region to prevent possible deadlocks against time accessors which
+ * are invoked with work related locks held.
+ */
+static void pvclock_irq_work_fn(struct irq_work *w)
+{
+       queue_work(system_long_wq, &pvclock_gtod_work);
+}
+
+static DEFINE_IRQ_WORK(pvclock_irq_work, pvclock_irq_work_fn);
+
+/*
  * Notification about pvclock gtod data update.
  */
 static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
@@ -7859,13 +7871,14 @@ static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
 
        update_pvclock_gtod(tk);
 
-       /* disable master clock if host does not trust, or does not
-        * use, TSC based clocksource.
+       /*
+        * Disable master clock if host does not trust, or does not use,
+        * TSC based clocksource. Delegate queue_work() to irq_work as
+        * this is invoked with tk_core.seq write held.
         */
        if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
            atomic_read(&kvm_guest_has_master_clock) != 0)
-               queue_work(system_long_wq, &pvclock_gtod_work);
-
+               irq_work_queue(&pvclock_irq_work);
        return 0;
 }
 
@@ -7981,6 +7994,7 @@ void kvm_arch_exit(void)
        cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
 #ifdef CONFIG_X86_64
        pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
+       irq_work_sync(&pvclock_irq_work);
        cancel_work_sync(&pvclock_gtod_work);
 #endif
        kvm_x86_ops.hardware_enable = NULL;