Drivers: hv: vmbus: Resolve race between init_vp_index() and CPU hotplug
authorAndrea Parri (Microsoft) <parri.andrea@gmail.com>
Fri, 22 May 2020 17:19:00 +0000 (19:19 +0200)
committerWei Liu <wei.liu@kernel.org>
Sat, 23 May 2020 09:06:42 +0000 (09:06 +0000)
vmbus_process_offer() does two things (among others):

 1) first, it sets the channel's target CPU with cpu_hotplug_lock;
 2) it then adds the channel to the channel list(s) with channel_mutex.

Since cpu_hotplug_lock is released before (2), the channel's target CPU
(as designated in (1)) can be deemed "free" by hv_synic_cleanup() and go
offline before the channel is added to the list.

Fix the race condition by "extending" the cpu_hotplug_lock critical
section to include (2) (and (1)), nesting the channel_mutex critical
section within the cpu_hotplug_lock critical section as done elsewhere
(hv_synic_cleanup(), target_cpu_store()) in the hyperv drivers code.

Move even further by extending the channel_mutex critical section to
include (1) (and (2)): this change allows to remove (the now redundant)
bind_channel_to_cpu_lock, and generally simplifies the handling of the
target CPUs (that are now always modified with channel_mutex held).

Fixes: d570aec0f2154e ("Drivers: hv: vmbus: Synchronize init_vp_index() vs. CPU hotplug")
Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Link: https://lore.kernel.org/r/20200522171901.204127-2-parri.andrea@gmail.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
drivers/hv/channel_mgmt.c

index fde806d..89eaacf 100644 (file)
@@ -554,26 +554,34 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
        bool fnew = true;
 
        /*
-        * Initialize the target_CPU before inserting the channel in
-        * the chn_list and sc_list lists, within the channel_mutex
-        * critical section:
+        * Synchronize vmbus_process_offer() and CPU hotplugging:
         *
         * CPU1                         CPU2
         *
-        * [vmbus_process_offer()]      [hv_syninc_cleanup()]
+        * [vmbus_process_offer()]      [Hot removal of the CPU]
         *
-        * STORE target_cpu             LOCK channel_mutex
-        * LOCK channel_mutex           SEARCH chn_list
-        * INSERT chn_list              LOAD target_cpu
-        * UNLOCK channel_mutex         UNLOCK channel_mutex
+        * CPU_READ_LOCK                CPUS_WRITE_LOCK
+        * LOAD cpu_online_mask         SEARCH chn_list
+        * STORE target_cpu             LOAD target_cpu
+        * INSERT chn_list              STORE cpu_online_mask
+        * CPUS_READ_UNLOCK             CPUS_WRITE_UNLOCK
+        *
+        * Forbids: CPU1's LOAD from *not* seing CPU2's STORE &&
+        *              CPU2's SEARCH from *not* seeing CPU1's INSERT
         *
         * Forbids: CPU2's SEARCH from seeing CPU1's INSERT &&
         *              CPU2's LOAD from *not* seing CPU1's STORE
         */
-       init_vp_index(newchannel, hv_get_dev_type(newchannel));
+       cpus_read_lock();
 
+       /*
+        * Serializes the modifications of the chn_list list as well as
+        * the accesses to next_numa_node_id in init_vp_index().
+        */
        mutex_lock(&vmbus_connection.channel_mutex);
 
+       init_vp_index(newchannel, hv_get_dev_type(newchannel));
+
        /* Remember the channels that should be cleaned up upon suspend. */
        if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel))
                atomic_inc(&vmbus_connection.nr_chan_close_on_suspend);
@@ -623,6 +631,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
        vmbus_channel_map_relid(newchannel);
 
        mutex_unlock(&vmbus_connection.channel_mutex);
+       cpus_read_unlock();
 
        /*
         * vmbus_process_offer() mustn't call channel->sc_creation_callback()
@@ -655,13 +664,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
  * We use this state to statically distribute the channel interrupt load.
  */
 static int next_numa_node_id;
-/*
- * init_vp_index() accesses global variables like next_numa_node_id, and
- * it can run concurrently for primary channels and sub-channels: see
- * vmbus_process_offer(), so we need the lock to protect the global
- * variables.
- */
-static DEFINE_SPINLOCK(bind_channel_to_cpu_lock);
 
 /*
  * Starting with Win8, we can statically distribute the incoming
@@ -700,15 +702,6 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
                return;
        }
 
-       /* No CPUs can come up or down during this. */
-       cpus_read_lock();
-
-       /*
-        * Serializes the accesses to the global variable next_numa_node_id.
-        * See also the header comment of the spin lock declaration.
-        */
-       spin_lock(&bind_channel_to_cpu_lock);
-
        while (true) {
                numa_node = next_numa_node_id++;
                if (numa_node == nr_node_ids) {
@@ -739,9 +732,6 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
        channel->target_cpu = target_cpu;
        channel->target_vp = hv_cpu_number_to_vp_number(target_cpu);
 
-       spin_unlock(&bind_channel_to_cpu_lock);
-       cpus_read_unlock();
-
        free_cpumask_var(available_mask);
 }