PCI: hv: Fix a race condition when removing the device
authorLong Li <longli@microsoft.com>
Wed, 12 May 2021 08:06:40 +0000 (01:06 -0700)
committerLorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Thu, 3 Jun 2021 17:28:48 +0000 (18:28 +0100)
On removing the device, any work item (hv_pci_devices_present() or
hv_pci_eject_device()) scheduled on workqueue hbus->wq may still be running
and race with hv_pci_remove().

This can happen because the host may send PCI_EJECT or PCI_BUS_RELATIONS(2)
and decide to rescind the channel immediately after that.

Fix this by flushing/destroying the workqueue of hbus before doing hbus remove.

Link: https://lore.kernel.org/r/1620806800-30983-1-git-send-email-longli@linuxonhyperv.com
Signed-off-by: Long Li <longli@microsoft.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
drivers/pci/controller/pci-hyperv.c

index 6511648..272c63a 100644 (file)
@@ -444,7 +444,6 @@ enum hv_pcibus_state {
        hv_pcibus_probed,
        hv_pcibus_installed,
        hv_pcibus_removing,
-       hv_pcibus_removed,
        hv_pcibus_maximum
 };
 
@@ -3243,8 +3242,9 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs)
                struct pci_packet teardown_packet;
                u8 buffer[sizeof(struct pci_message)];
        } pkt;
-       struct hv_dr_state *dr;
        struct hv_pci_compl comp_pkt;
+       struct hv_pci_dev *hpdev, *tmp;
+       unsigned long flags;
        int ret;
 
        /*
@@ -3256,9 +3256,16 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs)
 
        if (!keep_devs) {
                /* Delete any children which might still exist. */
-               dr = kzalloc(sizeof(*dr), GFP_KERNEL);
-               if (dr && hv_pci_start_relations_work(hbus, dr))
-                       kfree(dr);
+               spin_lock_irqsave(&hbus->device_list_lock, flags);
+               list_for_each_entry_safe(hpdev, tmp, &hbus->children, list_entry) {
+                       list_del(&hpdev->list_entry);
+                       if (hpdev->pci_slot)
+                               pci_destroy_slot(hpdev->pci_slot);
+                       /* For the two refs got in new_pcichild_device() */
+                       put_pcichild(hpdev);
+                       put_pcichild(hpdev);
+               }
+               spin_unlock_irqrestore(&hbus->device_list_lock, flags);
        }
 
        ret = hv_send_resources_released(hdev);
@@ -3301,13 +3308,23 @@ static int hv_pci_remove(struct hv_device *hdev)
 
        hbus = hv_get_drvdata(hdev);
        if (hbus->state == hv_pcibus_installed) {
+               tasklet_disable(&hdev->channel->callback_event);
+               hbus->state = hv_pcibus_removing;
+               tasklet_enable(&hdev->channel->callback_event);
+               destroy_workqueue(hbus->wq);
+               hbus->wq = NULL;
+               /*
+                * At this point, no work is running or can be scheduled
+                * on hbus-wq. We can't race with hv_pci_devices_present()
+                * or hv_pci_eject_device(), it's safe to proceed.
+                */
+
                /* Remove the bus from PCI's point of view. */
                pci_lock_rescan_remove();
                pci_stop_root_bus(hbus->pci_bus);
                hv_pci_remove_slots(hbus);
                pci_remove_root_bus(hbus->pci_bus);
                pci_unlock_rescan_remove();
-               hbus->state = hv_pcibus_removed;
        }
 
        ret = hv_pci_bus_exit(hdev, false);
@@ -3322,7 +3339,6 @@ static int hv_pci_remove(struct hv_device *hdev)
        irq_domain_free_fwnode(hbus->sysdata.fwnode);
        put_hvpcibus(hbus);
        wait_for_completion(&hbus->remove_event);
-       destroy_workqueue(hbus->wq);
 
        hv_put_dom_num(hbus->sysdata.domain);