nvme: Wait for reset state when required
authorKeith Busch <kbusch@kernel.org>
Wed, 4 Sep 2019 16:06:11 +0000 (10:06 -0600)
committerKeith Busch <kbusch@kernel.org>
Mon, 14 Oct 2019 14:22:00 +0000 (23:22 +0900)
Prevent simultaneous controller disabling/enabling tasks from interfering
with each other through a function to wait until the task successfully
transitioned the controller to the RESETTING state. This ensures disabling
the controller will not be interrupted by another reset path, otherwise
a concurrent reset may leave the controller in the wrong state.

Tested-by: Edmund Nadolski <edmund.nadolski@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
drivers/nvme/host/core.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c

index e94fa69..fa7ba09 100644 (file)
@@ -126,7 +126,7 @@ static void nvme_queue_scan(struct nvme_ctrl *ctrl)
  * code paths that can't be interrupted by other reset attempts. A hot removal
  * may prevent this from succeeding.
  */
-static int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
+int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
 {
        if (ctrl->state != NVME_CTRL_RESETTING)
                return -EBUSY;
@@ -134,6 +134,7 @@ static int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
                return -EBUSY;
        return 0;
 }
+EXPORT_SYMBOL_GPL(nvme_try_sched_reset);
 
 int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
 {
@@ -384,8 +385,10 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
                break;
        }
 
-       if (changed)
+       if (changed) {
                ctrl->state = new_state;
+               wake_up_all(&ctrl->state_wq);
+       }
 
        spin_unlock_irqrestore(&ctrl->lock, flags);
        if (changed && ctrl->state == NVME_CTRL_LIVE)
@@ -394,6 +397,39 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 }
 EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
 
+/*
+ * Returns true for sink states that can't ever transition back to live.
+ */
+static bool nvme_state_terminal(struct nvme_ctrl *ctrl)
+{
+       switch (ctrl->state) {
+       case NVME_CTRL_NEW:
+       case NVME_CTRL_LIVE:
+       case NVME_CTRL_RESETTING:
+       case NVME_CTRL_CONNECTING:
+               return false;
+       case NVME_CTRL_DELETING:
+       case NVME_CTRL_DEAD:
+               return true;
+       default:
+               WARN_ONCE(1, "Unhandled ctrl state:%d", ctrl->state);
+               return true;
+       }
+}
+
+/*
+ * Waits for the controller state to be resetting, or returns false if it is
+ * not possible to ever transition to that state.
+ */
+bool nvme_wait_reset(struct nvme_ctrl *ctrl)
+{
+       wait_event(ctrl->state_wq,
+                  nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) ||
+                  nvme_state_terminal(ctrl));
+       return ctrl->state == NVME_CTRL_RESETTING;
+}
+EXPORT_SYMBOL_GPL(nvme_wait_reset);
+
 static void nvme_free_ns_head(struct kref *ref)
 {
        struct nvme_ns_head *head =
@@ -3998,6 +4034,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
        INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
        INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
        INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work);
+       init_waitqueue_head(&ctrl->state_wq);
 
        INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
        memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
index 2ba5772..22e8401 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/sed-opal.h>
 #include <linux/fault-inject.h>
 #include <linux/rcupdate.h>
+#include <linux/wait.h>
 
 #include <trace/events/block.h>
 
@@ -198,6 +199,7 @@ struct nvme_ctrl {
        struct cdev cdev;
        struct work_struct reset_work;
        struct work_struct delete_work;
+       wait_queue_head_t state_wq;
 
        struct nvme_subsystem *subsys;
        struct list_head subsys_entry;
@@ -448,6 +450,7 @@ void nvme_complete_rq(struct request *req);
 bool nvme_cancel_request(struct request *req, void *data, bool reserved);
 bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
                enum nvme_ctrl_state new_state);
+bool nvme_wait_reset(struct nvme_ctrl *ctrl);
 int nvme_disable_ctrl(struct nvme_ctrl *ctrl);
 int nvme_enable_ctrl(struct nvme_ctrl *ctrl);
 int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl);
@@ -498,6 +501,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
 void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
 int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
 int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
+int nvme_try_sched_reset(struct nvme_ctrl *ctrl);
 int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
 
 int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
index e7e79d6..4b18196 100644 (file)
@@ -2463,6 +2463,14 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
        mutex_unlock(&dev->shutdown_lock);
 }
 
+static int nvme_disable_prepare_reset(struct nvme_dev *dev, bool shutdown)
+{
+       if (!nvme_wait_reset(&dev->ctrl))
+               return -EBUSY;
+       nvme_dev_disable(dev, shutdown);
+       return 0;
+}
+
 static int nvme_setup_prp_pools(struct nvme_dev *dev)
 {
        dev->prp_page_pool = dma_pool_create("prp list page", dev->dev,
@@ -2510,6 +2518,11 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
 
 static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
 {
+       /*
+        * Set state to deleting now to avoid blocking nvme_wait_reset(), which
+        * may be holding this pci_dev's device lock.
+        */
+       nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
        nvme_get_ctrl(&dev->ctrl);
        nvme_dev_disable(dev, false);
        nvme_kill_queues(&dev->ctrl);
@@ -2835,19 +2848,28 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 static void nvme_reset_prepare(struct pci_dev *pdev)
 {
        struct nvme_dev *dev = pci_get_drvdata(pdev);
-       nvme_dev_disable(dev, false);
+
+       /*
+        * We don't need to check the return value from waiting for the reset
+        * state as pci_dev device lock is held, making it impossible to race
+        * with ->remove().
+        */
+       nvme_disable_prepare_reset(dev, false);
+       nvme_sync_queues(&dev->ctrl);
 }
 
 static void nvme_reset_done(struct pci_dev *pdev)
 {
        struct nvme_dev *dev = pci_get_drvdata(pdev);
-       nvme_reset_ctrl_sync(&dev->ctrl);
+
+       if (!nvme_try_sched_reset(&dev->ctrl))
+               flush_work(&dev->ctrl.reset_work);
 }
 
 static void nvme_shutdown(struct pci_dev *pdev)
 {
        struct nvme_dev *dev = pci_get_drvdata(pdev);
-       nvme_dev_disable(dev, true);
+       nvme_disable_prepare_reset(dev, true);
 }
 
 /*
@@ -2900,7 +2922,7 @@ static int nvme_resume(struct device *dev)
 
        if (ndev->last_ps == U32_MAX ||
            nvme_set_power_state(ctrl, ndev->last_ps) != 0)
-               nvme_reset_ctrl(ctrl);
+               return nvme_try_sched_reset(&ndev->ctrl);
        return 0;
 }
 
@@ -2928,10 +2950,8 @@ static int nvme_suspend(struct device *dev)
         */
        if (pm_suspend_via_firmware() || !ctrl->npss ||
            !pcie_aspm_enabled(pdev) ||
-           (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND)) {
-               nvme_dev_disable(ndev, true);
-               return 0;
-       }
+           (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND))
+               return nvme_disable_prepare_reset(ndev, true);
 
        nvme_start_freeze(ctrl);
        nvme_wait_freeze(ctrl);
@@ -2963,9 +2983,8 @@ static int nvme_suspend(struct device *dev)
                 * Clearing npss forces a controller reset on resume. The
                 * correct value will be resdicovered then.
                 */
-               nvme_dev_disable(ndev, true);
+               ret = nvme_disable_prepare_reset(ndev, true);
                ctrl->npss = 0;
-               ret = 0;
        }
 unfreeze:
        nvme_unfreeze(ctrl);
@@ -2975,9 +2994,7 @@ unfreeze:
 static int nvme_simple_suspend(struct device *dev)
 {
        struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
-
-       nvme_dev_disable(ndev, true);
-       return 0;
+       return nvme_disable_prepare_reset(ndev, true);
 }
 
 static int nvme_simple_resume(struct device *dev)
@@ -2985,8 +3002,7 @@ static int nvme_simple_resume(struct device *dev)
        struct pci_dev *pdev = to_pci_dev(dev);
        struct nvme_dev *ndev = pci_get_drvdata(pdev);
 
-       nvme_reset_ctrl(&ndev->ctrl);
-       return 0;
+       return nvme_try_sched_reset(&ndev->ctrl);
 }
 
 static const struct dev_pm_ops nvme_dev_pm_ops = {