nvme-fc: fix race between error recovery and creating association
authorMichael Liang <mliang@purestorage.com>
Fri, 7 Jul 2023 21:21:57 +0000 (15:21 -0600)
committerKeith Busch <kbusch@kernel.org>
Wed, 12 Jul 2023 16:29:51 +0000 (09:29 -0700)
There is a small race window between nvme-fc association creation and error
recovery. Fix this race condition by protecting accessing to controller
state and ASSOC_FAILED flag under nvme-fc controller lock.

Signed-off-by: Michael Liang <mliang@purestorage.com>
Reviewed-by: Caleb Sander <csander@purestorage.com>
Reviewed-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
drivers/nvme/host/fc.c

index ad93363..1cd2bf8 100644 (file)
@@ -2548,17 +2548,24 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
         * the controller.  Abort any ios on the association and let the
         * create_association error path resolve things.
         */
-       if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
-               __nvme_fc_abort_outstanding_ios(ctrl, true);
+       enum nvme_ctrl_state state;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ctrl->lock, flags);
+       state = ctrl->ctrl.state;
+       if (state == NVME_CTRL_CONNECTING) {
                set_bit(ASSOC_FAILED, &ctrl->flags);
+               spin_unlock_irqrestore(&ctrl->lock, flags);
+               __nvme_fc_abort_outstanding_ios(ctrl, true);
                dev_warn(ctrl->ctrl.device,
                        "NVME-FC{%d}: transport error during (re)connect\n",
                        ctrl->cnum);
                return;
        }
+       spin_unlock_irqrestore(&ctrl->lock, flags);
 
        /* Otherwise, only proceed if in LIVE state - e.g. on first error */
-       if (ctrl->ctrl.state != NVME_CTRL_LIVE)
+       if (state != NVME_CTRL_LIVE)
                return;
 
        dev_warn(ctrl->ctrl.device,
@@ -3172,12 +3179,16 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
                else
                        ret = nvme_fc_recreate_io_queues(ctrl);
        }
+
+       spin_lock_irqsave(&ctrl->lock, flags);
        if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags))
                ret = -EIO;
-       if (ret)
+       if (ret) {
+               spin_unlock_irqrestore(&ctrl->lock, flags);
                goto out_term_aen_ops;
-
+       }
        changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
+       spin_unlock_irqrestore(&ctrl->lock, flags);
 
        ctrl->ctrl.nr_reconnects = 0;