nvme-fc: track error_recovery while connecting
authorJames Smart <james.smart@broadcom.com>
Fri, 23 Oct 2020 22:26:04 +0000 (15:26 -0700)
committerChristoph Hellwig <hch@lst.de>
Tue, 27 Oct 2020 09:01:30 +0000 (10:01 +0100)
Whenever there are errors during CONNECTING, the driver recovers by
aborting all outstanding ios and counts on the io completion to fail them
and thus the connection/association they are on.  However, the connection
failure depends on a failure state from the core routines.  Not all
commands that are issued by the core routine are guaranteed to cause a
failure of the core routine. They may be treated as a failure status and
the status is then ignored.

As such, whenever the transport enters error_recovery while CONNECTING,
it will set a new flag indicating an association failed. The
create_association routine which creates and initializes the controller,
will monitor the state of the flag as well as the core routine error
status and ensure the association fails if there was an error.

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
drivers/nvme/host/fc.c

index 3c002bd..cfb6ef7 100644 (file)
@@ -146,7 +146,8 @@ struct nvme_fc_rport {
 
 /* fc_ctrl flags values - specified as bit positions */
 #define ASSOC_ACTIVE           0
-#define FCCTRL_TERMIO          1
+#define ASSOC_FAILED           1
+#define FCCTRL_TERMIO          2
 
 struct nvme_fc_ctrl {
        spinlock_t              lock;
@@ -2988,6 +2989,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
                ctrl->cnum, ctrl->lport->localport.port_name,
                ctrl->rport->remoteport.port_name, ctrl->ctrl.opts->subsysnqn);
 
+       clear_bit(ASSOC_FAILED, &ctrl->flags);
+
        /*
         * Create the admin queue
         */
@@ -3016,7 +3019,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
         */
 
        ret = nvme_enable_ctrl(&ctrl->ctrl);
-       if (ret)
+       if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
                goto out_disconnect_admin_queue;
 
        ctrl->ctrl.max_segments = ctrl->lport->ops->max_sgl_segments;
@@ -3026,7 +3029,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
        blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 
        ret = nvme_init_identify(&ctrl->ctrl);
-       if (ret)
+       if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
                goto out_disconnect_admin_queue;
 
        /* sanity checks */
@@ -3071,9 +3074,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
                        ret = nvme_fc_create_io_queues(ctrl);
                else
                        ret = nvme_fc_recreate_io_queues(ctrl);
-               if (ret)
-                       goto out_term_aen_ops;
        }
+       if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
+               goto out_term_aen_ops;
 
        changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
 
@@ -3301,6 +3304,7 @@ __nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl)
         */
        if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
                __nvme_fc_abort_outstanding_ios(ctrl, true);
+               set_bit(ASSOC_FAILED, &ctrl->flags);
                return;
        }