nvmet: fix false keep-alive timeout when a controller is torn down
authorSagi Grimberg <sagi@grimberg.me>
Tue, 25 May 2021 15:49:05 +0000 (08:49 -0700)
committerChristoph Hellwig <hch@lst.de>
Wed, 26 May 2021 14:18:22 +0000 (16:18 +0200)
Controller teardown flow may take some time in case it has many I/O
queues, and the host may not send us keep-alive during this period.
Hence reset the traffic based keep-alive timer so we don't trigger
a controller teardown as a result of a keep-alive expiration.

Reported-by: Yi Zhang <yi.zhang@redhat.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Tested-by: Yi Zhang <yi.zhang@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
drivers/nvme/target/core.c
drivers/nvme/target/nvmet.h

index 1853db3..4b29a5b 100644 (file)
@@ -388,10 +388,10 @@ static void nvmet_keep_alive_timer(struct work_struct *work)
 {
        struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work),
                        struct nvmet_ctrl, ka_work);
-       bool cmd_seen = ctrl->cmd_seen;
+       bool reset_tbkas = ctrl->reset_tbkas;
 
-       ctrl->cmd_seen = false;
-       if (cmd_seen) {
+       ctrl->reset_tbkas = false;
+       if (reset_tbkas) {
                pr_debug("ctrl %d reschedule traffic based keep-alive timer\n",
                        ctrl->cntlid);
                schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
@@ -804,6 +804,13 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
        percpu_ref_exit(&sq->ref);
 
        if (ctrl) {
+               /*
+                * The teardown flow may take some time, and the host may not
+                * send us keep-alive during this period, hence reset the
+                * traffic based keep-alive timer so we don't trigger a
+                * controller teardown as a result of a keep-alive expiration.
+                */
+               ctrl->reset_tbkas = true;
                nvmet_ctrl_put(ctrl);
                sq->ctrl = NULL; /* allows reusing the queue later */
        }
@@ -952,7 +959,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
        }
 
        if (sq->ctrl)
-               sq->ctrl->cmd_seen = true;
+               sq->ctrl->reset_tbkas = true;
 
        return true;
 
index d69a409..53aea9a 100644 (file)
@@ -167,7 +167,7 @@ struct nvmet_ctrl {
        struct nvmet_subsys     *subsys;
        struct nvmet_sq         **sqs;
 
-       bool                    cmd_seen;
+       bool                    reset_tbkas;
 
        struct mutex            lock;
        u64                     cap;