nvmet: fix false keep-alive timeout when a controller is torn down
authorSagi Grimberg <sagi@grimberg.me>
Tue, 25 May 2021 15:49:05 +0000 (08:49 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 16 Jun 2021 10:01:38 +0000 (12:01 +0200)
[ Upstream commit aaeadd7075dc9e184bc7876e9dd7b3bada771df2 ]

Controller teardown flow may take some time in case it has many I/O
queues, and the host may not send us keep-alive during this period.
Hence reset the traffic based keep-alive timer so we don't trigger
a controller teardown as a result of a keep-alive expiration.

Reported-by: Yi Zhang <yi.zhang@redhat.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Tested-by: Yi Zhang <yi.zhang@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sasha Levin <sashal@kernel.org>
drivers/nvme/target/core.c
drivers/nvme/target/nvmet.h

index 8b939e9db470cd5849abde1428dfd9012f885b73..9a8fa2e582d5be9dd080337bed03c5840a6d653e 100644 (file)
@@ -379,10 +379,10 @@ static void nvmet_keep_alive_timer(struct work_struct *work)
 {
        struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work),
                        struct nvmet_ctrl, ka_work);
-       bool cmd_seen = ctrl->cmd_seen;
+       bool reset_tbkas = ctrl->reset_tbkas;
 
-       ctrl->cmd_seen = false;
-       if (cmd_seen) {
+       ctrl->reset_tbkas = false;
+       if (reset_tbkas) {
                pr_debug("ctrl %d reschedule traffic based keep-alive timer\n",
                        ctrl->cntlid);
                schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
@@ -792,6 +792,13 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
        percpu_ref_exit(&sq->ref);
 
        if (ctrl) {
+               /*
+                * The teardown flow may take some time, and the host may not
+                * send us keep-alive during this period, hence reset the
+                * traffic based keep-alive timer so we don't trigger a
+                * controller teardown as a result of a keep-alive expiration.
+                */
+               ctrl->reset_tbkas = true;
                nvmet_ctrl_put(ctrl);
                sq->ctrl = NULL; /* allows reusing the queue later */
        }
@@ -942,7 +949,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
        }
 
        if (sq->ctrl)
-               sq->ctrl->cmd_seen = true;
+               sq->ctrl->reset_tbkas = true;
 
        return true;
 
index ea96487b5424e82181091336979180f4edff3447..4bf6d21290c235c35733a635e0f63c9c33a147ad 100644 (file)
@@ -166,7 +166,7 @@ struct nvmet_ctrl {
        struct nvmet_subsys     *subsys;
        struct nvmet_sq         **sqs;
 
-       bool                    cmd_seen;
+       bool                    reset_tbkas;
 
        struct mutex            lock;
        u64                     cap;