nbd: don't clear 'NBD_CMD_INFLIGHT' flag if request is not completed
authorYu Kuai <yukuai3@huawei.com>
Sat, 21 May 2022 07:37:46 +0000 (15:37 +0800)
committerJens Axboe <axboe@kernel.dk>
Sat, 28 May 2022 02:39:33 +0000 (20:39 -0600)
Otherwise io will hung because request will only be completed if the
cmd has the flag 'NBD_CMD_INFLIGHT'.

Fixes: 07175cb1baf4 ("nbd: make sure request completion won't concurrent")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Link: https://lore.kernel.org/r/20220521073749.3146892-4-yukuai3@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
drivers/block/nbd.c

index 2ee1e37..a0d0910 100644 (file)
@@ -403,13 +403,14 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
        if (!mutex_trylock(&cmd->lock))
                return BLK_EH_RESET_TIMER;
 
-       if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
+       if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
                mutex_unlock(&cmd->lock);
                return BLK_EH_DONE;
        }
 
        if (!refcount_inc_not_zero(&nbd->config_refs)) {
                cmd->status = BLK_STS_TIMEOUT;
+               __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
                mutex_unlock(&cmd->lock);
                goto done;
        }
@@ -478,6 +479,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
        dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out\n");
        set_bit(NBD_RT_TIMEDOUT, &config->runtime_flags);
        cmd->status = BLK_STS_IOERR;
+       __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
        mutex_unlock(&cmd->lock);
        sock_shutdown(nbd);
        nbd_config_put(nbd);
@@ -745,7 +747,7 @@ static struct nbd_cmd *nbd_handle_reply(struct nbd_device *nbd, int index,
        cmd = blk_mq_rq_to_pdu(req);
 
        mutex_lock(&cmd->lock);
-       if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
+       if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
                dev_err(disk_to_dev(nbd->disk), "Suspicious reply %d (status %u flags %lu)",
                        tag, cmd->status, cmd->flags);
                ret = -ENOENT;
@@ -854,8 +856,16 @@ static void recv_work(struct work_struct *work)
                }
 
                rq = blk_mq_rq_from_pdu(cmd);
-               if (likely(!blk_should_fake_timeout(rq->q)))
-                       blk_mq_complete_request(rq);
+               if (likely(!blk_should_fake_timeout(rq->q))) {
+                       bool complete;
+
+                       mutex_lock(&cmd->lock);
+                       complete = __test_and_clear_bit(NBD_CMD_INFLIGHT,
+                                                       &cmd->flags);
+                       mutex_unlock(&cmd->lock);
+                       if (complete)
+                               blk_mq_complete_request(rq);
+               }
                percpu_ref_put(&q->q_usage_counter);
        }