nvme: implement Enhanced Command Retry
authorKeith Busch <keith.busch@intel.com>
Tue, 27 Nov 2018 16:40:57 +0000 (09:40 -0700)
committerJens Axboe <axboe@kernel.dk>
Sat, 8 Dec 2018 05:26:58 +0000 (22:26 -0700)
A controller may have an internal state that is not able to successfully
process commands for a short duration. In such states, an immediate
command requeue is expected to fail. The driver may exceed its max
retry count, which permanently ends the command in failure when the same
command would succeed after waiting for the controller to be ready.

NVMe ratified TP 4033 provides a delay hint in the completion status
code for failed commands. Implement the retry delay based on the command
completion status and the controller's requested delay.

Note that requeued commands are handled per request_queue, not per
individual request. If multiple commands fail, the controller should
consistently report the desired delay time for retryable commands in
all CQEs, otherwise the requeue list may be kicked too soon.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
drivers/nvme/host/core.c
drivers/nvme/host/nvme.h
include/linux/nvme.h

index 71d2a89..f905768 100644 (file)
@@ -244,6 +244,22 @@ static inline bool nvme_req_needs_retry(struct request *req)
        return true;
 }
 
+static void nvme_retry_req(struct request *req)
+{
+       struct nvme_ns *ns = req->q->queuedata;
+       unsigned long delay = 0;
+       u16 crd;
+
+       /* The mask and shift result must be <= 3 */
+       crd = (nvme_req(req)->status & NVME_SC_CRD) >> 11;
+       if (ns && crd)
+               delay = ns->ctrl->crdt[crd - 1] * 100;
+
+       nvme_req(req)->retries++;
+       blk_mq_requeue_request(req, false);
+       blk_mq_delay_kick_requeue_list(req->q, delay);
+}
+
 void nvme_complete_rq(struct request *req)
 {
        blk_status_t status = nvme_error_status(req);
@@ -261,8 +277,7 @@ void nvme_complete_rq(struct request *req)
                }
 
                if (!blk_queue_dying(req->q)) {
-                       nvme_req(req)->retries++;
-                       blk_mq_requeue_request(req, true);
+                       nvme_retry_req(req);
                        return;
                }
        }
@@ -1883,6 +1898,26 @@ static int nvme_configure_timestamp(struct nvme_ctrl *ctrl)
        return ret;
 }
 
+static int nvme_configure_acre(struct nvme_ctrl *ctrl)
+{
+       struct nvme_feat_host_behavior *host;
+       int ret;
+
+       /* Don't bother enabling the feature if retry delay is not reported */
+       if (!ctrl->crdt[0])
+               return 0;
+
+       host = kzalloc(sizeof(*host), GFP_KERNEL);
+       if (!host)
+               return 0;
+
+       host->acre = NVME_ENABLE_ACRE;
+       ret = nvme_set_features(ctrl, NVME_FEAT_HOST_BEHAVIOR, 0,
+                               host, sizeof(*host), NULL);
+       kfree(host);
+       return ret;
+}
+
 static int nvme_configure_apst(struct nvme_ctrl *ctrl)
 {
        /*
@@ -2404,6 +2439,10 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
                ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS;
        }
 
+       ctrl->crdt[0] = le16_to_cpu(id->crdt1);
+       ctrl->crdt[1] = le16_to_cpu(id->crdt2);
+       ctrl->crdt[2] = le16_to_cpu(id->crdt3);
+
        ctrl->oacs = le16_to_cpu(id->oacs);
        ctrl->oncs = le16_to_cpup(&id->oncs);
        ctrl->oaes = le32_to_cpu(id->oaes);
@@ -2504,6 +2543,10 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
        if (ret < 0)
                return ret;
 
+       ret = nvme_configure_acre(ctrl);
+       if (ret < 0)
+               return ret;
+
        ctrl->identified = true;
 
        return 0;
index f2594d4..79e621f 100644 (file)
@@ -181,6 +181,7 @@ struct nvme_ctrl {
        u32 page_size;
        u32 max_hw_sectors;
        u32 max_segments;
+       u16 crdt[3];
        u16 oncs;
        u16 oacs;
        u16 nssa;
index c03973c..88812cb 100644 (file)
@@ -223,7 +223,11 @@ struct nvme_id_ctrl {
        __le32                  rtd3e;
        __le32                  oaes;
        __le32                  ctratt;
-       __u8                    rsvd100[156];
+       __u8                    rsvd100[28];
+       __le16                  crdt1;
+       __le16                  crdt2;
+       __le16                  crdt3;
+       __u8                    rsvd134[122];
        __le16                  oacs;
        __u8                    acl;
        __u8                    aerl;
@@ -756,6 +760,15 @@ enum {
        NVME_HOST_MEM_RETURN    = (1 << 1),
 };
 
+struct nvme_feat_host_behavior {
+       __u8 acre;
+       __u8 resv1[511];
+};
+
+enum {
+       NVME_ENABLE_ACRE        = 1,
+};
+
 /* Admin commands */
 
 enum nvme_admin_opcode {
@@ -810,6 +823,7 @@ enum {
        NVME_FEAT_RRL           = 0x12,
        NVME_FEAT_PLM_CONFIG    = 0x13,
        NVME_FEAT_PLM_WINDOW    = 0x14,
+       NVME_FEAT_HOST_BEHAVIOR = 0x16,
        NVME_FEAT_SW_PROGRESS   = 0x80,
        NVME_FEAT_HOST_ID       = 0x81,
        NVME_FEAT_RESV_MASK     = 0x82,
@@ -1265,6 +1279,7 @@ enum {
        NVME_SC_ANA_TRANSITION          = 0x303,
        NVME_SC_HOST_PATH_ERROR         = 0x370,
 
+       NVME_SC_CRD                     = 0x1800,
        NVME_SC_DNR                     = 0x4000,
 };