scsi: core: Add limitless cmd retry support
authorMike Christie <michael.christie@oracle.com>
Thu, 1 Oct 2020 15:35:53 +0000 (10:35 -0500)
committerMartin K. Petersen <martin.petersen@oracle.com>
Fri, 2 Oct 2020 22:53:06 +0000 (18:53 -0400)
Add infinite retry support to SCSI midlayer by combining common checks for
retries into some helper functions, and then checking for the
-1/SCSI_CMD_RETRIES_NO_LIMIT.

Link: https://lore.kernel.org/r/1601566554-26752-2-git-send-email-michael.christie@oracle.com
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/scsi_error.c
drivers/scsi/scsi_lib.c
drivers/scsi/scsi_priv.h

index 5f3726abed78fdb7851d9cbb4cdc3bba390bc89f..ae80daa5d831e4e797ec3a42da6cb24f328fcbf2 100644 (file)
@@ -116,6 +116,14 @@ static int scsi_host_eh_past_deadline(struct Scsi_Host *shost)
        return 1;
 }
 
+static bool scsi_cmd_retry_allowed(struct scsi_cmnd *cmd)
+{
+       if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
+               return true;
+
+       return ++cmd->retries <= cmd->allowed;
+}
+
 /**
  * scmd_eh_abort_handler - Handle command aborts
  * @work:      command to be aborted.
@@ -151,7 +159,7 @@ scmd_eh_abort_handler(struct work_struct *work)
                                                    "eh timeout, not retrying "
                                                    "aborted command\n"));
                        } else if (!scsi_noretry_cmd(scmd) &&
-                           (++scmd->retries <= scmd->allowed)) {
+                                  scsi_cmd_retry_allowed(scmd)) {
                                SCSI_LOG_ERROR_RECOVERY(3,
                                        scmd_printk(KERN_WARNING, scmd,
                                                    "retry aborted command\n"));
@@ -1264,11 +1272,18 @@ int scsi_eh_get_sense(struct list_head *work_q,
                 * upper level.
                 */
                if (rtn == SUCCESS)
-                       /* we don't want this command reissued, just
-                        * finished with the sense data, so set
-                        * retries to the max allowed to ensure it
-                        * won't get reissued */
-                       scmd->retries = scmd->allowed;
+                       /*
+                        * We don't want this command reissued, just finished
+                        * with the sense data, so set retries to the max
+                        * allowed to ensure it won't get reissued. If the user
+                        * has requested infinite retries, we also want to
+                        * finish this command, so force completion by setting
+                        * retries and allowed to the same value.
+                        */
+                       if (scmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
+                               scmd->retries = scmd->allowed = 1;
+                       else
+                               scmd->retries = scmd->allowed;
                else if (rtn != NEEDS_RETRY)
                        continue;
 
@@ -1944,8 +1959,7 @@ maybe_retry:
         * the request was not marked fast fail.  Note that above,
         * even if the request is marked fast fail, we still requeue
         * for queue congestion conditions (QUEUE_FULL or BUSY) */
-       if ((++scmd->retries) <= scmd->allowed
-           && !scsi_noretry_cmd(scmd)) {
+       if (scsi_cmd_retry_allowed(scmd) && !scsi_noretry_cmd(scmd)) {
                return NEEDS_RETRY;
        } else {
                /*
@@ -2091,8 +2105,7 @@ void scsi_eh_flush_done_q(struct list_head *done_q)
        list_for_each_entry_safe(scmd, next, done_q, eh_entry) {
                list_del_init(&scmd->eh_entry);
                if (scsi_device_online(scmd->device) &&
-                   !scsi_noretry_cmd(scmd) &&
-                   (++scmd->retries <= scmd->allowed)) {
+                   !scsi_noretry_cmd(scmd) && scsi_cmd_retry_allowed(scmd)) {
                        SCSI_LOG_ERROR_RECOVERY(3,
                                scmd_printk(KERN_INFO, scmd,
                                             "%s: flush retry cmd\n",
index f0ee11dc07e4b093001391ae08800fe78b25ffd8..4e49469b6c5386d3cbdcdcccef46555453811e15 100644 (file)
@@ -669,6 +669,23 @@ static void scsi_io_completion_reprep(struct scsi_cmnd *cmd,
        scsi_mq_requeue_cmd(cmd);
 }
 
+static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd)
+{
+       struct request *req = cmd->request;
+       unsigned long wait_for;
+
+       if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
+               return false;
+
+       wait_for = (cmd->allowed + 1) * req->timeout;
+       if (time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
+               scmd_printk(KERN_ERR, cmd, "timing out command, waited %lus\n",
+                           wait_for/HZ);
+               return true;
+       }
+       return false;
+}
+
 /* Helper for scsi_io_completion() when special action required. */
 static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
 {
@@ -677,7 +694,6 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
        int level = 0;
        enum {ACTION_FAIL, ACTION_REPREP, ACTION_RETRY,
              ACTION_DELAYED_RETRY} action;
-       unsigned long wait_for = (cmd->allowed + 1) * req->timeout;
        struct scsi_sense_hdr sshdr;
        bool sense_valid;
        bool sense_current = true;      /* false implies "deferred sense" */
@@ -782,8 +798,7 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
        } else
                action = ACTION_FAIL;
 
-       if (action != ACTION_FAIL &&
-           time_before(cmd->jiffies_at_alloc + wait_for, jiffies))
+       if (action != ACTION_FAIL && scsi_cmd_runtime_exceeced(cmd))
                action = ACTION_FAIL;
 
        switch (action) {
@@ -1456,7 +1471,6 @@ static bool scsi_mq_lld_busy(struct request_queue *q)
 static void scsi_softirq_done(struct request *rq)
 {
        struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
-       unsigned long wait_for = (cmd->allowed + 1) * rq->timeout;
        int disposition;
 
        INIT_LIST_HEAD(&cmd->eh_entry);
@@ -1466,13 +1480,8 @@ static void scsi_softirq_done(struct request *rq)
                atomic_inc(&cmd->device->ioerr_cnt);
 
        disposition = scsi_decide_disposition(cmd);
-       if (disposition != SUCCESS &&
-           time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
-               scmd_printk(KERN_ERR, cmd,
-                           "timing out command, waited %lus\n",
-                           wait_for/HZ);
+       if (disposition != SUCCESS && scsi_cmd_runtime_exceeced(cmd))
                disposition = SUCCESS;
-       }
 
        scsi_log_completion(cmd, disposition);
 
index d12ada03596136a08a7ef07f1bfe646bf80c39fc..180636d54982d0db670ecaf1b3b158f1eca08956 100644 (file)
@@ -15,6 +15,7 @@ struct scsi_host_template;
 struct Scsi_Host;
 struct scsi_nl_hdr;
 
+#define SCSI_CMD_RETRIES_NO_LIMIT -1
 
 /*
  * Scsi Error Handler Flags