scsi: core: Add limitless cmd retry support

author Mike Christie <michael.christie@oracle.com>

Thu, 1 Oct 2020 15:35:53 +0000 (10:35 -0500)

committer Martin K. Petersen <martin.petersen@oracle.com>

Fri, 2 Oct 2020 22:53:06 +0000 (18:53 -0400)
author Mike Christie <michael.christie@oracle.com>
Thu, 1 Oct 2020 15:35:53 +0000 (10:35 -0500)
committer Martin K. Petersen <martin.petersen@oracle.com>
Fri, 2 Oct 2020 22:53:06 +0000 (18:53 -0400)
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c

index 5f3726abed78fdb7851d9cbb4cdc3bba390bc89f..ae80daa5d831e4e797ec3a42da6cb24f328fcbf2 100644 (file)
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -116,6 +116,14 @@ static int scsi_host_eh_past_deadline(struct Scsi_Host *shost)
         return 1;
  }
  
+static bool scsi_cmd_retry_allowed(struct scsi_cmnd *cmd)
+{
+       if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
+               return true;
+
+       return ++cmd->retries <= cmd->allowed;
+}
+
  /**
   * scmd_eh_abort_handler - Handle command aborts
   * @work:      command to be aborted.
@@ -151,7 +159,7 @@ scmd_eh_abort_handler(struct work_struct *work)
                                                     "eh timeout, not retrying "
                                                     "aborted command\n"));
                         } else if (!scsi_noretry_cmd(scmd) &&
-                           (++scmd->retries <= scmd->allowed)) {
+                                  scsi_cmd_retry_allowed(scmd)) {
                                 SCSI_LOG_ERROR_RECOVERY(3,
                                         scmd_printk(KERN_WARNING, scmd,
                                                     "retry aborted command\n"));
@@ -1264,11 +1272,18 @@ int scsi_eh_get_sense(struct list_head *work_q,
                  * upper level.
                  */
                 if (rtn == SUCCESS)
-                       /* we don't want this command reissued, just
-                        * finished with the sense data, so set
-                        * retries to the max allowed to ensure it
-                        * won't get reissued */
-                       scmd->retries = scmd->allowed;
+                       /*
+                        * We don't want this command reissued, just finished
+                        * with the sense data, so set retries to the max
+                        * allowed to ensure it won't get reissued. If the user
+                        * has requested infinite retries, we also want to
+                        * finish this command, so force completion by setting
+                        * retries and allowed to the same value.
+                        */
+                       if (scmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
+                               scmd->retries = scmd->allowed = 1;
+                       else
+                               scmd->retries = scmd->allowed;
                 else if (rtn != NEEDS_RETRY)
                         continue;
  
@@ -1944,8 +1959,7 @@ maybe_retry:
          * the request was not marked fast fail.  Note that above,
          * even if the request is marked fast fail, we still requeue
          * for queue congestion conditions (QUEUE_FULL or BUSY) */
-       if ((++scmd->retries) <= scmd->allowed
-           && !scsi_noretry_cmd(scmd)) {
+       if (scsi_cmd_retry_allowed(scmd) && !scsi_noretry_cmd(scmd)) {
                 return NEEDS_RETRY;
         } else {
                 /*
@@ -2091,8 +2105,7 @@ void scsi_eh_flush_done_q(struct list_head *done_q)
         list_for_each_entry_safe(scmd, next, done_q, eh_entry) {
                 list_del_init(&scmd->eh_entry);
                 if (scsi_device_online(scmd->device) &&
-                   !scsi_noretry_cmd(scmd) &&
-                   (++scmd->retries <= scmd->allowed)) {
+                   !scsi_noretry_cmd(scmd) && scsi_cmd_retry_allowed(scmd)) {
                         SCSI_LOG_ERROR_RECOVERY(3,
                                 scmd_printk(KERN_INFO, scmd,
                                              "%s: flush retry cmd\n",
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c

index f0ee11dc07e4b093001391ae08800fe78b25ffd8..4e49469b6c5386d3cbdcdcccef46555453811e15 100644 (file)
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -669,6 +669,23 @@ static void scsi_io_completion_reprep(struct scsi_cmnd *cmd,
         scsi_mq_requeue_cmd(cmd);
  }
  
+static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd)
+{
+       struct request *req = cmd->request;
+       unsigned long wait_for;
+
+       if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
+               return false;
+
+       wait_for = (cmd->allowed + 1) * req->timeout;
+       if (time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
+               scmd_printk(KERN_ERR, cmd, "timing out command, waited %lus\n",
+                           wait_for/HZ);
+               return true;
+       }
+       return false;
+}
+
  /* Helper for scsi_io_completion() when special action required. */
  static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
  {
@@ -677,7 +694,6 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
         int level = 0;
         enum {ACTION_FAIL, ACTION_REPREP, ACTION_RETRY,
               ACTION_DELAYED_RETRY} action;
-       unsigned long wait_for = (cmd->allowed + 1) * req->timeout;
         struct scsi_sense_hdr sshdr;
         bool sense_valid;
         bool sense_current = true;      /* false implies "deferred sense" */
@@ -782,8 +798,7 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
         } else
                 action = ACTION_FAIL;
  
-       if (action != ACTION_FAIL &&
-           time_before(cmd->jiffies_at_alloc + wait_for, jiffies))
+       if (action != ACTION_FAIL && scsi_cmd_runtime_exceeced(cmd))
                 action = ACTION_FAIL;
  
         switch (action) {
@@ -1456,7 +1471,6 @@ static bool scsi_mq_lld_busy(struct request_queue *q)
  static void scsi_softirq_done(struct request *rq)
  {
         struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
-       unsigned long wait_for = (cmd->allowed + 1) * rq->timeout;
         int disposition;
  
         INIT_LIST_HEAD(&cmd->eh_entry);
@@ -1466,13 +1480,8 @@ static void scsi_softirq_done(struct request *rq)
                 atomic_inc(&cmd->device->ioerr_cnt);
  
         disposition = scsi_decide_disposition(cmd);
-       if (disposition != SUCCESS &&
-           time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
-               scmd_printk(KERN_ERR, cmd,
-                           "timing out command, waited %lus\n",
-                           wait_for/HZ);
+       if (disposition != SUCCESS && scsi_cmd_runtime_exceeced(cmd))
                 disposition = SUCCESS;
-       }
  
         scsi_log_completion(cmd, disposition);
  
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h

index d12ada03596136a08a7ef07f1bfe646bf80c39fc..180636d54982d0db670ecaf1b3b158f1eca08956 100644 (file)
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -15,6 +15,7 @@ struct scsi_host_template;
  struct Scsi_Host;
  struct scsi_nl_hdr;
  
+#define SCSI_CMD_RETRIES_NO_LIMIT -1
  
  /*
   * Scsi Error Handler Flags
author	Mike Christie <michael.christie@oracle.com>
	Thu, 1 Oct 2020 15:35:53 +0000 (10:35 -0500)
committer	Martin K. Petersen <martin.petersen@oracle.com>
	Fri, 2 Oct 2020 22:53:06 +0000 (18:53 -0400)
drivers/scsi/scsi_error.c		patch \| blob \| history
drivers/scsi/scsi_lib.c		patch \| blob \| history
drivers/scsi/scsi_priv.h		patch \| blob \| history