scsi: mpi3mr: Fault IOC when internal command gets timeout
authorSreekanth Reddy <sreekanth.reddy@broadcom.com>
Mon, 20 Dec 2021 14:11:45 +0000 (19:41 +0530)
committerMartin K. Petersen <martin.petersen@oracle.com>
Thu, 23 Dec 2021 05:04:23 +0000 (00:04 -0500)
Save snapdump and fault the controller with the given reason code if it is
already not in the fault or not in asynchronous reset. This ensures that
soft reset is issued from the watchdog thread.  This will also be used to
handle initialization time faults/resets/timeout as in those cases
immediate soft reset invocation is not required.

Link: https://lore.kernel.org/r/20211220141159.16117-12-sreekanth.reddy@broadcom.com
Signed-off-by: Sreekanth Reddy <sreekanth.reddy@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/mpi3mr/mpi3mr.h
drivers/scsi/mpi3mr/mpi3mr_fw.c

index fdbedf2..55a07f9 100644 (file)
@@ -916,5 +916,6 @@ void mpi3mr_flush_host_io(struct mpi3mr_ioc *mrioc);
 void mpi3mr_invalidate_devhandles(struct mpi3mr_ioc *mrioc);
 void mpi3mr_rfresh_tgtdevs(struct mpi3mr_ioc *mrioc);
 void mpi3mr_flush_delayed_rmhs_list(struct mpi3mr_ioc *mrioc);
+void mpi3mr_check_rh_fault_ioc(struct mpi3mr_ioc *mrioc, u32 reason_code);
 
 #endif /*MPI3MR_H_INCLUDED*/
index 6b534ed..b6d4e9d 100644 (file)
@@ -1391,13 +1391,9 @@ static int mpi3mr_delete_op_reply_q(struct mpi3mr_ioc *mrioc, u16 qidx)
        wait_for_completion_timeout(&mrioc->init_cmds.done,
            (MPI3MR_INTADMCMD_TIMEOUT * HZ));
        if (!(mrioc->init_cmds.state & MPI3MR_CMD_COMPLETE)) {
-               ioc_err(mrioc, "Issue DelRepQ: command timed out\n");
-               mpi3mr_set_diagsave(mrioc);
-               mpi3mr_issue_reset(mrioc,
-                   MPI3_SYSIF_HOST_DIAG_RESET_ACTION_DIAG_FAULT,
+               ioc_err(mrioc, "delete reply queue timed out\n");
+               mpi3mr_check_rh_fault_ioc(mrioc,
                    MPI3MR_RESET_FROM_DELREPQ_TIMEOUT);
-               mrioc->unrecoverable = 1;
-
                retval = -1;
                goto out_unlock;
        }
@@ -1617,12 +1613,9 @@ static int mpi3mr_create_op_reply_q(struct mpi3mr_ioc *mrioc, u16 qidx)
        wait_for_completion_timeout(&mrioc->init_cmds.done,
            (MPI3MR_INTADMCMD_TIMEOUT * HZ));
        if (!(mrioc->init_cmds.state & MPI3MR_CMD_COMPLETE)) {
-               ioc_err(mrioc, "CreateRepQ: command timed out\n");
-               mpi3mr_set_diagsave(mrioc);
-               mpi3mr_issue_reset(mrioc,
-                   MPI3_SYSIF_HOST_DIAG_RESET_ACTION_DIAG_FAULT,
+               ioc_err(mrioc, "create reply queue timed out\n");
+               mpi3mr_check_rh_fault_ioc(mrioc,
                    MPI3MR_RESET_FROM_CREATEREPQ_TIMEOUT);
-               mrioc->unrecoverable = 1;
                retval = -1;
                goto out_unlock;
        }
@@ -1724,12 +1717,9 @@ static int mpi3mr_create_op_req_q(struct mpi3mr_ioc *mrioc, u16 idx,
        wait_for_completion_timeout(&mrioc->init_cmds.done,
            (MPI3MR_INTADMCMD_TIMEOUT * HZ));
        if (!(mrioc->init_cmds.state & MPI3MR_CMD_COMPLETE)) {
-               ioc_err(mrioc, "CreateReqQ: command timed out\n");
-               mpi3mr_set_diagsave(mrioc);
-               if (mpi3mr_issue_reset(mrioc,
-                   MPI3_SYSIF_HOST_DIAG_RESET_ACTION_DIAG_FAULT,
-                   MPI3MR_RESET_FROM_CREATEREQQ_TIMEOUT))
-                       mrioc->unrecoverable = 1;
+               ioc_err(mrioc, "create request queue timed out\n");
+               mpi3mr_check_rh_fault_ioc(mrioc,
+                   MPI3MR_RESET_FROM_CREATEREQQ_TIMEOUT);
                retval = -1;
                goto out_unlock;
        }
@@ -1903,6 +1893,42 @@ out:
 }
 
 /**
+ * mpi3mr_check_rh_fault_ioc - check reset history and fault
+ * controller
+ * @mrioc: Adapter instance reference
+ * @reason_code, reason code for the fault.
+ *
+ * This routine will save snapdump and fault the controller with
+ * the given reason code if it is not already in the fault or
+ * not asynchronosuly reset. This will be used to handle
+ * initilaization time faults/resets/timeout as in those cases
+ * immediate soft reset invocation is not required.
+ *
+ * Return:  None.
+ */
+void mpi3mr_check_rh_fault_ioc(struct mpi3mr_ioc *mrioc, u32 reason_code)
+{
+       u32 ioc_status, host_diagnostic, timeout;
+
+       ioc_status = readl(&mrioc->sysif_regs->ioc_status);
+       if ((ioc_status & MPI3_SYSIF_IOC_STATUS_RESET_HISTORY) ||
+           (ioc_status & MPI3_SYSIF_IOC_STATUS_FAULT)) {
+               mpi3mr_print_fault_info(mrioc);
+               return;
+       }
+       mpi3mr_set_diagsave(mrioc);
+       mpi3mr_issue_reset(mrioc, MPI3_SYSIF_HOST_DIAG_RESET_ACTION_DIAG_FAULT,
+           reason_code);
+       timeout = MPI3_SYSIF_DIAG_SAVE_TIMEOUT * 10;
+       do {
+               host_diagnostic = readl(&mrioc->sysif_regs->host_diagnostic);
+               if (!(host_diagnostic & MPI3_SYSIF_HOST_DIAG_SAVE_IN_PROGRESS))
+                       break;
+               msleep(100);
+       } while (--timeout);
+}
+
+/**
  * mpi3mr_sync_timestamp - Issue time stamp sync request
  * @mrioc: Adapter reference
  *
@@ -2025,6 +2051,8 @@ static int mpi3mr_print_pkg_ver(struct mpi3mr_ioc *mrioc)
            (MPI3MR_INTADMCMD_TIMEOUT * HZ));
        if (!(mrioc->init_cmds.state & MPI3MR_CMD_COMPLETE)) {
                ioc_err(mrioc, "get package version timed out\n");
+               mpi3mr_check_rh_fault_ioc(mrioc,
+                   MPI3MR_RESET_FROM_GETPKGVER_TIMEOUT);
                retval = -1;
                goto out_unlock;
        }
@@ -2344,12 +2372,9 @@ static int mpi3mr_issue_iocfacts(struct mpi3mr_ioc *mrioc,
        wait_for_completion_timeout(&mrioc->init_cmds.done,
            (MPI3MR_INTADMCMD_TIMEOUT * HZ));
        if (!(mrioc->init_cmds.state & MPI3MR_CMD_COMPLETE)) {
-               ioc_err(mrioc, "Issue IOCFacts: command timed out\n");
-               mpi3mr_set_diagsave(mrioc);
-               mpi3mr_issue_reset(mrioc,
-                   MPI3_SYSIF_HOST_DIAG_RESET_ACTION_DIAG_FAULT,
+               ioc_err(mrioc, "ioc_facts timed out\n");
+               mpi3mr_check_rh_fault_ioc(mrioc,
                    MPI3MR_RESET_FROM_IOCFACTS_TIMEOUT);
-               mrioc->unrecoverable = 1;
                retval = -1;
                goto out_unlock;
        }
@@ -2743,12 +2768,9 @@ static int mpi3mr_issue_iocinit(struct mpi3mr_ioc *mrioc)
        wait_for_completion_timeout(&mrioc->init_cmds.done,
            (MPI3MR_INTADMCMD_TIMEOUT * HZ));
        if (!(mrioc->init_cmds.state & MPI3MR_CMD_COMPLETE)) {
-               mpi3mr_set_diagsave(mrioc);
-               mpi3mr_issue_reset(mrioc,
-                   MPI3_SYSIF_HOST_DIAG_RESET_ACTION_DIAG_FAULT,
+               mpi3mr_check_rh_fault_ioc(mrioc,
                    MPI3MR_RESET_FROM_IOCINIT_TIMEOUT);
-               mrioc->unrecoverable = 1;
-               ioc_err(mrioc, "Issue IOCInit: command timed out\n");
+               ioc_err(mrioc, "ioc_init timed out\n");
                retval = -1;
                goto out_unlock;
        }
@@ -2839,12 +2861,9 @@ static int mpi3mr_issue_event_notification(struct mpi3mr_ioc *mrioc)
        wait_for_completion_timeout(&mrioc->init_cmds.done,
            (MPI3MR_INTADMCMD_TIMEOUT * HZ));
        if (!(mrioc->init_cmds.state & MPI3MR_CMD_COMPLETE)) {
-               ioc_err(mrioc, "Issue EvtNotify: command timed out\n");
-               mpi3mr_set_diagsave(mrioc);
-               mpi3mr_issue_reset(mrioc,
-                   MPI3_SYSIF_HOST_DIAG_RESET_ACTION_DIAG_FAULT,
+               ioc_err(mrioc, "event notification timed out\n");
+               mpi3mr_check_rh_fault_ioc(mrioc,
                    MPI3MR_RESET_FROM_EVTNOTIFY_TIMEOUT);
-               mrioc->unrecoverable = 1;
                retval = -1;
                goto out_unlock;
        }
@@ -3051,29 +3070,28 @@ int mpi3mr_issue_port_enable(struct mpi3mr_ioc *mrioc, u8 async)
                ioc_err(mrioc, "Issue PortEnable: Admin Post failed\n");
                goto out_unlock;
        }
-       if (!async) {
-               wait_for_completion_timeout(&mrioc->init_cmds.done,
-                   (pe_timeout * HZ));
-               if (!(mrioc->init_cmds.state & MPI3MR_CMD_COMPLETE)) {
-                       ioc_err(mrioc, "Issue PortEnable: command timed out\n");
-                       retval = -1;
-                       mrioc->scan_failed = MPI3_IOCSTATUS_INTERNAL_ERROR;
-                       mpi3mr_set_diagsave(mrioc);
-                       mpi3mr_issue_reset(mrioc,
-                           MPI3_SYSIF_HOST_DIAG_RESET_ACTION_DIAG_FAULT,
-                           MPI3MR_RESET_FROM_PE_TIMEOUT);
-                       mrioc->unrecoverable = 1;
-                       goto out_unlock;
-               }
-               mpi3mr_port_enable_complete(mrioc, &mrioc->init_cmds);
+       if (async) {
+               mutex_unlock(&mrioc->init_cmds.mutex);
+               goto out;
        }
+
+       wait_for_completion_timeout(&mrioc->init_cmds.done, (pe_timeout * HZ));
+       if (!(mrioc->init_cmds.state & MPI3MR_CMD_COMPLETE)) {
+               ioc_err(mrioc, "port enable timed out\n");
+               retval = -1;
+               mpi3mr_check_rh_fault_ioc(mrioc, MPI3MR_RESET_FROM_PE_TIMEOUT);
+               goto out_unlock;
+       }
+       mpi3mr_port_enable_complete(mrioc, &mrioc->init_cmds);
+
 out_unlock:
+       mrioc->init_cmds.state = MPI3MR_CMD_NOTUSED;
        mutex_unlock(&mrioc->init_cmds.mutex);
 out:
        return retval;
 }
 
-/* Protocol type to name mapper structure*/
+/* Protocol type to name mapper structure */
 static const struct {
        u8 protocol;
        char *name;