scsi: mpt3sas: Perform additional retries if doorbell read returns 0
authorRanjan Kumar <ranjan.kumar@broadcom.com>
Tue, 29 Aug 2023 09:00:19 +0000 (14:30 +0530)
committerMartin K. Petersen <martin.petersen@oracle.com>
Thu, 31 Aug 2023 01:39:06 +0000 (21:39 -0400)
The driver retries certain register reads 3 times if the returned value is
0. This was done because the controller could return 0 for certain
registers if other registers were being accessed concurrently by the BMC.

In certain systems with increased BMC interactions, the register values
returned can be 0 for longer than 3 retries. Change the retry count from 3
to 30 for the affected registers to prevent problems with out-of-band
management.

Fixes: b899202901a8 ("scsi: mpt3sas: Add separate function for aero doorbell reads")
Cc: stable@vger.kernel.org
Signed-off-by: Ranjan Kumar <ranjan.kumar@broadcom.com>
Link: https://lore.kernel.org/r/20230829090020.5417-2-ranjan.kumar@broadcom.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/mpt3sas/mpt3sas_base.c
drivers/scsi/mpt3sas/mpt3sas_base.h

index 53f5492..5284584 100644 (file)
@@ -138,6 +138,9 @@ _base_get_ioc_facts(struct MPT3SAS_ADAPTER *ioc);
 static void
 _base_clear_outstanding_commands(struct MPT3SAS_ADAPTER *ioc);
 
+static u32
+_base_readl_ext_retry(const volatile void __iomem *addr);
+
 /**
  * mpt3sas_base_check_cmd_timeout - Function
  *             to check timeout and command termination due
@@ -213,6 +216,20 @@ _base_readl_aero(const volatile void __iomem *addr)
        return ret_val;
 }
 
+static u32
+_base_readl_ext_retry(const volatile void __iomem *addr)
+{
+       u32 i, ret_val;
+
+       for (i = 0 ; i < 30 ; i++) {
+               ret_val = readl(addr);
+               if (ret_val == 0)
+                       continue;
+       }
+
+       return ret_val;
+}
+
 static inline u32
 _base_readl(const volatile void __iomem *addr)
 {
@@ -940,7 +957,7 @@ mpt3sas_halt_firmware(struct MPT3SAS_ADAPTER *ioc)
 
        dump_stack();
 
-       doorbell = ioc->base_readl(&ioc->chip->Doorbell);
+       doorbell = ioc->base_readl_ext_retry(&ioc->chip->Doorbell);
        if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) {
                mpt3sas_print_fault_code(ioc, doorbell &
                    MPI2_DOORBELL_DATA_MASK);
@@ -6686,7 +6703,7 @@ mpt3sas_base_get_iocstate(struct MPT3SAS_ADAPTER *ioc, int cooked)
 {
        u32 s, sc;
 
-       s = ioc->base_readl(&ioc->chip->Doorbell);
+       s = ioc->base_readl_ext_retry(&ioc->chip->Doorbell);
        sc = s & MPI2_IOC_STATE_MASK;
        return cooked ? sc : s;
 }
@@ -6831,7 +6848,7 @@ _base_wait_for_doorbell_ack(struct MPT3SAS_ADAPTER *ioc, int timeout)
                                           __func__, count, timeout));
                        return 0;
                } else if (int_status & MPI2_HIS_IOC2SYS_DB_STATUS) {
-                       doorbell = ioc->base_readl(&ioc->chip->Doorbell);
+                       doorbell = ioc->base_readl_ext_retry(&ioc->chip->Doorbell);
                        if ((doorbell & MPI2_IOC_STATE_MASK) ==
                            MPI2_IOC_STATE_FAULT) {
                                mpt3sas_print_fault_code(ioc, doorbell);
@@ -6871,7 +6888,7 @@ _base_wait_for_doorbell_not_used(struct MPT3SAS_ADAPTER *ioc, int timeout)
        count = 0;
        cntdn = 1000 * timeout;
        do {
-               doorbell_reg = ioc->base_readl(&ioc->chip->Doorbell);
+               doorbell_reg = ioc->base_readl_ext_retry(&ioc->chip->Doorbell);
                if (!(doorbell_reg & MPI2_DOORBELL_USED)) {
                        dhsprintk(ioc,
                                  ioc_info(ioc, "%s: successful count(%d), timeout(%d)\n",
@@ -7019,7 +7036,7 @@ _base_handshake_req_reply_wait(struct MPT3SAS_ADAPTER *ioc, int request_bytes,
        __le32 *mfp;
 
        /* make sure doorbell is not in use */
-       if ((ioc->base_readl(&ioc->chip->Doorbell) & MPI2_DOORBELL_USED)) {
+       if ((ioc->base_readl_ext_retry(&ioc->chip->Doorbell) & MPI2_DOORBELL_USED)) {
                ioc_err(ioc, "doorbell is in use (line=%d)\n", __LINE__);
                return -EFAULT;
        }
@@ -7068,7 +7085,7 @@ _base_handshake_req_reply_wait(struct MPT3SAS_ADAPTER *ioc, int request_bytes,
        }
 
        /* read the first two 16-bits, it gives the total length of the reply */
-       reply[0] = le16_to_cpu(ioc->base_readl(&ioc->chip->Doorbell)
+       reply[0] = le16_to_cpu(ioc->base_readl_ext_retry(&ioc->chip->Doorbell)
            & MPI2_DOORBELL_DATA_MASK);
        writel(0, &ioc->chip->HostInterruptStatus);
        if ((_base_wait_for_doorbell_int(ioc, 5))) {
@@ -7076,7 +7093,7 @@ _base_handshake_req_reply_wait(struct MPT3SAS_ADAPTER *ioc, int request_bytes,
                        __LINE__);
                return -EFAULT;
        }
-       reply[1] = le16_to_cpu(ioc->base_readl(&ioc->chip->Doorbell)
+       reply[1] = le16_to_cpu(ioc->base_readl_ext_retry(&ioc->chip->Doorbell)
            & MPI2_DOORBELL_DATA_MASK);
        writel(0, &ioc->chip->HostInterruptStatus);
 
@@ -7087,10 +7104,10 @@ _base_handshake_req_reply_wait(struct MPT3SAS_ADAPTER *ioc, int request_bytes,
                        return -EFAULT;
                }
                if (i >=  reply_bytes/2) /* overflow case */
-                       ioc->base_readl(&ioc->chip->Doorbell);
+                       ioc->base_readl_ext_retry(&ioc->chip->Doorbell);
                else
                        reply[i] = le16_to_cpu(
-                           ioc->base_readl(&ioc->chip->Doorbell)
+                           ioc->base_readl_ext_retry(&ioc->chip->Doorbell)
                            & MPI2_DOORBELL_DATA_MASK);
                writel(0, &ioc->chip->HostInterruptStatus);
        }
@@ -7949,7 +7966,7 @@ _base_diag_reset(struct MPT3SAS_ADAPTER *ioc)
                        goto out;
                }
 
-               host_diagnostic = ioc->base_readl(&ioc->chip->HostDiagnostic);
+               host_diagnostic = ioc->base_readl_ext_retry(&ioc->chip->HostDiagnostic);
                drsprintk(ioc,
                          ioc_info(ioc, "wrote magic sequence: count(%d), host_diagnostic(0x%08x)\n",
                                   count, host_diagnostic));
@@ -7969,7 +7986,7 @@ _base_diag_reset(struct MPT3SAS_ADAPTER *ioc)
        for (count = 0; count < (300000000 /
                MPI2_HARD_RESET_PCIE_SECOND_READ_DELAY_MICRO_SEC); count++) {
 
-               host_diagnostic = ioc->base_readl(&ioc->chip->HostDiagnostic);
+               host_diagnostic = ioc->base_readl_ext_retry(&ioc->chip->HostDiagnostic);
 
                if (host_diagnostic == 0xFFFFFFFF) {
                        ioc_info(ioc,
@@ -8359,10 +8376,13 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc)
        ioc->rdpq_array_enable_assigned = 0;
        ioc->use_32bit_dma = false;
        ioc->dma_mask = 64;
-       if (ioc->is_aero_ioc)
+       if (ioc->is_aero_ioc) {
                ioc->base_readl = &_base_readl_aero;
-       else
+               ioc->base_readl_ext_retry = &_base_readl_ext_retry;
+       } else {
                ioc->base_readl = &_base_readl;
+               ioc->base_readl_ext_retry = &_base_readl;
+       }
        r = mpt3sas_base_map_resources(ioc);
        if (r)
                goto out_free_resources;
index 05364aa..10055c7 100644 (file)
@@ -1618,6 +1618,7 @@ struct MPT3SAS_ADAPTER {
        u8              diag_trigger_active;
        u8              atomic_desc_capable;
        BASE_READ_REG   base_readl;
+       BASE_READ_REG   base_readl_ext_retry;
        struct SL_WH_MASTER_TRIGGER_T diag_trigger_master;
        struct SL_WH_EVENT_TRIGGERS_T diag_trigger_event;
        struct SL_WH_SCSI_TRIGGERS_T diag_trigger_scsi;