scsi: lpfc: Trigger SLI4 firmware dump before doing driver cleanup
authorJames Smart <jsmart2021@gmail.com>
Sat, 4 Dec 2021 00:26:40 +0000 (16:26 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 27 Jan 2022 10:05:00 +0000 (11:05 +0100)
[ Upstream commit 7dd2e2a923173d637c272e483966be8e96a72b64 ]

Extraneous teardown routines are present in the firmware dump path causing
altered states in firmware captures.

When a firmware dump is requested via sysfs, trigger the dump immediately
without tearing down structures and changing adapter state.

The driver shall rely on pre-existing firmware error state clean up
handlers to restore the adapter.

Link: https://lore.kernel.org/r/20211204002644.116455-6-jsmart2021@gmail.com
Co-developed-by: Justin Tee <justin.tee@broadcom.com>
Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
drivers/scsi/lpfc/lpfc.h
drivers/scsi/lpfc/lpfc_attr.c
drivers/scsi/lpfc/lpfc_hbadisc.c
drivers/scsi/lpfc/lpfc_sli.c

index befeb7c34290378820647d5012e70c663fc74347..19fd9d263f47f76a2c9badd34591974eaf8ec001 100644 (file)
@@ -1022,7 +1022,6 @@ struct lpfc_hba {
 #define HBA_DEVLOSS_TMO         0x2000 /* HBA in devloss timeout */
 #define HBA_RRQ_ACTIVE         0x4000 /* process the rrq active list */
 #define HBA_IOQ_FLUSH          0x8000 /* FCP/NVME I/O queues being flushed */
-#define HBA_FW_DUMP_OP         0x10000 /* Skips fn reset before FW dump */
 #define HBA_RECOVERABLE_UE     0x20000 /* Firmware supports recoverable UE */
 #define HBA_FORCED_LINK_SPEED  0x40000 /*
                                         * Firmware supports Forced Link Speed
@@ -1038,6 +1037,7 @@ struct lpfc_hba {
 #define HBA_HBEAT_TMO          0x8000000 /* HBEAT initiated after timeout */
 #define HBA_FLOGI_OUTSTANDING  0x10000000 /* FLOGI is outstanding */
 
+       struct completion *fw_dump_cmpl; /* cmpl event tracker for fw_dump */
        uint32_t fcp_ring_in_use; /* When polling test if intr-hndlr active*/
        struct lpfc_dmabuf slim2p;
 
index ebe417921dac05257d7ff0a58c6c7991464d79c9..f20c4fe1fb8b951f1690735b96131a3464732aa2 100644 (file)
@@ -1709,25 +1709,25 @@ lpfc_sli4_pdev_reg_request(struct lpfc_hba *phba, uint32_t opcode)
        before_fc_flag = phba->pport->fc_flag;
        sriov_nr_virtfn = phba->cfg_sriov_nr_virtfn;
 
-       /* Disable SR-IOV virtual functions if enabled */
-       if (phba->cfg_sriov_nr_virtfn) {
-               pci_disable_sriov(pdev);
-               phba->cfg_sriov_nr_virtfn = 0;
-       }
+       if (opcode == LPFC_FW_DUMP) {
+               init_completion(&online_compl);
+               phba->fw_dump_cmpl = &online_compl;
+       } else {
+               /* Disable SR-IOV virtual functions if enabled */
+               if (phba->cfg_sriov_nr_virtfn) {
+                       pci_disable_sriov(pdev);
+                       phba->cfg_sriov_nr_virtfn = 0;
+               }
 
-       if (opcode == LPFC_FW_DUMP)
-               phba->hba_flag |= HBA_FW_DUMP_OP;
+               status = lpfc_do_offline(phba, LPFC_EVT_OFFLINE);
 
-       status = lpfc_do_offline(phba, LPFC_EVT_OFFLINE);
+               if (status != 0)
+                       return status;
 
-       if (status != 0) {
-               phba->hba_flag &= ~HBA_FW_DUMP_OP;
-               return status;
+               /* wait for the device to be quiesced before firmware reset */
+               msleep(100);
        }
 
-       /* wait for the device to be quiesced before firmware reset */
-       msleep(100);
-
        reg_val = readl(phba->sli4_hba.conf_regs_memmap_p +
                        LPFC_CTL_PDEV_CTL_OFFSET);
 
@@ -1756,24 +1756,42 @@ lpfc_sli4_pdev_reg_request(struct lpfc_hba *phba, uint32_t opcode)
                lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
                                "3153 Fail to perform the requested "
                                "access: x%x\n", reg_val);
+               if (phba->fw_dump_cmpl)
+                       phba->fw_dump_cmpl = NULL;
                return rc;
        }
 
        /* keep the original port state */
-       if (before_fc_flag & FC_OFFLINE_MODE)
-               goto out;
-
-       init_completion(&online_compl);
-       job_posted = lpfc_workq_post_event(phba, &status, &online_compl,
-                                          LPFC_EVT_ONLINE);
-       if (!job_posted)
+       if (before_fc_flag & FC_OFFLINE_MODE) {
+               if (phba->fw_dump_cmpl)
+                       phba->fw_dump_cmpl = NULL;
                goto out;
+       }
 
-       wait_for_completion(&online_compl);
+       /* Firmware dump will trigger an HA_ERATT event, and
+        * lpfc_handle_eratt_s4 routine already handles bringing the port back
+        * online.
+        */
+       if (opcode == LPFC_FW_DUMP) {
+               wait_for_completion(phba->fw_dump_cmpl);
+       } else  {
+               init_completion(&online_compl);
+               job_posted = lpfc_workq_post_event(phba, &status, &online_compl,
+                                                  LPFC_EVT_ONLINE);
+               if (!job_posted)
+                       goto out;
 
+               wait_for_completion(&online_compl);
+       }
 out:
        /* in any case, restore the virtual functions enabled as before */
        if (sriov_nr_virtfn) {
+               /* If fw_dump was performed, first disable to clean up */
+               if (opcode == LPFC_FW_DUMP) {
+                       pci_disable_sriov(pdev);
+                       phba->cfg_sriov_nr_virtfn = 0;
+               }
+
                sriov_err =
                        lpfc_sli_probe_sriov_nr_virtfn(phba, sriov_nr_virtfn);
                if (!sriov_err)
index 9ccb904e35fcf6a7b9957b70072ecc97e0995f43..3bb7c2aa949f79c891f84a56b911fd9353b84138 100644 (file)
@@ -869,10 +869,16 @@ lpfc_work_done(struct lpfc_hba *phba)
        if (phba->pci_dev_grp == LPFC_PCI_DEV_OC)
                lpfc_sli4_post_async_mbox(phba);
 
-       if (ha_copy & HA_ERATT)
+       if (ha_copy & HA_ERATT) {
                /* Handle the error attention event */
                lpfc_handle_eratt(phba);
 
+               if (phba->fw_dump_cmpl) {
+                       complete(phba->fw_dump_cmpl);
+                       phba->fw_dump_cmpl = NULL;
+               }
+       }
+
        if (ha_copy & HA_MBATT)
                lpfc_sli_handle_mb_event(phba);
 
index 9c1f485952ef775b1d1cf1267cbd38a4bc950f41..e5009f21d97e136c0637d1bd4a289d9e1a7b885a 100644 (file)
@@ -5043,12 +5043,6 @@ lpfc_sli4_brdreset(struct lpfc_hba *phba)
        phba->fcf.fcf_flag = 0;
        spin_unlock_irq(&phba->hbalock);
 
-       /* SLI4 INTF 2: if FW dump is being taken skip INIT_PORT */
-       if (phba->hba_flag & HBA_FW_DUMP_OP) {
-               phba->hba_flag &= ~HBA_FW_DUMP_OP;
-               return rc;
-       }
-
        /* Now physically reset the device */
        lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
                        "0389 Performing PCI function reset!\n");