bnxt_en: fix the handling of PCIE-AER
authorVikas Gupta <vikas.gupta@broadcom.com>
Thu, 3 Nov 2022 23:33:25 +0000 (19:33 -0400)
committerJakub Kicinski <kuba@kernel.org>
Sat, 5 Nov 2022 02:29:02 +0000 (19:29 -0700)
Fix the sequence required for PCIE-AER. While slot reset occurs, firmware
might not be ready and the driver needs to check for its recovery.  We
also need to remap the health registers for some chips and clear the
resource reservations.  The resources will be allocated again during
bnxt_io_resume().

Fixes: fb1e6e562b37 ("bnxt_en: Fix AER recovery.")
Signed-off-by: Vikas Gupta <vikas.gupta@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h
drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c

index 3743d97..f44f936 100644 (file)
@@ -13922,7 +13922,9 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev)
        pci_ers_result_t result = PCI_ERS_RESULT_DISCONNECT;
        struct net_device *netdev = pci_get_drvdata(pdev);
        struct bnxt *bp = netdev_priv(netdev);
-       int err = 0, off;
+       int retry = 0;
+       int err = 0;
+       int off;
 
        netdev_info(bp->dev, "PCI Slot Reset\n");
 
@@ -13950,11 +13952,36 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev)
                pci_restore_state(pdev);
                pci_save_state(pdev);
 
+               bnxt_inv_fw_health_reg(bp);
+               bnxt_try_map_fw_health_reg(bp);
+
+               /* In some PCIe AER scenarios, firmware may take up to
+                * 10 seconds to become ready in the worst case.
+                */
+               do {
+                       err = bnxt_try_recover_fw(bp);
+                       if (!err)
+                               break;
+                       retry++;
+               } while (retry < BNXT_FW_SLOT_RESET_RETRY);
+
+               if (err) {
+                       dev_err(&pdev->dev, "Firmware not ready\n");
+                       goto reset_exit;
+               }
+
                err = bnxt_hwrm_func_reset(bp);
                if (!err)
                        result = PCI_ERS_RESULT_RECOVERED;
+
+               bnxt_ulp_irq_stop(bp);
+               bnxt_clear_int_mode(bp);
+               err = bnxt_init_int_mode(bp);
+               bnxt_ulp_irq_restart(bp, err);
        }
 
+reset_exit:
+       bnxt_clear_reservations(bp, true);
        rtnl_unlock();
 
        return result;
index b1b17f9..d5fa43c 100644 (file)
@@ -1621,6 +1621,7 @@ struct bnxt_fw_health {
 
 #define BNXT_FW_RETRY                  5
 #define BNXT_FW_IF_RETRY               10
+#define BNXT_FW_SLOT_RESET_RETRY       4
 
 enum board_idx {
        BCM57301,
index b01d429..132442f 100644 (file)
@@ -476,7 +476,8 @@ static int __hwrm_send(struct bnxt *bp, struct bnxt_hwrm_ctx *ctx)
                memset(ctx->resp, 0, PAGE_SIZE);
 
        req_type = le16_to_cpu(ctx->req->req_type);
-       if (BNXT_NO_FW_ACCESS(bp) && req_type != HWRM_FUNC_RESET) {
+       if (BNXT_NO_FW_ACCESS(bp) &&
+           (req_type != HWRM_FUNC_RESET && req_type != HWRM_VER_GET)) {
                netdev_dbg(bp->dev, "hwrm req_type 0x%x skipped, FW channel down\n",
                           req_type);
                goto exit;