bnx2x: new flag for track HW resource allocation
authorThinh Tran <thinhtr@linux.vnet.ibm.com>
Fri, 18 Aug 2023 16:14:40 +0000 (11:14 -0500)
committerJakub Kicinski <kuba@kernel.org>
Wed, 23 Aug 2023 00:07:40 +0000 (17:07 -0700)
While injecting PCIe errors to the upstream PCIe switch of
a BCM57810 NIC, system hangs/crashes were observed.

After several calls to bnx2x_tx_timout() complete,
bnx2x_nic_unload() is called to free up HW resources
and bnx2x_napi_disable() is called to release NAPI objects.
Later, when the EEH driver calls bnx2x_io_slot_reset() to
complete the recovery process, bnx2x attempts to disable
NAPI again by calling bnx2x_napi_disable() and freeing
resources which have already been freed, resulting in a
hang or crash.

Introduce a new flag to track the HW resource and NAPI
allocation state, refactor duplicated code into a single
function, check page pool allocation status before freeing,
and reduces debug output when a TX timeout event occurs.

Reviewed-by: Manish Chopra <manishc@marvell.com>
Tested-by: Abdul Haleem <abdhalee@in.ibm.com>
Tested-by: David Christensen <drc@linux.vnet.ibm.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Tested-by: Venkata Sai Duggi <venkata.sai.duggi@ibm.com>
Signed-off-by: Thinh Tran <thinhtr@linux.vnet.ibm.com>
Link: https://lore.kernel.org/r/20230818161443.708785-2-thinhtr@linux.vnet.ibm.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c

index 8bcde0a..e2a4e10 100644 (file)
@@ -1508,6 +1508,8 @@ struct bnx2x {
        bool                    cnic_loaded;
        struct cnic_eth_dev     *(*cnic_probe)(struct net_device *);
 
+       bool                    nic_stopped;
+
        /* Flag that indicates that we can start looking for FCoE L2 queue
         * completions in the default status block.
         */
index 6ea5521..e9c1e1b 100644 (file)
@@ -2715,6 +2715,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
        bnx2x_add_all_napi(bp);
        DP(NETIF_MSG_IFUP, "napi added\n");
        bnx2x_napi_enable(bp);
+       bp->nic_stopped = false;
 
        if (IS_PF(bp)) {
                /* set pf load just before approaching the MCP */
@@ -2960,6 +2961,7 @@ load_error2:
 load_error1:
        bnx2x_napi_disable(bp);
        bnx2x_del_all_napi(bp);
+       bp->nic_stopped = true;
 
        /* clear pf_load status, as it was already set */
        if (IS_PF(bp))
@@ -3095,14 +3097,17 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
                if (!CHIP_IS_E1x(bp))
                        bnx2x_pf_disable(bp);
 
-               /* Disable HW interrupts, NAPI */
-               bnx2x_netif_stop(bp, 1);
-               /* Delete all NAPI objects */
-               bnx2x_del_all_napi(bp);
-               if (CNIC_LOADED(bp))
-                       bnx2x_del_all_napi_cnic(bp);
-               /* Release IRQs */
-               bnx2x_free_irq(bp);
+               if (!bp->nic_stopped) {
+                       /* Disable HW interrupts, NAPI */
+                       bnx2x_netif_stop(bp, 1);
+                       /* Delete all NAPI objects */
+                       bnx2x_del_all_napi(bp);
+                       if (CNIC_LOADED(bp))
+                               bnx2x_del_all_napi_cnic(bp);
+                       /* Release IRQs */
+                       bnx2x_free_irq(bp);
+                       bp->nic_stopped = true;
+               }
 
                /* Report UNLOAD_DONE to MCP */
                bnx2x_send_unload_done(bp, false);
index 1e7a6f1..0d8e61c 100644 (file)
@@ -9474,15 +9474,18 @@ unload_error:
                }
        }
 
-       /* Disable HW interrupts, NAPI */
-       bnx2x_netif_stop(bp, 1);
-       /* Delete all NAPI objects */
-       bnx2x_del_all_napi(bp);
-       if (CNIC_LOADED(bp))
-               bnx2x_del_all_napi_cnic(bp);
+       if (!bp->nic_stopped) {
+               /* Disable HW interrupts, NAPI */
+               bnx2x_netif_stop(bp, 1);
+               /* Delete all NAPI objects */
+               bnx2x_del_all_napi(bp);
+               if (CNIC_LOADED(bp))
+                       bnx2x_del_all_napi_cnic(bp);
 
-       /* Release IRQs */
-       bnx2x_free_irq(bp);
+               /* Release IRQs */
+               bnx2x_free_irq(bp);
+               bp->nic_stopped = true;
+       }
 
        /* Reset the chip, unless PCI function is offline. If we reach this
         * point following a PCI error handling, it means device is really
@@ -14238,13 +14241,16 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev)
                }
                bnx2x_drain_tx_queues(bp);
                bnx2x_send_unload_req(bp, UNLOAD_RECOVERY);
-               bnx2x_netif_stop(bp, 1);
-               bnx2x_del_all_napi(bp);
+               if (!bp->nic_stopped) {
+                       bnx2x_netif_stop(bp, 1);
+                       bnx2x_del_all_napi(bp);
 
-               if (CNIC_LOADED(bp))
-                       bnx2x_del_all_napi_cnic(bp);
+                       if (CNIC_LOADED(bp))
+                               bnx2x_del_all_napi_cnic(bp);
 
-               bnx2x_free_irq(bp);
+                       bnx2x_free_irq(bp);
+                       bp->nic_stopped = true;
+               }
 
                /* Report UNLOAD_DONE to MCP */
                bnx2x_send_unload_done(bp, true);
index 0657a0f..8946a93 100644 (file)
@@ -529,13 +529,16 @@ void bnx2x_vfpf_close_vf(struct bnx2x *bp)
        bnx2x_vfpf_finalize(bp, &req->first_tlv);
 
 free_irq:
-       /* Disable HW interrupts, NAPI */
-       bnx2x_netif_stop(bp, 0);
-       /* Delete all NAPI objects */
-       bnx2x_del_all_napi(bp);
-
-       /* Release IRQs */
-       bnx2x_free_irq(bp);
+       if (!bp->nic_stopped) {
+               /* Disable HW interrupts, NAPI */
+               bnx2x_netif_stop(bp, 0);
+               /* Delete all NAPI objects */
+               bnx2x_del_all_napi(bp);
+
+               /* Release IRQs */
+               bnx2x_free_irq(bp);
+               bp->nic_stopped = true;
+       }
 }
 
 static void bnx2x_leading_vfq_init(struct bnx2x *bp, struct bnx2x_virtf *vf,