bnxt_en: Discover firmware error recovery capabilities.
authorMichael Chan <michael.chan@broadcom.com>
Fri, 30 Aug 2019 03:54:53 +0000 (23:54 -0400)
committerDavid S. Miller <davem@davemloft.net>
Fri, 30 Aug 2019 21:02:18 +0000 (14:02 -0700)
Call the new firmware API HWRM_ERROR_RECOVERY_QCFG if it is supported
to discover the firmware health and recovery capabilities and settings.
This feature allows the driver to reset the chip if firmware crashes and
becomes unresponsive.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h

index 303933b8c44e14d9b9158656c456a225c8e0a845..825a7f945e517aa9dedf2943c4684927e3aa8cce 100644 (file)
@@ -6847,6 +6847,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
                bp->fw_cap |= BNXT_FW_CAP_PCIE_STATS_SUPPORTED;
        if (flags & FUNC_QCAPS_RESP_FLAGS_EXT_STATS_SUPPORTED)
                bp->fw_cap |= BNXT_FW_CAP_EXT_STATS_SUPPORTED;
+       if (flags &  FUNC_QCAPS_RESP_FLAGS_ERROR_RECOVERY_CAPABLE)
+               bp->fw_cap |= BNXT_FW_CAP_ERROR_RECOVERY;
 
        bp->tx_push_thresh = 0;
        if (flags & FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED)
@@ -6948,6 +6950,74 @@ hwrm_cfa_adv_qcaps_exit:
        return rc;
 }
 
+static int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp)
+{
+       struct hwrm_error_recovery_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
+       struct bnxt_fw_health *fw_health = bp->fw_health;
+       struct hwrm_error_recovery_qcfg_input req = {0};
+       int rc, i;
+
+       if (!(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY))
+               return 0;
+
+       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_ERROR_RECOVERY_QCFG, -1, -1);
+       mutex_lock(&bp->hwrm_cmd_lock);
+       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       if (rc)
+               goto err_recovery_out;
+       if (!fw_health) {
+               fw_health = kzalloc(sizeof(*fw_health), GFP_KERNEL);
+               bp->fw_health = fw_health;
+               if (!fw_health) {
+                       rc = -ENOMEM;
+                       goto err_recovery_out;
+               }
+       }
+       fw_health->flags = le32_to_cpu(resp->flags);
+       if ((fw_health->flags & ERROR_RECOVERY_QCFG_RESP_FLAGS_CO_CPU) &&
+           !(bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL)) {
+               rc = -EINVAL;
+               goto err_recovery_out;
+       }
+       fw_health->polling_dsecs = le32_to_cpu(resp->driver_polling_freq);
+       fw_health->master_func_wait_dsecs =
+               le32_to_cpu(resp->master_func_wait_period);
+       fw_health->normal_func_wait_dsecs =
+               le32_to_cpu(resp->normal_func_wait_period);
+       fw_health->post_reset_wait_dsecs =
+               le32_to_cpu(resp->master_func_wait_period_after_reset);
+       fw_health->post_reset_max_wait_dsecs =
+               le32_to_cpu(resp->max_bailout_time_after_reset);
+       fw_health->regs[BNXT_FW_HEALTH_REG] =
+               le32_to_cpu(resp->fw_health_status_reg);
+       fw_health->regs[BNXT_FW_HEARTBEAT_REG] =
+               le32_to_cpu(resp->fw_heartbeat_reg);
+       fw_health->regs[BNXT_FW_RESET_CNT_REG] =
+               le32_to_cpu(resp->fw_reset_cnt_reg);
+       fw_health->regs[BNXT_FW_RESET_INPROG_REG] =
+               le32_to_cpu(resp->reset_inprogress_reg);
+       fw_health->fw_reset_inprog_reg_mask =
+               le32_to_cpu(resp->reset_inprogress_reg_mask);
+       fw_health->fw_reset_seq_cnt = resp->reg_array_cnt;
+       if (fw_health->fw_reset_seq_cnt >= 16) {
+               rc = -EINVAL;
+               goto err_recovery_out;
+       }
+       for (i = 0; i < fw_health->fw_reset_seq_cnt; i++) {
+               fw_health->fw_reset_seq_regs[i] =
+                       le32_to_cpu(resp->reset_reg[i]);
+               fw_health->fw_reset_seq_vals[i] =
+                       le32_to_cpu(resp->reset_reg_val[i]);
+               fw_health->fw_reset_seq_delay_msec[i] =
+                       resp->delay_after_reset[i];
+       }
+err_recovery_out:
+       mutex_unlock(&bp->hwrm_cmd_lock);
+       if (rc)
+               bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY;
+       return rc;
+}
+
 static int bnxt_hwrm_func_reset(struct bnxt *bp)
 {
        struct hwrm_func_reset_input req = {0};
@@ -10058,6 +10128,11 @@ static int bnxt_fw_init_one_p2(struct bnxt *bp)
                netdev_warn(bp->dev, "hwrm query adv flow mgnt failure rc: %d\n",
                            rc);
 
+       rc = bnxt_hwrm_error_recovery_qcfg(bp);
+       if (rc)
+               netdev_warn(bp->dev, "hwrm query error recovery failure rc: %d\n",
+                           rc);
+
        rc = bnxt_hwrm_func_drv_rgtr(bp);
        if (rc)
                return -ENODEV;
@@ -11238,6 +11313,8 @@ init_err_pci_clean:
        bnxt_free_ctx_mem(bp);
        kfree(bp->ctx);
        bp->ctx = NULL;
+       kfree(bp->fw_health);
+       bp->fw_health = NULL;
        bnxt_cleanup_pci(bp);
 
 init_err_free:
index 3e0fcc21fc9c28ac02355d271d64b49e66eac272..ce535e52eaa9a8e136ba89260ac368bb404ff0a6 100644 (file)
@@ -1333,6 +1333,41 @@ struct bnxt_ctx_mem_info {
        struct bnxt_ctx_pg_info *tqm_mem[9];
 };
 
+struct bnxt_fw_health {
+       u32 flags;
+       u32 polling_dsecs;
+       u32 master_func_wait_dsecs;
+       u32 normal_func_wait_dsecs;
+       u32 post_reset_wait_dsecs;
+       u32 post_reset_max_wait_dsecs;
+       u32 regs[4];
+       u32 mapped_regs[4];
+#define BNXT_FW_HEALTH_REG             0
+#define BNXT_FW_HEARTBEAT_REG          1
+#define BNXT_FW_RESET_CNT_REG          2
+#define BNXT_FW_RESET_INPROG_REG       3
+       u32 fw_reset_inprog_reg_mask;
+       u32 last_fw_heartbeat;
+       u32 last_fw_reset_cnt;
+       u8 enabled:1;
+       u8 master:1;
+       u8 tmr_multiplier;
+       u8 tmr_counter;
+       u8 fw_reset_seq_cnt;
+       u32 fw_reset_seq_regs[16];
+       u32 fw_reset_seq_vals[16];
+       u32 fw_reset_seq_delay_msec[16];
+};
+
+#define BNXT_FW_HEALTH_REG_TYPE_MASK   3
+#define BNXT_FW_HEALTH_REG_TYPE_CFG    0
+#define BNXT_FW_HEALTH_REG_TYPE_GRC    1
+#define BNXT_FW_HEALTH_REG_TYPE_BAR0   2
+#define BNXT_FW_HEALTH_REG_TYPE_BAR1   3
+
+#define BNXT_FW_HEALTH_REG_TYPE(reg)   ((reg) & BNXT_FW_HEALTH_REG_TYPE_MASK)
+#define BNXT_FW_HEALTH_REG_OFF(reg)    ((reg) & ~BNXT_FW_HEALTH_REG_TYPE_MASK)
+
 struct bnxt {
        void __iomem            *bar0;
        void __iomem            *bar1;
@@ -1581,6 +1616,7 @@ struct bnxt {
        #define BNXT_FW_CAP_KONG_MB_CHNL                0x00000080
        #define BNXT_FW_CAP_OVS_64BIT_HANDLE            0x00000400
        #define BNXT_FW_CAP_TRUSTED_VF                  0x00000800
+       #define BNXT_FW_CAP_ERROR_RECOVERY              0x00002000
        #define BNXT_FW_CAP_PKG_VER                     0x00004000
        #define BNXT_FW_CAP_CFA_ADV_FLOW                0x00008000
        #define BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX        0x00010000
@@ -1666,6 +1702,8 @@ struct bnxt {
 #define BNXT_UPDATE_PHY_SP_EVENT       16
 #define BNXT_RING_COAL_NOW_SP_EVENT    17
 
+       struct bnxt_fw_health   *fw_health;
+
        struct bnxt_hw_resc     hw_resc;
        struct bnxt_pf_info     pf;
        struct bnxt_ctx_mem_info        *ctx;