ice: Handle critical FW error during admin queue initialization
authorEvan Swanson <evan.swanson@intel.com>
Sat, 16 May 2020 00:42:14 +0000 (17:42 -0700)
committerJeff Kirsher <jeffrey.t.kirsher@intel.com>
Thu, 28 May 2020 00:48:23 +0000 (17:48 -0700)
A race condition between FW and SW can occur between admin queue setup and
the first command sent. A link event may occur and FW attempts to notify a
non-existent queue. FW will set the critical error bit and disable the
queue. When this happens retry queue setup.

Signed-off-by: Evan Swanson <evan.swanson@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
drivers/net/ethernet/intel/ice/ice_controlq.c
drivers/net/ethernet/intel/ice/ice_controlq.h
drivers/net/ethernet/intel/ice/ice_hw_autogen.h
drivers/net/ethernet/intel/ice/ice_main.c
drivers/net/ethernet/intel/ice/ice_status.h

index 62c2c1e..479a74e 100644 (file)
@@ -12,6 +12,7 @@ do {                                                          \
        (qinfo)->sq.bal = prefix##_ATQBAL;                      \
        (qinfo)->sq.len_mask = prefix##_ATQLEN_ATQLEN_M;        \
        (qinfo)->sq.len_ena_mask = prefix##_ATQLEN_ATQENABLE_M; \
+       (qinfo)->sq.len_crit_mask = prefix##_ATQLEN_ATQCRIT_M;  \
        (qinfo)->sq.head_mask = prefix##_ATQH_ATQH_M;           \
        (qinfo)->rq.head = prefix##_ARQH;                       \
        (qinfo)->rq.tail = prefix##_ARQT;                       \
@@ -20,6 +21,7 @@ do {                                                          \
        (qinfo)->rq.bal = prefix##_ARQBAL;                      \
        (qinfo)->rq.len_mask = prefix##_ARQLEN_ARQLEN_M;        \
        (qinfo)->rq.len_ena_mask = prefix##_ARQLEN_ARQENABLE_M; \
+       (qinfo)->rq.len_crit_mask = prefix##_ARQLEN_ARQCRIT_M;  \
        (qinfo)->rq.head_mask = prefix##_ARQH_ARQH_M;           \
 } while (0)
 
@@ -642,6 +644,50 @@ init_ctrlq_free_sq:
 }
 
 /**
+ * ice_shutdown_ctrlq - shutdown routine for any control queue
+ * @hw: pointer to the hardware structure
+ * @q_type: specific Control queue type
+ *
+ * NOTE: this function does not destroy the control queue locks.
+ */
+static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
+{
+       struct ice_ctl_q_info *cq;
+
+       switch (q_type) {
+       case ICE_CTL_Q_ADMIN:
+               cq = &hw->adminq;
+               if (ice_check_sq_alive(hw, cq))
+                       ice_aq_q_shutdown(hw, true);
+               break;
+       case ICE_CTL_Q_MAILBOX:
+               cq = &hw->mailboxq;
+               break;
+       default:
+               return;
+       }
+
+       ice_shutdown_sq(hw, cq);
+       ice_shutdown_rq(hw, cq);
+}
+
+/**
+ * ice_shutdown_all_ctrlq - shutdown routine for all control queues
+ * @hw: pointer to the hardware structure
+ *
+ * NOTE: this function does not destroy the control queue locks. The driver
+ * may call this at runtime to shutdown and later restart control queues, such
+ * as in response to a reset event.
+ */
+void ice_shutdown_all_ctrlq(struct ice_hw *hw)
+{
+       /* Shutdown FW admin queue */
+       ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
+       /* Shutdown PF-VF Mailbox */
+       ice_shutdown_ctrlq(hw, ICE_CTL_Q_MAILBOX);
+}
+
+/**
  * ice_init_all_ctrlq - main initialization routine for all control queues
  * @hw: pointer to the hardware structure
  *
@@ -656,17 +702,27 @@ init_ctrlq_free_sq:
  */
 enum ice_status ice_init_all_ctrlq(struct ice_hw *hw)
 {
-       enum ice_status ret_code;
+       enum ice_status status;
+       u32 retry = 0;
 
        /* Init FW admin queue */
-       ret_code = ice_init_ctrlq(hw, ICE_CTL_Q_ADMIN);
-       if (ret_code)
-               return ret_code;
+       do {
+               status = ice_init_ctrlq(hw, ICE_CTL_Q_ADMIN);
+               if (status)
+                       return status;
 
-       ret_code = ice_init_check_adminq(hw);
-       if (ret_code)
-               return ret_code;
+               status = ice_init_check_adminq(hw);
+               if (status != ICE_ERR_AQ_FW_CRITICAL)
+                       break;
 
+               ice_debug(hw, ICE_DBG_AQ_MSG,
+                         "Retry Admin Queue init due to FW critical error\n");
+               ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
+               msleep(ICE_CTL_Q_ADMIN_INIT_MSEC);
+       } while (retry++ < ICE_CTL_Q_ADMIN_INIT_TIMEOUT);
+
+       if (status)
+               return status;
        /* Init Mailbox queue */
        return ice_init_ctrlq(hw, ICE_CTL_Q_MAILBOX);
 }
@@ -708,50 +764,6 @@ enum ice_status ice_create_all_ctrlq(struct ice_hw *hw)
 }
 
 /**
- * ice_shutdown_ctrlq - shutdown routine for any control queue
- * @hw: pointer to the hardware structure
- * @q_type: specific Control queue type
- *
- * NOTE: this function does not destroy the control queue locks.
- */
-static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
-{
-       struct ice_ctl_q_info *cq;
-
-       switch (q_type) {
-       case ICE_CTL_Q_ADMIN:
-               cq = &hw->adminq;
-               if (ice_check_sq_alive(hw, cq))
-                       ice_aq_q_shutdown(hw, true);
-               break;
-       case ICE_CTL_Q_MAILBOX:
-               cq = &hw->mailboxq;
-               break;
-       default:
-               return;
-       }
-
-       ice_shutdown_sq(hw, cq);
-       ice_shutdown_rq(hw, cq);
-}
-
-/**
- * ice_shutdown_all_ctrlq - shutdown routine for all control queues
- * @hw: pointer to the hardware structure
- *
- * NOTE: this function does not destroy the control queue locks. The driver
- * may call this at runtime to shutdown and later restart control queues, such
- * as in response to a reset event.
- */
-void ice_shutdown_all_ctrlq(struct ice_hw *hw)
-{
-       /* Shutdown FW admin queue */
-       ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
-       /* Shutdown PF-VF Mailbox */
-       ice_shutdown_ctrlq(hw, ICE_CTL_Q_MAILBOX);
-}
-
-/**
  * ice_destroy_ctrlq_locks - Destroy locks for a control queue
  * @cq: pointer to the control queue
  *
@@ -1049,9 +1061,15 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 
        /* update the error if time out occurred */
        if (!cmd_completed) {
-               ice_debug(hw, ICE_DBG_AQ_MSG,
-                         "Control Send Queue Writeback timeout.\n");
-               status = ICE_ERR_AQ_TIMEOUT;
+               if (rd32(hw, cq->rq.len) & cq->rq.len_crit_mask ||
+                   rd32(hw, cq->sq.len) & cq->sq.len_crit_mask) {
+                       ice_debug(hw, ICE_DBG_AQ_MSG, "Critical FW error.\n");
+                       status = ICE_ERR_AQ_FW_CRITICAL;
+               } else {
+                       ice_debug(hw, ICE_DBG_AQ_MSG,
+                                 "Control Send Queue Writeback timeout.\n");
+                       status = ICE_ERR_AQ_TIMEOUT;
+               }
        }
 
 sq_send_command_error:
index bf0ebe6..faaa08e 100644 (file)
@@ -34,6 +34,8 @@ enum ice_ctl_q {
 /* Control Queue timeout settings - max delay 250ms */
 #define ICE_CTL_Q_SQ_CMD_TIMEOUT       2500  /* Count 2500 times */
 #define ICE_CTL_Q_SQ_CMD_USEC          100   /* Check every 100usec */
+#define ICE_CTL_Q_ADMIN_INIT_TIMEOUT   10    /* Count 10 times */
+#define ICE_CTL_Q_ADMIN_INIT_MSEC      100   /* Check every 100msec */
 
 struct ice_ctl_q_ring {
        void *dma_head;                 /* Virtual address to DMA head */
@@ -59,6 +61,7 @@ struct ice_ctl_q_ring {
        u32 bal;
        u32 len_mask;
        u32 len_ena_mask;
+       u32 len_crit_mask;
        u32 head_mask;
 };
 
index 2f1c776..1086c9f 100644 (file)
@@ -39,6 +39,7 @@
 #define PF_MBX_ARQH_ARQH_M                     ICE_M(0x3FF, 0)
 #define PF_MBX_ARQLEN                          0x0022E480
 #define PF_MBX_ARQLEN_ARQLEN_M                 ICE_M(0x3FF, 0)
+#define PF_MBX_ARQLEN_ARQCRIT_M                        BIT(30)
 #define PF_MBX_ARQLEN_ARQENABLE_M              BIT(31)
 #define PF_MBX_ARQT                            0x0022E580
 #define PF_MBX_ATQBAH                          0x0022E180
@@ -47,6 +48,7 @@
 #define PF_MBX_ATQH_ATQH_M                     ICE_M(0x3FF, 0)
 #define PF_MBX_ATQLEN                          0x0022E200
 #define PF_MBX_ATQLEN_ATQLEN_M                 ICE_M(0x3FF, 0)
+#define PF_MBX_ATQLEN_ATQCRIT_M                        BIT(30)
 #define PF_MBX_ATQLEN_ATQENABLE_M              BIT(31)
 #define PF_MBX_ATQT                            0x0022E300
 #define PRTDCB_GENC                            0x00083000
index 93a42ff..247e7b1 100644 (file)
@@ -5207,6 +5207,8 @@ const char *ice_stat_str(enum ice_status stat_err)
                return "ICE_ERR_AQ_NO_WORK";
        case ICE_ERR_AQ_EMPTY:
                return "ICE_ERR_AQ_EMPTY";
+       case ICE_ERR_AQ_FW_CRITICAL:
+               return "ICE_ERR_AQ_FW_CRITICAL";
        }
 
        return "ICE_ERR_UNKNOWN";
index 546a028..4028c63 100644 (file)
@@ -37,6 +37,7 @@ enum ice_status {
        ICE_ERR_AQ_FULL                         = -102,
        ICE_ERR_AQ_NO_WORK                      = -103,
        ICE_ERR_AQ_EMPTY                        = -104,
+       ICE_ERR_AQ_FW_CRITICAL                  = -105,
 };
 
 #endif /* _ICE_STATUS_H_ */