net: hns3: log detail error info of ROCEE ECC and AXI errors
authorXiaofei Tan <tanxiaofei@huawei.com>
Fri, 7 Jun 2019 02:03:02 +0000 (10:03 +0800)
committerDavid S. Miller <davem@davemloft.net>
Sun, 9 Jun 2019 20:20:58 +0000 (13:20 -0700)
This patch logs detail error info of ROCEE ECC and AXI errors for
debug purpose, and remove unnecessary reset for ROCEE overflow
errors.

Signed-off-by: Xiaofei Tan <tanxiaofei@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h

index 7a14d80..29af1b4 100644 (file)
@@ -268,6 +268,8 @@ enum hclge_opcode_type {
        HCLGE_CONFIG_ROCEE_RAS_INT_EN   = 0x1580,
        HCLGE_QUERY_CLEAR_ROCEE_RAS_INT = 0x1581,
        HCLGE_ROCEE_PF_RAS_INT_CMD      = 0x1584,
+       HCLGE_QUERY_ROCEE_ECC_RAS_INFO_CMD      = 0x1585,
+       HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD      = 0x1586,
        HCLGE_IGU_EGU_TNL_INT_EN        = 0x1803,
        HCLGE_IGU_COMMON_INT_EN         = 0x1806,
        HCLGE_TM_QCN_MEM_INT_CFG        = 0x1A14,
index 784512d..4f2af3d 100644 (file)
@@ -1388,6 +1388,66 @@ static int hclge_handle_all_ras_errors(struct hclge_dev *hdev)
        return ret;
 }
 
+static int hclge_log_rocee_axi_error(struct hclge_dev *hdev)
+{
+       struct device *dev = &hdev->pdev->dev;
+       struct hclge_desc desc[3];
+       int ret;
+
+       hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD,
+                                  true);
+       hclge_cmd_setup_basic_desc(&desc[1], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD,
+                                  true);
+       hclge_cmd_setup_basic_desc(&desc[2], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD,
+                                  true);
+       desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+       desc[1].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+
+       ret = hclge_cmd_send(&hdev->hw, &desc[0], 3);
+       if (ret) {
+               dev_err(dev, "failed(%d) to query ROCEE AXI error sts\n", ret);
+               return ret;
+       }
+
+       dev_info(dev, "AXI1: %08X %08X %08X %08X %08X %08X\n",
+                le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]),
+                le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]),
+                le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5]));
+       dev_info(dev, "AXI2: %08X %08X %08X %08X %08X %08X\n",
+                le32_to_cpu(desc[1].data[0]), le32_to_cpu(desc[1].data[1]),
+                le32_to_cpu(desc[1].data[2]), le32_to_cpu(desc[1].data[3]),
+                le32_to_cpu(desc[1].data[4]), le32_to_cpu(desc[1].data[5]));
+       dev_info(dev, "AXI3: %08X %08X %08X %08X\n",
+                le32_to_cpu(desc[2].data[0]), le32_to_cpu(desc[2].data[1]),
+                le32_to_cpu(desc[2].data[2]), le32_to_cpu(desc[2].data[3]));
+
+       return 0;
+}
+
+static int hclge_log_rocee_ecc_error(struct hclge_dev *hdev)
+{
+       struct device *dev = &hdev->pdev->dev;
+       struct hclge_desc desc[2];
+       int ret;
+
+       ret = hclge_cmd_query_error(hdev, &desc[0],
+                                   HCLGE_QUERY_ROCEE_ECC_RAS_INFO_CMD,
+                                   HCLGE_CMD_FLAG_NEXT, 0, 0);
+       if (ret) {
+               dev_err(dev, "failed(%d) to query ROCEE ECC error sts\n", ret);
+               return ret;
+       }
+
+       dev_info(dev, "ECC1: %08X %08X %08X %08X %08X %08X\n",
+                le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]),
+                le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]),
+                le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5]));
+       dev_info(dev, "ECC2: %08X %08X %08X\n", le32_to_cpu(desc[1].data[0]),
+                le32_to_cpu(desc[1].data[1]), le32_to_cpu(desc[1].data[2]));
+
+       return 0;
+}
+
 static int hclge_log_rocee_ovf_error(struct hclge_dev *hdev)
 {
        struct device *dev = &hdev->pdev->dev;
@@ -1456,19 +1516,27 @@ hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev)
 
        status = le32_to_cpu(desc[0].data[0]);
 
-       if (status & HCLGE_ROCEE_RERR_INT_MASK) {
-               dev_warn(dev, "ROCEE RAS AXI rresp error\n");
-               reset_type = HNAE3_FUNC_RESET;
-       }
+       if (status & HCLGE_ROCEE_AXI_ERR_INT_MASK) {
+               if (status & HCLGE_ROCEE_RERR_INT_MASK)
+                       dev_warn(dev, "ROCEE RAS AXI rresp error\n");
+
+               if (status & HCLGE_ROCEE_BERR_INT_MASK)
+                       dev_warn(dev, "ROCEE RAS AXI bresp error\n");
 
-       if (status & HCLGE_ROCEE_BERR_INT_MASK) {
-               dev_warn(dev, "ROCEE RAS AXI bresp error\n");
                reset_type = HNAE3_FUNC_RESET;
+
+               ret = hclge_log_rocee_axi_error(hdev);
+               if (ret)
+                       return HNAE3_GLOBAL_RESET;
        }
 
        if (status & HCLGE_ROCEE_ECC_INT_MASK) {
                dev_warn(dev, "ROCEE RAS 2bit ECC error\n");
                reset_type = HNAE3_GLOBAL_RESET;
+
+               ret = hclge_log_rocee_ecc_error(hdev);
+               if (ret)
+                       return HNAE3_GLOBAL_RESET;
        }
 
        if (status & HCLGE_ROCEE_OVF_INT_MASK) {
@@ -1478,7 +1546,6 @@ hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev)
                        /* reset everything for now */
                        return HNAE3_GLOBAL_RESET;
                }
-               reset_type = HNAE3_FUNC_RESET;
        }
 
        /* clear error status */
index 81d115a..6684733 100644 (file)
@@ -94,6 +94,7 @@
 #define HCLGE_ROCEE_RAS_CE_INT_EN_MASK         0x1
 #define HCLGE_ROCEE_RERR_INT_MASK              BIT(0)
 #define HCLGE_ROCEE_BERR_INT_MASK              BIT(1)
+#define HCLGE_ROCEE_AXI_ERR_INT_MASK           GENMASK(1, 0)
 #define HCLGE_ROCEE_ECC_INT_MASK               BIT(2)
 #define HCLGE_ROCEE_OVF_INT_MASK               BIT(3)
 #define HCLGE_ROCEE_OVF_ERR_INT_MASK           0x10000