From 542f07430eb564229e6f9aa32b709194c1a23896 Mon Sep 17 00:00:00 2001 From: Xiaofei Tan Date: Fri, 7 Jun 2019 10:03:02 +0800 Subject: [PATCH] net: hns3: log detail error info of ROCEE ECC and AXI errors This patch logs detail error info of ROCEE ECC and AXI errors for debug purpose, and remove unnecessary reset for ROCEE overflow errors. Signed-off-by: Xiaofei Tan Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_cmd.h | 2 + .../hisilicon/hns3/hns3pf/hclge_err.c | 81 +++++++++++++++++-- .../hisilicon/hns3/hns3pf/hclge_err.h | 1 + 3 files changed, 77 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 7a14d806744c0..29af1b444fa39 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -268,6 +268,8 @@ enum hclge_opcode_type { HCLGE_CONFIG_ROCEE_RAS_INT_EN = 0x1580, HCLGE_QUERY_CLEAR_ROCEE_RAS_INT = 0x1581, HCLGE_ROCEE_PF_RAS_INT_CMD = 0x1584, + HCLGE_QUERY_ROCEE_ECC_RAS_INFO_CMD = 0x1585, + HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD = 0x1586, HCLGE_IGU_EGU_TNL_INT_EN = 0x1803, HCLGE_IGU_COMMON_INT_EN = 0x1806, HCLGE_TM_QCN_MEM_INT_CFG = 0x1A14, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index 784512d5f3959..4f2af3d527936 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -1388,6 +1388,66 @@ static int hclge_handle_all_ras_errors(struct hclge_dev *hdev) return ret; } +static int hclge_log_rocee_axi_error(struct hclge_dev *hdev) +{ + struct device *dev = &hdev->pdev->dev; + struct hclge_desc desc[3]; + int ret; + + hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, + true); + hclge_cmd_setup_basic_desc(&desc[1], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, + true); + hclge_cmd_setup_basic_desc(&desc[2], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, + true); + desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + desc[1].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + + ret = hclge_cmd_send(&hdev->hw, &desc[0], 3); + if (ret) { + dev_err(dev, "failed(%d) to query ROCEE AXI error sts\n", ret); + return ret; + } + + dev_info(dev, "AXI1: %08X %08X %08X %08X %08X %08X\n", + le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]), + le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]), + le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5])); + dev_info(dev, "AXI2: %08X %08X %08X %08X %08X %08X\n", + le32_to_cpu(desc[1].data[0]), le32_to_cpu(desc[1].data[1]), + le32_to_cpu(desc[1].data[2]), le32_to_cpu(desc[1].data[3]), + le32_to_cpu(desc[1].data[4]), le32_to_cpu(desc[1].data[5])); + dev_info(dev, "AXI3: %08X %08X %08X %08X\n", + le32_to_cpu(desc[2].data[0]), le32_to_cpu(desc[2].data[1]), + le32_to_cpu(desc[2].data[2]), le32_to_cpu(desc[2].data[3])); + + return 0; +} + +static int hclge_log_rocee_ecc_error(struct hclge_dev *hdev) +{ + struct device *dev = &hdev->pdev->dev; + struct hclge_desc desc[2]; + int ret; + + ret = hclge_cmd_query_error(hdev, &desc[0], + HCLGE_QUERY_ROCEE_ECC_RAS_INFO_CMD, + HCLGE_CMD_FLAG_NEXT, 0, 0); + if (ret) { + dev_err(dev, "failed(%d) to query ROCEE ECC error sts\n", ret); + return ret; + } + + dev_info(dev, "ECC1: %08X %08X %08X %08X %08X %08X\n", + le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]), + le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]), + le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5])); + dev_info(dev, "ECC2: %08X %08X %08X\n", le32_to_cpu(desc[1].data[0]), + le32_to_cpu(desc[1].data[1]), le32_to_cpu(desc[1].data[2])); + + return 0; +} + static int hclge_log_rocee_ovf_error(struct hclge_dev *hdev) { struct device *dev = &hdev->pdev->dev; @@ -1456,19 +1516,27 @@ hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev) status = le32_to_cpu(desc[0].data[0]); - if (status & HCLGE_ROCEE_RERR_INT_MASK) { - dev_warn(dev, "ROCEE RAS AXI rresp error\n"); - reset_type = HNAE3_FUNC_RESET; - } + if (status & HCLGE_ROCEE_AXI_ERR_INT_MASK) { + if (status & HCLGE_ROCEE_RERR_INT_MASK) + dev_warn(dev, "ROCEE RAS AXI rresp error\n"); + + if (status & HCLGE_ROCEE_BERR_INT_MASK) + dev_warn(dev, "ROCEE RAS AXI bresp error\n"); - if (status & HCLGE_ROCEE_BERR_INT_MASK) { - dev_warn(dev, "ROCEE RAS AXI bresp error\n"); reset_type = HNAE3_FUNC_RESET; + + ret = hclge_log_rocee_axi_error(hdev); + if (ret) + return HNAE3_GLOBAL_RESET; } if (status & HCLGE_ROCEE_ECC_INT_MASK) { dev_warn(dev, "ROCEE RAS 2bit ECC error\n"); reset_type = HNAE3_GLOBAL_RESET; + + ret = hclge_log_rocee_ecc_error(hdev); + if (ret) + return HNAE3_GLOBAL_RESET; } if (status & HCLGE_ROCEE_OVF_INT_MASK) { @@ -1478,7 +1546,6 @@ hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev) /* reset everything for now */ return HNAE3_GLOBAL_RESET; } - reset_type = HNAE3_FUNC_RESET; } /* clear error status */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h index 81d115ac13db7..668473306cb34 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h @@ -94,6 +94,7 @@ #define HCLGE_ROCEE_RAS_CE_INT_EN_MASK 0x1 #define HCLGE_ROCEE_RERR_INT_MASK BIT(0) #define HCLGE_ROCEE_BERR_INT_MASK BIT(1) +#define HCLGE_ROCEE_AXI_ERR_INT_MASK GENMASK(1, 0) #define HCLGE_ROCEE_ECC_INT_MASK BIT(2) #define HCLGE_ROCEE_OVF_INT_MASK BIT(3) #define HCLGE_ROCEE_OVF_ERR_INT_MASK 0x10000 -- 2.39.5