Merge tag 'ras_core_for_v5.14_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 28 Jun 2021 18:19:40 +0000 (11:19 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 28 Jun 2021 18:19:40 +0000 (11:19 -0700)
Pull x86 RAS updates from Borislav Petkov:

 - Add the required information to the faked APEI-reported mem error so
   that the kernel properly attempts to offline the corresponding page,
   as it does for kernel-detected correctable errors.

 - Fix a typo in AMD's error descriptions.

* tag 'ras_core_for_v5.14_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  EDAC/mce_amd: Fix typo "FIfo" -> "Fifo"
  x86/mce: Include a MCi_MISC value in faked mce logs
  x86/MCE/AMD, EDAC/mce_amd: Add new SMCA bank types

arch/x86/include/asm/mce.h
arch/x86/kernel/cpu/mce/amd.c
arch/x86/kernel/cpu/mce/apei.c
drivers/edac/mce_amd.c

index ddfb3ca..0607ec4 100644 (file)
@@ -305,7 +305,7 @@ extern void apei_mce_report_mem_error(int corrected,
 /* These may be used by multiple smca_hwid_mcatypes */
 enum smca_bank_types {
        SMCA_LS = 0,    /* Load Store */
-       SMCA_LS_V2,     /* Load Store */
+       SMCA_LS_V2,
        SMCA_IF,        /* Instruction Fetch */
        SMCA_L2_CACHE,  /* L2 Cache */
        SMCA_DE,        /* Decoder Unit */
@@ -314,17 +314,22 @@ enum smca_bank_types {
        SMCA_FP,        /* Floating Point */
        SMCA_L3_CACHE,  /* L3 Cache */
        SMCA_CS,        /* Coherent Slave */
-       SMCA_CS_V2,     /* Coherent Slave */
+       SMCA_CS_V2,
        SMCA_PIE,       /* Power, Interrupts, etc. */
        SMCA_UMC,       /* Unified Memory Controller */
+       SMCA_UMC_V2,
        SMCA_PB,        /* Parameter Block */
        SMCA_PSP,       /* Platform Security Processor */
-       SMCA_PSP_V2,    /* Platform Security Processor */
+       SMCA_PSP_V2,
        SMCA_SMU,       /* System Management Unit */
-       SMCA_SMU_V2,    /* System Management Unit */
+       SMCA_SMU_V2,
        SMCA_MP5,       /* Microprocessor 5 Unit */
        SMCA_NBIO,      /* Northbridge IO Unit */
        SMCA_PCIE,      /* PCI Express Unit */
+       SMCA_PCIE_V2,
+       SMCA_XGMI_PCS,  /* xGMI PCS Unit */
+       SMCA_XGMI_PHY,  /* xGMI PHY Unit */
+       SMCA_WAFL_PHY,  /* WAFL PHY Unit */
        N_SMCA_BANK_TYPES
 };
 
index e486f96..08831ac 100644 (file)
@@ -77,27 +77,29 @@ struct smca_bank_name {
 };
 
 static struct smca_bank_name smca_names[] = {
-       [SMCA_LS]       = { "load_store",       "Load Store Unit" },
-       [SMCA_LS_V2]    = { "load_store",       "Load Store Unit" },
-       [SMCA_IF]       = { "insn_fetch",       "Instruction Fetch Unit" },
-       [SMCA_L2_CACHE] = { "l2_cache",         "L2 Cache" },
-       [SMCA_DE]       = { "decode_unit",      "Decode Unit" },
-       [SMCA_RESERVED] = { "reserved",         "Reserved" },
-       [SMCA_EX]       = { "execution_unit",   "Execution Unit" },
-       [SMCA_FP]       = { "floating_point",   "Floating Point Unit" },
-       [SMCA_L3_CACHE] = { "l3_cache",         "L3 Cache" },
-       [SMCA_CS]       = { "coherent_slave",   "Coherent Slave" },
-       [SMCA_CS_V2]    = { "coherent_slave",   "Coherent Slave" },
-       [SMCA_PIE]      = { "pie",              "Power, Interrupts, etc." },
-       [SMCA_UMC]      = { "umc",              "Unified Memory Controller" },
-       [SMCA_PB]       = { "param_block",      "Parameter Block" },
-       [SMCA_PSP]      = { "psp",              "Platform Security Processor" },
-       [SMCA_PSP_V2]   = { "psp",              "Platform Security Processor" },
-       [SMCA_SMU]      = { "smu",              "System Management Unit" },
-       [SMCA_SMU_V2]   = { "smu",              "System Management Unit" },
-       [SMCA_MP5]      = { "mp5",              "Microprocessor 5 Unit" },
-       [SMCA_NBIO]     = { "nbio",             "Northbridge IO Unit" },
-       [SMCA_PCIE]     = { "pcie",             "PCI Express Unit" },
+       [SMCA_LS ... SMCA_LS_V2]        = { "load_store",       "Load Store Unit" },
+       [SMCA_IF]                       = { "insn_fetch",       "Instruction Fetch Unit" },
+       [SMCA_L2_CACHE]                 = { "l2_cache",         "L2 Cache" },
+       [SMCA_DE]                       = { "decode_unit",      "Decode Unit" },
+       [SMCA_RESERVED]                 = { "reserved",         "Reserved" },
+       [SMCA_EX]                       = { "execution_unit",   "Execution Unit" },
+       [SMCA_FP]                       = { "floating_point",   "Floating Point Unit" },
+       [SMCA_L3_CACHE]                 = { "l3_cache",         "L3 Cache" },
+       [SMCA_CS ... SMCA_CS_V2]        = { "coherent_slave",   "Coherent Slave" },
+       [SMCA_PIE]                      = { "pie",              "Power, Interrupts, etc." },
+
+       /* UMC v2 is separate because both of them can exist in a single system. */
+       [SMCA_UMC]                      = { "umc",              "Unified Memory Controller" },
+       [SMCA_UMC_V2]                   = { "umc_v2",           "Unified Memory Controller v2" },
+       [SMCA_PB]                       = { "param_block",      "Parameter Block" },
+       [SMCA_PSP ... SMCA_PSP_V2]      = { "psp",              "Platform Security Processor" },
+       [SMCA_SMU ... SMCA_SMU_V2]      = { "smu",              "System Management Unit" },
+       [SMCA_MP5]                      = { "mp5",              "Microprocessor 5 Unit" },
+       [SMCA_NBIO]                     = { "nbio",             "Northbridge IO Unit" },
+       [SMCA_PCIE ... SMCA_PCIE_V2]    = { "pcie",             "PCI Express Unit" },
+       [SMCA_XGMI_PCS]                 = { "xgmi_pcs",         "Ext Global Memory Interconnect PCS Unit" },
+       [SMCA_XGMI_PHY]                 = { "xgmi_phy",         "Ext Global Memory Interconnect PHY Unit" },
+       [SMCA_WAFL_PHY]                 = { "wafl_phy",         "WAFL PHY Unit" },
 };
 
 static const char *smca_get_name(enum smca_bank_types t)
@@ -155,6 +157,7 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
 
        /* Unified Memory Controller MCA type */
        { SMCA_UMC,      HWID_MCATYPE(0x96, 0x0)        },
+       { SMCA_UMC_V2,   HWID_MCATYPE(0x96, 0x1)        },
 
        /* Parameter Block MCA type */
        { SMCA_PB,       HWID_MCATYPE(0x05, 0x0)        },
@@ -175,6 +178,16 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
 
        /* PCI Express Unit MCA type */
        { SMCA_PCIE,     HWID_MCATYPE(0x46, 0x0)        },
+       { SMCA_PCIE_V2,  HWID_MCATYPE(0x46, 0x1)        },
+
+       /* xGMI PCS MCA type */
+       { SMCA_XGMI_PCS, HWID_MCATYPE(0x50, 0x0)        },
+
+       /* xGMI PHY MCA type */
+       { SMCA_XGMI_PHY, HWID_MCATYPE(0x259, 0x0)       },
+
+       /* WAFL PHY MCA type */
+       { SMCA_WAFL_PHY, HWID_MCATYPE(0x267, 0x0)       },
 };
 
 struct smca_bank smca_banks[MAX_NR_BANKS];
index b58b853..0e3ae64 100644 (file)
@@ -36,7 +36,8 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
        mce_setup(&m);
        m.bank = -1;
        /* Fake a memory read error with unknown channel */
-       m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f;
+       m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f;
+       m.misc = (MCI_MISC_ADDR_PHYS << 6) | PAGE_SHIFT;
 
        if (severity >= GHES_SEV_RECOVERABLE)
                m.status |= MCI_STATUS_UC;
index 5dd905a..27d5692 100644 (file)
@@ -323,6 +323,21 @@ static const char * const smca_umc_mce_desc[] = {
        "AES SRAM ECC error",
 };
 
+static const char * const smca_umc2_mce_desc[] = {
+       "DRAM ECC error",
+       "Data poison error",
+       "SDP parity error",
+       "Reserved",
+       "Address/Command parity error",
+       "Write data parity error",
+       "DCQ SRAM ECC error",
+       "Reserved",
+       "Read data parity error",
+       "Rdb SRAM ECC error",
+       "RdRsp SRAM ECC error",
+       "LM32 MP errors",
+};
+
 static const char * const smca_pb_mce_desc[] = {
        "An ECC error in the Parameter Block RAM array",
 };
@@ -400,6 +415,56 @@ static const char * const smca_pcie_mce_desc[] = {
        "CCIX Non-okay write response with data error",
 };
 
+static const char * const smca_pcie2_mce_desc[] = {
+       "SDP Parity Error logging",
+};
+
+static const char * const smca_xgmipcs_mce_desc[] = {
+       "Data Loss Error",
+       "Training Error",
+       "Flow Control Acknowledge Error",
+       "Rx Fifo Underflow Error",
+       "Rx Fifo Overflow Error",
+       "CRC Error",
+       "BER Exceeded Error",
+       "Tx Vcid Data Error",
+       "Replay Buffer Parity Error",
+       "Data Parity Error",
+       "Replay Fifo Overflow Error",
+       "Replay Fifo Underflow Error",
+       "Elastic Fifo Overflow Error",
+       "Deskew Error",
+       "Flow Control CRC Error",
+       "Data Startup Limit Error",
+       "FC Init Timeout Error",
+       "Recovery Timeout Error",
+       "Ready Serial Timeout Error",
+       "Ready Serial Attempt Error",
+       "Recovery Attempt Error",
+       "Recovery Relock Attempt Error",
+       "Replay Attempt Error",
+       "Sync Header Error",
+       "Tx Replay Timeout Error",
+       "Rx Replay Timeout Error",
+       "LinkSub Tx Timeout Error",
+       "LinkSub Rx Timeout Error",
+       "Rx CMD Pocket Error",
+};
+
+static const char * const smca_xgmiphy_mce_desc[] = {
+       "RAM ECC Error",
+       "ARC instruction buffer parity error",
+       "ARC data buffer parity error",
+       "PHY APB error",
+};
+
+static const char * const smca_waflphy_mce_desc[] = {
+       "RAM ECC Error",
+       "ARC instruction buffer parity error",
+       "ARC data buffer parity error",
+       "PHY APB error",
+};
+
 struct smca_mce_desc {
        const char * const *descs;
        unsigned int num_descs;
@@ -418,6 +483,7 @@ static struct smca_mce_desc smca_mce_descs[] = {
        [SMCA_CS_V2]    = { smca_cs2_mce_desc,  ARRAY_SIZE(smca_cs2_mce_desc)   },
        [SMCA_PIE]      = { smca_pie_mce_desc,  ARRAY_SIZE(smca_pie_mce_desc)   },
        [SMCA_UMC]      = { smca_umc_mce_desc,  ARRAY_SIZE(smca_umc_mce_desc)   },
+       [SMCA_UMC_V2]   = { smca_umc2_mce_desc, ARRAY_SIZE(smca_umc2_mce_desc)  },
        [SMCA_PB]       = { smca_pb_mce_desc,   ARRAY_SIZE(smca_pb_mce_desc)    },
        [SMCA_PSP]      = { smca_psp_mce_desc,  ARRAY_SIZE(smca_psp_mce_desc)   },
        [SMCA_PSP_V2]   = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc)  },
@@ -426,6 +492,10 @@ static struct smca_mce_desc smca_mce_descs[] = {
        [SMCA_MP5]      = { smca_mp5_mce_desc,  ARRAY_SIZE(smca_mp5_mce_desc)   },
        [SMCA_NBIO]     = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc)  },
        [SMCA_PCIE]     = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc)  },
+       [SMCA_PCIE_V2]  = { smca_pcie2_mce_desc,   ARRAY_SIZE(smca_pcie2_mce_desc)      },
+       [SMCA_XGMI_PCS] = { smca_xgmipcs_mce_desc, ARRAY_SIZE(smca_xgmipcs_mce_desc)    },
+       [SMCA_XGMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc)    },
+       [SMCA_WAFL_PHY] = { smca_waflphy_mce_desc, ARRAY_SIZE(smca_waflphy_mce_desc)    },
 };
 
 static bool f12h_mc0_mce(u16 ec, u8 xec)