EDAC, i10nm: Add a driver for Intel 10nm server processors
authorQiuxu Zhuo <qiuxu.zhuo@intel.com>
Wed, 30 Jan 2019 19:15:19 +0000 (11:15 -0800)
committerBorislav Petkov <bp@suse.de>
Sat, 2 Feb 2019 12:33:18 +0000 (13:33 +0100)
This driver supports the Intel 10nm series server integrated memory
controller. It gets the memory capacity and topology information by
reading the registers in PCI configuration space and memory-mapped I/O.

It decodes the memory error address to the platform specific address
by using the ACPI Address Translation (ADXL) Device Specific Method
(DSM).

Co-developed-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: James Morse <james.morse@arm.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: https://lkml.kernel.org/r/20190130191519.15393-5-tony.luck@intel.com
drivers/edac/Kconfig
drivers/edac/Makefile
drivers/edac/i10nm_base.c [new file with mode: 0644]

index 480fa79..0825c9c 100644 (file)
@@ -241,6 +241,18 @@ config EDAC_SKX
          system has non-volatile DIMMs you should also manually
          select CONFIG_ACPI_NFIT.
 
+config EDAC_I10NM
+       tristate "Intel 10nm server Integrated MC"
+       depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG
+       depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_I10NM can't be y
+       select DMI
+       select ACPI_ADXL if ACPI
+       help
+         Support for error detection and correction the Intel
+         10nm server Integrated Memory Controllers. If your
+         system has non-volatile DIMMs you should also manually
+         select CONFIG_ACPI_NFIT.
+
 config EDAC_PND2
        tristate "Intel Pondicherry2"
        depends on PCI && X86_64 && X86_MCE_INTEL
index 1c985fa..89ad4a8 100644 (file)
@@ -60,6 +60,9 @@ obj-$(CONFIG_EDAC_LAYERSCAPE)         += layerscape_edac_mod.o
 skx_edac-y                             := skx_common.o skx_base.o
 obj-$(CONFIG_EDAC_SKX)                 += skx_edac.o
 
+i10nm_edac-y                           := skx_common.o i10nm_base.o
+obj-$(CONFIG_EDAC_I10NM)               += i10nm_edac.o
+
 obj-$(CONFIG_EDAC_MV64X60)             += mv64x60_edac.o
 obj-$(CONFIG_EDAC_CELL)                        += cell_edac.o
 obj-$(CONFIG_EDAC_PPC4XX)              += ppc4xx_edac.o
diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c
new file mode 100644 (file)
index 0000000..c334fb7
--- /dev/null
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for Intel(R) 10nm server memory controller.
+ * Copyright (c) 2019, Intel Corporation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
+#include <asm/mce.h>
+#include "edac_module.h"
+#include "skx_common.h"
+
+#define I10NM_REVISION "v0.0.3"
+#define EDAC_MOD_STR   "i10nm_edac"
+
+/* Debug macros */
+#define i10nm_printk(level, fmt, arg...)       \
+       edac_printk(level, "i10nm", fmt, ##arg)
+
+#define I10NM_GET_SCK_BAR(d, reg)              \
+       pci_read_config_dword((d)->uracu, 0xd0, &(reg))
+#define I10NM_GET_IMC_BAR(d, i, reg)   \
+       pci_read_config_dword((d)->uracu, 0xd8 + (i) * 4, &(reg))
+#define I10NM_GET_DIMMMTR(m, i, j)     \
+       (*(u32 *)((m)->mbase + 0x2080c + (i) * 0x4000 + (j) * 4))
+#define I10NM_GET_MCDDRTCFG(m, i, j)   \
+       (*(u32 *)((m)->mbase + 0x20970 + (i) * 0x4000 + (j) * 4))
+
+#define I10NM_GET_SCK_MMIO_BASE(reg)   (GET_BITFIELD(reg, 0, 28) << 23)
+#define I10NM_GET_IMC_MMIO_OFFSET(reg) (GET_BITFIELD(reg, 0, 10) << 12)
+#define I10NM_GET_IMC_MMIO_SIZE(reg)   ((GET_BITFIELD(reg, 13, 23) - \
+                                        GET_BITFIELD(reg, 0, 10) + 1) << 12)
+
+static struct list_head *i10nm_edac_list;
+
+static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus,
+                                          unsigned int dev, unsigned int fun)
+{
+       struct pci_dev *pdev;
+
+       pdev = pci_get_domain_bus_and_slot(dom, bus, PCI_DEVFN(dev, fun));
+       if (!pdev) {
+               edac_dbg(2, "No device %02x:%02x.%x\n",
+                        bus, dev, fun);
+               return NULL;
+       }
+
+       if (unlikely(pci_enable_device(pdev) < 0)) {
+               edac_dbg(2, "Failed to enable device %02x:%02x.%x\n",
+                        bus, dev, fun);
+               return NULL;
+       }
+
+       pci_dev_get(pdev);
+
+       return pdev;
+}
+
+static int i10nm_get_all_munits(void)
+{
+       struct pci_dev *mdev;
+       void __iomem *mbase;
+       unsigned long size;
+       struct skx_dev *d;
+       int i, j = 0;
+       u32 reg, off;
+       u64 base;
+
+       list_for_each_entry(d, i10nm_edac_list, list) {
+               d->util_all = pci_get_dev_wrapper(d->seg, d->bus[1], 29, 1);
+               if (!d->util_all)
+                       return -ENODEV;
+
+               d->uracu = pci_get_dev_wrapper(d->seg, d->bus[0], 0, 1);
+               if (!d->uracu)
+                       return -ENODEV;
+
+               if (I10NM_GET_SCK_BAR(d, reg)) {
+                       i10nm_printk(KERN_ERR, "Failed to socket bar\n");
+                       return -ENODEV;
+               }
+
+               base = I10NM_GET_SCK_MMIO_BASE(reg);
+               edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n",
+                        j++, base, reg);
+
+               for (i = 0; i < I10NM_NUM_IMC; i++) {
+                       mdev = pci_get_dev_wrapper(d->seg, d->bus[0],
+                                                  12 + i, 0);
+                       if (i == 0 && !mdev) {
+                               i10nm_printk(KERN_ERR, "No IMC found\n");
+                               return -ENODEV;
+                       }
+                       if (!mdev)
+                               continue;
+
+                       d->imc[i].mdev = mdev;
+
+                       if (I10NM_GET_IMC_BAR(d, i, reg)) {
+                               i10nm_printk(KERN_ERR, "Failed to get mc bar\n");
+                               return -ENODEV;
+                       }
+
+                       off  = I10NM_GET_IMC_MMIO_OFFSET(reg);
+                       size = I10NM_GET_IMC_MMIO_SIZE(reg);
+                       edac_dbg(2, "mc%d mmio base 0x%llx size 0x%lx (reg 0x%x)\n",
+                                i, base + off, size, reg);
+
+                       mbase = ioremap(base + off, size);
+                       if (!mbase) {
+                               i10nm_printk(KERN_ERR, "Failed to ioremap 0x%llx\n",
+                                            base + off);
+                               return -ENODEV;
+                       }
+
+                       d->imc[i].mbase = mbase;
+               }
+       }
+
+       return 0;
+}
+
+static const struct x86_cpu_id i10nm_cpuids[] = {
+       { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_TREMONT_X, 0, 0 },
+       { }
+};
+MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids);
+
+static bool i10nm_check_ecc(struct skx_imc *imc, int chan)
+{
+       u32 mcmtr;
+
+       mcmtr = *(u32 *)(imc->mbase + 0x20ef8 + chan * 0x4000);
+       edac_dbg(1, "ch%d mcmtr reg %x\n", chan, mcmtr);
+
+       return !!GET_BITFIELD(mcmtr, 2, 2);
+}
+
+static int i10nm_get_dimm_config(struct mem_ctl_info *mci)
+{
+       struct skx_pvt *pvt = mci->pvt_info;
+       struct skx_imc *imc = pvt->imc;
+       struct dimm_info *dimm;
+       u32 mtr, mcddrtcfg;
+       int i, j, ndimms;
+
+       for (i = 0; i < I10NM_NUM_CHANNELS; i++) {
+               if (!imc->mbase)
+                       continue;
+
+               ndimms = 0;
+               for (j = 0; j < I10NM_NUM_DIMMS; j++) {
+                       dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
+                                            mci->n_layers, i, j, 0);
+                       mtr = I10NM_GET_DIMMMTR(imc, i, j);
+                       mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i, j);
+                       edac_dbg(1, "dimmmtr 0x%x mcddrtcfg 0x%x (mc%d ch%d dimm%d)\n",
+                                mtr, mcddrtcfg, imc->mc, i, j);
+
+                       if (IS_DIMM_PRESENT(mtr))
+                               ndimms += skx_get_dimm_info(mtr, 0, dimm,
+                                                           imc, i, j);
+                       else if (IS_NVDIMM_PRESENT(mcddrtcfg, j))
+                               ndimms += skx_get_nvdimm_info(dimm, imc, i, j,
+                                                             EDAC_MOD_STR);
+               }
+               if (ndimms && !i10nm_check_ecc(imc, 0)) {
+                       i10nm_printk(KERN_ERR, "ECC is disabled on imc %d\n",
+                                    imc->mc);
+                       return -ENODEV;
+               }
+       }
+
+       return 0;
+}
+
+static struct notifier_block i10nm_mce_dec = {
+       .notifier_call  = skx_mce_check_error,
+       .priority       = MCE_PRIO_EDAC,
+};
+
+static int __init i10nm_init(void)
+{
+       u8 mc = 0, src_id = 0, node_id = 0;
+       const struct x86_cpu_id *id;
+       const char *owner;
+       struct skx_dev *d;
+       int rc, i, off[3] = {0xd0, 0xc8, 0xcc};
+       u64 tolm, tohm;
+
+       edac_dbg(2, "\n");
+
+       owner = edac_get_owner();
+       if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
+               return -EBUSY;
+
+       id = x86_match_cpu(i10nm_cpuids);
+       if (!id)
+               return -ENODEV;
+
+       rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm);
+       if (rc)
+               return rc;
+
+       rc = skx_get_all_bus_mappings(0x3452, 0xcc, I10NM, &i10nm_edac_list);
+       if (rc < 0)
+               goto fail;
+       if (rc == 0) {
+               i10nm_printk(KERN_ERR, "No memory controllers found\n");
+               return -ENODEV;
+       }
+
+       rc = i10nm_get_all_munits();
+       if (rc < 0)
+               goto fail;
+
+       list_for_each_entry(d, i10nm_edac_list, list) {
+               rc = skx_get_src_id(d, &src_id);
+               if (rc < 0)
+                       goto fail;
+
+               rc = skx_get_node_id(d, &node_id);
+               if (rc < 0)
+                       goto fail;
+
+               edac_dbg(2, "src_id = %d node_id = %d\n", src_id, node_id);
+               for (i = 0; i < I10NM_NUM_IMC; i++) {
+                       if (!d->imc[i].mdev)
+                               continue;
+
+                       d->imc[i].mc  = mc++;
+                       d->imc[i].lmc = i;
+                       d->imc[i].src_id  = src_id;
+                       d->imc[i].node_id = node_id;
+
+                       rc = skx_register_mci(&d->imc[i], d->imc[i].mdev,
+                                             "Intel_10nm Socket", EDAC_MOD_STR,
+                                             i10nm_get_dimm_config);
+                       if (rc < 0)
+                               goto fail;
+               }
+       }
+
+       rc = skx_adxl_get();
+       if (rc)
+               goto fail;
+
+       opstate_init();
+       mce_register_decode_chain(&i10nm_mce_dec);
+       setup_skx_debug("i10nm_test");
+
+       i10nm_printk(KERN_INFO, "%s\n", I10NM_REVISION);
+
+       return 0;
+fail:
+       skx_remove();
+       return rc;
+}
+
+static void __exit i10nm_exit(void)
+{
+       edac_dbg(2, "\n");
+       teardown_skx_debug();
+       mce_unregister_decode_chain(&i10nm_mce_dec);
+       skx_adxl_put();
+       skx_remove();
+}
+
+module_init(i10nm_init);
+module_exit(i10nm_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("MC Driver for Intel 10nm server processors");