From 5790862255028c831761e13014ee87a06df828f1 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 20 Nov 2020 16:10:33 -0800 Subject: [PATCH] PCI/ERR: Recover from RCiEP AER errors Add support for handling AER errors detected by Root Complex Integrated Endpoints (RCiEPs). These errors are signaled to software natively via a Root Complex Event Collector (RCEC) or non-natively via ACPI APEI if the platform retains control of AER or uses a non-standard RCEC-like device. When recovering from RCiEP errors, the Root Error Command and Status registers are in the AER Capability of an associated RCEC (if any), not in a Root Port. In the non-native case, the platform is responsible for those registers and we can't touch them. [bhelgaas: commit log, etc] Co-developed-by: Sean V Kelley Link: https://lore.kernel.org/r/20201121001036.8560-13-sean.v.kelley@intel.com Signed-off-by: Sean V Kelley Signed-off-by: Qiuxu Zhuo Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/aer.c | 24 +++++++++++++++++++----- drivers/pci/pcie/err.c | 15 ++++++++------- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 84a785f..e682df5 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -1359,8 +1359,8 @@ static int aer_probe(struct pcie_device *dev) } /** - * aer_root_reset - reset Root Port hierarchy or RCEC - * @dev: pointer to Root Port or RCEC + * aer_root_reset - reset Root Port hierarchy, RCEC, or RCiEP + * @dev: pointer to Root Port, RCEC, or RCiEP * * Invoked by Port Bus driver when performing reset. */ @@ -1373,8 +1373,22 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) u32 reg32; int rc; - root = dev; /* device with Root Error registers */ - aer = root->aer_cap; + /* + * Only Root Ports and RCECs have AER Root Command and Root Status + * registers. If "dev" is an RCiEP, the relevant registers are in + * the RCEC. + */ + if (type == PCI_EXP_TYPE_RC_END) + root = dev->rcec; + else + root = dev; + + /* + * If the platform retained control of AER, an RCiEP may not have + * an RCEC visible to us, so dev->rcec ("root") may be NULL. In + * that case, firmware is responsible for these registers. + */ + aer = root ? root->aer_cap : 0; if ((host->native_aer || pcie_ports_native) && aer) { /* Disable Root's interrupt in response to error messages */ @@ -1383,7 +1397,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32); } - if (type == PCI_EXP_TYPE_RC_EC) { + if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END) { if (pcie_has_flr(dev)) { rc = pcie_flr(dev); pci_info(dev, "has been reset (%d)\n", rc); diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 87a2dc8..510f31f 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -148,7 +148,7 @@ out: /** * pci_walk_bridge - walk bridges potentially AER affected - * @bridge: bridge which may be a Port or an RCEC + * @bridge: bridge which may be a Port, an RCEC, or an RCiEP * @cb: callback to be called for each device found * @userdata: arbitrary pointer to be passed to callback * @@ -156,8 +156,8 @@ out: * any bridged devices on buses under this bus. Call the provided callback * on each device found. * - * If the device provided has no subordinate bus, e.g., an RCEC, call the - * callback on the device itself. + * If the device provided has no subordinate bus, e.g., an RCEC or RCiEP, + * call the callback on the device itself. */ static void pci_walk_bridge(struct pci_dev *bridge, int (*cb)(struct pci_dev *, void *), @@ -179,9 +179,9 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); /* - * If the error was detected by a Root Port, Downstream Port, or - * RCEC, recovery runs on the device itself. For Ports, that also - * includes any subordinate devices. + * If the error was detected by a Root Port, Downstream Port, RCEC, + * or RCiEP, recovery runs on the device itself. For Ports, that + * also includes any subordinate devices. * * If it was detected by another device (Endpoint, etc), recovery * runs on the device and anything else under the same Port, i.e., @@ -189,7 +189,8 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, */ if (type == PCI_EXP_TYPE_ROOT_PORT || type == PCI_EXP_TYPE_DOWNSTREAM || - type == PCI_EXP_TYPE_RC_EC) + type == PCI_EXP_TYPE_RC_EC || + type == PCI_EXP_TYPE_RC_END) bridge = dev; else bridge = pci_upstream_bridge(dev); -- 2.7.4