PCI/AER: Add uevents in AER and EEH error/resume
authorBryant G. Ly <bryantly@linux.vnet.ibm.com>
Fri, 5 Jan 2018 16:45:47 +0000 (10:45 -0600)
committerMichael Ellerman <mpe@ellerman.id.au>
Sat, 27 Jan 2018 09:02:51 +0000 (20:02 +1100)
Devices can go offline when erors reported. This patch adds a change
to the kernel object and lets udev know of error. When device resumes,
a change is also set reporting device as online. Therefore, EEH and
AER events are better propagated to user space for PCI devices in all
arches.

Signed-off-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Signed-off-by: Juan J. Alvarez <jjalvare@linux.vnet.ibm.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Russell Currey <ruscur@russell.cc>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
arch/powerpc/kernel/eeh_driver.c
drivers/pci/pcie/aer/aerdrv_core.c
include/linux/pci.h

index 3c0fa99..beea218 100644 (file)
@@ -228,6 +228,7 @@ static void *eeh_report_error(void *data, void *userdata)
 
        edev->in_error = true;
        eeh_pcid_put(dev);
+       pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
        return NULL;
 }
 
@@ -381,6 +382,10 @@ static void *eeh_report_resume(void *data, void *userdata)
        driver->err_handler->resume(dev);
 
        eeh_pcid_put(dev);
+       pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
+#ifdef CONFIG_PCI_IOV
+       eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
+#endif
        return NULL;
 }
 
@@ -416,6 +421,7 @@ static void *eeh_report_failure(void *data, void *userdata)
        driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
 
        eeh_pcid_put(dev);
+       pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
        return NULL;
 }
 
index 7448052..8d74480 100644 (file)
@@ -278,6 +278,7 @@ static int report_error_detected(struct pci_dev *dev, void *data)
        } else {
                err_handler = dev->driver->err_handler;
                vote = err_handler->error_detected(dev, result_data->state);
+               pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
        }
 
        result_data->result = merge_result(result_data->result, vote);
@@ -341,6 +342,7 @@ static int report_resume(struct pci_dev *dev, void *data)
 
        err_handler = dev->driver->err_handler;
        err_handler->resume(dev);
+       pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
 out:
        device_unlock(&dev->dev);
        return 0;
@@ -541,6 +543,7 @@ static void do_recovery(struct pci_dev *dev, int severity)
        return;
 
 failed:
+       pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
        /* TODO: Should kernel panic here? */
        dev_info(&dev->dev, "AER: Device recovery failed\n");
 }
index e3e9446..4056304 100644 (file)
@@ -2277,6 +2277,42 @@ static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev)
        return false;
 }
 
+/**
+ * pci_uevent_ers - emit a uevent during recovery path of pci device
+ * @pdev: pci device to check
+ * @err_type: type of error event
+ *
+ */
+static inline void pci_uevent_ers(struct pci_dev *pdev,
+                                 enum  pci_ers_result err_type)
+{
+       int idx = 0;
+       char *envp[3];
+
+       switch (err_type) {
+       case PCI_ERS_RESULT_NONE:
+       case PCI_ERS_RESULT_CAN_RECOVER:
+               envp[idx++] = "ERROR_EVENT=BEGIN_RECOVERY";
+               envp[idx++] = "DEVICE_ONLINE=0";
+               break;
+       case PCI_ERS_RESULT_RECOVERED:
+               envp[idx++] = "ERROR_EVENT=SUCCESSFUL_RECOVERY";
+               envp[idx++] = "DEVICE_ONLINE=1";
+               break;
+       case PCI_ERS_RESULT_DISCONNECT:
+               envp[idx++] = "ERROR_EVENT=FAILED_RECOVERY";
+               envp[idx++] = "DEVICE_ONLINE=0";
+               break;
+       default:
+               break;
+       }
+
+       if (idx > 0) {
+               envp[idx++] = NULL;
+               kobject_uevent_env(&pdev->dev.kobj, KOBJ_CHANGE, envp);
+       }
+}
+
 /* provide the legacy pci_dma_* API */
 #include <linux/pci-dma-compat.h>