powerpc/eeh: Hotplug improvement
authorGavin Shan <shangw@linux.vnet.ibm.com>
Sun, 12 Jan 2014 06:13:45 +0000 (14:13 +0800)
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>
Wed, 15 Jan 2014 02:58:29 +0000 (13:58 +1100)
When EEH error comes to one specific PCI device before its driver
is loaded, we will apply hotplug to recover the error. During the
plug time, the PCI device will be probed and its driver is loaded.
Then we wrongly calls to the error handlers if the driver supports
EEH explicitly.

The patch intends to fix by introducing flag EEH_DEV_NO_HANDLER and
set it before we remove the PCI device. In turn, we can avoid wrongly
calls the error handlers of the PCI device after its driver loaded.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
arch/powerpc/include/asm/eeh.h
arch/powerpc/kernel/eeh.c
arch/powerpc/kernel/eeh_driver.c

index 7f8adc8..8b4b8e4 100644 (file)
@@ -90,7 +90,8 @@ struct eeh_pe {
 #define EEH_DEV_IRQ_DISABLED   (1 << 3)        /* Interrupt disabled   */
 #define EEH_DEV_DISCONNECTED   (1 << 4)        /* Removing from PE     */
 
-#define EEH_DEV_SYSFS          (1 << 8)        /* Sysfs created        */
+#define EEH_DEV_NO_HANDLER     (1 << 8)        /* No error handler     */
+#define EEH_DEV_SYSFS          (1 << 9)        /* Sysfs created        */
 
 struct eeh_dev {
        int mode;                       /* EEH mode                     */
index f4b7a22..148db72 100644 (file)
@@ -921,6 +921,13 @@ void eeh_add_device_late(struct pci_dev *dev)
                eeh_sysfs_remove_device(edev->pdev);
                edev->mode &= ~EEH_DEV_SYSFS;
 
+               /*
+                * We definitely should have the PCI device removed
+                * though it wasn't correctly. So we needn't call
+                * into error handler afterwards.
+                */
+               edev->mode |= EEH_DEV_NO_HANDLER;
+
                edev->pdev = NULL;
                dev->dev.archdata.edev = NULL;
        }
@@ -1023,6 +1030,14 @@ void eeh_remove_device(struct pci_dev *dev)
        else
                edev->mode |= EEH_DEV_DISCONNECTED;
 
+       /*
+        * We're removing from the PCI subsystem, that means
+        * the PCI device driver can't support EEH or not
+        * well. So we rely on hotplug completely to do recovery
+        * for the specific PCI device.
+        */
+       edev->mode |= EEH_DEV_NO_HANDLER;
+
        eeh_addr_cache_rmv_dev(dev);
        eeh_sysfs_remove_device(dev);
        edev->mode &= ~EEH_DEV_SYSFS;
index 4ef59c3..7db3920 100644 (file)
@@ -217,7 +217,8 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
        if (!driver) return NULL;
 
        if (!driver->err_handler ||
-           !driver->err_handler->mmio_enabled) {
+           !driver->err_handler->mmio_enabled ||
+           (edev->mode & EEH_DEV_NO_HANDLER)) {
                eeh_pcid_put(dev);
                return NULL;
        }
@@ -258,7 +259,8 @@ static void *eeh_report_reset(void *data, void *userdata)
        eeh_enable_irq(dev);
 
        if (!driver->err_handler ||
-           !driver->err_handler->slot_reset) {
+           !driver->err_handler->slot_reset ||
+           (edev->mode & EEH_DEV_NO_HANDLER)) {
                eeh_pcid_put(dev);
                return NULL;
        }
@@ -297,7 +299,9 @@ static void *eeh_report_resume(void *data, void *userdata)
        eeh_enable_irq(dev);
 
        if (!driver->err_handler ||
-           !driver->err_handler->resume) {
+           !driver->err_handler->resume ||
+           (edev->mode & EEH_DEV_NO_HANDLER)) {
+               edev->mode &= ~EEH_DEV_NO_HANDLER;
                eeh_pcid_put(dev);
                return NULL;
        }