powerpc/eeh: Avoid event on passed PE

author Gavin Shan <gwshan@linux.vnet.ibm.com>

Tue, 10 Jun 2014 01:41:55 +0000 (11:41 +1000)

committer Benjamin Herrenschmidt <benh@kernel.crashing.org>

Tue, 5 Aug 2014 05:28:47 +0000 (15:28 +1000)
author Gavin Shan <gwshan@linux.vnet.ibm.com>
Tue, 10 Jun 2014 01:41:55 +0000 (11:41 +1000)
committer Benjamin Herrenschmidt <benh@kernel.crashing.org>
Tue, 5 Aug 2014 05:28:47 +0000 (15:28 +1000)
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h

index fab7743..9537d83 100644 (file)
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -25,6 +25,7 @@
  #include <linux/list.h>
  #include <linux/string.h>
  #include <linux/time.h>
+#include <linux/atomic.h>
  
  struct pci_dev;
  struct pci_bus;
@@ -84,6 +85,7 @@ struct eeh_pe {
         int freeze_count;               /* Times of froze up            */
         struct timeval tstamp;          /* Time on first-time freeze    */
         int false_positives;            /* Times of reported #ff's      */
+       atomic_t pass_dev_cnt;          /* Count of passed through devs */
         struct eeh_pe *parent;          /* Parent PE                    */
         struct list_head child_list;    /* Link PE to the child list    */
         struct list_head edevs;         /* Link list of EEH devices     */
@@ -93,6 +95,11 @@ struct eeh_pe {
  #define eeh_pe_for_each_dev(pe, edev, tmp) \
                 list_for_each_entry_safe(edev, tmp, &pe->edevs, list)
  
+static inline bool eeh_pe_passed(struct eeh_pe *pe)
+{
+       return pe ? !!atomic_read(&pe->pass_dev_cnt) : false;
+}
+
  /*
   * The struct is used to trace EEH state for the associated
   * PCI device node or PCI device. In future, it might
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c

index 86e2570..c8f1a9d 100644 (file)
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -400,6 +400,14 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
         if (ret > 0)
                 return ret;
  
+       /*
+        * If the PE isn't owned by us, we shouldn't check the
+        * state. Instead, let the owner handle it if the PE has
+        * been frozen.
+        */
+       if (eeh_pe_passed(pe))
+               return 0;
+
         /* If we already have a pending isolation event for this
          * slot, we know it's bad already, we don't need to check.
          * Do this checking under a lock; as multiple PCI devices
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c

index 8ad0c5b..f6abdb1 100644 (file)
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -812,7 +812,8 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
                                 opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no,
                                         OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
                                 ret = EEH_NEXT_ERR_NONE;
-                       } else if ((*pe)->state & EEH_PE_ISOLATED) {
+                       } else if ((*pe)->state & EEH_PE_ISOLATED ||
+                                  eeh_pe_passed(*pe)) {
                                 ret = EEH_NEXT_ERR_NONE;
                         } else {
                                 pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
author	Gavin Shan <gwshan@linux.vnet.ibm.com>
	Tue, 10 Jun 2014 01:41:55 +0000 (11:41 +1000)
committer	Benjamin Herrenschmidt <benh@kernel.crashing.org>
	Tue, 5 Aug 2014 05:28:47 +0000 (15:28 +1000)
arch/powerpc/include/asm/eeh.h		patch \| blob \| history
arch/powerpc/kernel/eeh.c		patch \| blob \| history
arch/powerpc/platforms/powernv/eeh-ioda.c		patch \| blob \| history