{
struct eeh_pe *pe, *phb_pe;
struct pci_bus *bus;
- struct pci_controller *hose, *tmp;
+ struct pci_controller *hose;
unsigned long flags;
- int rc = 0;
+ int rc;
- /*
- * The return value from next_error() has been classified as follows.
- * It might be good to enumerate them. However, next_error() is only
- * supported by PowerNV platform for now. So it would be fine to use
- * integer directly:
- *
- * 4 - Dead IOC 3 - Dead PHB
- * 2 - Fenced PHB 1 - Frozen PE
- * 0 - No error found
- *
- */
- rc = eeh_ops->next_error(&pe);
- if (rc <= 0)
- return;
- switch (rc) {
- case 4:
- /* Mark all PHBs in dead state */
- eeh_serialize_lock(&flags);
- list_for_each_entry_safe(hose, tmp,
- &hose_list, list_node) {
- phb_pe = eeh_phb_pe_get(hose);
- if (!phb_pe) continue;
-
- eeh_pe_state_mark(phb_pe,
- EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
+ do {
+ rc = eeh_ops->next_error(&pe);
+
+ switch (rc) {
+ case EEH_NEXT_ERR_DEAD_IOC:
+ /* Mark all PHBs in dead state */
+ eeh_serialize_lock(&flags);
+
+ /* Purge all events */
+ eeh_remove_event(NULL);
+
+ list_for_each_entry(hose, &hose_list, list_node) {
+ phb_pe = eeh_phb_pe_get(hose);
+ if (!phb_pe) continue;
+
+ eeh_pe_state_mark(phb_pe,
+ EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
+ }
+
+ eeh_serialize_unlock(flags);
+
+ break;
+ case EEH_NEXT_ERR_FROZEN_PE:
+ case EEH_NEXT_ERR_FENCED_PHB:
+ case EEH_NEXT_ERR_DEAD_PHB:
+ /* Mark the PE in fenced state */
+ eeh_serialize_lock(&flags);
+
+ /* Purge all events of the PHB */
+ eeh_remove_event(pe);
+
+ if (rc == EEH_NEXT_ERR_DEAD_PHB)
+ eeh_pe_state_mark(pe,
+ EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
+ else
+ eeh_pe_state_mark(pe,
+ EEH_PE_ISOLATED | EEH_PE_RECOVERING);
+
+ eeh_serialize_unlock(flags);
+
+ break;
+ case EEH_NEXT_ERR_NONE:
+ return;
+ default:
+ pr_warn("%s: Invalid value %d from next_error()\n",
+ __func__, rc);
+ return;
}
- eeh_serialize_unlock(flags);
-
- /* Purge all events */
- eeh_remove_event(NULL);
- break;
- case 3:
- case 2:
- case 1:
- /* Mark the PE in fenced state */
- eeh_serialize_lock(&flags);
- if (rc == 3)
- eeh_pe_state_mark(pe,
- EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
- else
- eeh_pe_state_mark(pe,
- EEH_PE_ISOLATED | EEH_PE_RECOVERING);
- eeh_serialize_unlock(flags);
-
- /* Purge all events of the PHB */
- eeh_remove_event(pe);
- break;
- default:
- pr_err("%s: Invalid value %d from next_error()\n",
- __func__, rc);
- return;
- }
- /*
- * For fenced PHB and frozen PE, it's handled as normal
- * event. We have to remove the affected PHBs for dead
- * PHB and IOC
- */
- if (rc == 2 || rc == 1)
- eeh_handle_normal_event(pe);
- else {
- list_for_each_entry_safe(hose, tmp,
- &hose_list, list_node) {
- phb_pe = eeh_phb_pe_get(hose);
- if (!phb_pe || !(phb_pe->state & EEH_PE_PHB_DEAD))
- continue;
-
- bus = eeh_pe_bus_get(phb_pe);
- /* Notify all devices that they're about to go down. */
- eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
- pcibios_remove_pci_devices(bus);
+ /*
+ * For fenced PHB and frozen PE, it's handled as normal
+ * event. We have to remove the affected PHBs for dead
+ * PHB and IOC
+ */
+ if (rc == EEH_NEXT_ERR_FROZEN_PE ||
+ rc == EEH_NEXT_ERR_FENCED_PHB) {
+ eeh_handle_normal_event(pe);
+ } else {
+ list_for_each_entry(hose, &hose_list, list_node) {
+ phb_pe = eeh_phb_pe_get(hose);
+ if (!phb_pe ||
+ !(phb_pe->state & EEH_PE_PHB_DEAD))
+ continue;
+
+ /* Notify all devices to be down */
+ bus = eeh_pe_bus_get(phb_pe);
+ eeh_pe_dev_traverse(pe,
+ eeh_report_failure, NULL);
+ pcibios_remove_pci_devices(bus);
+ }
}
- }
+
+ /*
+ * If we have detected dead IOC, we needn't proceed
+ * any more since all PHBs would have been removed
+ */
+ if (rc == EEH_NEXT_ERR_DEAD_IOC)
+ break;
+ } while (rc != EEH_NEXT_ERR_NONE);
}
/**
*/
static int ioda_eeh_next_error(struct eeh_pe **pe)
{
- struct pci_controller *hose, *tmp;
+ struct pci_controller *hose;
struct pnv_phb *phb;
u64 frozen_pe_no;
u16 err_type, severity;
long rc;
- int ret = 1;
+ int ret = EEH_NEXT_ERR_NONE;
/*
* While running here, it's safe to purge the event queue.
eeh_remove_event(NULL);
opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ list_for_each_entry(hose, &hose_list, list_node) {
/*
* If the subordinate PCI buses of the PHB has been
* removed, we needn't take care of it any more.
switch (err_type) {
case OPAL_EEH_IOC_ERROR:
if (severity == OPAL_EEH_SEV_IOC_DEAD) {
- list_for_each_entry_safe(hose, tmp,
- &hose_list, list_node) {
+ list_for_each_entry(hose, &hose_list,
+ list_node) {
phb = hose->private_data;
phb->eeh_state |= PNV_EEH_STATE_REMOVED;
}
pr_err("EEH: dead IOC detected\n");
- ret = 4;
- goto out;
+ ret = EEH_NEXT_ERR_DEAD_IOC;
} else if (severity == OPAL_EEH_SEV_INF) {
pr_info("EEH: IOC informative error "
"detected\n");
ioda_eeh_hub_diag(hose);
+ ret = EEH_NEXT_ERR_NONE;
}
break;
pr_err("EEH: dead PHB#%x detected\n",
hose->global_number);
phb->eeh_state |= PNV_EEH_STATE_REMOVED;
- ret = 3;
- goto out;
+ ret = EEH_NEXT_ERR_DEAD_PHB;
} else if (severity == OPAL_EEH_SEV_PHB_FENCED) {
if (ioda_eeh_get_phb_pe(hose, pe))
break;
pr_err("EEH: fenced PHB#%x detected\n",
hose->global_number);
- ret = 2;
- goto out;
+ ret = EEH_NEXT_ERR_FENCED_PHB;
} else if (severity == OPAL_EEH_SEV_INF) {
pr_info("EEH: PHB#%x informative error "
"detected\n",
hose->global_number);
ioda_eeh_phb_diag(hose);
+ ret = EEH_NEXT_ERR_NONE;
}
break;
pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
(*pe)->addr, (*pe)->phb->global_number);
- ret = 1;
- goto out;
+ ret = EEH_NEXT_ERR_FROZEN_PE;
+ break;
+ default:
+ pr_warn("%s: Unexpected error type %d\n",
+ __func__, err_type);
}
+
+ /*
+ * If we have no errors on the specific PHB or only
+ * informative error there, we continue poking it.
+ * Otherwise, we need actions to be taken by upper
+ * layer.
+ */
+ if (ret > EEH_NEXT_ERR_INF)
+ break;
}
- ret = 0;
-out:
return ret;
}