powerpc/eeh: Refactor the output message

author Gavin Shan <shangw@linux.vnet.ibm.com>

Thu, 27 Jun 2013 05:46:46 +0000 (13:46 +0800)

committer Benjamin Herrenschmidt <benh@kernel.crashing.org>

Mon, 1 Jul 2013 01:10:33 +0000 (11:10 +1000)
author Gavin Shan <shangw@linux.vnet.ibm.com>
Thu, 27 Jun 2013 05:46:46 +0000 (13:46 +0800)
committer Benjamin Herrenschmidt <benh@kernel.crashing.org>
Mon, 1 Jul 2013 01:10:33 +0000 (11:10 +1000)
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c

index 416fb432d7e284721641318ff6becc659555fd29..3a8f82fd9005eba14b3528b3ff2f7385c5459ea3 100644 (file)
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -329,7 +329,9 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
         eeh_serialize_unlock(flags);
         eeh_send_failure_event(phb_pe);
  
-       WARN(1, "EEH: PHB failure detected\n");
+       pr_err("EEH: PHB#%x failure detected\n",
+               phb_pe->phb->global_number);
+       dump_stack();
  
         return 1;
  out:
@@ -458,7 +460,10 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
          * a stack trace will help the device-driver authors figure
          * out what happened.  So print that out.
          */
-       WARN(1, "EEH: failure detected\n");
+       pr_err("EEH: Frozen PE#%x detected on PHB#%x\n",
+               pe->addr, pe->phb->global_number);
+       dump_stack();
+
         return 1;
  
  dn_unlock:
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c

index 0974e13268423efeb6bdb950fbaceefe777363b6..2b1ce17cae504d95a5be5e04e16828a5e3f7516e 100644 (file)
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -425,6 +425,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
          * status ... if any child can't handle the reset, then the entire
          * slot is dlpar removed and added.
          */
+       pr_info("EEH: Notify device drivers to shutdown\n");
         eeh_pe_dev_traverse(pe, eeh_report_error, &result);
  
         /* Get the current PCI slot state. This can take a long time,
@@ -432,7 +433,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
          */
         rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
         if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
-               printk(KERN_WARNING "EEH: Permanent failure\n");
+               pr_warning("EEH: Permanent failure\n");
                 goto hard_fail;
         }
  
@@ -440,6 +441,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
          * don't post the error log until after all dev drivers
          * have been informed.
          */
+       pr_info("EEH: Collect temporary log\n");
         eeh_slot_error_detail(pe, EEH_LOG_TEMP);
  
         /* If all device drivers were EEH-unaware, then shut
@@ -447,15 +449,18 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
          * go down willingly, without panicing the system.
          */
         if (result == PCI_ERS_RESULT_NONE) {
+               pr_info("EEH: Reset with hotplug activity\n");
                 rc = eeh_reset_device(pe, frozen_bus);
                 if (rc) {
-                       printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc);
+                       pr_warning("%s: Unable to reset, err=%d\n",
+                                  __func__, rc);
                         goto hard_fail;
                 }
         }
  
         /* If all devices reported they can proceed, then re-enable MMIO */
         if (result == PCI_ERS_RESULT_CAN_RECOVER) {
+               pr_info("EEH: Enable I/O for affected devices\n");
                 rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
  
                 if (rc < 0)
@@ -463,6 +468,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
                 if (rc) {
                         result = PCI_ERS_RESULT_NEED_RESET;
                 } else {
+                       pr_info("EEH: Notify device drivers to resume I/O\n");
                         result = PCI_ERS_RESULT_NONE;
                         eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result);
                 }
@@ -470,6 +476,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
  
         /* If all devices reported they can proceed, then re-enable DMA */
         if (result == PCI_ERS_RESULT_CAN_RECOVER) {
+               pr_info("EEH: Enabled DMA for affected devices\n");
                 rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
  
                 if (rc < 0)
@@ -482,17 +489,22 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
  
         /* If any device has a hard failure, then shut off everything. */
         if (result == PCI_ERS_RESULT_DISCONNECT) {
-               printk(KERN_WARNING "EEH: Device driver gave up\n");
+               pr_warning("EEH: Device driver gave up\n");
                 goto hard_fail;
         }
  
         /* If any device called out for a reset, then reset the slot */
         if (result == PCI_ERS_RESULT_NEED_RESET) {
+               pr_info("EEH: Reset without hotplug activity\n");
                 rc = eeh_reset_device(pe, NULL);
                 if (rc) {
-                       printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc);
+                       pr_warning("%s: Cannot reset, err=%d\n",
+                                  __func__, rc);
                         goto hard_fail;
                 }
+
+               pr_info("EEH: Notify device drivers "
+                       "the completion of reset\n");
                 result = PCI_ERS_RESULT_NONE;
                 eeh_pe_dev_traverse(pe, eeh_report_reset, &result);
         }
@@ -500,11 +512,12 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
         /* All devices should claim they have recovered by now. */
         if ((result != PCI_ERS_RESULT_RECOVERED) &&
             (result != PCI_ERS_RESULT_NONE)) {
-               printk(KERN_WARNING "EEH: Not recovered\n");
+               pr_warning("EEH: Not recovered\n");
                 goto hard_fail;
         }
  
         /* Tell all device drivers that they can resume operations */
+       pr_info("EEH: Notify device driver to resume\n");
         eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
  
         return;
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c

index 85025d7e639623f7c0815ad337f96884941db1c7..0cd1c4a717550d150ade10ec180c405cec48aa76 100644 (file)
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -853,11 +853,14 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
                                         phb->eeh_state |= PNV_EEH_STATE_REMOVED;
                                 }
  
-                               WARN(1, "EEH: dead IOC detected\n");
+                               pr_err("EEH: dead IOC detected\n");
                                 ret = 4;
                                 goto out;
-                       } else if (severity == OPAL_EEH_SEV_INF)
+                       } else if (severity == OPAL_EEH_SEV_INF) {
+                               pr_info("EEH: IOC informative error "
+                                       "detected\n");
                                 ioda_eeh_hub_diag(hose);
+                       }
  
                         break;
                 case OPAL_EEH_PHB_ERROR:
@@ -865,8 +868,8 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
                                 if (ioda_eeh_get_phb_pe(hose, pe))
                                         break;
  
-                               WARN(1, "EEH: dead PHB#%x detected\n",
-                                    hose->global_number);
+                               pr_err("EEH: dead PHB#%x detected\n",
+                                       hose->global_number);
                                 phb->eeh_state |= PNV_EEH_STATE_REMOVED;
                                 ret = 3;
                                 goto out;
@@ -874,20 +877,24 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
                                 if (ioda_eeh_get_phb_pe(hose, pe))
                                         break;
  
-                               WARN(1, "EEH: fenced PHB#%x detected\n",
-                                    hose->global_number);
+                               pr_err("EEH: fenced PHB#%x detected\n",
+                                       hose->global_number);
                                 ret = 2;
                                 goto out;
-                       } else if (severity == OPAL_EEH_SEV_INF)
+                       } else if (severity == OPAL_EEH_SEV_INF) {
+                               pr_info("EEH: PHB#%x informative error "
+                                       "detected\n",
+                                       hose->global_number);
                                 ioda_eeh_phb_diag(hose);
+                       }
  
                         break;
                 case OPAL_EEH_PE_ERROR:
                         if (ioda_eeh_get_pe(hose, frozen_pe_no, pe))
                                 break;
  
-                       WARN(1, "EEH: Frozen PE#%x on PHB#%x detected\n",
-                            (*pe)->addr, (*pe)->phb->global_number);
+                       pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
+                               (*pe)->addr, (*pe)->phb->global_number);
                         ret = 1;
                         goto out;
                 }
author	Gavin Shan <shangw@linux.vnet.ibm.com>
	Thu, 27 Jun 2013 05:46:46 +0000 (13:46 +0800)
committer	Benjamin Herrenschmidt <benh@kernel.crashing.org>
	Mon, 1 Jul 2013 01:10:33 +0000 (11:10 +1000)
arch/powerpc/kernel/eeh.c		patch \| blob \| history
arch/powerpc/kernel/eeh_driver.c		patch \| blob \| history
arch/powerpc/platforms/powernv/eeh-ioda.c		patch \| blob \| history