scsi: pm80xx: Fixed system hang issue during kexec boot
authorDeepak Ukey <deepak.ukey@microchip.com>
Tue, 11 Sep 2018 08:48:04 +0000 (14:18 +0530)
committerMartin K. Petersen <martin.petersen@oracle.com>
Wed, 12 Sep 2018 01:14:38 +0000 (21:14 -0400)
When the firmware is not responding, execution of kexec boot causes a system
hang. When firmware assertion happened, driver get notified with interrupt
vector updated in MPI configuration table. Then, the driver will read
scratchpad register and set controller_fatal_error flag to true.

Signed-off-by: Deepak Ukey <deepak.ukey@microchip.com>
Signed-off-by: Viswas G <Viswas.G@microchip.com>
Acked-by: Jack Wang <jinpu.wang@profitbricks.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/pm8001/pm8001_hwi.c
drivers/scsi/pm8001/pm8001_sas.c
drivers/scsi/pm8001/pm8001_sas.h
drivers/scsi/pm8001/pm80xx_hwi.c
drivers/scsi/pm8001/pm80xx_hwi.h

index a14bf50..e37ab97 100644 (file)
@@ -1479,6 +1479,12 @@ u32 pm8001_mpi_msg_consume(struct pm8001_hba_info *pm8001_ha,
                } else {
                        u32 producer_index;
                        void *pi_virt = circularQ->pi_virt;
+                       /* spurious interrupt during setup if
+                        * kexec-ing and driver doing a doorbell access
+                        * with the pre-kexec oq interrupt setup
+                        */
+                       if (!pi_virt)
+                               break;
                        /* Update the producer index from SPC */
                        producer_index = pm8001_read_32(pi_virt);
                        circularQ->producer_index = cpu_to_le32(producer_index);
index e063faa..b1e7d26 100644 (file)
@@ -396,6 +396,13 @@ static int pm8001_task_exec(struct sas_task *task,
                return 0;
        }
        pm8001_ha = pm8001_find_ha_by_dev(task->dev);
+       if (pm8001_ha->controller_fatal_error) {
+               struct task_status_struct *ts = &t->task_status;
+
+               ts->resp = SAS_TASK_UNDELIVERED;
+               t->task_done(t);
+               return 0;
+       }
        PM8001_IO_DBG(pm8001_ha, pm8001_printk("pm8001_task_exec device \n "));
        spin_lock_irqsave(&pm8001_ha->lock, flags);
        do {
index 80b4dd6..1816e35 100644 (file)
@@ -538,6 +538,7 @@ struct pm8001_hba_info {
        u32                     logging_level;
        u32                     fw_status;
        u32                     smp_exp_mode;
+       bool                    controller_fatal_error;
        const struct firmware   *fw_image;
        struct isr_param irq_vector[PM8001_MAX_MSIX_VEC];
        u32                     reset_in_progress;
index 91ff6a4..b641875 100644 (file)
@@ -577,6 +577,9 @@ static void update_main_config_table(struct pm8001_hba_info *pm8001_ha)
                pm8001_ha->main_cfg_tbl.pm80xx_tbl.pcs_event_log_size);
        pm8001_mw32(address, MAIN_PCS_EVENT_LOG_OPTION,
                pm8001_ha->main_cfg_tbl.pm80xx_tbl.pcs_event_log_severity);
+       /* Update Fatal error interrupt vector */
+       pm8001_ha->main_cfg_tbl.pm80xx_tbl.fatal_err_interrupt |=
+                                       ((pm8001_ha->number_of_intr - 1) << 8);
        pm8001_mw32(address, MAIN_FATAL_ERROR_INTERRUPT,
                pm8001_ha->main_cfg_tbl.pm80xx_tbl.fatal_err_interrupt);
        pm8001_mw32(address, MAIN_EVENT_CRC_CHECK,
@@ -1110,6 +1113,9 @@ static int pm80xx_chip_init(struct pm8001_hba_info *pm8001_ha)
                return -EBUSY;
        }
 
+       /* Initialize the controller fatal error flag */
+       pm8001_ha->controller_fatal_error = false;
+
        /* Initialize pci space address eg: mpi offset */
        init_pci_device_addresses(pm8001_ha);
        init_default_table_values(pm8001_ha);
@@ -1218,13 +1224,17 @@ pm80xx_chip_soft_rst(struct pm8001_hba_info *pm8001_ha)
        u32 bootloader_state;
        u32 ibutton0, ibutton1;
 
-       /* Check if MPI is in ready state to reset */
-       if (mpi_uninit_check(pm8001_ha) != 0) {
-               PM8001_FAIL_DBG(pm8001_ha,
-                       pm8001_printk("MPI state is not ready\n"));
-               return -1;
+       /* Process MPI table uninitialization only if FW is ready */
+       if (!pm8001_ha->controller_fatal_error) {
+               /* Check if MPI is in ready state to reset */
+               if (mpi_uninit_check(pm8001_ha) != 0) {
+                       regval = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1);
+                       PM8001_FAIL_DBG(pm8001_ha, pm8001_printk(
+                               "MPI state is not ready scratch1 :0x%x\n",
+                               regval));
+                       return -1;
+               }
        }
-
        /* checked for reset register normal state; 0x0 */
        regval = pm8001_cr32(pm8001_ha, 0, SPC_REG_SOFT_RESET);
        PM8001_INIT_DBG(pm8001_ha,
@@ -3754,6 +3764,46 @@ static void process_one_iomb(struct pm8001_hba_info *pm8001_ha, void *piomb)
        }
 }
 
+static void print_scratchpad_registers(struct pm8001_hba_info *pm8001_ha)
+{
+       PM8001_FAIL_DBG(pm8001_ha,
+               pm8001_printk("MSGU_SCRATCH_PAD_0: 0x%x\n",
+                       pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_0)));
+       PM8001_FAIL_DBG(pm8001_ha,
+               pm8001_printk("MSGU_SCRATCH_PAD_1:0x%x\n",
+                       pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1)));
+       PM8001_FAIL_DBG(pm8001_ha,
+               pm8001_printk("MSGU_SCRATCH_PAD_2: 0x%x\n",
+                       pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_2)));
+       PM8001_FAIL_DBG(pm8001_ha,
+               pm8001_printk("MSGU_SCRATCH_PAD_3: 0x%x\n",
+                       pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_3)));
+       PM8001_FAIL_DBG(pm8001_ha,
+               pm8001_printk("MSGU_HOST_SCRATCH_PAD_0: 0x%x\n",
+                       pm8001_cr32(pm8001_ha, 0, MSGU_HOST_SCRATCH_PAD_0)));
+       PM8001_FAIL_DBG(pm8001_ha,
+               pm8001_printk("MSGU_HOST_SCRATCH_PAD_1: 0x%x\n",
+                       pm8001_cr32(pm8001_ha, 0, MSGU_HOST_SCRATCH_PAD_1)));
+       PM8001_FAIL_DBG(pm8001_ha,
+               pm8001_printk("MSGU_HOST_SCRATCH_PAD_2: 0x%x\n",
+                       pm8001_cr32(pm8001_ha, 0, MSGU_HOST_SCRATCH_PAD_2)));
+       PM8001_FAIL_DBG(pm8001_ha,
+               pm8001_printk("MSGU_HOST_SCRATCH_PAD_3: 0x%x\n",
+                       pm8001_cr32(pm8001_ha, 0, MSGU_HOST_SCRATCH_PAD_3)));
+       PM8001_FAIL_DBG(pm8001_ha,
+               pm8001_printk("MSGU_HOST_SCRATCH_PAD_4: 0x%x\n",
+                       pm8001_cr32(pm8001_ha, 0, MSGU_HOST_SCRATCH_PAD_4)));
+       PM8001_FAIL_DBG(pm8001_ha,
+               pm8001_printk("MSGU_HOST_SCRATCH_PAD_5: 0x%x\n",
+                       pm8001_cr32(pm8001_ha, 0, MSGU_HOST_SCRATCH_PAD_5)));
+       PM8001_FAIL_DBG(pm8001_ha,
+               pm8001_printk("MSGU_RSVD_SCRATCH_PAD_0: 0x%x\n",
+                       pm8001_cr32(pm8001_ha, 0, MSGU_HOST_SCRATCH_PAD_6)));
+       PM8001_FAIL_DBG(pm8001_ha,
+               pm8001_printk("MSGU_RSVD_SCRATCH_PAD_1: 0x%x\n",
+                       pm8001_cr32(pm8001_ha, 0, MSGU_HOST_SCRATCH_PAD_7)));
+}
+
 static int process_oq(struct pm8001_hba_info *pm8001_ha, u8 vec)
 {
        struct outbound_queue_table *circularQ;
@@ -3761,10 +3811,28 @@ static int process_oq(struct pm8001_hba_info *pm8001_ha, u8 vec)
        u8 uninitialized_var(bc);
        u32 ret = MPI_IO_STATUS_FAIL;
        unsigned long flags;
+       u32 regval;
 
+       if (vec == (pm8001_ha->number_of_intr - 1)) {
+               regval = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1);
+               if ((regval & SCRATCH_PAD_MIPSALL_READY) !=
+                                       SCRATCH_PAD_MIPSALL_READY) {
+                       pm8001_ha->controller_fatal_error = true;
+                       PM8001_FAIL_DBG(pm8001_ha, pm8001_printk(
+                               "Firmware Fatal error! Regval:0x%x\n", regval));
+                       print_scratchpad_registers(pm8001_ha);
+                       return ret;
+               }
+       }
        spin_lock_irqsave(&pm8001_ha->lock, flags);
        circularQ = &pm8001_ha->outbnd_q_tbl[vec];
        do {
+               /* spurious interrupt during setup if kexec-ing and
+                * driver doing a doorbell access w/ the pre-kexec oq
+                * interrupt setup.
+                */
+               if (!circularQ->pi_virt)
+                       break;
                ret = pm8001_mpi_msg_consume(pm8001_ha, circularQ, &pMsg1, &bc);
                if (MPI_IO_STATUS_SUCCESS == ret) {
                        /* process the outbound message */
index dead05a..84d7426 100644 (file)
@@ -1386,6 +1386,9 @@ typedef struct SASProtocolTimerConfig SASProtocolTimerConfig_t;
 #define SCRATCH_PAD_BOOT_LOAD_SUCCESS  0x0
 #define SCRATCH_PAD_IOP0_READY         0xC00
 #define SCRATCH_PAD_IOP1_READY         0x3000
+#define SCRATCH_PAD_MIPSALL_READY      (SCRATCH_PAD_IOP1_READY | \
+                                       SCRATCH_PAD_IOP0_READY | \
+                                       SCRATCH_PAD_RAAE_READY)
 
 /* boot loader state */
 #define SCRATCH_PAD1_BOOTSTATE_MASK            0x70    /* Bit 4-6 */