scsi: qedf: Add schedule_hw_err_handler callback for fan failure
authorSaurav Kashyap <skashyap@marvell.com>
Mon, 7 Sep 2020 12:14:41 +0000 (05:14 -0700)
committerMartin K. Petersen <martin.petersen@oracle.com>
Wed, 9 Sep 2020 03:14:19 +0000 (23:14 -0400)
On fan failure, disable the PCI function and initiate recovery for ramrod
failure.

Link: https://lore.kernel.org/r/20200907121443.5150-7-jhasan@marvell.com
Signed-off-by: Saurav Kashyap <skashyap@marvell.com>
Signed-off-by: Javed Hasan <jhasan@marvell.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/qedf/qedf.h
drivers/scsi/qedf/qedf_main.c

index 15d6cbef7459ca11542a6b657c825f487ab0e914..0e2cbb164eeba894a6fc651aef8e2cfbb2461f89 100644 (file)
@@ -389,6 +389,7 @@ struct qedf_ctx {
        mempool_t *io_mempool;
        struct workqueue_struct *dpc_wq;
        struct delayed_work recovery_work;
+       struct delayed_work board_disable_work;
        struct delayed_work grcdump_work;
        struct delayed_work stag_work;
 
@@ -541,6 +542,9 @@ extern void qedf_get_generic_tlv_data(void *dev, struct qed_generic_tlvs *data);
 extern void qedf_wq_grcdump(struct work_struct *work);
 void qedf_stag_change_work(struct work_struct *work);
 void qedf_ctx_soft_reset(struct fc_lport *lport);
+extern void qedf_board_disable_work(struct work_struct *work);
+extern void qedf_schedule_hw_err_handler(void *dev,
+               enum qed_hw_err_type err_type);
 
 #define FCOE_WORD_TO_BYTE  4
 #define QEDF_MAX_TASK_NUM      0xFFFF
index 073de50dfbe818184e9f27d0167d9b464f45a269..ed595c83be3d89c42bf49f98e7e5e41493d7f1db 100644 (file)
@@ -105,6 +105,12 @@ module_param_named(dp_level, qedf_dp_level, uint, S_IRUGO);
 MODULE_PARM_DESC(dp_level, " printk verbosity control passed to qed module  "
        "during probe (0-3: 0 more verbose).");
 
+static bool qedf_enable_recovery = true;
+module_param_named(enable_recovery, qedf_enable_recovery,
+               bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(enable_recovery, "Enable/disable recovery on driver/firmware "
+               "interface level errors 0 = Disabled, 1 = Enabled (Default: 1).");
+
 struct workqueue_struct *qedf_io_wq;
 
 static struct fcoe_percpu_s qedf_global;
@@ -690,6 +696,7 @@ static struct qed_fcoe_cb_ops qedf_cb_ops = {
                .dcbx_aen = qedf_dcbx_handler,
                .get_generic_tlv_data = qedf_get_generic_tlv_data,
                .get_protocol_tlv_data = qedf_get_protocol_tlv_data,
+               .schedule_hw_err_handler = qedf_schedule_hw_err_handler,
        }
 };
 
@@ -3799,6 +3806,44 @@ void qedf_wq_grcdump(struct work_struct *work)
        qedf_capture_grc_dump(qedf);
 }
 
+void qedf_schedule_hw_err_handler(void *dev, enum qed_hw_err_type err_type)
+{
+       struct qedf_ctx *qedf = dev;
+
+       QEDF_ERR(&(qedf->dbg_ctx),
+                       "Hardware error handler scheduled, event=%d.\n",
+                       err_type);
+
+       if (test_bit(QEDF_IN_RECOVERY, &qedf->flags)) {
+               QEDF_ERR(&(qedf->dbg_ctx),
+                               "Already in recovery, not scheduling board disable work.\n");
+               return;
+       }
+
+       switch (err_type) {
+       case QED_HW_ERR_FAN_FAIL:
+               schedule_delayed_work(&qedf->board_disable_work, 0);
+               break;
+       case QED_HW_ERR_MFW_RESP_FAIL:
+       case QED_HW_ERR_HW_ATTN:
+       case QED_HW_ERR_DMAE_FAIL:
+       case QED_HW_ERR_FW_ASSERT:
+               /* Prevent HW attentions from being reasserted */
+               qed_ops->common->attn_clr_enable(qedf->cdev, true);
+               break;
+       case QED_HW_ERR_RAMROD_FAIL:
+               /* Prevent HW attentions from being reasserted */
+               qed_ops->common->attn_clr_enable(qedf->cdev, true);
+
+               if (qedf_enable_recovery)
+                       qed_ops->common->recovery_process(qedf->cdev);
+
+               break;
+       default:
+               break;
+       }
+}
+
 /*
  * Protocol TLV handler
  */