scsi: qla2xxx: Fix NVMe session down detection
authorQuinn Tran <qutran@marvell.com>
Tue, 17 Aug 2021 05:13:13 +0000 (22:13 -0700)
committerMartin K. Petersen <martin.petersen@oracle.com>
Tue, 24 Aug 2021 02:36:54 +0000 (22:36 -0400)
When Target port transitions personality from one to another (NVMe <-->
FCP), there could be some overlap of the two where one layer is going down
while the other layer is coming up. This overlap can cause temporary I/O
error. Detect those errors/transitions and recover from them. Triggers
session tear down and allow relogin to re-drive the connection under the
following conditions:

 - NVMe command error

 - On PRLO + N2N (rida format 2)

Link: https://lore.kernel.org/r/20210817051315.2477-11-njavali@marvell.com
Signed-off-by: Quinn Tran <qutran@marvell.com>
Signed-off-by: Nilesh Javali <njavali@marvell.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/qla2xxx/qla_attr.c
drivers/scsi/qla2xxx/qla_isr.c
drivers/scsi/qla2xxx/qla_mbx.c

index 4a0a5b4..d09776b 100644 (file)
@@ -2733,12 +2733,14 @@ qla2x00_terminate_rport_io(struct fc_rport *rport)
         * final cleanup of firmware resources (PCBs and XCBs).
         */
        if (fcport->loop_id != FC_NO_LOOP_ID) {
-               if (IS_FWI2_CAPABLE(fcport->vha->hw))
-                       fcport->vha->hw->isp_ops->fabric_logout(fcport->vha,
-                           fcport->loop_id, fcport->d_id.b.domain,
-                           fcport->d_id.b.area, fcport->d_id.b.al_pa);
-               else
+               if (IS_FWI2_CAPABLE(fcport->vha->hw)) {
+                       if (fcport->loop_id != FC_NO_LOOP_ID)
+                               fcport->logout_on_delete = 1;
+
+                       qlt_schedule_sess_for_deletion(fcport);
+               } else {
                        qla2x00_port_logout(fcport->vha, fcport);
+               }
        }
 }
 
index c2fc75a..ece6026 100644 (file)
@@ -2652,6 +2652,15 @@ static void qla24xx_nvme_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
        case CS_PORT_UNAVAILABLE:
        case CS_PORT_LOGGED_OUT:
                fcport->nvme_flag |= NVME_FLAG_RESETTING;
+               if (atomic_read(&fcport->state) == FCS_ONLINE) {
+                       ql_dbg(ql_dbg_disc, fcport->vha, 0x3021,
+                              "Port to be marked lost on fcport=%06x, current "
+                              "port state= %s comp_status %x.\n",
+                              fcport->d_id.b24, port_state_str[FCS_ONLINE],
+                              comp_status);
+
+                       qlt_schedule_sess_for_deletion(fcport);
+               }
                fallthrough;
        case CS_ABORTED:
        case CS_PORT_BUSY:
index 438af0d..7811c49 100644 (file)
@@ -4190,6 +4190,16 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
                                rptid_entry->u.f2.remote_nport_id[1];
                        fcport->d_id.b.al_pa =
                                rptid_entry->u.f2.remote_nport_id[0];
+
+                       /*
+                        * For the case where remote port sending PRLO, FW
+                        * sends up RIDA Format 2 as an indication of session
+                        * loss. In other word, FW state change from PRLI
+                        * complete back to PLOGI complete. Delete the
+                        * session and let relogin drive the reconnect.
+                        */
+                       if (atomic_read(&fcport->state) == FCS_ONLINE)
+                               qlt_schedule_sess_for_deletion(fcport);
                }
        }
 }