scsi: lpfc: Fix crash when nvmet transport calls host_release
authorJames Smart <jsmart2021@gmail.com>
Mon, 4 Jan 2021 18:02:37 +0000 (10:02 -0800)
committerMartin K. Petersen <martin.petersen@oracle.com>
Fri, 8 Jan 2021 04:02:37 +0000 (23:02 -0500)
When lpfc is running in NVMET mode and supports the NVME-1 addendum
changes, a LIP on a bound NVME Initiator or lipping the lpfc NVMET's link
resulted in an Oops in lpfc_nvmet_host_release.

The fix requires lpfc NVMET to maintain an additional reference on any node
structure that acts as the hosthandle for the NVMET transport.  This
reference get is a one-time addition, is taken prior to the upcall of an
unsolicited LS_REQ, and is released when the NVMET transport releases the
hosthandle during the host_release downcall.

Link: https://lore.kernel.org/r/20210104180240.46824-13-jsmart2021@gmail.com
Co-developed-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/lpfc/lpfc_disc.h
drivers/scsi/lpfc/lpfc_nvmet.c
drivers/scsi/lpfc/lpfc_sli.c

index 4cea61b..8ce13ef 100644 (file)
@@ -77,6 +77,13 @@ struct lpfc_node_rrqs {
        unsigned long xri_bitmap[XRI_BITMAP_ULONGS];
 };
 
+enum lpfc_fc4_xpt_flags {
+       NLP_WAIT_FOR_UNREG = 0x1,
+       SCSI_XPT_REGD      = 0x2,
+       NVME_XPT_REGD      = 0x4,
+       NLP_XPT_HAS_HH     = 0x8,
+};
+
 struct lpfc_nodelist {
        struct list_head nlp_listp;
        struct lpfc_name nlp_portname;
@@ -134,13 +141,10 @@ struct lpfc_nodelist {
        unsigned long *active_rrqs_xri_bitmap;
        struct lpfc_scsicmd_bkt *lat_data;      /* Latency data */
        uint32_t fc4_prli_sent;
-       uint32_t fc4_xpt_flags;
-       uint32_t upcall_flags;
-#define NLP_WAIT_FOR_UNREG    0x1
-#define SCSI_XPT_REGD         0x2
-#define NVME_XPT_REGD         0x4
-#define NLP_WAIT_FOR_LOGO     0x2
+       u32 upcall_flags;
+#define        NLP_WAIT_FOR_LOGO 0x2
 
+       enum lpfc_fc4_xpt_flags fc4_xpt_flags;
 
        uint32_t nvme_fb_size; /* NVME target's supported byte cnt */
 #define NVME_FB_BIT_SHIFT 9    /* PRLI Rsp first burst in 512B units. */
index a71df87..bb2a4a0 100644 (file)
@@ -1367,17 +1367,22 @@ static void
 lpfc_nvmet_host_release(void *hosthandle)
 {
        struct lpfc_nodelist *ndlp = hosthandle;
-       struct lpfc_hba *phba = NULL;
+       struct lpfc_hba *phba = ndlp->phba;
        struct lpfc_nvmet_tgtport *tgtp;
 
-       phba = ndlp->phba;
        if (!phba->targetport || !phba->targetport->private)
                return;
 
        lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
-                       "6202 NVMET XPT releasing hosthandle x%px\n",
-                       hosthandle);
+                       "6202 NVMET XPT releasing hosthandle x%px "
+                       "DID x%x xflags x%x refcnt %d\n",
+                       hosthandle, ndlp->nlp_DID, ndlp->fc4_xpt_flags,
+                       kref_read(&ndlp->kref));
        tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+       spin_lock_irq(&ndlp->lock);
+       ndlp->fc4_xpt_flags &= ~NLP_XPT_HAS_HH;
+       spin_unlock_irq(&ndlp->lock);
+       lpfc_nlp_put(ndlp);
        atomic_set(&tgtp->state, 0);
 }
 
@@ -3644,15 +3649,33 @@ out:
 void
 lpfc_nvmet_invalidate_host(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 {
+       u32 ndlp_has_hh;
        struct lpfc_nvmet_tgtport *tgtp;
 
-       lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_NVME_ABTS,
+       lpfc_printf_log(phba, KERN_INFO,
+                       LOG_NVME | LOG_NVME_ABTS | LOG_NVME_DISC,
                        "6203 Invalidating hosthandle x%px\n",
                        ndlp);
 
        tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
        atomic_set(&tgtp->state, LPFC_NVMET_INV_HOST_ACTIVE);
 
+       spin_lock_irq(&ndlp->lock);
+       ndlp_has_hh = ndlp->fc4_xpt_flags & NLP_XPT_HAS_HH;
+       spin_unlock_irq(&ndlp->lock);
+
+       /* Do not invalidate any nodes that do not have a hosthandle.
+        * The host_release callbk will cause a node reference
+        * count imbalance and a crash.
+        */
+       if (!ndlp_has_hh) {
+               lpfc_printf_log(phba, KERN_INFO,
+                               LOG_NVME | LOG_NVME_ABTS | LOG_NVME_DISC,
+                               "6204 Skip invalidate on node x%px DID x%x\n",
+                               ndlp, ndlp->nlp_DID);
+               return;
+       }
+
 #if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
        /* Need to get the nvmet_fc_target_port pointer here.*/
        nvmet_fc_invalidate_host(phba->targetport, ndlp);
index dedea5d..176706a 100644 (file)
@@ -3011,23 +3011,44 @@ lpfc_nvme_unsol_ls_handler(struct lpfc_hba *phba, struct lpfc_iocbq *piocb)
        axchg->payload = nvmebuf->dbuf.virt;
        INIT_LIST_HEAD(&axchg->list);
 
-       if (phba->nvmet_support)
+       if (phba->nvmet_support) {
                ret = lpfc_nvmet_handle_lsreq(phba, axchg);
-       else
+               spin_lock_irq(&ndlp->lock);
+               if (!ret && !(ndlp->fc4_xpt_flags & NLP_XPT_HAS_HH)) {
+                       ndlp->fc4_xpt_flags |= NLP_XPT_HAS_HH;
+                       spin_unlock_irq(&ndlp->lock);
+
+                       /* This reference is a single occurrence to hold the
+                        * node valid until the nvmet transport calls
+                        * host_release.
+                        */
+                       if (!lpfc_nlp_get(ndlp))
+                               goto out_fail;
+
+                       lpfc_printf_log(phba, KERN_ERR, LOG_NODE,
+                                       "6206 NVMET unsol ls_req ndlp %p "
+                                       "DID x%x xflags x%x refcnt %d\n",
+                                       ndlp, ndlp->nlp_DID,
+                                       ndlp->fc4_xpt_flags,
+                                       kref_read(&ndlp->kref));
+               } else {
+                       spin_unlock_irq(&ndlp->lock);
+               }
+       } else {
                ret = lpfc_nvme_handle_lsreq(phba, axchg);
+       }
 
        /* if zero, LS was successfully handled. If non-zero, LS not handled */
        if (!ret)
                return;
 
+out_fail:
        lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
                        "6155 Drop NVME LS from DID %06X: SID %06X OXID x%X "
                        "NVMe%s handler failed %d\n",
                        did, sid, oxid,
                        (phba->nvmet_support) ? "T" : "I", ret);
 
-out_fail:
-
        /* recycle receive buffer */
        lpfc_in_buf_free(phba, &nvmebuf->dbuf);