scsi: lpfc: Streamline NVME Initiator WQE setup
authorJames Smart <jsmart2021@gmail.com>
Mon, 5 Mar 2018 20:04:04 +0000 (12:04 -0800)
committerMartin K. Petersen <martin.petersen@oracle.com>
Tue, 13 Mar 2018 01:55:23 +0000 (21:55 -0400)
To reduce latency when initializing WQE content, create templates for the
most common wqes. This reduces the number of operations taken to set the
content. It's not a lot of speed up, but every bit helps.

This patch updates the NVME initiator path.

[mkp: fixed typo]

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/lpfc/lpfc_crtn.h
drivers/scsi/lpfc/lpfc_hw4.h
drivers/scsi/lpfc/lpfc_init.c
drivers/scsi/lpfc/lpfc_nvme.c

index 14a86b5..c7df226 100644 (file)
@@ -565,6 +565,7 @@ void lpfc_nvme_mod_param_dep(struct lpfc_hba *phba);
 void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba,
                                struct lpfc_iocbq *cmdiocb,
                                struct lpfc_wcqe_complete *abts_cmpl);
+void lpfc_nvme_cmd_template(void);
 extern int lpfc_enable_nvmet_cnt;
 extern unsigned long long lpfc_enable_nvmet[];
 extern int lpfc_no_hba_reset_cnt;
index a2f372d..98b8055 100644 (file)
@@ -4183,6 +4183,7 @@ struct wqe_common {
 #define wqe_iod_SHIFT         13
 #define wqe_iod_MASK          0x00000001
 #define wqe_iod_WORD          word10
+#define LPFC_WQE_IOD_NONE      0
 #define LPFC_WQE_IOD_WRITE     0
 #define LPFC_WQE_IOD_READ      1
 #define wqe_dbde_SHIFT        14
index 50bc6c6..68adea8 100644 (file)
@@ -12583,6 +12583,7 @@ lpfc_init(void)
                fc_release_transport(lpfc_transport_template);
                return -ENOMEM;
        }
+       lpfc_nvme_cmd_template();
 
        /* Initialize in case vector mapping is needed */
        lpfc_used_cpu = NULL;
index 6d215f2..52dd947 100644 (file)
@@ -65,6 +65,136 @@ lpfc_release_nvme_buf(struct lpfc_hba *, struct lpfc_nvme_buf *);
 
 static struct nvme_fc_port_template lpfc_nvme_template;
 
+union lpfc_wqe128 lpfc_iread_cmd_template;
+union lpfc_wqe128 lpfc_iwrite_cmd_template;
+union lpfc_wqe128 lpfc_icmnd_cmd_template;
+
+/* Setup WQE templates for NVME IOs */
+void
+lpfc_nvme_cmd_template()
+{
+       union lpfc_wqe128 *wqe;
+
+       /* IREAD template */
+       wqe = &lpfc_iread_cmd_template;
+       memset(wqe, 0, sizeof(union lpfc_wqe128));
+
+       /* Word 0, 1, 2 - BDE is variable */
+
+       /* Word 3 - cmd_buff_len, payload_offset_len is zero */
+
+       /* Word 4 - total_xfer_len is variable */
+
+       /* Word 5 - is zero */
+
+       /* Word 6 - ctxt_tag, xri_tag is variable */
+
+       /* Word 7 */
+       bf_set(wqe_cmnd, &wqe->fcp_iread.wqe_com, CMD_FCP_IREAD64_WQE);
+       bf_set(wqe_pu, &wqe->fcp_iread.wqe_com, PARM_READ_CHECK);
+       bf_set(wqe_class, &wqe->fcp_iread.wqe_com, CLASS3);
+       bf_set(wqe_ct, &wqe->fcp_iread.wqe_com, SLI4_CT_RPI);
+
+       /* Word 8 - abort_tag is variable */
+
+       /* Word 9  - reqtag is variable */
+
+       /* Word 10 - dbde, wqes is variable */
+       bf_set(wqe_qosd, &wqe->fcp_iread.wqe_com, 0);
+       bf_set(wqe_nvme, &wqe->fcp_iread.wqe_com, 1);
+       bf_set(wqe_iod, &wqe->fcp_iread.wqe_com, LPFC_WQE_IOD_READ);
+       bf_set(wqe_lenloc, &wqe->fcp_iread.wqe_com, LPFC_WQE_LENLOC_WORD4);
+       bf_set(wqe_dbde, &wqe->fcp_iread.wqe_com, 0);
+       bf_set(wqe_wqes, &wqe->fcp_iread.wqe_com, 1);
+
+       /* Word 11 - pbde is variable */
+       bf_set(wqe_cmd_type, &wqe->fcp_iread.wqe_com, NVME_READ_CMD);
+       bf_set(wqe_cqid, &wqe->fcp_iread.wqe_com, LPFC_WQE_CQ_ID_DEFAULT);
+       bf_set(wqe_pbde, &wqe->fcp_iread.wqe_com, 1);
+
+       /* Word 12 - is zero */
+
+       /* Word 13, 14, 15 - PBDE is variable */
+
+       /* IWRITE template */
+       wqe = &lpfc_iwrite_cmd_template;
+       memset(wqe, 0, sizeof(union lpfc_wqe128));
+
+       /* Word 0, 1, 2 - BDE is variable */
+
+       /* Word 3 - cmd_buff_len, payload_offset_len is zero */
+
+       /* Word 4 - total_xfer_len is variable */
+
+       /* Word 5 - initial_xfer_len is variable */
+
+       /* Word 6 - ctxt_tag, xri_tag is variable */
+
+       /* Word 7 */
+       bf_set(wqe_cmnd, &wqe->fcp_iwrite.wqe_com, CMD_FCP_IWRITE64_WQE);
+       bf_set(wqe_pu, &wqe->fcp_iwrite.wqe_com, PARM_READ_CHECK);
+       bf_set(wqe_class, &wqe->fcp_iwrite.wqe_com, CLASS3);
+       bf_set(wqe_ct, &wqe->fcp_iwrite.wqe_com, SLI4_CT_RPI);
+
+       /* Word 8 - abort_tag is variable */
+
+       /* Word 9  - reqtag is variable */
+
+       /* Word 10 - dbde, wqes is variable */
+       bf_set(wqe_qosd, &wqe->fcp_iwrite.wqe_com, 0);
+       bf_set(wqe_nvme, &wqe->fcp_iwrite.wqe_com, 1);
+       bf_set(wqe_iod, &wqe->fcp_iwrite.wqe_com, LPFC_WQE_IOD_WRITE);
+       bf_set(wqe_lenloc, &wqe->fcp_iwrite.wqe_com, LPFC_WQE_LENLOC_WORD4);
+       bf_set(wqe_dbde, &wqe->fcp_iwrite.wqe_com, 0);
+       bf_set(wqe_wqes, &wqe->fcp_iwrite.wqe_com, 1);
+
+       /* Word 11 - pbde is variable */
+       bf_set(wqe_cmd_type, &wqe->fcp_iwrite.wqe_com, NVME_WRITE_CMD);
+       bf_set(wqe_cqid, &wqe->fcp_iwrite.wqe_com, LPFC_WQE_CQ_ID_DEFAULT);
+       bf_set(wqe_pbde, &wqe->fcp_iwrite.wqe_com, 1);
+
+       /* Word 12 - is zero */
+
+       /* Word 13, 14, 15 - PBDE is variable */
+
+       /* ICMND template */
+       wqe = &lpfc_icmnd_cmd_template;
+       memset(wqe, 0, sizeof(union lpfc_wqe128));
+
+       /* Word 0, 1, 2 - BDE is variable */
+
+       /* Word 3 - payload_offset_len is variable */
+
+       /* Word 4, 5 - is zero */
+
+       /* Word 6 - ctxt_tag, xri_tag is variable */
+
+       /* Word 7 */
+       bf_set(wqe_cmnd, &wqe->fcp_icmd.wqe_com, CMD_FCP_ICMND64_WQE);
+       bf_set(wqe_pu, &wqe->fcp_icmd.wqe_com, 0);
+       bf_set(wqe_class, &wqe->fcp_icmd.wqe_com, CLASS3);
+       bf_set(wqe_ct, &wqe->fcp_icmd.wqe_com, SLI4_CT_RPI);
+
+       /* Word 8 - abort_tag is variable */
+
+       /* Word 9  - reqtag is variable */
+
+       /* Word 10 - dbde, wqes is variable */
+       bf_set(wqe_qosd, &wqe->fcp_icmd.wqe_com, 1);
+       bf_set(wqe_nvme, &wqe->fcp_icmd.wqe_com, 1);
+       bf_set(wqe_iod, &wqe->fcp_icmd.wqe_com, LPFC_WQE_IOD_NONE);
+       bf_set(wqe_lenloc, &wqe->fcp_icmd.wqe_com, LPFC_WQE_LENLOC_NONE);
+       bf_set(wqe_dbde, &wqe->fcp_icmd.wqe_com, 0);
+       bf_set(wqe_wqes, &wqe->fcp_icmd.wqe_com, 1);
+
+       /* Word 11 */
+       bf_set(wqe_cmd_type, &wqe->fcp_icmd.wqe_com, FCP_COMMAND);
+       bf_set(wqe_cqid, &wqe->fcp_icmd.wqe_com, LPFC_WQE_CQ_ID_DEFAULT);
+       bf_set(wqe_pbde, &wqe->fcp_icmd.wqe_com, 0);
+
+       /* Word 12, 13, 14, 15 - is zero */
+}
+
 /**
  * lpfc_nvme_create_queue -
  * @lpfc_pnvme: Pointer to the driver's nvme instance data
@@ -612,7 +742,7 @@ lpfc_nvme_ls_abort(struct nvme_fc_local_port *pnvme_lport,
 }
 
 /* Fix up the existing sgls for NVME IO. */
-static void
+static inline void
 lpfc_nvme_adj_fcp_sgls(struct lpfc_vport *vport,
                       struct lpfc_nvme_buf *lpfc_ncmd,
                       struct nvmefc_fcp_req *nCmd)
@@ -648,6 +778,37 @@ lpfc_nvme_adj_fcp_sgls(struct lpfc_vport *vport,
                wqe->generic.bde.tus.f.bdeSize = 56;
                wqe->generic.bde.addrHigh = 0;
                wqe->generic.bde.addrLow =  64;  /* Word 16 */
+
+               /* Word 10  - dbde is 0, wqes is 1 in template */
+
+               /*
+                * Embed the payload in the last half of the WQE
+                * WQE words 16-30 get the NVME CMD IU payload
+                *
+                * WQE words 16-19 get payload Words 1-4
+                * WQE words 20-21 get payload Words 6-7
+                * WQE words 22-29 get payload Words 16-23
+                */
+               wptr = &wqe->words[16];  /* WQE ptr */
+               dptr = (uint32_t *)nCmd->cmdaddr;  /* payload ptr */
+               dptr++;                 /* Skip Word 0 in payload */
+
+               *wptr++ = *dptr++;      /* Word 1 */
+               *wptr++ = *dptr++;      /* Word 2 */
+               *wptr++ = *dptr++;      /* Word 3 */
+               *wptr++ = *dptr++;      /* Word 4 */
+               dptr++;                 /* Skip Word 5 in payload */
+               *wptr++ = *dptr++;      /* Word 6 */
+               *wptr++ = *dptr++;      /* Word 7 */
+               dptr += 8;              /* Skip Words 8-15 in payload */
+               *wptr++ = *dptr++;      /* Word 16 */
+               *wptr++ = *dptr++;      /* Word 17 */
+               *wptr++ = *dptr++;      /* Word 18 */
+               *wptr++ = *dptr++;      /* Word 19 */
+               *wptr++ = *dptr++;      /* Word 20 */
+               *wptr++ = *dptr++;      /* Word 21 */
+               *wptr++ = *dptr++;      /* Word 22 */
+               *wptr   = *dptr;        /* Word 23 */
        } else {
                sgl->addr_hi = cpu_to_le32(putPaddrHigh(nCmd->cmddma));
                sgl->addr_lo = cpu_to_le32(putPaddrLow(nCmd->cmddma));
@@ -657,6 +818,10 @@ lpfc_nvme_adj_fcp_sgls(struct lpfc_vport *vport,
                wqe->generic.bde.tus.f.bdeSize = nCmd->cmdlen;
                wqe->generic.bde.addrHigh = sgl->addr_hi;
                wqe->generic.bde.addrLow =  sgl->addr_lo;
+
+               /* Word 10 */
+               bf_set(wqe_dbde, &wqe->generic.wqe_com, 1);
+               bf_set(wqe_wqes, &wqe->generic.wqe_com, 0);
        }
 
        sgl++;
@@ -671,50 +836,6 @@ lpfc_nvme_adj_fcp_sgls(struct lpfc_vport *vport,
                bf_set(lpfc_sli4_sge_last, sgl, 1);
        sgl->word2 = cpu_to_le32(sgl->word2);
        sgl->sge_len = cpu_to_le32(nCmd->rsplen);
-
-       /* Word 3 */
-       bf_set(payload_offset_len, &wqe->fcp_icmd,
-              (nCmd->rsplen + nCmd->cmdlen));
-
-       /* Word 10 */
-       bf_set(wqe_nvme, &wqe->fcp_icmd.wqe_com, 1);
-
-       if (!phba->cfg_nvme_embed_cmd) {
-               bf_set(wqe_dbde, &wqe->generic.wqe_com, 1);
-               bf_set(wqe_wqes, &wqe->fcp_icmd.wqe_com, 0);
-               return;
-       }
-       bf_set(wqe_dbde, &wqe->generic.wqe_com, 0);
-       bf_set(wqe_wqes, &wqe->fcp_icmd.wqe_com, 1);
-
-       /*
-        * Embed the payload in the last half of the WQE
-        * WQE words 16-30 get the NVME CMD IU payload
-        *
-        * WQE words 16-19 get payload Words 1-4
-        * WQE words 20-21 get payload Words 6-7
-        * WQE words 22-29 get payload Words 16-23
-        */
-       wptr = &wqe->words[16];  /* WQE ptr */
-       dptr = (uint32_t *)nCmd->cmdaddr;  /* payload ptr */
-       dptr++;                 /* Skip Word 0 in payload */
-
-       *wptr++ = *dptr++;      /* Word 1 */
-       *wptr++ = *dptr++;      /* Word 2 */
-       *wptr++ = *dptr++;      /* Word 3 */
-       *wptr++ = *dptr++;      /* Word 4 */
-       dptr++;                 /* Skip Word 5 in payload */
-       *wptr++ = *dptr++;      /* Word 6 */
-       *wptr++ = *dptr++;      /* Word 7 */
-       dptr += 8;              /* Skip Words 8-15 in payload */
-       *wptr++ = *dptr++;      /* Word 16 */
-       *wptr++ = *dptr++;      /* Word 17 */
-       *wptr++ = *dptr++;      /* Word 18 */
-       *wptr++ = *dptr++;      /* Word 19 */
-       *wptr++ = *dptr++;      /* Word 20 */
-       *wptr++ = *dptr++;      /* Word 21 */
-       *wptr++ = *dptr++;      /* Word 22 */
-       *wptr   = *dptr;        /* Word 23 */
 }
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
@@ -1057,9 +1178,16 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
         * There are three possibilities here - use scatter-gather segment, use
         * the single mapping, or neither.
         */
-       wqe->fcp_iwrite.initial_xfer_len = 0;
        if (nCmd->sg_cnt) {
                if (nCmd->io_dir == NVMEFC_FCP_WRITE) {
+                       /* From the iwrite template, initialize words 7 - 11 */
+                       memcpy(&wqe->words[7],
+                              &lpfc_iwrite_cmd_template.words[7],
+                              sizeof(uint32_t) * 5);
+
+                       /* Word 4 */
+                       wqe->fcp_iwrite.total_xfer_len = nCmd->payload_length;
+
                        /* Word 5 */
                        if ((phba->cfg_nvme_enable_fb) &&
                            (pnode->nlp_flag & NLP_FIRSTBURST)) {
@@ -1070,69 +1198,28 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
                                else
                                        wqe->fcp_iwrite.initial_xfer_len =
                                                pnode->nvme_fb_size;
+                       } else {
+                               wqe->fcp_iwrite.initial_xfer_len = 0;
                        }
-
-                       /* Word 7 */
-                       bf_set(wqe_cmnd, &wqe->generic.wqe_com,
-                              CMD_FCP_IWRITE64_WQE);
-                       bf_set(wqe_pu, &wqe->generic.wqe_com,
-                              PARM_READ_CHECK);
-
-                       /* Word 10 */
-                       bf_set(wqe_qosd, &wqe->fcp_iwrite.wqe_com, 0);
-                       bf_set(wqe_iod, &wqe->fcp_iwrite.wqe_com,
-                              LPFC_WQE_IOD_WRITE);
-                       bf_set(wqe_lenloc, &wqe->fcp_iwrite.wqe_com,
-                              LPFC_WQE_LENLOC_WORD4);
-                       if (phba->cfg_nvme_oas)
-                               bf_set(wqe_oas, &wqe->fcp_iwrite.wqe_com, 1);
-
-                       /* Word 11 */
-                       bf_set(wqe_cmd_type, &wqe->generic.wqe_com,
-                              NVME_WRITE_CMD);
-
                        atomic_inc(&phba->fc4NvmeOutputRequests);
                } else {
-                       /* Word 7 */
-                       bf_set(wqe_cmnd, &wqe->generic.wqe_com,
-                              CMD_FCP_IREAD64_WQE);
-                       bf_set(wqe_pu, &wqe->generic.wqe_com,
-                              PARM_READ_CHECK);
-
-                       /* Word 10 */
-                       bf_set(wqe_qosd, &wqe->fcp_iread.wqe_com, 0);
-                       bf_set(wqe_iod, &wqe->fcp_iread.wqe_com,
-                              LPFC_WQE_IOD_READ);
-                       bf_set(wqe_lenloc, &wqe->fcp_iread.wqe_com,
-                              LPFC_WQE_LENLOC_WORD4);
-                       if (phba->cfg_nvme_oas)
-                               bf_set(wqe_oas, &wqe->fcp_iread.wqe_com, 1);
-
-                       /* Word 11 */
-                       bf_set(wqe_cmd_type, &wqe->generic.wqe_com,
-                              NVME_READ_CMD);
+                       /* From the iread template, initialize words 7 - 11 */
+                       memcpy(&wqe->words[7],
+                              &lpfc_iread_cmd_template.words[7],
+                              sizeof(uint32_t) * 5);
+
+                       /* Word 4 */
+                       wqe->fcp_iread.total_xfer_len = nCmd->payload_length;
+
+                       /* Word 5 */
+                       wqe->fcp_iread.rsrvd5 = 0;
 
                        atomic_inc(&phba->fc4NvmeInputRequests);
                }
        } else {
-               /* Word 4 */
-               wqe->fcp_icmd.rsrvd4 = 0;
-
-               /* Word 7 */
-               bf_set(wqe_cmnd, &wqe->generic.wqe_com, CMD_FCP_ICMND64_WQE);
-               bf_set(wqe_pu, &wqe->generic.wqe_com, 0);
-
-               /* Word 10 */
-               bf_set(wqe_qosd, &wqe->fcp_icmd.wqe_com, 1);
-               bf_set(wqe_iod, &wqe->fcp_icmd.wqe_com, LPFC_WQE_IOD_WRITE);
-               bf_set(wqe_lenloc, &wqe->fcp_icmd.wqe_com,
-                      LPFC_WQE_LENLOC_NONE);
-               if (phba->cfg_nvme_oas)
-                       bf_set(wqe_oas, &wqe->fcp_icmd.wqe_com, 1);
-
-               /* Word 11 */
-               bf_set(wqe_cmd_type, &wqe->generic.wqe_com, NVME_READ_CMD);
-
+               /* From the icmnd template, initialize words 4 - 11 */
+               memcpy(&wqe->words[4], &lpfc_icmnd_cmd_template.words[4],
+                      sizeof(uint32_t) * 8);
                atomic_inc(&phba->fc4NvmeControlRequests);
        }
        /*
@@ -1140,25 +1227,21 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
         * of the nvme_cmnd request_buffer
         */
 
+       /* Word 3 */
+       bf_set(payload_offset_len, &wqe->fcp_icmd,
+              (nCmd->rsplen + nCmd->cmdlen));
+
        /* Word 6 */
        bf_set(wqe_ctxt_tag, &wqe->generic.wqe_com,
               phba->sli4_hba.rpi_ids[pnode->nlp_rpi]);
        bf_set(wqe_xri_tag, &wqe->generic.wqe_com, pwqeq->sli4_xritag);
 
-       /* Word 7 */
-       /* Preserve Class data in the ndlp. */
-       bf_set(wqe_class, &wqe->generic.wqe_com,
-              (pnode->nlp_fcp_info & 0x0f));
-
        /* Word 8 */
        wqe->generic.wqe_com.abort_tag = pwqeq->iotag;
 
        /* Word 9 */
        bf_set(wqe_reqtag, &wqe->generic.wqe_com, pwqeq->iotag);
 
-       /* Word 11 */
-       bf_set(wqe_cqid, &wqe->generic.wqe_com, LPFC_WQE_CQ_ID_DEFAULT);
-
        pwqeq->vport = vport;
        return 0;
 }
@@ -1269,12 +1352,14 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
                                le32_to_cpu(first_data_sgl->sge_len);
                        bde->tus.f.bdeFlags = BUFF_TYPE_BDE_64;
                        bde->tus.w = cpu_to_le32(bde->tus.w);
-                       bf_set(wqe_pbde, &wqe->generic.wqe_com, 1);
-               } else
+                       /* wqe_pbde is 1 in template */
+               } else {
+                       memset(&wqe->words[13], 0, (sizeof(uint32_t) * 3));
                        bf_set(wqe_pbde, &wqe->generic.wqe_com, 0);
-
+               }
        } else {
                bf_set(wqe_pbde, &wqe->generic.wqe_com, 0);
+               memset(&wqe->words[13], 0, (sizeof(uint32_t) * 3));
 
                /* For this clause to be valid, the payload_length
                 * and sg_cnt must zero.
@@ -1287,12 +1372,6 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
                        return 1;
                }
        }
-
-       /*
-        * Due to difference in data length between DIF/non-DIF paths,
-        * we need to set word 4 of WQE here
-        */
-       wqe->fcp_iread.total_xfer_len = nCmd->payload_length;
        return 0;
 }
 
@@ -2175,14 +2254,8 @@ lpfc_new_nvme_buf(struct lpfc_vport *vport, int num_to_alloc)
 
                lpfc_ncmd->cur_iocbq.context1 = lpfc_ncmd;
 
-               /* Word 7 */
-               bf_set(wqe_erp, &wqe->generic.wqe_com, 0);
-               /* NVME upper layers will time things out, if needed */
-               bf_set(wqe_tmo, &wqe->generic.wqe_com, 0);
-
-               /* Word 10 */
-               bf_set(wqe_ebde_cnt, &wqe->generic.wqe_com, 0);
-               bf_set(wqe_dbde, &wqe->generic.wqe_com, 1);
+               /* Initialize WQE */
+               memset(wqe, 0, sizeof(union lpfc_wqe));
 
                /* add the nvme buffer to a post list */
                list_add_tail(&lpfc_ncmd->list, &post_nblist);