scsi: lpfc: Add if_type=6 support for cycling valid bits
authorJames Smart <jsmart2021@gmail.com>
Thu, 22 Feb 2018 16:18:46 +0000 (08:18 -0800)
committerMartin K. Petersen <martin.petersen@oracle.com>
Fri, 23 Feb 2018 01:39:29 +0000 (20:39 -0500)
Traditional SLI4 required the driver to clear Valid bits on
EQEs and CQEs after consuming them.

The new if_type=6 hardware will cycle the value for what is
valid on each queue itteration. The driver no longer has to
touch the valid bits. This also means all the cpu cache
dirtying and perhaps flush/refill's done by the hardware
in accessing the EQ/CQ elements is eliminated.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/lpfc/lpfc_hw4.h
drivers/scsi/lpfc/lpfc_init.c
drivers/scsi/lpfc/lpfc_sli.c
drivers/scsi/lpfc/lpfc_sli4.h

index 0c33510..dba724e 100644 (file)
@@ -1040,6 +1040,9 @@ struct eq_context {
 #define lpfc_eq_context_valid_SHIFT    29
 #define lpfc_eq_context_valid_MASK     0x00000001
 #define lpfc_eq_context_valid_WORD     word0
+#define lpfc_eq_context_autovalid_SHIFT 28
+#define lpfc_eq_context_autovalid_MASK  0x00000001
+#define lpfc_eq_context_autovalid_WORD  word0
        uint32_t word1;
 #define lpfc_eq_context_count_SHIFT    26
 #define lpfc_eq_context_count_MASK     0x00000003
@@ -1173,6 +1176,9 @@ struct cq_context {
 #define LPFC_CQ_CNT_512                0x1
 #define LPFC_CQ_CNT_1024       0x2
 #define LPFC_CQ_CNT_WORD7      0x3
+#define lpfc_cq_context_autovalid_SHIFT 15
+#define lpfc_cq_context_autovalid_MASK  0x00000001
+#define lpfc_cq_context_autovalid_WORD  word0
        uint32_t word1;
 #define lpfc_cq_eq_id_SHIFT            22      /* Version 0 Only */
 #define lpfc_cq_eq_id_MASK             0x000000FF
@@ -1231,9 +1237,9 @@ struct lpfc_mbx_cq_create_set {
 #define lpfc_mbx_cq_create_set_cqe_size_SHIFT  25
 #define lpfc_mbx_cq_create_set_cqe_size_MASK   0x00000003
 #define lpfc_mbx_cq_create_set_cqe_size_WORD   word1
-#define lpfc_mbx_cq_create_set_auto_SHIFT      15
-#define lpfc_mbx_cq_create_set_auto_MASK       0x0000001
-#define lpfc_mbx_cq_create_set_auto_WORD       word1
+#define lpfc_mbx_cq_create_set_autovalid_SHIFT 15
+#define lpfc_mbx_cq_create_set_autovalid_MASK  0x0000001
+#define lpfc_mbx_cq_create_set_autovalid_WORD  word1
 #define lpfc_mbx_cq_create_set_nodelay_SHIFT   14
 #define lpfc_mbx_cq_create_set_nodelay_MASK    0x00000001
 #define lpfc_mbx_cq_create_set_nodelay_WORD    word1
@@ -3288,6 +3294,9 @@ struct lpfc_sli4_parameters {
 #define cfg_sli_hint_2_MASK                    0x0000001f
 #define cfg_sli_hint_2_WORD                    word1
        uint32_t word2;
+#define cfg_eqav_SHIFT                         31
+#define cfg_eqav_MASK                          0x00000001
+#define cfg_eqav_WORD                          word2
        uint32_t word3;
        uint32_t word4;
 #define cfg_cqv_SHIFT                          14
@@ -3296,6 +3305,9 @@ struct lpfc_sli4_parameters {
 #define cfg_cqpsize_SHIFT                      16
 #define cfg_cqpsize_MASK                       0x000000ff
 #define cfg_cqpsize_WORD                       word4
+#define cfg_cqav_SHIFT                         31
+#define cfg_cqav_MASK                          0x00000001
+#define cfg_cqav_WORD                          word4
        uint32_t word5;
        uint32_t word6;
 #define cfg_mqv_SHIFT                          14
index 576ab7e..96a37e4 100644 (file)
@@ -8063,6 +8063,7 @@ lpfc_alloc_nvme_wq_cq(struct lpfc_hba *phba, int wqidx)
                                wqidx);
                return 1;
        }
+       qdesc->qe_valid = 1;
        phba->sli4_hba.nvme_cq[wqidx] = qdesc;
 
        qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE,
@@ -8100,6 +8101,7 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
                        "0499 Failed allocate fast-path FCP CQ (%d)\n", wqidx);
                return 1;
        }
+       qdesc->qe_valid = 1;
        phba->sli4_hba.fcp_cq[wqidx] = qdesc;
 
        /* Create Fast Path FCP WQs */
@@ -8293,6 +8295,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                                        "0497 Failed allocate EQ (%d)\n", idx);
                        goto out_error;
                }
+               qdesc->qe_valid = 1;
                phba->sli4_hba.hba_eq[idx] = qdesc;
        }
 
@@ -8318,6 +8321,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                                        "CQ Set (%d)\n", idx);
                                goto out_error;
                        }
+                       qdesc->qe_valid = 1;
                        phba->sli4_hba.nvmet_cqset[idx] = qdesc;
                }
        }
@@ -8335,6 +8339,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                                "0500 Failed allocate slow-path mailbox CQ\n");
                goto out_error;
        }
+       qdesc->qe_valid = 1;
        phba->sli4_hba.mbx_cq = qdesc;
 
        /* Create slow-path ELS Complete Queue */
@@ -8346,6 +8351,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                                "0501 Failed allocate slow-path ELS CQ\n");
                goto out_error;
        }
+       qdesc->qe_valid = 1;
        phba->sli4_hba.els_cq = qdesc;
 
 
@@ -8391,6 +8397,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                                        "6079 Failed allocate NVME LS CQ\n");
                        goto out_error;
                }
+               qdesc->qe_valid = 1;
                phba->sli4_hba.nvmels_cq = qdesc;
 
                /* Create NVME LS Work Queue */
@@ -10569,6 +10576,8 @@ lpfc_get_sli4_parameters(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
        sli4_params->mqv = bf_get(cfg_mqv, mbx_sli4_parameters);
        sli4_params->wqv = bf_get(cfg_wqv, mbx_sli4_parameters);
        sli4_params->rqv = bf_get(cfg_rqv, mbx_sli4_parameters);
+       sli4_params->eqav = bf_get(cfg_eqav, mbx_sli4_parameters);
+       sli4_params->cqav = bf_get(cfg_cqav, mbx_sli4_parameters);
        sli4_params->wqsize = bf_get(cfg_wqsize, mbx_sli4_parameters);
        sli4_params->sgl_pages_max = bf_get(cfg_sgl_page_cnt,
                                            mbx_sli4_parameters);
@@ -12387,6 +12396,7 @@ lpfc_fof_queue_create(struct lpfc_hba *phba)
        if (!qdesc)
                goto out_error;
 
+       qdesc->qe_valid = 1;
        phba->sli4_hba.fof_eq = qdesc;
 
        if (phba->cfg_fof) {
@@ -12405,6 +12415,7 @@ lpfc_fof_queue_create(struct lpfc_hba *phba)
                if (!qdesc)
                        goto out_error;
 
+               qdesc->qe_valid = 1;
                phba->sli4_hba.oas_cq = qdesc;
 
                /* Create OAS WQ */
index 265f1fa..925a40d 100644 (file)
@@ -283,16 +283,18 @@ lpfc_sli4_mq_release(struct lpfc_queue *q)
 static struct lpfc_eqe *
 lpfc_sli4_eq_get(struct lpfc_queue *q)
 {
+       struct lpfc_hba *phba;
        struct lpfc_eqe *eqe;
        uint32_t idx;
 
        /* sanity check on queue memory */
        if (unlikely(!q))
                return NULL;
+       phba = q->phba;
        eqe = q->qe[q->hba_index].eqe;
 
        /* If the next EQE is not valid then we are done */
-       if (!bf_get_le32(lpfc_eqe_valid, eqe))
+       if (bf_get_le32(lpfc_eqe_valid, eqe) != q->qe_valid)
                return NULL;
        /* If the host has not yet processed the next entry then we are done */
        idx = ((q->hba_index + 1) % q->entry_count);
@@ -300,6 +302,10 @@ lpfc_sli4_eq_get(struct lpfc_queue *q)
                return NULL;
 
        q->hba_index = idx;
+       /* if the index wrapped around, toggle the valid bit */
+       if (phba->sli4_hba.pc_sli4_params.eqav && !q->hba_index)
+               q->qe_valid = (q->qe_valid) ? 0 : 1;
+
 
        /*
         * insert barrier for instruction interlock : data from the hardware
@@ -371,17 +377,21 @@ uint32_t
 lpfc_sli4_eq_release(struct lpfc_queue *q, bool arm)
 {
        uint32_t released = 0;
+       struct lpfc_hba *phba;
        struct lpfc_eqe *temp_eqe;
        struct lpfc_register doorbell;
 
        /* sanity check on queue memory */
        if (unlikely(!q))
                return 0;
+       phba = q->phba;
 
        /* while there are valid entries */
        while (q->hba_index != q->host_index) {
-               temp_eqe = q->qe[q->host_index].eqe;
-               bf_set_le32(lpfc_eqe_valid, temp_eqe, 0);
+               if (!phba->sli4_hba.pc_sli4_params.eqav) {
+                       temp_eqe = q->qe[q->host_index].eqe;
+                       bf_set_le32(lpfc_eqe_valid, temp_eqe, 0);
+               }
                released++;
                q->host_index = ((q->host_index + 1) % q->entry_count);
        }
@@ -425,17 +435,21 @@ uint32_t
 lpfc_sli4_if6_eq_release(struct lpfc_queue *q, bool arm)
 {
        uint32_t released = 0;
+       struct lpfc_hba *phba;
        struct lpfc_eqe *temp_eqe;
        struct lpfc_register doorbell;
 
        /* sanity check on queue memory */
        if (unlikely(!q))
                return 0;
+       phba = q->phba;
 
        /* while there are valid entries */
        while (q->hba_index != q->host_index) {
-               temp_eqe = q->qe[q->host_index].eqe;
-               bf_set_le32(lpfc_eqe_valid, temp_eqe, 0);
+               if (!phba->sli4_hba.pc_sli4_params.eqav) {
+                       temp_eqe = q->qe[q->host_index].eqe;
+                       bf_set_le32(lpfc_eqe_valid, temp_eqe, 0);
+               }
                released++;
                q->host_index = ((q->host_index + 1) % q->entry_count);
        }
@@ -467,23 +481,28 @@ lpfc_sli4_if6_eq_release(struct lpfc_queue *q, bool arm)
 static struct lpfc_cqe *
 lpfc_sli4_cq_get(struct lpfc_queue *q)
 {
+       struct lpfc_hba *phba;
        struct lpfc_cqe *cqe;
        uint32_t idx;
 
        /* sanity check on queue memory */
        if (unlikely(!q))
                return NULL;
+       phba = q->phba;
+       cqe = q->qe[q->hba_index].cqe;
 
        /* If the next CQE is not valid then we are done */
-       if (!bf_get_le32(lpfc_cqe_valid, q->qe[q->hba_index].cqe))
+       if (bf_get_le32(lpfc_cqe_valid, cqe) != q->qe_valid)
                return NULL;
        /* If the host has not yet processed the next entry then we are done */
        idx = ((q->hba_index + 1) % q->entry_count);
        if (idx == q->host_index)
                return NULL;
 
-       cqe = q->qe[q->hba_index].cqe;
        q->hba_index = idx;
+       /* if the index wrapped around, toggle the valid bit */
+       if (phba->sli4_hba.pc_sli4_params.cqav && !q->hba_index)
+               q->qe_valid = (q->qe_valid) ? 0 : 1;
 
        /*
         * insert barrier for instruction interlock : data from the hardware
@@ -516,16 +535,21 @@ uint32_t
 lpfc_sli4_cq_release(struct lpfc_queue *q, bool arm)
 {
        uint32_t released = 0;
+       struct lpfc_hba *phba;
        struct lpfc_cqe *temp_qe;
        struct lpfc_register doorbell;
 
        /* sanity check on queue memory */
        if (unlikely(!q))
                return 0;
+       phba = q->phba;
+
        /* while there are valid entries */
        while (q->hba_index != q->host_index) {
-               temp_qe = q->qe[q->host_index].cqe;
-               bf_set_le32(lpfc_cqe_valid, temp_qe, 0);
+               if (!phba->sli4_hba.pc_sli4_params.cqav) {
+                       temp_qe = q->qe[q->host_index].cqe;
+                       bf_set_le32(lpfc_cqe_valid, temp_qe, 0);
+               }
                released++;
                q->host_index = ((q->host_index + 1) % q->entry_count);
        }
@@ -564,16 +588,21 @@ uint32_t
 lpfc_sli4_if6_cq_release(struct lpfc_queue *q, bool arm)
 {
        uint32_t released = 0;
+       struct lpfc_hba *phba;
        struct lpfc_cqe *temp_qe;
        struct lpfc_register doorbell;
 
        /* sanity check on queue memory */
        if (unlikely(!q))
                return 0;
+       phba = q->phba;
+
        /* while there are valid entries */
        while (q->hba_index != q->host_index) {
-               temp_qe = q->qe[q->host_index].cqe;
-               bf_set_le32(lpfc_cqe_valid, temp_qe, 0);
+               if (!phba->sli4_hba.pc_sli4_params.cqav) {
+                       temp_qe = q->qe[q->host_index].cqe;
+                       bf_set_le32(lpfc_cqe_valid, temp_qe, 0);
+               }
                released++;
                q->host_index = ((q->host_index + 1) % q->entry_count);
        }
@@ -7367,6 +7396,7 @@ lpfc_sli4_mbox_completions_pending(struct lpfc_hba *phba)
        struct lpfc_queue *mcq;
        struct lpfc_mcqe *mcqe;
        bool pending_completions = false;
+       uint8_t qe_valid;
 
        if (unlikely(!phba) || (phba->sli_rev != LPFC_SLI_REV4))
                return false;
@@ -7375,7 +7405,8 @@ lpfc_sli4_mbox_completions_pending(struct lpfc_hba *phba)
 
        mcq = phba->sli4_hba.mbx_cq;
        idx = mcq->hba_index;
-       while (bf_get_le32(lpfc_cqe_valid, mcq->qe[idx].cqe)) {
+       qe_valid = mcq->qe_valid;
+       while (bf_get_le32(lpfc_cqe_valid, mcq->qe[idx].cqe) == qe_valid) {
                mcqe = (struct lpfc_mcqe *)mcq->qe[idx].cqe;
                if (bf_get_le32(lpfc_trailer_completed, mcqe) &&
                    (!bf_get_le32(lpfc_trailer_async, mcqe))) {
@@ -7385,6 +7416,10 @@ lpfc_sli4_mbox_completions_pending(struct lpfc_hba *phba)
                idx = (idx + 1) % mcq->entry_count;
                if (mcq->hba_index == idx)
                        break;
+
+               /* if the index wrapped around, toggle the valid bit */
+               if (phba->sli4_hba.pc_sli4_params.cqav && !idx)
+                       qe_valid = (qe_valid) ? 0 : 1;
        }
        return pending_completions;
 
@@ -8258,7 +8293,7 @@ lpfc_sli_issue_mbox_s4(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq,
        } else if (flag == MBX_POLL) {
                lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX | LOG_SLI,
                                "(%d):2542 Try to issue mailbox command "
-                               "x%x (x%x/x%x) synchronously ahead of async"
+                               "x%x (x%x/x%x) synchronously ahead of async "
                                "mailbox command queue: x%x x%x\n",
                                mboxq->vport ? mboxq->vport->vpi : 0,
                                mboxq->u.mb.mbxCommand,
@@ -14335,11 +14370,21 @@ lpfc_eq_create(struct lpfc_hba *phba, struct lpfc_queue *eq, uint32_t imax)
                         LPFC_MBOX_OPCODE_EQ_CREATE,
                         length, LPFC_SLI4_MBX_EMBED);
        eq_create = &mbox->u.mqe.un.eq_create;
+       shdr = (union lpfc_sli4_cfg_shdr *) &eq_create->header.cfg_shdr;
        bf_set(lpfc_mbx_eq_create_num_pages, &eq_create->u.request,
               eq->page_count);
        bf_set(lpfc_eq_context_size, &eq_create->u.request.context,
               LPFC_EQE_SIZE);
        bf_set(lpfc_eq_context_valid, &eq_create->u.request.context, 1);
+
+       /* Use version 2 of CREATE_EQ if eqav is set */
+       if (phba->sli4_hba.pc_sli4_params.eqav) {
+               bf_set(lpfc_mbox_hdr_version, &shdr->request,
+                      LPFC_Q_CREATE_VERSION_2);
+               bf_set(lpfc_eq_context_autovalid, &eq_create->u.request.context,
+                      phba->sli4_hba.pc_sli4_params.eqav);
+       }
+
        /* don't setup delay multiplier using EQ_CREATE */
        dmult = 0;
        bf_set(lpfc_eq_context_delay_multi, &eq_create->u.request.context,
@@ -14384,7 +14429,6 @@ lpfc_eq_create(struct lpfc_hba *phba, struct lpfc_queue *eq, uint32_t imax)
        mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
        mbox->context1 = NULL;
        rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
-       shdr = (union lpfc_sli4_cfg_shdr *) &eq_create->header.cfg_shdr;
        shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
        shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
        if (shdr_status || shdr_add_status || rc) {
@@ -14467,6 +14511,8 @@ lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq,
                       (cq->page_size / SLI4_PAGE_SIZE));
                bf_set(lpfc_cq_eq_id_2, &cq_create->u.request.context,
                       eq->queue_id);
+               bf_set(lpfc_cq_context_autovalid, &cq_create->u.request.context,
+                      phba->sli4_hba.pc_sli4_params.cqav);
        } else {
                bf_set(lpfc_cq_eq_id, &cq_create->u.request.context,
                       eq->queue_id);
@@ -14638,6 +14684,9 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
                               &cq_set->u.request, 0);
                        bf_set(lpfc_mbx_cq_create_set_num_cq,
                               &cq_set->u.request, numcq);
+                       bf_set(lpfc_mbx_cq_create_set_autovalid,
+                              &cq_set->u.request,
+                              phba->sli4_hba.pc_sli4_params.cqav);
                        switch (cq->entry_count) {
                        case 2048:
                        case 4096:
index 708167b..cf64aca 100644 (file)
@@ -216,6 +216,7 @@ struct lpfc_queue {
        struct work_struct spwork;
 
        uint64_t isr_timestamp;
+       uint8_t qe_valid;
        struct lpfc_queue *assoc_qp;
        union sli4_qe qe[1];    /* array to index entries (must be last) */
 };
@@ -486,6 +487,8 @@ struct lpfc_pc_sli4_params {
        uint8_t mqv;
        uint8_t wqv;
        uint8_t rqv;
+       uint8_t eqav;
+       uint8_t cqav;
        uint8_t wqsize;
 #define LPFC_WQ_SZ64_SUPPORT   1
 #define LPFC_WQ_SZ128_SUPPORT  2