[SCSI] lpfc 8.3.33: Parallelize SLI-4 Q distribution
authorJames Smart <james.smart@emulex.com>
Fri, 3 Aug 2012 16:35:54 +0000 (12:35 -0400)
committerJames Bottomley <JBottomley@Parallels.com>
Fri, 14 Sep 2012 13:39:22 +0000 (14:39 +0100)
Commonize SLI-3/4 Ring/Queue framework, to keep SLI-3 compatibility
Parallelize SLI-4 Q distribution - to use multiple posting/completion queues

Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
drivers/scsi/lpfc/lpfc.h
drivers/scsi/lpfc/lpfc_hw.h
drivers/scsi/lpfc/lpfc_init.c
drivers/scsi/lpfc/lpfc_sli.c
drivers/scsi/lpfc/lpfc_sli.h
drivers/scsi/lpfc/lpfc_sli4.h

index a65c05a..a870af1 100644 (file)
@@ -732,7 +732,7 @@ struct lpfc_hba {
        uint32_t hbq_count;             /* Count of configured HBQs */
        struct hbq_s hbqs[LPFC_MAX_HBQS]; /* local copy of hbq indicies  */
 
-       uint32_t fcp_qidx;              /* next work queue to post work to */
+       atomic_t fcp_qidx;              /* next work queue to post work to */
 
        unsigned long pci_bar0_map;     /* Physical address for PCI BAR0 */
        unsigned long pci_bar1_map;     /* Physical address for PCI BAR1 */
index 4ab0e35..d744704 100644 (file)
@@ -1188,8 +1188,8 @@ typedef struct {
  */
 
 /* Number of rings currently used and available. */
-#define MAX_CONFIGURED_RINGS     3
-#define MAX_RINGS                4
+#define MAX_SLI3_CONFIGURED_RINGS     3
+#define MAX_SLI3_RINGS                4
 
 /* IOCB / Mailbox is owned by FireFly */
 #define OWN_CHIP        1
@@ -2993,7 +2993,7 @@ typedef struct _PCB {
 
        uint32_t pgpAddrLow;
        uint32_t pgpAddrHigh;
-       SLI2_RDSC rdsc[MAX_RINGS];
+       SLI2_RDSC rdsc[MAX_SLI3_RINGS];
 } PCB_t;
 
 /* NEW_FEATURE */
@@ -3103,18 +3103,18 @@ struct lpfc_pgp {
 
 struct sli2_desc {
        uint32_t unused1[16];
-       struct lpfc_hgp host[MAX_RINGS];
-       struct lpfc_pgp port[MAX_RINGS];
+       struct lpfc_hgp host[MAX_SLI3_RINGS];
+       struct lpfc_pgp port[MAX_SLI3_RINGS];
 };
 
 struct sli3_desc {
-       struct lpfc_hgp host[MAX_RINGS];
+       struct lpfc_hgp host[MAX_SLI3_RINGS];
        uint32_t reserved[8];
        uint32_t hbq_put[16];
 };
 
 struct sli3_pgp {
-       struct lpfc_pgp port[MAX_RINGS];
+       struct lpfc_pgp port[MAX_SLI3_RINGS];
        uint32_t hbq_get[16];
 };
 
index 9efe5f8..176302f 100644 (file)
@@ -4551,6 +4551,13 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
                        phba->cfg_sg_seg_cnt = LPFC_DEFAULT_MENLO_SG_SEG_CNT;
        }
 
+       if (!phba->sli.ring)
+               phba->sli.ring = (struct lpfc_sli_ring *)
+                       kzalloc(LPFC_SLI3_MAX_RING *
+                       sizeof(struct lpfc_sli_ring), GFP_KERNEL);
+       if (!phba->sli.ring)
+               return -ENOMEM;
+
        /*
         * Since the sg_tablesize is module parameter, the sg_dma_buf_size
         * used to create the sg_dma_buf_pool must be dynamically calculated.
@@ -4710,6 +4717,16 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
                sges_per_segment = 2;
 
        /*
+        * For SLI4, instead of using ring 0 (LPFC_FCP_RING) for FCP commands
+        * we will associate a new ring, for each FCP fastpath EQ/CQ/WQ tuple.
+        */
+       if (!phba->sli.ring)
+               phba->sli.ring = kzalloc(
+                       (LPFC_SLI3_MAX_RING + phba->cfg_fcp_eq_count) *
+                       sizeof(struct lpfc_sli_ring), GFP_KERNEL);
+       if (!phba->sli.ring)
+               return -ENOMEM;
+       /*
         * Since the sg_tablesize is module parameter, the sg_dma_buf_size
         * used to create the sg_dma_buf_pool must be dynamically calculated.
         * 2 segments are added since the IOCB needs a command and response bde.
@@ -5555,6 +5572,10 @@ lpfc_hba_free(struct lpfc_hba *phba)
        /* Release the driver assigned board number */
        idr_remove(&lpfc_hba_index, phba->brd_no);
 
+       /* Free memory allocated with sli rings */
+       kfree(phba->sli.ring);
+       phba->sli.ring = NULL;
+
        kfree(phba);
        return;
 }
@@ -6924,6 +6945,8 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba)
 int
 lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 {
+       struct lpfc_sli *psli = &phba->sli;
+       struct lpfc_sli_ring *pring;
        int rc = -ENOMEM;
        int fcp_eqidx, fcp_cqidx, fcp_wqidx;
        int fcp_cq_index = 0;
@@ -7107,6 +7130,12 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
                                "rc = 0x%x\n", rc);
                goto out_destroy_mbx_wq;
        }
+
+       /* Bind this WQ to the ELS ring */
+       pring = &psli->ring[LPFC_ELS_RING];
+       pring->sli.sli4.wqp = (void *)phba->sli4_hba.els_wq;
+       phba->sli4_hba.els_cq->pring = pring;
+
        lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
                        "2590 ELS WQ setup: wq-id=%d, parent cq-id=%d\n",
                        phba->sli4_hba.els_wq->queue_id,
@@ -7137,6 +7166,12 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
                                        "WQ (%d), rc = 0x%x\n", fcp_wqidx, rc);
                        goto out_destroy_fcp_wq;
                }
+
+               /* Bind this WQ to the next FCP ring */
+               pring = &psli->ring[MAX_SLI3_CONFIGURED_RINGS + fcp_wqidx];
+               pring->sli.sli4.wqp = (void *)phba->sli4_hba.fcp_wq[fcp_wqidx];
+               phba->sli4_hba.fcp_cq[fcp_cq_index]->pring = pring;
+
                lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
                                "2591 FCP WQ setup: wq[%d]-id=%d, "
                                "parent cq[%d]-id=%d\n",
index d7afd0f..982bd40 100644 (file)
@@ -7796,14 +7796,14 @@ lpfc_sli4_bpl2sgl(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq,
  *
  * Return: index into SLI4 fast-path FCP queue index.
  **/
-static uint32_t
+static inline uint32_t
 lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba)
 {
-       ++phba->fcp_qidx;
-       if (phba->fcp_qidx >= phba->cfg_fcp_wq_count)
-               phba->fcp_qidx = 0;
+       int i;
 
-       return phba->fcp_qidx;
+       i = atomic_add_return(1, &phba->fcp_qidx);
+       i = (i % phba->cfg_fcp_wq_count);
+       return i;
 }
 
 /**
@@ -8323,16 +8323,6 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number,
 
        if ((piocb->iocb_flag & LPFC_IO_FCP) ||
                (piocb->iocb_flag & LPFC_USE_FCPWQIDX)) {
-               /*
-                * For FCP command IOCB, get a new WQ index to distribute
-                * WQE across the WQsr. On the other hand, for abort IOCB,
-                * it carries the same WQ index to the original command
-                * IOCB.
-                */
-               if (piocb->iocb_flag & LPFC_IO_FCP)
-                       piocb->fcp_wqidx = lpfc_sli4_scmd_to_wqidx_distr(phba);
-               if (unlikely(!phba->sli4_hba.fcp_wq))
-                       return IOCB_ERROR;
                if (lpfc_sli4_wq_put(phba->sli4_hba.fcp_wq[piocb->fcp_wqidx],
                                     &wqe))
                        return IOCB_ERROR;
@@ -8413,11 +8403,18 @@ int
 lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
                    struct lpfc_iocbq *piocb, uint32_t flag)
 {
-       struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+       struct lpfc_sli_ring *pring;
        unsigned long iflags;
-       int rc;
+       int rc, idx;
 
        if (phba->sli_rev == LPFC_SLI_REV4) {
+               if (piocb->iocb_flag &  LPFC_IO_FCP) {
+                       if (unlikely(!phba->sli4_hba.fcp_wq))
+                               return IOCB_ERROR;
+                       idx = lpfc_sli4_scmd_to_wqidx_distr(phba);
+                       piocb->fcp_wqidx = idx;
+                       ring_number = MAX_SLI3_CONFIGURED_RINGS + idx;
+               }
                pring = &phba->sli.ring[ring_number];
                spin_lock_irqsave(&pring->ring_lock, iflags);
                rc = __lpfc_sli_issue_iocb(phba, ring_number, piocb, flag);
@@ -8712,7 +8709,9 @@ lpfc_sli_setup(struct lpfc_hba *phba)
        struct lpfc_sli *psli = &phba->sli;
        struct lpfc_sli_ring *pring;
 
-       psli->num_rings = MAX_CONFIGURED_RINGS;
+       psli->num_rings = MAX_SLI3_CONFIGURED_RINGS;
+       if (phba->sli_rev == LPFC_SLI_REV4)
+               psli->num_rings += phba->cfg_fcp_eq_count;
        psli->sli_flag = 0;
        psli->fcp_ring = LPFC_FCP_RING;
        psli->next_ring = LPFC_FCP_NEXT_RING;
@@ -11191,6 +11190,7 @@ lpfc_sli4_sp_handle_mcqe(struct lpfc_hba *phba, struct lpfc_cqe *cqe)
 /**
  * lpfc_sli4_sp_handle_els_wcqe - Handle els work-queue completion event
  * @phba: Pointer to HBA context object.
+ * @cq: Pointer to associated CQ
  * @wcqe: Pointer to work-queue completion queue entry.
  *
  * This routine handles an ELS work-queue completion event.
@@ -11198,12 +11198,12 @@ lpfc_sli4_sp_handle_mcqe(struct lpfc_hba *phba, struct lpfc_cqe *cqe)
  * Return: true if work posted to worker thread, otherwise false.
  **/
 static bool
-lpfc_sli4_sp_handle_els_wcqe(struct lpfc_hba *phba,
+lpfc_sli4_sp_handle_els_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
                             struct lpfc_wcqe_complete *wcqe)
 {
        struct lpfc_iocbq *irspiocbq;
        unsigned long iflags;
-       struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+       struct lpfc_sli_ring *pring = cq->pring;
 
        /* Get an irspiocbq for later ELS response processing use */
        irspiocbq = lpfc_sli_get_iocbq(phba);
@@ -11408,7 +11408,7 @@ lpfc_sli4_sp_handle_cqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
        case CQE_CODE_COMPL_WQE:
                /* Process the WQ/RQ complete event */
                phba->last_completion_time = jiffies;
-               workposted = lpfc_sli4_sp_handle_els_wcqe(phba,
+               workposted = lpfc_sli4_sp_handle_els_wcqe(phba, cq,
                                (struct lpfc_wcqe_complete *)&cqevt);
                break;
        case CQE_CODE_RELEASE_WQE:
@@ -11540,16 +11540,18 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe)
 
 /**
  * lpfc_sli4_fp_handle_fcp_wcqe - Process fast-path work queue completion entry
- * @eqe: Pointer to fast-path completion queue entry.
+ * @phba: Pointer to HBA context object.
+ * @cq: Pointer to associated CQ
+ * @wcqe: Pointer to work-queue completion queue entry.
  *
  * This routine process a fast-path work queue completion entry from fast-path
  * event queue for FCP command response completion.
  **/
 static void
-lpfc_sli4_fp_handle_fcp_wcqe(struct lpfc_hba *phba,
+lpfc_sli4_fp_handle_fcp_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
                             struct lpfc_wcqe_complete *wcqe)
 {
-       struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_FCP_RING];
+       struct lpfc_sli_ring *pring = cq->pring;
        struct lpfc_iocbq *cmdiocbq;
        struct lpfc_iocbq irspiocbq;
        unsigned long iflags;
@@ -11667,7 +11669,7 @@ lpfc_sli4_fp_handle_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
                cq->CQ_wq++;
                /* Process the WQ complete event */
                phba->last_completion_time = jiffies;
-               lpfc_sli4_fp_handle_fcp_wcqe(phba,
+               lpfc_sli4_fp_handle_fcp_wcqe(phba, cq,
                                (struct lpfc_wcqe_complete *)&wcqe);
                break;
        case CQE_CODE_RELEASE_WQE:
index 2d64a2b..4b9b44e 100644 (file)
@@ -131,7 +131,9 @@ typedef struct lpfcMboxq {
 
 #define LPFC_MAX_RING_MASK  5  /* max num of rctl/type masks allowed per
                                   ring */
-#define LPFC_MAX_RING       4  /* max num of SLI rings used by driver */
+#define LPFC_SLI3_MAX_RING  4  /* Max num of SLI3 rings used by driver.
+                                  For SLI4, an additional ring for each
+                                  FCP WQ will be allocated.  */
 
 struct lpfc_sli_ring;
 
@@ -172,7 +174,7 @@ struct lpfc_sli3_ring {
 };
 
 struct lpfc_sli4_ring {
-       void *wqp;      /* Pointer to associated WQ */
+       struct lpfc_queue *wqp; /* Pointer to associated WQ */
 };
 
 
@@ -284,7 +286,7 @@ struct lpfc_sli {
 #define LPFC_MENLO_MAINT          0x1000 /* need for menl fw download */
 #define LPFC_SLI_ASYNC_MBX_BLK    0x2000 /* Async mailbox is blocked */
 
-       struct lpfc_sli_ring ring[LPFC_MAX_RING];
+       struct lpfc_sli_ring *ring;
        int fcp_ring;           /* ring used for FCP initiator commands */
        int next_ring;
 
index f4b5765..e7d8413 100644 (file)
@@ -142,6 +142,8 @@ struct lpfc_queue {
        uint32_t host_index;    /* The host's index for putting or getting */
        uint32_t hba_index;     /* The last known hba index for get or put */
 
+       struct lpfc_sli_ring *pring; /* ptr to io ring associated with q */
+
        /* For q stats */
        uint32_t q_cnt_1;
        uint32_t q_cnt_2;