IB/srpt: Change default behavior from using SRQ to using RC
authorBart Van Assche <bart.vanassche@wdc.com>
Wed, 11 Oct 2017 17:27:25 +0000 (10:27 -0700)
committerDoug Ledford <dledford@redhat.com>
Wed, 18 Oct 2017 14:49:54 +0000 (10:49 -0400)
Although in the RC mode more resources are needed that mode has three
advantages over SRQ:
- It works with all RDMA adapters, even those that do not support
  SRQ.
- Posting WRs and polling WCs does not trigger lock contention
  because only one thread at a time accesses a WR or WC queue in
  non-SRQ mode.
- The end-to-end flow control mechanism is used.

>From the IB spec:

    C9-150.2.1: For QPs that are not associated with an SRQ, each HCA
    receive queue shall generate end-to-end flow control credits. If
    a QP is associated with an SRQ, the HCA receive queue shall not
    generate end-to-end flow control credits.

Add new configfs attributes that allow to configure which mode to use
(/sys/kernel/config/target/srpt/$GUID/$GUID/attrib/use_srq). Note:
only the attribute for port 1 is relevant on multi-port adapters.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/ulp/srpt/ib_srpt.c
drivers/infiniband/ulp/srpt/ib_srpt.h

index 6cf95ad..304855b 100644 (file)
@@ -295,6 +295,7 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
 {
        struct srpt_device *sdev = sport->sdev;
        struct ib_dm_ioc_profile *iocp;
+       int send_queue_depth;
 
        iocp = (struct ib_dm_ioc_profile *)mad->data;
 
@@ -310,6 +311,12 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
                return;
        }
 
+       if (sdev->use_srq)
+               send_queue_depth = sdev->srq_size;
+       else
+               send_queue_depth = min(SRPT_RQ_SIZE,
+                                      sdev->device->attrs.max_qp_wr);
+
        memset(iocp, 0, sizeof(*iocp));
        strcpy(iocp->id_string, SRPT_ID_STRING);
        iocp->guid = cpu_to_be64(srpt_service_guid);
@@ -322,7 +329,7 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
        iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
        iocp->protocol = cpu_to_be16(SRP_PROTOCOL);
        iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION);
-       iocp->send_queue_depth = cpu_to_be16(sdev->srq_size);
+       iocp->send_queue_depth = cpu_to_be16(send_queue_depth);
        iocp->rdma_read_depth = 4;
        iocp->send_size = cpu_to_be32(srp_max_req_size);
        iocp->rdma_size = cpu_to_be32(min(sport->port_attrib.srp_max_rdma_size,
@@ -686,6 +693,9 @@ static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring,
 {
        int i;
 
+       if (!ioctx_ring)
+               return;
+
        for (i = 0; i < ring_size; ++i)
                srpt_free_ioctx(sdev, ioctx_ring[i], dma_size, dir);
        kfree(ioctx_ring);
@@ -757,7 +767,7 @@ static bool srpt_test_and_set_cmd_state(struct srpt_send_ioctx *ioctx,
 /**
  * srpt_post_recv() - Post an IB receive request.
  */
-static int srpt_post_recv(struct srpt_device *sdev,
+static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch,
                          struct srpt_recv_ioctx *ioctx)
 {
        struct ib_sge list;
@@ -774,7 +784,10 @@ static int srpt_post_recv(struct srpt_device *sdev,
        wr.sg_list = &list;
        wr.num_sge = 1;
 
-       return ib_post_srq_recv(sdev->srq, &wr, &bad_wr);
+       if (sdev->use_srq)
+               return ib_post_srq_recv(sdev->srq, &wr, &bad_wr);
+       else
+               return ib_post_recv(ch->qp, &wr, &bad_wr);
 }
 
 /**
@@ -1517,7 +1530,7 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
                break;
        }
 
-       srpt_post_recv(ch->sport->sdev, recv_ioctx);
+       srpt_post_recv(ch->sport->sdev, ch, recv_ioctx);
        return;
 
 out_wait:
@@ -1616,7 +1629,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
        struct srpt_device *sdev = sport->sdev;
        const struct ib_device_attr *attrs = &sdev->device->attrs;
        u32 srp_sq_size = sport->port_attrib.srp_sq_size;
-       int ret;
+       int i, ret;
 
        WARN_ON(ch->rq_size < 1);
 
@@ -1640,7 +1653,6 @@ retry:
                = (void(*)(struct ib_event *, void*))srpt_qp_event;
        qp_init->send_cq = ch->cq;
        qp_init->recv_cq = ch->cq;
-       qp_init->srq = sdev->srq;
        qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
        qp_init->qp_type = IB_QPT_RC;
        /*
@@ -1654,6 +1666,12 @@ retry:
        qp_init->cap.max_rdma_ctxs = srp_sq_size / 2;
        qp_init->cap.max_send_sge = min(attrs->max_sge, SRPT_MAX_SG_PER_WQE);
        qp_init->port_num = ch->sport->port;
+       if (sdev->use_srq) {
+               qp_init->srq = sdev->srq;
+       } else {
+               qp_init->cap.max_recv_wr = ch->rq_size;
+               qp_init->cap.max_recv_sge = qp_init->cap.max_send_sge;
+       }
 
        ch->qp = ib_create_qp(sdev->pd, qp_init);
        if (IS_ERR(ch->qp)) {
@@ -1669,6 +1687,10 @@ retry:
                goto err_destroy_cq;
        }
 
+       if (!sdev->use_srq)
+               for (i = 0; i < ch->rq_size; i++)
+                       srpt_post_recv(sdev, ch, ch->ioctx_recv_ring[i]);
+
        atomic_set(&ch->sq_wr_avail, qp_init->cap.max_send_wr);
 
        pr_debug("%s: max_cqe= %d max_sge= %d sq_size = %d cm_id= %p\n",
@@ -1818,6 +1840,10 @@ static void srpt_release_channel_work(struct work_struct *w)
                             ch->sport->sdev, ch->rq_size,
                             ch->rsp_size, DMA_TO_DEVICE);
 
+       srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring,
+                            sdev, ch->rq_size,
+                            srp_max_req_size, DMA_FROM_DEVICE);
+
        mutex_lock(&sdev->mutex);
        list_del_init(&ch->list);
        if (ch->release_done)
@@ -1975,6 +2001,19 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
                ch->ioctx_ring[i]->ch = ch;
                list_add_tail(&ch->ioctx_ring[i]->free_list, &ch->free_list);
        }
+       if (!sdev->use_srq) {
+               ch->ioctx_recv_ring = (struct srpt_recv_ioctx **)
+                       srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size,
+                                             sizeof(*ch->ioctx_recv_ring[0]),
+                                             srp_max_req_size,
+                                             DMA_FROM_DEVICE);
+               if (!ch->ioctx_recv_ring) {
+                       pr_err("rejected SRP_LOGIN_REQ because creating a new QP RQ ring failed.\n");
+                       rej->reason =
+                           cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
+                       goto free_ring;
+               }
+       }
 
        ret = srpt_create_ch_ib(ch);
        if (ret) {
@@ -1982,7 +2021,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
                              SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
                pr_err("rejected SRP_LOGIN_REQ because creating"
                       " a new RDMA channel failed.\n");
-               goto free_ring;
+               goto free_recv_ring;
        }
 
        ret = srpt_ch_qp_rtr(ch, ch->qp);
@@ -2073,6 +2112,11 @@ release_channel:
 destroy_ib:
        srpt_destroy_ch_ib(ch);
 
+free_recv_ring:
+       srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring,
+                            ch->sport->sdev, ch->rq_size,
+                            srp_max_req_size, DMA_FROM_DEVICE);
+
 free_ring:
        srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring,
                             ch->sport->sdev, ch->rq_size,
@@ -2502,20 +2546,38 @@ static void srpt_add_one(struct ib_device *device)
        srq_attr.attr.srq_limit = 0;
        srq_attr.srq_type = IB_SRQT_BASIC;
 
-       sdev->srq = ib_create_srq(sdev->pd, &srq_attr);
-       if (IS_ERR(sdev->srq))
-               goto err_pd;
+       sdev->srq = sdev->port[0].port_attrib.use_srq ?
+               ib_create_srq(sdev->pd, &srq_attr) : ERR_PTR(-ENOTSUPP);
+       if (IS_ERR(sdev->srq)) {
+               pr_debug("ib_create_srq() failed: %ld\n", PTR_ERR(sdev->srq));
+
+               /* SRQ not supported. */
+               sdev->use_srq = false;
+       } else {
+               pr_debug("create SRQ #wr= %d max_allow=%d dev= %s\n",
+                        sdev->srq_size, sdev->device->attrs.max_srq_wr,
+                        device->name);
+
+               sdev->use_srq = true;
 
-       pr_debug("%s: create SRQ #wr= %d max_allow=%d dev= %s\n",
-                __func__, sdev->srq_size, sdev->device->attrs.max_srq_wr,
-                device->name);
+               sdev->ioctx_ring = (struct srpt_recv_ioctx **)
+                       srpt_alloc_ioctx_ring(sdev, sdev->srq_size,
+                                             sizeof(*sdev->ioctx_ring[0]),
+                                             srp_max_req_size,
+                                             DMA_FROM_DEVICE);
+               if (!sdev->ioctx_ring)
+                       goto err_pd;
+
+               for (i = 0; i < sdev->srq_size; ++i)
+                       srpt_post_recv(sdev, NULL, sdev->ioctx_ring[i]);
+       }
 
        if (!srpt_service_guid)
                srpt_service_guid = be64_to_cpu(device->node_guid);
 
        sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev);
        if (IS_ERR(sdev->cm_id))
-               goto err_srq;
+               goto err_ring;
 
        /* print out target login information */
        pr_debug("Target login info: id_ext=%016llx,ioc_guid=%016llx,"
@@ -2535,16 +2597,6 @@ static void srpt_add_one(struct ib_device *device)
                              srpt_event_handler);
        ib_register_event_handler(&sdev->event_handler);
 
-       sdev->ioctx_ring = (struct srpt_recv_ioctx **)
-               srpt_alloc_ioctx_ring(sdev, sdev->srq_size,
-                                     sizeof(*sdev->ioctx_ring[0]),
-                                     srp_max_req_size, DMA_FROM_DEVICE);
-       if (!sdev->ioctx_ring)
-               goto err_event;
-
-       for (i = 0; i < sdev->srq_size; ++i)
-               srpt_post_recv(sdev, sdev->ioctx_ring[i]);
-
        WARN_ON(sdev->device->phys_port_cnt > ARRAY_SIZE(sdev->port));
 
        for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
@@ -2554,12 +2606,13 @@ static void srpt_add_one(struct ib_device *device)
                sport->port_attrib.srp_max_rdma_size = DEFAULT_MAX_RDMA_SIZE;
                sport->port_attrib.srp_max_rsp_size = DEFAULT_MAX_RSP_SIZE;
                sport->port_attrib.srp_sq_size = DEF_SRPT_SQ_SIZE;
+               sport->port_attrib.use_srq = false;
                INIT_WORK(&sport->work, srpt_refresh_port_work);
 
                if (srpt_refresh_port(sport)) {
                        pr_err("MAD registration failed for %s-%d.\n",
                               sdev->device->name, i);
-                       goto err_ring;
+                       goto err_event;
                }
        }
 
@@ -2572,16 +2625,16 @@ out:
        pr_debug("added %s.\n", device->name);
        return;
 
-err_ring:
-       srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev,
-                            sdev->srq_size, srp_max_req_size,
-                            DMA_FROM_DEVICE);
 err_event:
        ib_unregister_event_handler(&sdev->event_handler);
 err_cm:
        ib_destroy_cm_id(sdev->cm_id);
-err_srq:
-       ib_destroy_srq(sdev->srq);
+err_ring:
+       if (sdev->use_srq)
+               ib_destroy_srq(sdev->srq);
+       srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev,
+                            sdev->srq_size, srp_max_req_size,
+                            DMA_FROM_DEVICE);
 err_pd:
        ib_dealloc_pd(sdev->pd);
 free_dev:
@@ -2625,12 +2678,12 @@ static void srpt_remove_one(struct ib_device *device, void *client_data)
        spin_unlock(&srpt_dev_lock);
        srpt_release_sdev(sdev);
 
-       ib_destroy_srq(sdev->srq);
-       ib_dealloc_pd(sdev->pd);
-
+       if (sdev->use_srq)
+               ib_destroy_srq(sdev->srq);
        srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev,
                             sdev->srq_size, srp_max_req_size, DMA_FROM_DEVICE);
-       sdev->ioctx_ring = NULL;
+       ib_dealloc_pd(sdev->pd);
+
        kfree(sdev);
 }
 
@@ -2928,14 +2981,43 @@ static ssize_t srpt_tpg_attrib_srp_sq_size_store(struct config_item *item,
        return count;
 }
 
+static ssize_t srpt_tpg_attrib_use_srq_show(struct config_item *item,
+                                           char *page)
+{
+       struct se_portal_group *se_tpg = attrib_to_tpg(item);
+       struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
+
+       return sprintf(page, "%d\n", sport->port_attrib.use_srq);
+}
+
+static ssize_t srpt_tpg_attrib_use_srq_store(struct config_item *item,
+                                            const char *page, size_t count)
+{
+       struct se_portal_group *se_tpg = attrib_to_tpg(item);
+       struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
+       unsigned long val;
+       int ret;
+
+       ret = kstrtoul(page, 0, &val);
+       if (ret < 0)
+               return ret;
+       if (val != !!val)
+               return -EINVAL;
+       sport->port_attrib.use_srq = val;
+
+       return count;
+}
+
 CONFIGFS_ATTR(srpt_tpg_attrib_,  srp_max_rdma_size);
 CONFIGFS_ATTR(srpt_tpg_attrib_,  srp_max_rsp_size);
 CONFIGFS_ATTR(srpt_tpg_attrib_,  srp_sq_size);
+CONFIGFS_ATTR(srpt_tpg_attrib_,  use_srq);
 
 static struct configfs_attribute *srpt_tpg_attrib_attrs[] = {
        &srpt_tpg_attrib_attr_srp_max_rdma_size,
        &srpt_tpg_attrib_attr_srp_max_rsp_size,
        &srpt_tpg_attrib_attr_srp_sq_size,
+       &srpt_tpg_attrib_attr_use_srq,
        NULL,
 };
 
index 976e924..673387d 100644 (file)
@@ -252,6 +252,7 @@ enum rdma_ch_state {
  * @free_list:     Head of list with free send I/O contexts.
  * @state:         channel state. See also enum rdma_ch_state.
  * @ioctx_ring:    Send ring.
+ * @ioctx_recv_ring: Receive I/O context ring.
  * @list:          Node for insertion in the srpt_device.rch_list list.
  * @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This
  *                 list contains struct srpt_ioctx elements and is protected
@@ -281,6 +282,7 @@ struct srpt_rdma_ch {
        struct list_head        free_list;
        enum rdma_ch_state      state;
        struct srpt_send_ioctx  **ioctx_ring;
+       struct srpt_recv_ioctx  **ioctx_recv_ring;
        struct list_head        list;
        struct list_head        cmd_wait_list;
        struct se_session       *sess;
@@ -295,11 +297,13 @@ struct srpt_rdma_ch {
  * @srp_max_rdma_size: Maximum size of SRP RDMA transfers for new connections.
  * @srp_max_rsp_size: Maximum size of SRP response messages in bytes.
  * @srp_sq_size: Shared receive queue (SRQ) size.
+ * @use_srq: Whether or not to use SRQ.
  */
 struct srpt_port_attrib {
        u32                     srp_max_rdma_size;
        u32                     srp_max_rsp_size;
        u32                     srp_sq_size;
+       bool                    use_srq;
 };
 
 /**
@@ -347,6 +351,7 @@ struct srpt_port {
  * @srq:           Per-HCA SRQ (shared receive queue).
  * @cm_id:         Connection identifier.
  * @srq_size:      SRQ size.
+ * @use_srq:       Whether or not to use SRQ.
  * @ioctx_ring:    Per-HCA SRQ.
  * @rch_list:      Per-device channel list -- see also srpt_rdma_ch.list.
  * @ch_releaseQ:   Enables waiting for removal from rch_list.
@@ -362,6 +367,7 @@ struct srpt_device {
        struct ib_srq           *srq;
        struct ib_cm_id         *cm_id;
        int                     srq_size;
+       bool                    use_srq;
        struct srpt_recv_ioctx  **ioctx_ring;
        struct list_head        rch_list;
        wait_queue_head_t       ch_releaseQ;