rdma: Enable ib_alloc_cq to spread work over a device's comp_vectors
authorChuck Lever <chuck.lever@oracle.com>
Mon, 29 Jul 2019 17:22:09 +0000 (13:22 -0400)
committerDoug Ledford <dledford@redhat.com>
Mon, 5 Aug 2019 15:50:32 +0000 (11:50 -0400)
Send and Receive completion is handled on a single CPU selected at
the time each Completion Queue is allocated. Typically this is when
an initiator instantiates an RDMA transport, or when a target
accepts an RDMA connection.

Some ULPs cannot open a connection per CPU to spread completion
workload across available CPUs and MSI vectors. For such ULPs,
provide an API that allows the RDMA core to select a completion
vector based on the device's complement of available comp_vecs.

ULPs that invoke ib_alloc_cq() with only comp_vector 0 are converted
to use the new API so that their completion workloads interfere less
with each other.

Suggested-by: HÃ¥kon Bugge <haakon.bugge@oracle.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Cc: <linux-cifs@vger.kernel.org>
Cc: <v9fs-developer@lists.sourceforge.net>
Link: https://lore.kernel.org/r/20190729171923.13428.52555.stgit@manet.1015granger.net
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/core/cq.c
drivers/infiniband/ulp/srpt/ib_srpt.c
fs/cifs/smbdirect.c
include/rdma/ib_verbs.h
net/9p/trans_rdma.c
net/sunrpc/xprtrdma/svc_rdma_transport.c
net/sunrpc/xprtrdma/verbs.c

index 7c59987..bbfded6 100644 (file)
@@ -253,6 +253,34 @@ out_free_cq:
 EXPORT_SYMBOL(__ib_alloc_cq_user);
 
 /**
+ * __ib_alloc_cq_any - allocate a completion queue
+ * @dev:               device to allocate the CQ for
+ * @private:           driver private data, accessible from cq->cq_context
+ * @nr_cqe:            number of CQEs to allocate
+ * @poll_ctx:          context to poll the CQ from
+ * @caller:            module owner name
+ *
+ * Attempt to spread ULP Completion Queues over each device's interrupt
+ * vectors. A simple best-effort mechanism is used.
+ */
+struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
+                               int nr_cqe, enum ib_poll_context poll_ctx,
+                               const char *caller)
+{
+       static atomic_t counter;
+       int comp_vector = 0;
+
+       if (dev->num_comp_vectors > 1)
+               comp_vector =
+                       atomic_inc_return(&counter) %
+                       min_t(int, dev->num_comp_vectors, num_online_cpus());
+
+       return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
+                                 caller, NULL);
+}
+EXPORT_SYMBOL(__ib_alloc_cq_any);
+
+/**
  * ib_free_cq_user - free a completion queue
  * @cq:                completion queue to free.
  * @udata:     User data or NULL for kernel object
index 1a039f1..e25c70a 100644 (file)
@@ -1767,8 +1767,8 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
                goto out;
 
 retry:
-       ch->cq = ib_alloc_cq(sdev->device, ch, ch->rq_size + sq_size,
-                       0 /* XXX: spread CQs */, IB_POLL_WORKQUEUE);
+       ch->cq = ib_alloc_cq_any(sdev->device, ch, ch->rq_size + sq_size,
+                                IB_POLL_WORKQUEUE);
        if (IS_ERR(ch->cq)) {
                ret = PTR_ERR(ch->cq);
                pr_err("failed to create CQ cqe= %d ret= %d\n",
index cd07e53..3c91fa9 100644 (file)
@@ -1654,15 +1654,17 @@ static struct smbd_connection *_smbd_get_connection(
 
        info->send_cq = NULL;
        info->recv_cq = NULL;
-       info->send_cq = ib_alloc_cq(info->id->device, info,
-                       info->send_credit_target, 0, IB_POLL_SOFTIRQ);
+       info->send_cq =
+               ib_alloc_cq_any(info->id->device, info,
+                               info->send_credit_target, IB_POLL_SOFTIRQ);
        if (IS_ERR(info->send_cq)) {
                info->send_cq = NULL;
                goto alloc_cq_failed;
        }
 
-       info->recv_cq = ib_alloc_cq(info->id->device, info,
-                       info->receive_credit_max, 0, IB_POLL_SOFTIRQ);
+       info->recv_cq =
+               ib_alloc_cq_any(info->id->device, info,
+                               info->receive_credit_max, IB_POLL_SOFTIRQ);
        if (IS_ERR(info->recv_cq)) {
                info->recv_cq = NULL;
                goto alloc_cq_failed;
index c5f8a9f..2a1523c 100644 (file)
@@ -3711,6 +3711,25 @@ static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
                                NULL);
 }
 
+struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
+                               int nr_cqe, enum ib_poll_context poll_ctx,
+                               const char *caller);
+
+/**
+ * ib_alloc_cq_any: Allocate kernel CQ
+ * @dev: The IB device
+ * @private: Private data attached to the CQE
+ * @nr_cqe: Number of CQEs in the CQ
+ * @poll_ctx: Context used for polling the CQ
+ */
+static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev,
+                                           void *private, int nr_cqe,
+                                           enum ib_poll_context poll_ctx)
+{
+       return __ib_alloc_cq_any(dev, private, nr_cqe, poll_ctx,
+                                KBUILD_MODNAME);
+}
+
 /**
  * ib_free_cq_user - Free kernel/user CQ
  * @cq: The CQ to free
index bac8dad..b21c3c2 100644 (file)
@@ -685,9 +685,9 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
                goto error;
 
        /* Create the Completion Queue */
-       rdma->cq = ib_alloc_cq(rdma->cm_id->device, client,
-                       opts.sq_depth + opts.rq_depth + 1,
-                       0, IB_POLL_SOFTIRQ);
+       rdma->cq = ib_alloc_cq_any(rdma->cm_id->device, client,
+                                  opts.sq_depth + opts.rq_depth + 1,
+                                  IB_POLL_SOFTIRQ);
        if (IS_ERR(rdma->cq))
                goto error;
 
index 3fe6651..4d3db6e 100644 (file)
@@ -454,14 +454,14 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
                dprintk("svcrdma: error creating PD for connect request\n");
                goto errout;
        }
-       newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth,
-                                       0, IB_POLL_WORKQUEUE);
+       newxprt->sc_sq_cq = ib_alloc_cq_any(dev, newxprt, newxprt->sc_sq_depth,
+                                           IB_POLL_WORKQUEUE);
        if (IS_ERR(newxprt->sc_sq_cq)) {
                dprintk("svcrdma: error creating SQ CQ for connect request\n");
                goto errout;
        }
-       newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, rq_depth,
-                                       0, IB_POLL_WORKQUEUE);
+       newxprt->sc_rq_cq =
+               ib_alloc_cq_any(dev, newxprt, rq_depth, IB_POLL_WORKQUEUE);
        if (IS_ERR(newxprt->sc_rq_cq)) {
                dprintk("svcrdma: error creating RQ CQ for connect request\n");
                goto errout;
index 805b1f3..b10aa16 100644 (file)
@@ -521,18 +521,17 @@ int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
        init_waitqueue_head(&ep->rep_connect_wait);
        ep->rep_receive_count = 0;
 
-       sendcq = ib_alloc_cq(ia->ri_id->device, NULL,
-                            ep->rep_attr.cap.max_send_wr + 1,
-                            ia->ri_id->device->num_comp_vectors > 1 ? 1 : 0,
-                            IB_POLL_WORKQUEUE);
+       sendcq = ib_alloc_cq_any(ia->ri_id->device, NULL,
+                                ep->rep_attr.cap.max_send_wr + 1,
+                                IB_POLL_WORKQUEUE);
        if (IS_ERR(sendcq)) {
                rc = PTR_ERR(sendcq);
                goto out1;
        }
 
-       recvcq = ib_alloc_cq(ia->ri_id->device, NULL,
-                            ep->rep_attr.cap.max_recv_wr + 1,
-                            0, IB_POLL_WORKQUEUE);
+       recvcq = ib_alloc_cq_any(ia->ri_id->device, NULL,
+                                ep->rep_attr.cap.max_recv_wr + 1,
+                                IB_POLL_WORKQUEUE);
        if (IS_ERR(recvcq)) {
                rc = PTR_ERR(recvcq);
                goto out2;