svcrdma: Relieve contention on sc_send_lock.
authorChuck Lever <chuck.lever@oracle.com>
Tue, 9 Feb 2021 15:32:20 +0000 (10:32 -0500)
committerChuck Lever <chuck.lever@oracle.com>
Tue, 17 Aug 2021 15:47:53 +0000 (11:47 -0400)
/proc/lock_stat indicates the the sc_send_lock is heavily
contended when the server is under load from a single client.

To address this, convert the send_ctxt free list to an llist.
Returning an item to the send_ctxt cache is now waitless, which
reduces the instruction path length in the single-threaded Send
handler (svc_rdma_wc_send).

The goal is to enable the ib_comp_wq worker to handle a higher
RPC/RDMA Send completion rate given the same CPU resources. This
change reduces CPU utilization of Send completion by 2-3% on my
server.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-By: Tom Talpey <tom@talpey.com>
include/linux/sunrpc/svc_rdma.h
net/sunrpc/xprtrdma/svc_rdma_sendto.c
net/sunrpc/xprtrdma/svc_rdma_transport.c

index 57c60ff..5f8d5af 100644 (file)
@@ -90,7 +90,7 @@ struct svcxprt_rdma {
        struct ib_pd         *sc_pd;
 
        spinlock_t           sc_send_lock;
-       struct list_head     sc_send_ctxts;
+       struct llist_head    sc_send_ctxts;
        spinlock_t           sc_rw_ctxt_lock;
        struct list_head     sc_rw_ctxts;
 
@@ -150,7 +150,7 @@ struct svc_rdma_recv_ctxt {
 };
 
 struct svc_rdma_send_ctxt {
-       struct list_head        sc_list;
+       struct llist_node       sc_node;
        struct rpc_rdma_cid     sc_cid;
 
        struct ib_send_wr       sc_send_wr;
index fba2ee4..599021b 100644 (file)
 
 static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc);
 
-static inline struct svc_rdma_send_ctxt *
-svc_rdma_next_send_ctxt(struct list_head *list)
-{
-       return list_first_entry_or_null(list, struct svc_rdma_send_ctxt,
-                                       sc_list);
-}
-
 static void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma,
                                   struct rpc_rdma_cid *cid)
 {
@@ -182,9 +175,10 @@ fail0:
 void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma)
 {
        struct svc_rdma_send_ctxt *ctxt;
+       struct llist_node *node;
 
-       while ((ctxt = svc_rdma_next_send_ctxt(&rdma->sc_send_ctxts))) {
-               list_del(&ctxt->sc_list);
+       while ((node = llist_del_first(&rdma->sc_send_ctxts)) != NULL) {
+               ctxt = llist_entry(node, struct svc_rdma_send_ctxt, sc_node);
                ib_dma_unmap_single(rdma->sc_pd->device,
                                    ctxt->sc_sges[0].addr,
                                    rdma->sc_max_req_size,
@@ -204,12 +198,13 @@ void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma)
 struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma)
 {
        struct svc_rdma_send_ctxt *ctxt;
+       struct llist_node *node;
 
        spin_lock(&rdma->sc_send_lock);
-       ctxt = svc_rdma_next_send_ctxt(&rdma->sc_send_ctxts);
-       if (!ctxt)
+       node = llist_del_first(&rdma->sc_send_ctxts);
+       if (!node)
                goto out_empty;
-       list_del(&ctxt->sc_list);
+       ctxt = llist_entry(node, struct svc_rdma_send_ctxt, sc_node);
        spin_unlock(&rdma->sc_send_lock);
 
 out:
@@ -253,9 +248,7 @@ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
                                             ctxt->sc_sges[i].length);
        }
 
-       spin_lock(&rdma->sc_send_lock);
-       list_add(&ctxt->sc_list, &rdma->sc_send_ctxts);
-       spin_unlock(&rdma->sc_send_lock);
+       llist_add(&ctxt->sc_node, &rdma->sc_send_ctxts);
 }
 
 /**
index d94b775..9947407 100644 (file)
@@ -136,7 +136,7 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
        svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
        INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
        INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
-       INIT_LIST_HEAD(&cma_xprt->sc_send_ctxts);
+       init_llist_head(&cma_xprt->sc_send_ctxts);
        init_llist_head(&cma_xprt->sc_recv_ctxts);
        INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
        init_waitqueue_head(&cma_xprt->sc_send_wait);