xprtrdma: Eliminate per-transport "max pages"
authorChuck Lever <chuck.lever@oracle.com>
Fri, 3 Jan 2020 16:56:43 +0000 (11:56 -0500)
committerAnna Schumaker <Anna.Schumaker@Netapp.com>
Wed, 15 Jan 2020 15:54:32 +0000 (10:54 -0500)
To support device hotplug and migrating a connection between devices
of different capabilities, we have to guarantee that all in-kernel
devices can support the same max NFS payload size (1 megabyte).

This means that possibly one or two in-tree devices are no longer
supported for NFS/RDMA because they cannot support 1MB rsize/wsize.
The only one I confirmed was cxgb3, but it has already been removed
from the kernel.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
net/sunrpc/xprtrdma/frwr_ops.c
net/sunrpc/xprtrdma/rpc_rdma.c
net/sunrpc/xprtrdma/transport.c
net/sunrpc/xprtrdma/verbs.c
net/sunrpc/xprtrdma/xprt_rdma.h

index 859c301..032a896 100644 (file)
@@ -178,7 +178,7 @@ out_list_err:
  *     ep->rep_attr.cap.max_send_wr
  *     ep->rep_attr.cap.max_recv_wr
  *     ep->rep_max_requests
- *     ia->ri_max_segs
+ *     ia->ri_max_rdma_segs
  *
  * And these FRWR-related fields:
  *     ia->ri_max_frwr_depth
@@ -209,14 +209,12 @@ int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep)
         * capability, but perform optimally when the MRs are not larger
         * than a page.
         */
-       if (attrs->max_sge_rd > 1)
+       if (attrs->max_sge_rd > RPCRDMA_MAX_HDR_SEGS)
                ia->ri_max_frwr_depth = attrs->max_sge_rd;
        else
                ia->ri_max_frwr_depth = attrs->max_fast_reg_page_list_len;
        if (ia->ri_max_frwr_depth > RPCRDMA_MAX_DATA_SEGS)
                ia->ri_max_frwr_depth = RPCRDMA_MAX_DATA_SEGS;
-       dprintk("RPC:       %s: max FR page list depth = %u\n",
-               __func__, ia->ri_max_frwr_depth);
 
        /* Add room for frwr register and invalidate WRs.
         * 1. FRWR reg WR for head
@@ -260,30 +258,22 @@ int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep)
        ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
        ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
 
-       ia->ri_max_segs =
+       ia->ri_max_rdma_segs =
                DIV_ROUND_UP(RPCRDMA_MAX_DATA_SEGS, ia->ri_max_frwr_depth);
        /* Reply chunks require segments for head and tail buffers */
-       ia->ri_max_segs += 2;
-       if (ia->ri_max_segs > RPCRDMA_MAX_HDR_SEGS)
-               ia->ri_max_segs = RPCRDMA_MAX_HDR_SEGS;
-       return 0;
-}
-
-/**
- * frwr_maxpages - Compute size of largest payload
- * @r_xprt: transport
- *
- * Returns maximum size of an RPC message, in pages.
- *
- * FRWR mode conveys a list of pages per chunk segment. The
- * maximum length of that list is the FRWR page list depth.
- */
-size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt)
-{
-       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+       ia->ri_max_rdma_segs += 2;
+       if (ia->ri_max_rdma_segs > RPCRDMA_MAX_HDR_SEGS)
+               ia->ri_max_rdma_segs = RPCRDMA_MAX_HDR_SEGS;
+
+       /* Ensure the underlying device is capable of conveying the
+        * largest r/wsize NFS will ask for. This guarantees that
+        * failing over from one RDMA device to another will not
+        * break NFS I/O.
+        */
+       if ((ia->ri_max_rdma_segs * ia->ri_max_frwr_depth) < RPCRDMA_MAX_SEGS)
+               return -ENOMEM;
 
-       return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
-                    (ia->ri_max_segs - 2) * ia->ri_max_frwr_depth);
+       return 0;
 }
 
 /**
index 520323d..c6dcea0 100644 (file)
@@ -111,7 +111,7 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
  */
 void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt)
 {
-       unsigned int maxsegs = r_xprt->rx_ia.ri_max_segs;
+       unsigned int maxsegs = r_xprt->rx_ia.ri_max_rdma_segs;
        struct rpcrdma_ep *ep = &r_xprt->rx_ep;
 
        ep->rep_max_inline_send =
index f868a75..3cfeba6 100644 (file)
@@ -359,19 +359,13 @@ xprt_setup_rdma(struct xprt_create *args)
        if (rc)
                goto out3;
 
-       INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
-                         xprt_rdma_connect_worker);
-
-       xprt->max_payload = frwr_maxpages(new_xprt);
-       if (xprt->max_payload == 0)
-               goto out4;
-       xprt->max_payload <<= PAGE_SHIFT;
-       dprintk("RPC:       %s: transport data payload maximum: %zu bytes\n",
-               __func__, xprt->max_payload);
-
        if (!try_module_get(THIS_MODULE))
                goto out4;
 
+       INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
+                         xprt_rdma_connect_worker);
+       xprt->max_payload = RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT;
+
        dprintk("RPC:       %s: %s:%s\n", __func__,
                xprt->address_strings[RPC_DISPLAY_ADDR],
                xprt->address_strings[RPC_DISPLAY_PORT]);
index 766e775..21fc576 100644 (file)
@@ -936,7 +936,7 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
        struct rpcrdma_ia *ia = &r_xprt->rx_ia;
        unsigned int count;
 
-       for (count = 0; count < ia->ri_max_segs; count++) {
+       for (count = 0; count < ia->ri_max_rdma_segs; count++) {
                struct rpcrdma_mr *mr;
                int rc;
 
@@ -1018,7 +1018,7 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
 
        /* Compute maximum header buffer size in bytes */
        maxhdrsize = rpcrdma_fixed_maxsz + 3 +
-                    r_xprt->rx_ia.ri_max_segs * rpcrdma_readchunk_maxsz;
+                    r_xprt->rx_ia.ri_max_rdma_segs * rpcrdma_readchunk_maxsz;
        maxhdrsize *= sizeof(__be32);
        rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize),
                                  DMA_TO_DEVICE, flags);
index 0fde694..aac4cf9 100644 (file)
@@ -71,7 +71,7 @@ struct rpcrdma_ia {
        struct rdma_cm_id       *ri_id;
        struct ib_pd            *ri_pd;
        int                     ri_async_rc;
-       unsigned int            ri_max_segs;
+       unsigned int            ri_max_rdma_segs;
        unsigned int            ri_max_frwr_depth;
        bool                    ri_implicit_roundup;
        enum ib_mr_type         ri_mrtype;
@@ -539,7 +539,6 @@ void frwr_reset(struct rpcrdma_req *req);
 int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep);
 int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr);
 void frwr_release_mr(struct rpcrdma_mr *mr);
-size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt);
 struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
                                struct rpcrdma_mr_seg *seg,
                                int nsegs, bool writing, __be32 xid,