svcrdma: Use struct xdr_stream to decode ingress transport headers
authorChuck Lever <chuck.lever@oracle.com>
Mon, 2 Mar 2020 20:01:08 +0000 (15:01 -0500)
committerChuck Lever <chuck.lever@oracle.com>
Mon, 16 Mar 2020 16:04:32 +0000 (12:04 -0400)
The logic that checks incoming network headers has to be scrupulous.

De-duplicate: replace open-coded buffer overflow checks with the use
of xdr_stream helpers that are used most everywhere else XDR
decoding is done.

One minor change to the sanity checks: instead of checking the
length of individual segments, cap the length of the whole chunk
to be sure it can fit in the set of pages available in rq_pages.
This should be a better test of whether the server can handle the
chunks in each request.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
include/linux/sunrpc/rpc_rdma.h
include/linux/sunrpc/svc_rdma.h
include/trace/events/rpcrdma.h
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c

index 92d182fd8e3b0d9b655313132f8befaffb32eb78..320c672d84dea386fc5793b925ef077d718bda78 100644 (file)
@@ -58,7 +58,8 @@ enum {
 enum {
        rpcrdma_fixed_maxsz     = 4,
        rpcrdma_segment_maxsz   = 4,
-       rpcrdma_readchunk_maxsz = 2 + rpcrdma_segment_maxsz,
+       rpcrdma_readseg_maxsz   = 1 + rpcrdma_segment_maxsz,
+       rpcrdma_readchunk_maxsz = 1 + rpcrdma_readseg_maxsz,
 };
 
 /*
index 04e4a34d1c6ae721b8c0600e5da4dd168ea1004a..c790dbb0dd901bb70cd3522a266c938ac620a6c1 100644 (file)
@@ -132,6 +132,7 @@ struct svc_rdma_recv_ctxt {
        struct ib_sge           rc_recv_sge;
        void                    *rc_recv_buf;
        struct xdr_buf          rc_arg;
+       struct xdr_stream       rc_stream;
        bool                    rc_temp;
        u32                     rc_byte_len;
        unsigned int            rc_page_count;
index 545fe936a0cce14c90f2369810dacbb7ca0505fa..814b73bd2cc7f2dd402669731128377510952208 100644 (file)
@@ -1469,7 +1469,7 @@ DECLARE_EVENT_CLASS(svcrdma_segment_event,
 );
 
 #define DEFINE_SEGMENT_EVENT(name)                                     \
-               DEFINE_EVENT(svcrdma_segment_event, svcrdma_encode_##name,\
+               DEFINE_EVENT(svcrdma_segment_event, svcrdma_##name,\
                                TP_PROTO(                               \
                                        u32 handle,                     \
                                        u32 length,                     \
@@ -1477,8 +1477,9 @@ DECLARE_EVENT_CLASS(svcrdma_segment_event,
                                ),                                      \
                                TP_ARGS(handle, length, offset))
 
-DEFINE_SEGMENT_EVENT(rseg);
-DEFINE_SEGMENT_EVENT(wseg);
+DEFINE_SEGMENT_EVENT(decode_wseg);
+DEFINE_SEGMENT_EVENT(encode_rseg);
+DEFINE_SEGMENT_EVENT(encode_wseg);
 
 DECLARE_EVENT_CLASS(svcrdma_chunk_event,
        TP_PROTO(
index 71127d898562d12d3b3f2eb77b169f20977a7cc4..bd92ed611b4c7c1150ae674299473da6319f0242 100644 (file)
@@ -358,15 +358,14 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
        arg->len = ctxt->rc_byte_len;
 }
 
-/* This accommodates the largest possible Write chunk,
- * in one segment.
+/* This accommodates the largest possible Write chunk.
  */
-#define MAX_BYTES_WRITE_SEG    ((u32)(RPCSVC_MAXPAGES << PAGE_SHIFT))
+#define MAX_BYTES_WRITE_CHUNK ((u32)(RPCSVC_MAXPAGES << PAGE_SHIFT))
 
 /* This accommodates the largest possible Position-Zero
- * Read chunk or Reply chunk, in one segment.
+ * Read chunk or Reply chunk.
  */
-#define MAX_BYTES_SPECIAL_SEG  ((u32)((RPCSVC_MAXPAGES + 2) << PAGE_SHIFT))
+#define MAX_BYTES_SPECIAL_CHUNK ((u32)((RPCSVC_MAXPAGES + 2) << PAGE_SHIFT))
 
 /* Sanity check the Read list.
  *
@@ -374,7 +373,7 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
  * - This implementation supports only one Read chunk.
  *
  * Sanity checks:
- * - Read list does not overflow buffer.
+ * - Read list does not overflow Receive buffer.
  * - Segment size limited by largest NFS data payload.
  *
  * The segment count is limited to how many segments can
@@ -382,30 +381,44 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
  * buffer. That's about 40 Read segments for a 1KB inline
  * threshold.
  *
- * Returns pointer to the following Write list.
+ * Return values:
+ *       %true: Read list is valid. @rctxt's xdr_stream is updated
+ *             to point to the first byte past the Read list.
+ *      %false: Read list is corrupt. @rctxt's xdr_stream is left
+ *             in an unknown state.
  */
-static __be32 *xdr_check_read_list(__be32 *p, const __be32 *end)
+static bool xdr_check_read_list(struct svc_rdma_recv_ctxt *rctxt)
 {
-       u32 position;
+       u32 position, len;
        bool first;
+       __be32 *p;
+
+       p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
+       if (!p)
+               return false;
 
+       len = 0;
        first = true;
-       while (*p++ != xdr_zero) {
+       while (*p != xdr_zero) {
+               p = xdr_inline_decode(&rctxt->rc_stream,
+                                     rpcrdma_readseg_maxsz * sizeof(*p));
+               if (!p)
+                       return false;
+
                if (first) {
-                       position = be32_to_cpup(p++);
+                       position = be32_to_cpup(p);
                        first = false;
-               } else if (be32_to_cpup(p++) != position) {
-                       return NULL;
+               } else if (be32_to_cpup(p) != position) {
+                       return false;
                }
-               p++;    /* handle */
-               if (be32_to_cpup(p++) > MAX_BYTES_SPECIAL_SEG)
-                       return NULL;
-               p += 2; /* offset */
+               p += 2;
+               len += be32_to_cpup(p);
 
-               if (p > end)
-                       return NULL;
+               p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
+               if (!p)
+                       return false;
        }
-       return p;
+       return len <= MAX_BYTES_SPECIAL_CHUNK;
 }
 
 /* The segment count is limited to how many segments can
@@ -413,67 +426,93 @@ static __be32 *xdr_check_read_list(__be32 *p, const __be32 *end)
  * buffer. That's about 60 Write segments for a 1KB inline
  * threshold.
  */
-static __be32 *xdr_check_write_chunk(__be32 *p, const __be32 *end,
-                                    u32 maxlen)
+static bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt, u32 maxlen)
 {
-       u32 i, segcount;
+       u32 i, segcount, total;
+       __be32 *p;
+
+       p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
+       if (!p)
+               return false;
+       segcount = be32_to_cpup(p);
 
-       segcount = be32_to_cpup(p++);
+       total = 0;
        for (i = 0; i < segcount; i++) {
-               p++;    /* handle */
-               if (be32_to_cpup(p++) > maxlen)
-                       return NULL;
-               p += 2; /* offset */
+               u32 handle, length;
+               u64 offset;
 
-               if (p > end)
-                       return NULL;
-       }
+               p = xdr_inline_decode(&rctxt->rc_stream,
+                                     rpcrdma_segment_maxsz * sizeof(*p));
+               if (!p)
+                       return false;
+
+               handle = be32_to_cpup(p++);
+               length = be32_to_cpup(p++);
+               xdr_decode_hyper(p, &offset);
+               trace_svcrdma_decode_wseg(handle, length, offset);
 
-       return p;
+               total += length;
+       }
+       return total <= maxlen;
 }
 
 /* Sanity check the Write list.
  *
  * Implementation limits:
- * - This implementation supports only one Write chunk.
+ * - This implementation currently supports only one Write chunk.
  *
  * Sanity checks:
- * - Write list does not overflow buffer.
- * - Segment size limited by largest NFS data payload.
- *
- * Returns pointer to the following Reply chunk.
+ * - Write list does not overflow Receive buffer.
+ * - Chunk size limited by largest NFS data payload.
+ *
+ * Return values:
+ *       %true: Write list is valid. @rctxt's xdr_stream is updated
+ *             to point to the first byte past the Write list.
+ *      %false: Write list is corrupt. @rctxt's xdr_stream is left
+ *             in an unknown state.
  */
-static __be32 *xdr_check_write_list(__be32 *p, const __be32 *end)
+static bool xdr_check_write_list(struct svc_rdma_recv_ctxt *rctxt)
 {
-       u32 chcount;
+       u32 chcount = 0;
+       __be32 *p;
 
-       chcount = 0;
-       while (*p++ != xdr_zero) {
-               p = xdr_check_write_chunk(p, end, MAX_BYTES_WRITE_SEG);
+       p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
+       if (!p)
+               return false;
+       while (*p != xdr_zero) {
+               if (!xdr_check_write_chunk(rctxt, MAX_BYTES_WRITE_CHUNK))
+                       return false;
+               ++chcount;
+               p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
                if (!p)
-                       return NULL;
-               if (chcount++ > 1)
-                       return NULL;
+                       return false;
        }
-       return p;
+       return chcount < 2;
 }
 
 /* Sanity check the Reply chunk.
  *
  * Sanity checks:
- * - Reply chunk does not overflow buffer.
- * - Segment size limited by largest NFS data payload.
- *
- * Returns pointer to the following RPC header.
+ * - Reply chunk does not overflow Receive buffer.
+ * - Chunk size limited by largest NFS data payload.
+ *
+ * Return values:
+ *       %true: Reply chunk is valid. @rctxt's xdr_stream is updated
+ *             to point to the first byte past the Reply chunk.
+ *      %false: Reply chunk is corrupt. @rctxt's xdr_stream is left
+ *             in an unknown state.
  */
-static __be32 *xdr_check_reply_chunk(__be32 *p, const __be32 *end)
+static bool xdr_check_reply_chunk(struct svc_rdma_recv_ctxt *rctxt)
 {
-       if (*p++ != xdr_zero) {
-               p = xdr_check_write_chunk(p, end, MAX_BYTES_SPECIAL_SEG);
-               if (!p)
-                       return NULL;
-       }
-       return p;
+       __be32 *p;
+
+       p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
+       if (!p)
+               return false;
+       if (*p != xdr_zero)
+               if (!xdr_check_write_chunk(rctxt, MAX_BYTES_SPECIAL_CHUNK))
+                       return false;
+       return true;
 }
 
 /* RPC-over-RDMA Version One private extension: Remote Invalidation.
@@ -538,60 +577,61 @@ static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma,
        ctxt->rc_inv_rkey = be32_to_cpu(inv_rkey);
 }
 
-/* On entry, xdr->head[0].iov_base points to first byte in the
- * RPC-over-RDMA header.
+/**
+ * svc_rdma_xdr_decode_req - Decode the transport header
+ * @rq_arg: xdr_buf containing ingress RPC/RDMA message
+ * @rctxt: state of decoding
+ *
+ * On entry, xdr->head[0].iov_base points to first byte of the
+ * RPC-over-RDMA transport header.
  *
  * On successful exit, head[0] points to first byte past the
  * RPC-over-RDMA header. For RDMA_MSG, this is the RPC message.
+ *
  * The length of the RPC-over-RDMA header is returned.
  *
  * Assumptions:
  * - The transport header is entirely contained in the head iovec.
  */
-static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
+static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg,
+                                  struct svc_rdma_recv_ctxt *rctxt)
 {
-       __be32 *p, *end, *rdma_argp;
+       __be32 *p, *rdma_argp;
        unsigned int hdr_len;
 
-       /* Verify that there's enough bytes for header + something */
-       if (rq_arg->len <= RPCRDMA_HDRLEN_ERR)
-               goto out_short;
-
        rdma_argp = rq_arg->head[0].iov_base;
-       if (*(rdma_argp + 1) != rpcrdma_version)
-               goto out_version;
+       xdr_init_decode(&rctxt->rc_stream, rq_arg, rdma_argp, NULL);
 
-       switch (*(rdma_argp + 3)) {
+       p = xdr_inline_decode(&rctxt->rc_stream,
+                             rpcrdma_fixed_maxsz * sizeof(*p));
+       if (unlikely(!p))
+               goto out_short;
+       p++;
+       if (*p != rpcrdma_version)
+               goto out_version;
+       p += 2;
+       switch (*p) {
        case rdma_msg:
                break;
        case rdma_nomsg:
                break;
-
        case rdma_done:
                goto out_drop;
-
        case rdma_error:
                goto out_drop;
-
        default:
                goto out_proc;
        }
 
-       end = (__be32 *)((unsigned long)rdma_argp + rq_arg->len);
-       p = xdr_check_read_list(rdma_argp + 4, end);
-       if (!p)
+       if (!xdr_check_read_list(rctxt))
                goto out_inval;
-       p = xdr_check_write_list(p, end);
-       if (!p)
-               goto out_inval;
-       p = xdr_check_reply_chunk(p, end);
-       if (!p)
+       if (!xdr_check_write_list(rctxt))
                goto out_inval;
-       if (p > end)
+       if (!xdr_check_reply_chunk(rctxt))
                goto out_inval;
 
-       rq_arg->head[0].iov_base = p;
-       hdr_len = (unsigned long)p - (unsigned long)rdma_argp;
+       rq_arg->head[0].iov_base = rctxt->rc_stream.p;
+       hdr_len = xdr_stream_pos(&rctxt->rc_stream);
        rq_arg->head[0].iov_len -= hdr_len;
        rq_arg->len -= hdr_len;
        trace_svcrdma_decode_rqst(rdma_argp, hdr_len);
@@ -786,7 +826,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
        rqstp->rq_next_page = rqstp->rq_respages;
 
        p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
-       ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg);
+       ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt);
        if (ret < 0)
                goto out_err;
        if (ret == 0)