nfsd4: allow large readdirs
authorJ. Bruce Fields <bfields@redhat.com>
Mon, 20 Jan 2014 21:37:11 +0000 (16:37 -0500)
committerJ. Bruce Fields <bfields@redhat.com>
Fri, 30 May 2014 21:32:03 +0000 (17:32 -0400)
Currently we limit readdir results to a single page.  This can result in
a performance regression compared to NFSv3 when reading large
directories.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
fs/nfsd/nfs4proc.c
fs/nfsd/nfs4xdr.c
fs/nfsd/xdr4.h

index 59c3195..d95d901 100644 (file)
@@ -1500,13 +1500,14 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 
 static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
+       u32 maxcount = svc_max_payload(rqstp);
        u32 rlen = op->u.readdir.rd_maxcount;
 
-       if (rlen > PAGE_SIZE)
-               rlen = PAGE_SIZE;
+       if (rlen > maxcount)
+               rlen = maxcount;
 
-       return (op_encode_hdr_size + op_encode_verifier_maxsz)
-                * sizeof(__be32) + rlen;
+       return (op_encode_hdr_size + op_encode_verifier_maxsz +
+               XDR_QUADLEN(rlen)) * sizeof(__be32);
 }
 
 static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
index 5e7bac4..4d79e53 100644 (file)
@@ -2575,8 +2575,8 @@ static inline int attributes_need_mount(u32 *bmval)
 }
 
 static __be32
-nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
-               const char *name, int namlen, __be32 **p, int buflen)
+nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
+                       const char *name, int namlen)
 {
        struct svc_export *exp = cd->rd_fhp->fh_export;
        struct dentry *dentry;
@@ -2628,8 +2628,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
 
        }
 out_encode:
-       nfserr = nfsd4_encode_fattr_to_buf(p, buflen, NULL, exp, dentry,
-                                       cd->rd_bmval,
+       nfserr = nfsd4_encode_fattr(xdr, NULL, exp, dentry, cd->rd_bmval,
                                        cd->rd_rqstp, ignore_crossmnt);
 out_put:
        dput(dentry);
@@ -2638,9 +2637,12 @@ out_put:
 }
 
 static __be32 *
-nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr)
+nfsd4_encode_rdattr_error(struct xdr_stream *xdr, __be32 nfserr)
 {
-       if (buflen < 6)
+       __be32 *p;
+
+       p = xdr_reserve_space(xdr, 6);
+       if (!p)
                return NULL;
        *p++ = htonl(2);
        *p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */
@@ -2657,10 +2659,13 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
 {
        struct readdir_cd *ccd = ccdv;
        struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
-       int buflen;
-       __be32 *p = cd->buffer;
-       __be32 *cookiep;
+       struct xdr_stream *xdr = cd->xdr;
+       int start_offset = xdr->buf->len;
+       int cookie_offset;
+       int entry_bytes;
        __be32 nfserr = nfserr_toosmall;
+       __be64 wire_offset;
+       __be32 *p;
 
        /* In nfsv4, "." and ".." never make it onto the wire.. */
        if (name && isdotent(name, namlen)) {
@@ -2668,19 +2673,24 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
                return 0;
        }
 
-       if (cd->offset)
-               xdr_encode_hyper(cd->offset, (u64) offset);
+       if (cd->cookie_offset) {
+               wire_offset = cpu_to_be64(offset);
+               write_bytes_to_xdr_buf(xdr->buf, cd->cookie_offset,
+                                                       &wire_offset, 8);
+       }
 
-       buflen = cd->buflen - 4 - XDR_QUADLEN(namlen);
-       if (buflen < 0)
+       p = xdr_reserve_space(xdr, 4);
+       if (!p)
                goto fail;
-
        *p++ = xdr_one;                             /* mark entry present */
-       cookiep = p;
+       cookie_offset = xdr->buf->len;
+       p = xdr_reserve_space(xdr, 3*4 + namlen);
+       if (!p)
+               goto fail;
        p = xdr_encode_hyper(p, NFS_OFFSET_MAX);    /* offset of next entry */
        p = xdr_encode_array(p, name, namlen);      /* name length & name */
 
-       nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, &p, buflen);
+       nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen);
        switch (nfserr) {
        case nfs_ok:
                break;
@@ -2699,19 +2709,23 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
                 */
                if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR))
                        goto fail;
-               p = nfsd4_encode_rdattr_error(p, buflen, nfserr);
+               p = nfsd4_encode_rdattr_error(xdr, nfserr);
                if (p == NULL) {
                        nfserr = nfserr_toosmall;
                        goto fail;
                }
        }
-       cd->buflen -= (p - cd->buffer);
-       cd->buffer = p;
-       cd->offset = cookiep;
+       nfserr = nfserr_toosmall;
+       entry_bytes = xdr->buf->len - start_offset;
+       if (entry_bytes > cd->rd_maxcount)
+               goto fail;
+       cd->rd_maxcount -= entry_bytes;
+       cd->cookie_offset = cookie_offset;
 skip_entry:
        cd->common.err = nfs_ok;
        return 0;
 fail:
+       xdr_truncate_encode(xdr, start_offset);
        cd->common.err = nfserr;
        return -EINVAL;
 }
@@ -3206,10 +3220,11 @@ static __be32
 nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir)
 {
        int maxcount;
+       int bytes_left;
        loff_t offset;
+       __be64 wire_offset;
        struct xdr_stream *xdr = &resp->xdr;
        int starting_len = xdr->buf->len;
-       __be32 *page, *tailbase;
        __be32 *p;
 
        if (nfserr)
@@ -3219,38 +3234,38 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
        if (!p)
                return nfserr_resource;
 
-       if (resp->xdr.buf->page_len)
-               return nfserr_resource;
-       if (!*resp->rqstp->rq_next_page)
-               return nfserr_resource;
-
        /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
        WRITE32(0);
        WRITE32(0);
        resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p)
                                - (char *)resp->xdr.buf->head[0].iov_base;
-       tailbase = p;
-
-       maxcount = PAGE_SIZE;
-       if (maxcount > readdir->rd_maxcount)
-               maxcount = readdir->rd_maxcount;
 
        /*
-        * Convert from bytes to words, account for the two words already
-        * written, make sure to leave two words at the end for the next
-        * pointer and eof field.
+        * Number of bytes left for directory entries allowing for the
+        * final 8 bytes of the readdir and a following failed op:
+        */
+       bytes_left = xdr->buf->buflen - xdr->buf->len
+                       - COMPOUND_ERR_SLACK_SPACE - 8;
+       if (bytes_left < 0) {
+               nfserr = nfserr_resource;
+               goto err_no_verf;
+       }
+       maxcount = min_t(u32, readdir->rd_maxcount, INT_MAX);
+       /*
+        * Note the rfc defines rd_maxcount as the size of the
+        * READDIR4resok structure, which includes the verifier above
+        * and the 8 bytes encoded at the end of this function:
         */
-       maxcount = (maxcount >> 2) - 4;
-       if (maxcount < 0) {
-               nfserr =  nfserr_toosmall;
+       if (maxcount < 16) {
+               nfserr = nfserr_toosmall;
                goto err_no_verf;
        }
+       maxcount = min_t(int, maxcount-16, bytes_left);
 
-       page = page_address(*(resp->rqstp->rq_next_page++));
+       readdir->xdr = xdr;
+       readdir->rd_maxcount = maxcount;
        readdir->common.err = 0;
-       readdir->buflen = maxcount;
-       readdir->buffer = page;
-       readdir->offset = NULL;
+       readdir->cookie_offset = 0;
 
        offset = readdir->rd_cookie;
        nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp,
@@ -3258,33 +3273,31 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
                              &readdir->common, nfsd4_encode_dirent);
        if (nfserr == nfs_ok &&
            readdir->common.err == nfserr_toosmall &&
-           readdir->buffer == page) 
-               nfserr = nfserr_toosmall;
+           xdr->buf->len == starting_len + 8) {
+               /* nothing encoded; which limit did we hit?: */
+               if (maxcount - 16 < bytes_left)
+                       /* It was the fault of rd_maxcount: */
+                       nfserr = nfserr_toosmall;
+               else
+                       /* We ran out of buffer space: */
+                       nfserr = nfserr_resource;
+       }
        if (nfserr)
                goto err_no_verf;
 
-       if (readdir->offset)
-               xdr_encode_hyper(readdir->offset, offset);
+       if (readdir->cookie_offset) {
+               wire_offset = cpu_to_be64(offset);
+               write_bytes_to_xdr_buf(xdr->buf, readdir->cookie_offset,
+                                                       &wire_offset, 8);
+       }
 
-       p = readdir->buffer;
+       p = xdr_reserve_space(xdr, 8);
+       if (!p) {
+               WARN_ON_ONCE(1);
+               goto err_no_verf;
+       }
        *p++ = 0;       /* no more entries */
        *p++ = htonl(readdir->common.err == nfserr_eof);
-       resp->xdr.buf->page_len = ((char *)p) -
-               (char*)page_address(*(resp->rqstp->rq_next_page-1));
-       xdr->buf->len += xdr->buf->page_len;
-
-       xdr->iov = xdr->buf->tail;
-
-       xdr->page_ptr++;
-       xdr->buf->buflen -= PAGE_SIZE;
-       xdr->iov = xdr->buf->tail;
-
-       /* Use rest of head for padding and remaining ops: */
-       resp->xdr.buf->tail[0].iov_base = tailbase;
-       resp->xdr.buf->tail[0].iov_len = 0;
-       resp->xdr.p = resp->xdr.buf->tail[0].iov_base;
-       resp->xdr.end = resp->xdr.p +
-                       (PAGE_SIZE - resp->xdr.buf->head[0].iov_len)/4;
 
        return 0;
 err_no_verf:
index 41e5229..18cbb6d 100644 (file)
@@ -287,9 +287,8 @@ struct nfsd4_readdir {
        struct svc_fh * rd_fhp;             /* response */
 
        struct readdir_cd       common;
-       __be32 *                buffer;
-       int                     buflen;
-       __be32 *                offset;
+       struct xdr_stream       *xdr;
+       int                     cookie_offset;
 };
 
 struct nfsd4_release_lockowner {