fuse: writepages: crop secondary requests
authorMaxim Patlasov <MPatlasov@parallels.com>
Wed, 2 Oct 2013 17:38:32 +0000 (21:38 +0400)
committerMiklos Szeredi <mszeredi@suse.cz>
Tue, 5 Nov 2013 09:11:27 +0000 (10:11 +0100)
If writeback happens while fuse is in FUSE_NOWRITE condition, the request
will be queued but not processed immediately (see fuse_flush_writepages()).
Until FUSE_NOWRITE becomes relaxed, more writebacks can happen.  They will
be queued as "secondary" requests to that first ("primary") request.

Existing implementation crops only primary request.  This is not correct
because a subsequent extending write(2) may increase i_size and then
secondary requests won't be cropped properly.  The result would be stale
data written to the server to a file offset where zeros must be.

Similar problem may happen if secondary requests are attached to an
in-flight request that was already cropped.

The patch solves the issue by cropping all secondary requests in
fuse_writepage_end().  Thanks to Miklos for idea.

Signed-off-by: Maxim Patlasov <MPatlasov@parallels.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
fs/fuse/file.c

index 077b038..1cb303e 100644 (file)
@@ -1436,12 +1436,12 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
 }
 
 /* Called under fc->lock, may release and reacquire it */
-static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req)
+static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req,
+                               loff_t size)
 __releases(fc->lock)
 __acquires(fc->lock)
 {
        struct fuse_inode *fi = get_fuse_inode(req->inode);
-       loff_t size = i_size_read(req->inode);
        struct fuse_write_in *inarg = &req->misc.write.in;
        __u64 data_size = req->num_pages * PAGE_CACHE_SIZE;
 
@@ -1482,12 +1482,13 @@ __acquires(fc->lock)
 {
        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_inode *fi = get_fuse_inode(inode);
+       size_t crop = i_size_read(inode);
        struct fuse_req *req;
 
        while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
                req = list_entry(fi->queued_writes.next, struct fuse_req, list);
                list_del_init(&req->list);
-               fuse_send_writepage(fc, req);
+               fuse_send_writepage(fc, req, crop);
        }
 }
 
@@ -1499,12 +1500,37 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
        mapping_set_error(inode->i_mapping, req->out.h.error);
        spin_lock(&fc->lock);
        while (req->misc.write.next) {
+               struct fuse_conn *fc = get_fuse_conn(inode);
+               struct fuse_write_in *inarg = &req->misc.write.in;
                struct fuse_req *next = req->misc.write.next;
                req->misc.write.next = next->misc.write.next;
                next->misc.write.next = NULL;
                list_add(&next->writepages_entry, &fi->writepages);
-               list_add_tail(&next->list, &fi->queued_writes);
-               fuse_flush_writepages(inode);
+
+               /*
+                * Skip fuse_flush_writepages() to make it easy to crop requests
+                * based on primary request size.
+                *
+                * 1st case (trivial): there are no concurrent activities using
+                * fuse_set/release_nowrite.  Then we're on safe side because
+                * fuse_flush_writepages() would call fuse_send_writepage()
+                * anyway.
+                *
+                * 2nd case: someone called fuse_set_nowrite and it is waiting
+                * now for completion of all in-flight requests.  This happens
+                * rarely and no more than once per page, so this should be
+                * okay.
+                *
+                * 3rd case: someone (e.g. fuse_do_setattr()) is in the middle
+                * of fuse_set_nowrite..fuse_release_nowrite section.  The fact
+                * that fuse_set_nowrite returned implies that all in-flight
+                * requests were completed along with all of their secondary
+                * requests.  Further primary requests are blocked by negative
+                * writectr.  Hence there cannot be any in-flight requests and
+                * no invocations of fuse_writepage_end() while we're in
+                * fuse_set_nowrite..fuse_release_nowrite section.
+                */
+               fuse_send_writepage(fc, next, inarg->offset + inarg->size);
        }
        fi->writectr--;
        fuse_writepage_finish(fc, req);