iomap: complete partial direct I/O writes synchronously
authorAndreas Gruenbacher <agruenba@redhat.com>
Tue, 19 Jun 2018 22:10:55 +0000 (15:10 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 4 Oct 2018 00:00:53 +0000 (17:00 -0700)
[ Upstream commit ebf00be37de35788cad72f4f20b4a39e30c0be4a ]

According to xfstest generic/240, applications seem to expect direct I/O
writes to either complete as a whole or to fail; short direct I/O writes
are apparently not appreciated.  This means that when only part of an
asynchronous direct I/O write succeeds, we can either fail the entire
write, or we can wait for the partial write to complete and retry the
remaining write as buffered I/O.  The old __blockdev_direct_IO helper
has code for waiting for partial writes to complete; the new
iomap_dio_rw iomap helper does not.

The above mentioned fallback mode is needed for gfs2, which doesn't
allow block allocations under direct I/O to avoid taking cluster-wide
exclusive locks.  As a consequence, an asynchronous direct I/O write to
a file range that contains a hole will result in a short write.  In that
case, wait for the short write to complete to allow gfs2 to recover.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
fs/iomap.c

index d4801f8dd4fd55a111e647a810c3e0788829897d..8f7673a692736bb7f5d31977797b653bb9de442d 100644 (file)
@@ -693,6 +693,7 @@ struct iomap_dio {
        atomic_t                ref;
        unsigned                flags;
        int                     error;
+       bool                    wait_for_completion;
 
        union {
                /* used during submission and for synchronous completion: */
@@ -793,9 +794,8 @@ static void iomap_dio_bio_end_io(struct bio *bio)
                iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
 
        if (atomic_dec_and_test(&dio->ref)) {
-               if (is_sync_kiocb(dio->iocb)) {
+               if (dio->wait_for_completion) {
                        struct task_struct *waiter = dio->submit.waiter;
-
                        WRITE_ONCE(dio->submit.waiter, NULL);
                        wake_up_process(waiter);
                } else if (dio->flags & IOMAP_DIO_WRITE) {
@@ -980,13 +980,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
        dio->end_io = end_io;
        dio->error = 0;
        dio->flags = 0;
+       dio->wait_for_completion = is_sync_kiocb(iocb);
 
        dio->submit.iter = iter;
-       if (is_sync_kiocb(iocb)) {
-               dio->submit.waiter = current;
-               dio->submit.cookie = BLK_QC_T_NONE;
-               dio->submit.last_queue = NULL;
-       }
+       dio->submit.waiter = current;
+       dio->submit.cookie = BLK_QC_T_NONE;
+       dio->submit.last_queue = NULL;
 
        if (iov_iter_rw(iter) == READ) {
                if (pos >= dio->i_size)
@@ -1016,7 +1015,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
        WARN_ON_ONCE(ret);
        ret = 0;
 
-       if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
+       if (iov_iter_rw(iter) == WRITE && !dio->wait_for_completion &&
            !inode->i_sb->s_dio_done_wq) {
                ret = sb_init_dio_done_wq(inode->i_sb);
                if (ret < 0)
@@ -1031,8 +1030,10 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
                                iomap_dio_actor);
                if (ret <= 0) {
                        /* magic error code to fall back to buffered I/O */
-                       if (ret == -ENOTBLK)
+                       if (ret == -ENOTBLK) {
+                               dio->wait_for_completion = true;
                                ret = 0;
+                       }
                        break;
                }
                pos += ret;
@@ -1046,7 +1047,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
                iomap_dio_set_error(dio, ret);
 
        if (!atomic_dec_and_test(&dio->ref)) {
-               if (!is_sync_kiocb(iocb))
+               if (!dio->wait_for_completion)
                        return -EIOCBQUEUED;
 
                for (;;) {