xfs: update i_size after unwritten conversion in dio completion
authorEryu Guan <eguan@redhat.com>
Thu, 21 Sep 2017 18:26:18 +0000 (11:26 -0700)
committerDarrick J. Wong <darrick.wong@oracle.com>
Tue, 26 Sep 2017 17:55:19 +0000 (10:55 -0700)
Since commit d531d91d6990 ("xfs: always use unwritten extents for
direct I/O writes"), we start allocating unwritten extents for all
direct writes to allow appending aio in XFS.

But for dio writes that could extend file size we update the in-core
inode size first, then convert the unwritten extents to real
allocations at dio completion time in xfs_dio_write_end_io(). Thus a
racing direct read could see the new i_size and find the unwritten
extents first and read zeros instead of actual data, if the direct
writer also takes a shared iolock.

Fix it by updating the in-core inode size after the unwritten extent
conversion. To do this, introduce a new boolean argument to
xfs_iomap_write_unwritten() to tell if we want to update in-core
i_size or not.

Suggested-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Eryu Guan <eguan@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
fs/xfs/xfs_aops.c
fs/xfs/xfs_file.c
fs/xfs/xfs_iomap.c
fs/xfs/xfs_iomap.h
fs/xfs/xfs_pnfs.c

index 2917260..f18e593 100644 (file)
@@ -343,7 +343,8 @@ xfs_end_io(
                error = xfs_reflink_end_cow(ip, offset, size);
                break;
        case XFS_IO_UNWRITTEN:
-               error = xfs_iomap_write_unwritten(ip, offset, size);
+               /* writeback should never update isize */
+               error = xfs_iomap_write_unwritten(ip, offset, size, false);
                break;
        default:
                ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
index 350b6d4..309e26c 100644 (file)
@@ -434,7 +434,6 @@ xfs_dio_write_end_io(
        struct inode            *inode = file_inode(iocb->ki_filp);
        struct xfs_inode        *ip = XFS_I(inode);
        loff_t                  offset = iocb->ki_pos;
-       bool                    update_size = false;
        int                     error = 0;
 
        trace_xfs_end_io_direct_write(ip, offset, size);
@@ -445,6 +444,21 @@ xfs_dio_write_end_io(
        if (size <= 0)
                return size;
 
+       if (flags & IOMAP_DIO_COW) {
+               error = xfs_reflink_end_cow(ip, offset, size);
+               if (error)
+                       return error;
+       }
+
+       /*
+        * Unwritten conversion updates the in-core isize after extent
+        * conversion but before updating the on-disk size. Updating isize any
+        * earlier allows a racing dio read to find unwritten extents before
+        * they are converted.
+        */
+       if (flags & IOMAP_DIO_UNWRITTEN)
+               return xfs_iomap_write_unwritten(ip, offset, size, true);
+
        /*
         * We need to update the in-core inode size here so that we don't end up
         * with the on-disk inode size being outside the in-core inode size. We
@@ -459,20 +473,11 @@ xfs_dio_write_end_io(
        spin_lock(&ip->i_flags_lock);
        if (offset + size > i_size_read(inode)) {
                i_size_write(inode, offset + size);
-               update_size = true;
-       }
-       spin_unlock(&ip->i_flags_lock);
-
-       if (flags & IOMAP_DIO_COW) {
-               error = xfs_reflink_end_cow(ip, offset, size);
-               if (error)
-                       return error;
-       }
-
-       if (flags & IOMAP_DIO_UNWRITTEN)
-               error = xfs_iomap_write_unwritten(ip, offset, size);
-       else if (update_size)
+               spin_unlock(&ip->i_flags_lock);
                error = xfs_setfilesize(ip, offset, size);
+       } else {
+               spin_unlock(&ip->i_flags_lock);
+       }
 
        return error;
 }
index a1909bc..f179bdf 100644 (file)
@@ -829,7 +829,8 @@ int
 xfs_iomap_write_unwritten(
        xfs_inode_t     *ip,
        xfs_off_t       offset,
-       xfs_off_t       count)
+       xfs_off_t       count,
+       bool            update_isize)
 {
        xfs_mount_t     *mp = ip->i_mount;
        xfs_fileoff_t   offset_fsb;
@@ -840,6 +841,7 @@ xfs_iomap_write_unwritten(
        xfs_trans_t     *tp;
        xfs_bmbt_irec_t imap;
        struct xfs_defer_ops dfops;
+       struct inode    *inode = VFS_I(ip);
        xfs_fsize_t     i_size;
        uint            resblks;
        int             error;
@@ -899,7 +901,8 @@ xfs_iomap_write_unwritten(
                i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb);
                if (i_size > offset + count)
                        i_size = offset + count;
-
+               if (update_isize && i_size > i_size_read(inode))
+                       i_size_write(inode, i_size);
                i_size = xfs_new_eof(ip, i_size);
                if (i_size) {
                        ip->i_d.di_size = i_size;
index 00db3ec..ee53506 100644 (file)
@@ -27,7 +27,7 @@ int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
                        struct xfs_bmbt_irec *, int);
 int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
                        struct xfs_bmbt_irec *);
-int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t);
+int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
 
 void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
                struct xfs_bmbt_irec *);
index 2f2dc3c..4246876 100644 (file)
@@ -274,7 +274,7 @@ xfs_fs_commit_blocks(
                                        (end - 1) >> PAGE_SHIFT);
                WARN_ON_ONCE(error);
 
-               error = xfs_iomap_write_unwritten(ip, start, length);
+               error = xfs_iomap_write_unwritten(ip, start, length, false);
                if (error)
                        goto out_drop_iolock;
        }