xfs: rework insert range into an atomic operation
authorBrian Foster <bfoster@redhat.com>
Wed, 26 Feb 2020 17:43:16 +0000 (09:43 -0800)
committerDarrick J. Wong <darrick.wong@oracle.com>
Tue, 3 Mar 2020 04:55:51 +0000 (20:55 -0800)
The insert range operation uses a unique transaction and ilock cycle
for the extent split and each extent shift iteration of the overall
operation. While this works, it is risks racing with other
operations in subtle ways such as COW writeback modifying an extent
tree in the middle of a shift operation.

To avoid this problem, make insert range atomic with respect to
ilock. Hold the ilock across the entire operation, replace the
individual transactions with a single rolling transaction sequence
and relog the inode to keep it moving in the log. This guarantees
that nothing else can change the extent mapping of an inode while
an insert range operation is in progress.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Allison Collins <allison.henderson@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
fs/xfs/xfs_bmap_util.c

index 4a9c037..f6787e8 100644 (file)
@@ -1146,47 +1146,41 @@ xfs_insert_file_space(
        if (error)
                return error;
 
-       /*
-        * The extent shifting code works on extent granularity. So, if stop_fsb
-        * is not the starting block of extent, we need to split the extent at
-        * stop_fsb.
-        */
        error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
                        XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
        if (error)
                return error;
 
        xfs_ilock(ip, XFS_ILOCK_EXCL);
-       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(tp, ip, 0);
 
+       /*
+        * The extent shifting code works on extent granularity. So, if stop_fsb
+        * is not the starting block of extent, we need to split the extent at
+        * stop_fsb.
+        */
        error = xfs_bmap_split_extent(tp, ip, stop_fsb);
        if (error)
                goto out_trans_cancel;
 
-       error = xfs_trans_commit(tp);
-       if (error)
-               return error;
-
-       while (!error && !done) {
-               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0,
-                                       &tp);
+       do {
+               error = xfs_trans_roll_inode(&tp, ip);
                if (error)
-                       break;
+                       goto out_trans_cancel;
 
-               xfs_ilock(ip, XFS_ILOCK_EXCL);
-               xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
                error = xfs_bmap_insert_extents(tp, ip, &next_fsb, shift_fsb,
                                &done, stop_fsb);
                if (error)
                        goto out_trans_cancel;
+       } while (!done);
 
-               error = xfs_trans_commit(tp);
-       }
-
+       error = xfs_trans_commit(tp);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
        return error;
 
 out_trans_cancel:
        xfs_trans_cancel(tp);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
        return error;
 }