xfs: reserve enough blocks to handle btree splits when remapping
authorDarrick J. Wong <darrick.wong@oracle.com>
Wed, 12 Apr 2017 19:26:07 +0000 (12:26 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 7 Jun 2017 10:07:52 +0000 (12:07 +0200)
commit fe0be23e68200573de027de9b8cc2b27e7fce35e upstream.

In xfs_reflink_end_cow, we erroneously reserve only enough blocks to
handle adding 1 extent.  This is problematic if we fragment free space,
have to do CoW, and then have to perform multiple bmap btree expansions.
Furthermore, the BUI recovery routine doesn't reserve /any/ blocks to
handle btree splits, so log recovery fails after our first error causes
the filesystem to go down.

Therefore, refactor the transaction block reservation macros until we
have a macro that works for our deferred (re)mapping activities, and fix
both problems by using that macro.

With 1k blocks we can hit this fairly often in g/187 if the scratch fs
is big enough.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
fs/xfs/libxfs/xfs_trans_space.h
fs/xfs/xfs_bmap_item.c
fs/xfs/xfs_reflink.c

index 7917f6e..d787c67 100644 (file)
 /*
  * Components of space reservations.
  */
+
+/* Worst case number of rmaps that can be held in a block. */
 #define XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)    \
                (((mp)->m_rmap_mxr[0]) - ((mp)->m_rmap_mnr[0]))
+
+/* Adding one rmap could split every level up to the top of the tree. */
+#define XFS_RMAPADD_SPACE_RES(mp) ((mp)->m_rmap_maxlevels)
+
+/* Blocks we might need to add "b" rmaps to a tree. */
+#define XFS_NRMAPADD_SPACE_RES(mp, b)\
+       (((b + XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) - 1) / \
+         XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) * \
+         XFS_RMAPADD_SPACE_RES(mp))
+
 #define XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)    \
                (((mp)->m_alloc_mxr[0]) - ((mp)->m_alloc_mnr[0]))
 #define        XFS_EXTENTADD_SPACE_RES(mp,w)   (XFS_BM_MAXLEVELS(mp,w) - 1)
        (((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \
          XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \
          XFS_EXTENTADD_SPACE_RES(mp,w))
+
+/* Blocks we might need to add "b" mappings & rmappings to a file. */
 #define XFS_SWAP_RMAP_SPACE_RES(mp,b,w)\
-       (((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \
-         XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \
-         XFS_EXTENTADD_SPACE_RES(mp,w) + \
-        ((b + XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) - 1) / \
-         XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) * \
-         (mp)->m_rmap_maxlevels)
+       (XFS_NEXTENTADD_SPACE_RES((mp), (b), (w)) + \
+        XFS_NRMAPADD_SPACE_RES((mp), (b)))
+
 #define        XFS_DAENTER_1B(mp,w)    \
        ((w) == XFS_DATA_FORK ? (mp)->m_dir_geo->fsbcount : 1)
 #define        XFS_DAENTER_DBS(mp,w)   \
index 9bf57c7..c4b90e7 100644 (file)
@@ -34,6 +34,8 @@
 #include "xfs_bmap.h"
 #include "xfs_icache.h"
 #include "xfs_trace.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_trans_space.h"
 
 
 kmem_zone_t    *xfs_bui_zone;
@@ -446,7 +448,8 @@ xfs_bui_recover(
                return -EIO;
        }
 
-       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
+                       XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
        if (error)
                return error;
        budp = xfs_trans_get_bud(tp, buip);
index 2252f16..29a75ec 100644 (file)
@@ -736,8 +736,22 @@ xfs_reflink_end_cow(
        offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
        end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count);
 
-       /* Start a rolling transaction to switch the mappings */
-       resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK);
+       /*
+        * Start a rolling transaction to switch the mappings.  We're
+        * unlikely ever to have to remap 16T worth of single-block
+        * extents, so just cap the worst case extent count to 2^32-1.
+        * Stick a warning in just in case, and avoid 64-bit division.
+        */
+       BUILD_BUG_ON(MAX_RW_COUNT > UINT_MAX);
+       if (end_fsb - offset_fsb > UINT_MAX) {
+               error = -EFSCORRUPTED;
+               xfs_force_shutdown(ip->i_mount, SHUTDOWN_CORRUPT_INCORE);
+               ASSERT(0);
+               goto out;
+       }
+       resblks = XFS_NEXTENTADD_SPACE_RES(ip->i_mount,
+                       (unsigned int)(end_fsb - offset_fsb),
+                       XFS_DATA_FORK);
        error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
                        resblks, 0, 0, &tp);
        if (error)