xfs: reserve quota for dir expansion when linking/unlinking files
authorDarrick J. Wong <djwong@kernel.org>
Fri, 19 Aug 2022 18:14:24 +0000 (11:14 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 25 Aug 2022 09:40:47 +0000 (11:40 +0200)
[ Upstream commit 871b9316e7a778ff97bdc34fdb2f2977f616651d ]

XFS does not reserve quota for directory expansion when linking or
unlinking children from a directory.  This means that we don't reject
the expansion with EDQUOT when we're at or near a hard limit, which
means that unprivileged userspace can use link()/unlink() to exceed
quota.

The fix for this is nuanced -- link operations don't always expand the
directory, and we allow a link to proceed with no space reservation if
we don't need to add a block to the directory to handle the addition.
Unlink operations generally do not expand the directory (you'd have to
free a block and then cause a btree split) and we can defer the
directory block freeing if there is no space reservation.

Moreover, there is a further bug in that we do not trigger the blockgc
workers to try to clear space when we're out of quota.

To fix both cases, create a new xfs_trans_alloc_dir function that
allocates the transaction, locks and joins the inodes, and reserves
quota for the directory.  If there isn't sufficient space or quota,
we'll switch the caller to reservationless mode.  This should prevent
quota usage overruns with the least restriction in functionality.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
Acked-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
fs/xfs/xfs_inode.c
fs/xfs/xfs_trans.c
fs/xfs/xfs_trans.h

index c19f3ca605af6a5403f23b3f1bc6149865e4f346..f4dec7f6c6d0152cd4fe51065d4cbb06221b30dd 100644 (file)
@@ -1223,7 +1223,7 @@ xfs_link(
 {
        xfs_mount_t             *mp = tdp->i_mount;
        xfs_trans_t             *tp;
-       int                     error;
+       int                     error, nospace_error = 0;
        int                     resblks;
 
        trace_xfs_link(tdp, target_name);
@@ -1242,19 +1242,11 @@ xfs_link(
                goto std_return;
 
        resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
-       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, resblks, 0, 0, &tp);
-       if (error == -ENOSPC) {
-               resblks = 0;
-               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, 0, 0, 0, &tp);
-       }
+       error = xfs_trans_alloc_dir(tdp, &M_RES(mp)->tr_link, sip, &resblks,
+                       &tp, &nospace_error);
        if (error)
                goto std_return;
 
-       xfs_lock_two_inodes(sip, XFS_ILOCK_EXCL, tdp, XFS_ILOCK_EXCL);
-
-       xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
-       xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
-
        error = xfs_iext_count_may_overflow(tdp, XFS_DATA_FORK,
                        XFS_IEXT_DIR_MANIP_CNT(mp));
        if (error)
@@ -1312,6 +1304,8 @@ xfs_link(
  error_return:
        xfs_trans_cancel(tp);
  std_return:
+       if (error == -ENOSPC && nospace_error)
+               error = nospace_error;
        return error;
 }
 
@@ -2761,6 +2755,7 @@ xfs_remove(
        xfs_mount_t             *mp = dp->i_mount;
        xfs_trans_t             *tp = NULL;
        int                     is_dir = S_ISDIR(VFS_I(ip)->i_mode);
+       int                     dontcare;
        int                     error = 0;
        uint                    resblks;
 
@@ -2778,31 +2773,24 @@ xfs_remove(
                goto std_return;
 
        /*
-        * We try to get the real space reservation first,
-        * allowing for directory btree deletion(s) implying
-        * possible bmap insert(s).  If we can't get the space
-        * reservation then we use 0 instead, and avoid the bmap
-        * btree insert(s) in the directory code by, if the bmap
-        * insert tries to happen, instead trimming the LAST
-        * block from the directory.
+        * We try to get the real space reservation first, allowing for
+        * directory btree deletion(s) implying possible bmap insert(s).  If we
+        * can't get the space reservation then we use 0 instead, and avoid the
+        * bmap btree insert(s) in the directory code by, if the bmap insert
+        * tries to happen, instead trimming the LAST block from the directory.
+        *
+        * Ignore EDQUOT and ENOSPC being returned via nospace_error because
+        * the directory code can handle a reservationless update and we don't
+        * want to prevent a user from trying to free space by deleting things.
         */
        resblks = XFS_REMOVE_SPACE_RES(mp);
-       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, resblks, 0, 0, &tp);
-       if (error == -ENOSPC) {
-               resblks = 0;
-               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, 0, 0, 0,
-                               &tp);
-       }
+       error = xfs_trans_alloc_dir(dp, &M_RES(mp)->tr_remove, ip, &resblks,
+                       &tp, &dontcare);
        if (error) {
                ASSERT(error != -ENOSPC);
                goto std_return;
        }
 
-       xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL);
-
-       xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
-       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-
        /*
         * If we're removing a directory perform some additional validation.
         */
index 67dec11e34c7e5f8b06ddc413774640fe3dbc555..95c183072e7a2bd7e856d0a0413c08b3ed0fbcae 100644 (file)
@@ -1201,3 +1201,89 @@ out_cancel:
        xfs_trans_cancel(tp);
        return error;
 }
+
+/*
+ * Allocate an transaction, lock and join the directory and child inodes to it,
+ * and reserve quota for a directory update.  If there isn't sufficient space,
+ * @dblocks will be set to zero for a reservationless directory update and
+ * @nospace_error will be set to a negative errno describing the space
+ * constraint we hit.
+ *
+ * The caller must ensure that the on-disk dquots attached to this inode have
+ * already been allocated and initialized.  The ILOCKs will be dropped when the
+ * transaction is committed or cancelled.
+ */
+int
+xfs_trans_alloc_dir(
+       struct xfs_inode        *dp,
+       struct xfs_trans_res    *resv,
+       struct xfs_inode        *ip,
+       unsigned int            *dblocks,
+       struct xfs_trans        **tpp,
+       int                     *nospace_error)
+{
+       struct xfs_trans        *tp;
+       struct xfs_mount        *mp = ip->i_mount;
+       unsigned int            resblks;
+       bool                    retried = false;
+       int                     error;
+
+retry:
+       *nospace_error = 0;
+       resblks = *dblocks;
+       error = xfs_trans_alloc(mp, resv, resblks, 0, 0, &tp);
+       if (error == -ENOSPC) {
+               *nospace_error = error;
+               resblks = 0;
+               error = xfs_trans_alloc(mp, resv, resblks, 0, 0, &tp);
+       }
+       if (error)
+               return error;
+
+       xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL);
+
+       xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+
+       error = xfs_qm_dqattach_locked(dp, false);
+       if (error) {
+               /* Caller should have allocated the dquots! */
+               ASSERT(error != -ENOENT);
+               goto out_cancel;
+       }
+
+       error = xfs_qm_dqattach_locked(ip, false);
+       if (error) {
+               /* Caller should have allocated the dquots! */
+               ASSERT(error != -ENOENT);
+               goto out_cancel;
+       }
+
+       if (resblks == 0)
+               goto done;
+
+       error = xfs_trans_reserve_quota_nblks(tp, dp, resblks, 0, false);
+       if (error == -EDQUOT || error == -ENOSPC) {
+               if (!retried) {
+                       xfs_trans_cancel(tp);
+                       xfs_blockgc_free_quota(dp, 0);
+                       retried = true;
+                       goto retry;
+               }
+
+               *nospace_error = error;
+               resblks = 0;
+               error = 0;
+       }
+       if (error)
+               goto out_cancel;
+
+done:
+       *tpp = tp;
+       *dblocks = resblks;
+       return 0;
+
+out_cancel:
+       xfs_trans_cancel(tp);
+       return error;
+}
index 50da47f23a0772af77cd36ec1c4f44fd7ba53d91..faba74d4c70269943e668f639c26db8eed31a235 100644 (file)
@@ -265,6 +265,9 @@ int xfs_trans_alloc_icreate(struct xfs_mount *mp, struct xfs_trans_res *resv,
 int xfs_trans_alloc_ichange(struct xfs_inode *ip, struct xfs_dquot *udqp,
                struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, bool force,
                struct xfs_trans **tpp);
+int xfs_trans_alloc_dir(struct xfs_inode *dp, struct xfs_trans_res *resv,
+               struct xfs_inode *ip, unsigned int *dblocks,
+               struct xfs_trans **tpp, int *nospace_error);
 
 static inline void
 xfs_trans_set_context(