Merge tag 'xfs-6.5-merge-5' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 5 Jul 2023 21:08:03 +0000 (14:08 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 5 Jul 2023 21:08:03 +0000 (14:08 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 5 Jul 2023 21:08:03 +0000 (14:08 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 5 Jul 2023 21:08:03 +0000 (14:08 -0700)
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c

index ee84835..e9cc481 100644 (file)
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -985,7 +985,7 @@ xfs_ag_shrink_space(
                         goto resv_err;
  
                 err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL,
-                               true);
+                               XFS_AG_RESV_NONE, true);
                 if (err2)
                         goto resv_err;
  
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c

index c20fe99..3069194 100644 (file)
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -1536,7 +1536,8 @@ xfs_alloc_ag_vextent_lastblock(
   */
  STATIC int
  xfs_alloc_ag_vextent_near(
-       struct xfs_alloc_arg    *args)
+       struct xfs_alloc_arg    *args,
+       uint32_t                alloc_flags)
  {
         struct xfs_alloc_cur    acur = {};
         int                     error;          /* error code */
@@ -1555,6 +1556,8 @@ xfs_alloc_ag_vextent_near(
         if (args->agbno > args->max_agbno)
                 args->agbno = args->max_agbno;
  
+       /* Retry once quickly if we find busy extents before blocking. */
+       alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
  restart:
         len = 0;
  
@@ -1610,9 +1613,20 @@ restart:
          */
         if (!acur.len) {
                 if (acur.busy) {
+                       /*
+                        * Our only valid extents must have been busy. Flush and
+                        * retry the allocation again. If we get an -EAGAIN
+                        * error, we're being told that a deadlock was avoided
+                        * and the current transaction needs committing before
+                        * the allocation can be retried.
+                        */
                         trace_xfs_alloc_near_busy(args);
-                       xfs_extent_busy_flush(args->mp, args->pag,
-                                             acur.busy_gen);
+                       error = xfs_extent_busy_flush(args->tp, args->pag,
+                                       acur.busy_gen, alloc_flags);
+                       if (error)
+                               goto out;
+
+                       alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
                         goto restart;
                 }
                 trace_xfs_alloc_size_neither(args);
@@ -1635,22 +1649,25 @@ out:
   * and of the form k * prod + mod unless there's nothing that large.
   * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
   */
-STATIC int                             /* error */
+static int
  xfs_alloc_ag_vextent_size(
-       xfs_alloc_arg_t *args)          /* allocation argument structure */
+       struct xfs_alloc_arg    *args,
+       uint32_t                alloc_flags)
  {
-       struct xfs_agf  *agf = args->agbp->b_addr;
-       struct xfs_btree_cur *bno_cur;  /* cursor for bno btree */
-       struct xfs_btree_cur *cnt_cur;  /* cursor for cnt btree */
-       int             error;          /* error result */
-       xfs_agblock_t   fbno;           /* start of found freespace */
-       xfs_extlen_t    flen;           /* length of found freespace */
-       int             i;              /* temp status variable */
-       xfs_agblock_t   rbno;           /* returned block number */
-       xfs_extlen_t    rlen;           /* length of returned extent */
-       bool            busy;
-       unsigned        busy_gen;
+       struct xfs_agf          *agf = args->agbp->b_addr;
+       struct xfs_btree_cur    *bno_cur;
+       struct xfs_btree_cur    *cnt_cur;
+       xfs_agblock_t           fbno;           /* start of found freespace */
+       xfs_extlen_t            flen;           /* length of found freespace */
+       xfs_agblock_t           rbno;           /* returned block number */
+       xfs_extlen_t            rlen;           /* length of returned extent */
+       bool                    busy;
+       unsigned                busy_gen;
+       int                     error;
+       int                     i;
  
+       /* Retry once quickly if we find busy extents before blocking. */
+       alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
  restart:
         /*
          * Allocate and initialize a cursor for the by-size btree.
@@ -1708,19 +1725,25 @@ restart:
                         error = xfs_btree_increment(cnt_cur, 0, &i);
                         if (error)
                                 goto error0;
-                       if (i == 0) {
-                               /*
-                                * Our only valid extents must have been busy.
-                                * Make it unbusy by forcing the log out and
-                                * retrying.
-                                */
-                               xfs_btree_del_cursor(cnt_cur,
-                                                    XFS_BTREE_NOERROR);
-                               trace_xfs_alloc_size_busy(args);
-                               xfs_extent_busy_flush(args->mp,
-                                                       args->pag, busy_gen);
-                               goto restart;
-                       }
+                       if (i)
+                               continue;
+
+                       /*
+                        * Our only valid extents must have been busy. Flush and
+                        * retry the allocation again. If we get an -EAGAIN
+                        * error, we're being told that a deadlock was avoided
+                        * and the current transaction needs committing before
+                        * the allocation can be retried.
+                        */
+                       trace_xfs_alloc_size_busy(args);
+                       error = xfs_extent_busy_flush(args->tp, args->pag,
+                                       busy_gen, alloc_flags);
+                       if (error)
+                               goto error0;
+
+                       alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
+                       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+                       goto restart;
                 }
         }
  
@@ -1800,9 +1823,21 @@ restart:
         args->len = rlen;
         if (rlen < args->minlen) {
                 if (busy) {
-                       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+                       /*
+                        * Our only valid extents must have been busy. Flush and
+                        * retry the allocation again. If we get an -EAGAIN
+                        * error, we're being told that a deadlock was avoided
+                        * and the current transaction needs committing before
+                        * the allocation can be retried.
+                        */
                         trace_xfs_alloc_size_busy(args);
-                       xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
+                       error = xfs_extent_busy_flush(args->tp, args->pag,
+                                       busy_gen, alloc_flags);
+                       if (error)
+                               goto error0;
+
+                       alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
+                       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
                         goto restart;
                 }
                 goto out_nominleft;
@@ -2435,23 +2470,25 @@ static int
  xfs_defer_agfl_block(
         struct xfs_trans                *tp,
         xfs_agnumber_t                  agno,
-       xfs_fsblock_t                   agbno,
+       xfs_agblock_t                   agbno,
         struct xfs_owner_info           *oinfo)
  {
         struct xfs_mount                *mp = tp->t_mountp;
         struct xfs_extent_free_item     *xefi;
+       xfs_fsblock_t                   fsbno = XFS_AGB_TO_FSB(mp, agno, agbno);
  
         ASSERT(xfs_extfree_item_cache != NULL);
         ASSERT(oinfo != NULL);
  
+       if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, fsbno)))
+               return -EFSCORRUPTED;
+
         xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
                                GFP_KERNEL | __GFP_NOFAIL);
-       xefi->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
+       xefi->xefi_startblock = fsbno;
         xefi->xefi_blockcount = 1;
         xefi->xefi_owner = oinfo->oi_owner;
-
-       if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, xefi->xefi_startblock)))
-               return -EFSCORRUPTED;
+       xefi->xefi_agresv = XFS_AG_RESV_AGFL;
  
         trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
  
@@ -2470,6 +2507,7 @@ __xfs_free_extent_later(
         xfs_fsblock_t                   bno,
         xfs_filblks_t                   len,
         const struct xfs_owner_info     *oinfo,
+       enum xfs_ag_resv_type           type,
         bool                            skip_discard)
  {
         struct xfs_extent_free_item     *xefi;
@@ -2490,6 +2528,7 @@ __xfs_free_extent_later(
         ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
  #endif
         ASSERT(xfs_extfree_item_cache != NULL);
+       ASSERT(type != XFS_AG_RESV_AGFL);
  
         if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
                 return -EFSCORRUPTED;
@@ -2498,6 +2537,7 @@ __xfs_free_extent_later(
                                GFP_KERNEL | __GFP_NOFAIL);
         xefi->xefi_startblock = bno;
         xefi->xefi_blockcount = (xfs_extlen_t)len;
+       xefi->xefi_agresv = type;
         if (skip_discard)
                 xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
         if (oinfo) {
@@ -2568,7 +2608,7 @@ out:
  int                    /* error */
  xfs_alloc_fix_freelist(
         struct xfs_alloc_arg    *args,  /* allocation argument structure */
-       int                     flags)  /* XFS_ALLOC_FLAG_... */
+       uint32_t                alloc_flags)
  {
         struct xfs_mount        *mp = args->mp;
         struct xfs_perag        *pag = args->pag;
@@ -2584,7 +2624,7 @@ xfs_alloc_fix_freelist(
         ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
  
         if (!xfs_perag_initialised_agf(pag)) {
-               error = xfs_alloc_read_agf(pag, tp, flags, &agbp);
+               error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
                 if (error) {
                         /* Couldn't lock the AGF so skip this AG. */
                         if (error == -EAGAIN)
@@ -2600,13 +2640,13 @@ xfs_alloc_fix_freelist(
          */
         if (xfs_perag_prefers_metadata(pag) &&
             (args->datatype & XFS_ALLOC_USERDATA) &&
-           (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
-               ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
+           (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)) {
+               ASSERT(!(alloc_flags & XFS_ALLOC_FLAG_FREEING));
                 goto out_agbp_relse;
         }
  
         need = xfs_alloc_min_freelist(mp, pag);
-       if (!xfs_alloc_space_available(args, need, flags |
+       if (!xfs_alloc_space_available(args, need, alloc_flags |
                         XFS_ALLOC_FLAG_CHECK))
                 goto out_agbp_relse;
  
@@ -2615,7 +2655,7 @@ xfs_alloc_fix_freelist(
          * Can fail if we're not blocking on locks, and it's held.
          */
         if (!agbp) {
-               error = xfs_alloc_read_agf(pag, tp, flags, &agbp);
+               error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
                 if (error) {
                         /* Couldn't lock the AGF so skip this AG. */
                         if (error == -EAGAIN)
@@ -2630,7 +2670,7 @@ xfs_alloc_fix_freelist(
  
         /* If there isn't enough total space or single-extent, reject it. */
         need = xfs_alloc_min_freelist(mp, pag);
-       if (!xfs_alloc_space_available(args, need, flags))
+       if (!xfs_alloc_space_available(args, need, alloc_flags))
                 goto out_agbp_relse;
  
  #ifdef DEBUG
@@ -2668,11 +2708,12 @@ xfs_alloc_fix_freelist(
          */
         memset(&targs, 0, sizeof(targs));
         /* struct copy below */
-       if (flags & XFS_ALLOC_FLAG_NORMAP)
+       if (alloc_flags & XFS_ALLOC_FLAG_NORMAP)
                 targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
         else
                 targs.oinfo = XFS_RMAP_OINFO_AG;
-       while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
+       while (!(alloc_flags & XFS_ALLOC_FLAG_NOSHRINK) &&
+                       pag->pagf_flcount > need) {
                 error = xfs_alloc_get_freelist(pag, tp, agbp, &bno, 0);
                 if (error)
                         goto out_agbp_relse;
@@ -2700,7 +2741,7 @@ xfs_alloc_fix_freelist(
                 targs.resv = XFS_AG_RESV_AGFL;
  
                 /* Allocate as many blocks as possible at once. */
-               error = xfs_alloc_ag_vextent_size(&targs);
+               error = xfs_alloc_ag_vextent_size(&targs, alloc_flags);
                 if (error)
                         goto out_agflbp_relse;
  
@@ -2710,7 +2751,7 @@ xfs_alloc_fix_freelist(
                  * on a completely full ag.
                  */
                 if (targs.agbno == NULLAGBLOCK) {
-                       if (flags & XFS_ALLOC_FLAG_FREEING)
+                       if (alloc_flags & XFS_ALLOC_FLAG_FREEING)
                                 break;
                         goto out_agflbp_relse;
                 }
@@ -2916,6 +2957,47 @@ xfs_alloc_put_freelist(
  }
  
  /*
+ * Check that this AGF/AGI header's sequence number and length matches the AG
+ * number and size in fsblocks.
+ */
+xfs_failaddr_t
+xfs_validate_ag_length(
+       struct xfs_buf          *bp,
+       uint32_t                seqno,
+       uint32_t                length)
+{
+       struct xfs_mount        *mp = bp->b_mount;
+       /*
+        * During growfs operations, the perag is not fully initialised,
+        * so we can't use it for any useful checking. growfs ensures we can't
+        * use it by using uncached buffers that don't have the perag attached
+        * so we can detect and avoid this problem.
+        */
+       if (bp->b_pag && seqno != bp->b_pag->pag_agno)
+               return __this_address;
+
+       /*
+        * Only the last AG in the filesystem is allowed to be shorter
+        * than the AG size recorded in the superblock.
+        */
+       if (length != mp->m_sb.sb_agblocks) {
+               /*
+                * During growfs, the new last AG can get here before we
+                * have updated the superblock. Give it a pass on the seqno
+                * check.
+                */
+               if (bp->b_pag && seqno != mp->m_sb.sb_agcount - 1)
+                       return __this_address;
+               if (length < XFS_MIN_AG_BLOCKS)
+                       return __this_address;
+               if (length > mp->m_sb.sb_agblocks)
+                       return __this_address;
+       }
+
+       return NULL;
+}
+
+/*
   * Verify the AGF is consistent.
   *
   * We do not verify the AGFL indexes in the AGF are fully consistent here
@@ -2934,6 +3016,9 @@ xfs_agf_verify(
  {
         struct xfs_mount        *mp = bp->b_mount;
         struct xfs_agf          *agf = bp->b_addr;
+       xfs_failaddr_t          fa;
+       uint32_t                agf_seqno = be32_to_cpu(agf->agf_seqno);
+       uint32_t                agf_length = be32_to_cpu(agf->agf_length);
  
         if (xfs_has_crc(mp)) {
                 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
@@ -2945,18 +3030,26 @@ xfs_agf_verify(
         if (!xfs_verify_magic(bp, agf->agf_magicnum))
                 return __this_address;
  
-       if (!(XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
-             be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
-             be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) &&
-             be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) &&
-             be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)))
+       if (!XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)))
                 return __this_address;
  
-       if (be32_to_cpu(agf->agf_length) > mp->m_sb.sb_dblocks)
+       /*
+        * Both agf_seqno and agf_length need to validated before anything else
+        * block number related in the AGF or AGFL can be checked.
+        */
+       fa = xfs_validate_ag_length(bp, agf_seqno, agf_length);
+       if (fa)
+               return fa;
+
+       if (be32_to_cpu(agf->agf_flfirst) >= xfs_agfl_size(mp))
+               return __this_address;
+       if (be32_to_cpu(agf->agf_fllast) >= xfs_agfl_size(mp))
+               return __this_address;
+       if (be32_to_cpu(agf->agf_flcount) > xfs_agfl_size(mp))
                 return __this_address;
  
         if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) ||
-           be32_to_cpu(agf->agf_freeblks) > be32_to_cpu(agf->agf_length))
+           be32_to_cpu(agf->agf_freeblks) > agf_length)
                 return __this_address;
  
         if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
@@ -2967,38 +3060,28 @@ xfs_agf_verify(
                                                 mp->m_alloc_maxlevels)
                 return __this_address;
  
-       if (xfs_has_rmapbt(mp) &&
-           (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
-            be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) >
-                                               mp->m_rmap_maxlevels))
-               return __this_address;
-
-       if (xfs_has_rmapbt(mp) &&
-           be32_to_cpu(agf->agf_rmap_blocks) > be32_to_cpu(agf->agf_length))
+       if (xfs_has_lazysbcount(mp) &&
+           be32_to_cpu(agf->agf_btreeblks) > agf_length)
                 return __this_address;
  
-       /*
-        * during growfs operations, the perag is not fully initialised,
-        * so we can't use it for any useful checking. growfs ensures we can't
-        * use it by using uncached buffers that don't have the perag attached
-        * so we can detect and avoid this problem.
-        */
-       if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
-               return __this_address;
+       if (xfs_has_rmapbt(mp)) {
+               if (be32_to_cpu(agf->agf_rmap_blocks) > agf_length)
+                       return __this_address;
  
-       if (xfs_has_lazysbcount(mp) &&
-           be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
-               return __this_address;
+               if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
+                   be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) >
+                                                       mp->m_rmap_maxlevels)
+                       return __this_address;
+       }
  
-       if (xfs_has_reflink(mp) &&
-           be32_to_cpu(agf->agf_refcount_blocks) >
-           be32_to_cpu(agf->agf_length))
-               return __this_address;
+       if (xfs_has_reflink(mp)) {
+               if (be32_to_cpu(agf->agf_refcount_blocks) > agf_length)
+                       return __this_address;
  
-       if (xfs_has_reflink(mp) &&
-           (be32_to_cpu(agf->agf_refcount_level) < 1 ||
-            be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels))
-               return __this_address;
+               if (be32_to_cpu(agf->agf_refcount_level) < 1 ||
+                   be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels)
+                       return __this_address;
+       }
  
         return NULL;
  }
@@ -3226,7 +3309,7 @@ xfs_alloc_vextent_check_args(
  static int
  xfs_alloc_vextent_prepare_ag(
         struct xfs_alloc_arg    *args,
-       uint32_t                flags)
+       uint32_t                alloc_flags)
  {
         bool                    need_pag = !args->pag;
         int                     error;
@@ -3235,7 +3318,7 @@ xfs_alloc_vextent_prepare_ag(
                 args->pag = xfs_perag_get(args->mp, args->agno);
  
         args->agbp = NULL;
-       error = xfs_alloc_fix_freelist(args, flags);
+       error = xfs_alloc_fix_freelist(args, alloc_flags);
         if (error) {
                 trace_xfs_alloc_vextent_nofix(args);
                 if (need_pag)
@@ -3357,6 +3440,7 @@ xfs_alloc_vextent_this_ag(
  {
         struct xfs_mount        *mp = args->mp;
         xfs_agnumber_t          minimum_agno;
+       uint32_t                alloc_flags = 0;
         int                     error;
  
         ASSERT(args->pag != NULL);
@@ -3375,9 +3459,9 @@ xfs_alloc_vextent_this_ag(
                 return error;
         }
  
-       error = xfs_alloc_vextent_prepare_ag(args, 0);
+       error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
         if (!error && args->agbp)
-               error = xfs_alloc_ag_vextent_size(args);
+               error = xfs_alloc_ag_vextent_size(args, alloc_flags);
  
         return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
  }
@@ -3406,20 +3490,20 @@ xfs_alloc_vextent_iterate_ags(
         xfs_agnumber_t          minimum_agno,
         xfs_agnumber_t          start_agno,
         xfs_agblock_t           target_agbno,
-       uint32_t                flags)
+       uint32_t                alloc_flags)
  {
         struct xfs_mount        *mp = args->mp;
         xfs_agnumber_t          restart_agno = minimum_agno;
         xfs_agnumber_t          agno;
         int                     error = 0;
  
-       if (flags & XFS_ALLOC_FLAG_TRYLOCK)
+       if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)
                 restart_agno = 0;
  restart:
         for_each_perag_wrap_range(mp, start_agno, restart_agno,
                         mp->m_sb.sb_agcount, agno, args->pag) {
                 args->agno = agno;
-               error = xfs_alloc_vextent_prepare_ag(args, flags);
+               error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
                 if (error)
                         break;
                 if (!args->agbp) {
@@ -3433,10 +3517,10 @@ restart:
                  */
                 if (args->agno == start_agno && target_agbno) {
                         args->agbno = target_agbno;
-                       error = xfs_alloc_ag_vextent_near(args);
+                       error = xfs_alloc_ag_vextent_near(args, alloc_flags);
                 } else {
                         args->agbno = 0;
-                       error = xfs_alloc_ag_vextent_size(args);
+                       error = xfs_alloc_ag_vextent_size(args, alloc_flags);
                 }
                 break;
         }
@@ -3453,8 +3537,8 @@ restart:
          * constraining flags by the caller, drop them and retry the allocation
          * without any constraints being set.
          */
-       if (flags) {
-               flags = 0;
+       if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK) {
+               alloc_flags &= ~XFS_ALLOC_FLAG_TRYLOCK;
                 restart_agno = minimum_agno;
                 goto restart;
         }
@@ -3482,6 +3566,7 @@ xfs_alloc_vextent_start_ag(
         xfs_agnumber_t          start_agno;
         xfs_agnumber_t          rotorstep = xfs_rotorstep;
         bool                    bump_rotor = false;
+       uint32_t                alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
         int                     error;
  
         ASSERT(args->pag == NULL);
@@ -3508,7 +3593,7 @@ xfs_alloc_vextent_start_ag(
  
         start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
         error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
-                       XFS_FSB_TO_AGBNO(mp, target), XFS_ALLOC_FLAG_TRYLOCK);
+                       XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
  
         if (bump_rotor) {
                 if (args->agno == start_agno)
@@ -3535,6 +3620,7 @@ xfs_alloc_vextent_first_ag(
         struct xfs_mount        *mp = args->mp;
         xfs_agnumber_t          minimum_agno;
         xfs_agnumber_t          start_agno;
+       uint32_t                alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
         int                     error;
  
         ASSERT(args->pag == NULL);
@@ -3553,7 +3639,7 @@ xfs_alloc_vextent_first_ag(
  
         start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
         error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
-                       XFS_FSB_TO_AGBNO(mp, target), 0);
+                       XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
         return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
  }
  
@@ -3606,6 +3692,7 @@ xfs_alloc_vextent_near_bno(
         struct xfs_mount        *mp = args->mp;
         xfs_agnumber_t          minimum_agno;
         bool                    needs_perag = args->pag == NULL;
+       uint32_t                alloc_flags = 0;
         int                     error;
  
         if (!needs_perag)
@@ -3626,9 +3713,9 @@ xfs_alloc_vextent_near_bno(
         if (needs_perag)
                 args->pag = xfs_perag_grab(mp, args->agno);
  
-       error = xfs_alloc_vextent_prepare_ag(args, 0);
+       error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
         if (!error && args->agbp)
-               error = xfs_alloc_ag_vextent_near(args);
+               error = xfs_alloc_ag_vextent_near(args, alloc_flags);
  
         return xfs_alloc_vextent_finish(args, minimum_agno, error, needs_perag);
  }
@@ -3756,15 +3843,11 @@ xfs_alloc_query_range(
         xfs_alloc_query_range_fn                fn,
         void                                    *priv)
  {
-       union xfs_btree_irec                    low_brec;
-       union xfs_btree_irec                    high_brec;
-       struct xfs_alloc_query_range_info       query;
+       union xfs_btree_irec                    low_brec = { .a = *low_rec };
+       union xfs_btree_irec                    high_brec = { .a = *high_rec };
+       struct xfs_alloc_query_range_info       query = { .priv = priv, .fn = fn };
  
         ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
-       low_brec.a = *low_rec;
-       high_brec.a = *high_rec;
-       query.priv = priv;
-       query.fn = fn;
         return xfs_btree_query_range(cur, &low_brec, &high_brec,
                         xfs_alloc_query_range_helper, &query);
  }
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h

index 85ac470..6bb8d29 100644 (file)
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -19,11 +19,12 @@ unsigned int xfs_agfl_size(struct xfs_mount *mp);
  /*
   * Flags for xfs_alloc_fix_freelist.
   */
-#define        XFS_ALLOC_FLAG_TRYLOCK  0x00000001  /* use trylock for buffer locking */
-#define        XFS_ALLOC_FLAG_FREEING  0x00000002  /* indicate caller is freeing extents*/
-#define        XFS_ALLOC_FLAG_NORMAP   0x00000004  /* don't modify the rmapbt */
-#define        XFS_ALLOC_FLAG_NOSHRINK 0x00000008  /* don't shrink the freelist */
-#define        XFS_ALLOC_FLAG_CHECK    0x00000010  /* test only, don't modify args */
+#define        XFS_ALLOC_FLAG_TRYLOCK  (1U << 0)  /* use trylock for buffer locking */
+#define        XFS_ALLOC_FLAG_FREEING  (1U << 1)  /* indicate caller is freeing extents*/
+#define        XFS_ALLOC_FLAG_NORMAP   (1U << 2)  /* don't modify the rmapbt */
+#define        XFS_ALLOC_FLAG_NOSHRINK (1U << 3)  /* don't shrink the freelist */
+#define        XFS_ALLOC_FLAG_CHECK    (1U << 4)  /* test only, don't modify args */
+#define        XFS_ALLOC_FLAG_TRYFLUSH (1U << 5)  /* don't wait in busy extent flush */
  
  /*
   * Argument structure for xfs_alloc routines.
@@ -195,7 +196,7 @@ int xfs_alloc_read_agfl(struct xfs_perag *pag, struct xfs_trans *tp,
                 struct xfs_buf **bpp);
  int xfs_free_agfl_block(struct xfs_trans *, xfs_agnumber_t, xfs_agblock_t,
                         struct xfs_buf *, struct xfs_owner_info *);
-int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags);
+int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, uint32_t alloc_flags);
  int xfs_free_extent_fix_freelist(struct xfs_trans *tp, struct xfs_perag *pag,
                 struct xfs_buf **agbp);
  
@@ -232,7 +233,7 @@ xfs_buf_to_agfl_bno(
  
  int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
                 xfs_filblks_t len, const struct xfs_owner_info *oinfo,
-               bool skip_discard);
+               enum xfs_ag_resv_type type, bool skip_discard);
  
  /*
   * List of extents to be free "later".
@@ -245,6 +246,7 @@ struct xfs_extent_free_item {
         xfs_extlen_t            xefi_blockcount;/* number of blocks in extent */
         struct xfs_perag        *xefi_pag;
         unsigned int            xefi_flags;
+       enum xfs_ag_resv_type   xefi_agresv;
  };
  
  void xfs_extent_free_get_group(struct xfs_mount *mp,
@@ -259,9 +261,10 @@ xfs_free_extent_later(
         struct xfs_trans                *tp,
         xfs_fsblock_t                   bno,
         xfs_filblks_t                   len,
-       const struct xfs_owner_info     *oinfo)
+       const struct xfs_owner_info     *oinfo,
+       enum xfs_ag_resv_type           type)
  {
-       return __xfs_free_extent_later(tp, bno, len, oinfo, false);
+       return __xfs_free_extent_later(tp, bno, len, oinfo, type, false);
  }
  
  
@@ -270,4 +273,7 @@ extern struct kmem_cache    *xfs_extfree_item_cache;
  int __init xfs_extfree_intent_init_cache(void);
  void xfs_extfree_intent_destroy_cache(void);
  
+xfs_failaddr_t xfs_validate_ag_length(struct xfs_buf *bp, uint32_t seqno,
+               uint32_t length);
+
  #endif /* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c

index beee51a..2580ae4 100644 (file)
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -2293,8 +2293,6 @@ xfs_attr3_leaf_unbalance(
  
         trace_xfs_attr_leaf_unbalance(state->args);
  
-       drop_leaf = drop_blk->bp->b_addr;
-       save_leaf = save_blk->bp->b_addr;
         xfs_attr3_leaf_hdr_from_disk(state->args->geo, &drophdr, drop_leaf);
         xfs_attr3_leaf_hdr_from_disk(state->args->geo, &savehdr, save_leaf);
         entry = xfs_attr3_leaf_entryp(drop_leaf);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c

index fef3569..30c931b 100644 (file)
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -574,7 +574,8 @@ xfs_bmap_btree_to_extents(
                 return error;
  
         xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
-       error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo);
+       error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo,
+                       XFS_AG_RESV_NONE);
         if (error)
                 return error;
  
@@ -5236,8 +5237,9 @@ xfs_bmap_del_extent_real(
                 } else {
                         error = __xfs_free_extent_later(tp, del->br_startblock,
                                         del->br_blockcount, NULL,
-                                       (bflags & XFS_BMAPI_NODISCARD) ||
-                                       del->br_state == XFS_EXT_UNWRITTEN);
+                                       XFS_AG_RESV_NONE,
+                                       ((bflags & XFS_BMAPI_NODISCARD) ||
+                                       del->br_state == XFS_EXT_UNWRITTEN));
                         if (error)
                                 goto done;
                 }
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c

index 36564ae..bf3f1b3 100644 (file)
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -271,7 +271,8 @@ xfs_bmbt_free_block(
         int                     error;
  
         xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork);
-       error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo);
+       error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo,
+                       XFS_AG_RESV_NONE);
         if (error)
                 return error;
  
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c

index 34600f9..b83e54c 100644 (file)
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -1853,8 +1853,8 @@ xfs_difree_inode_chunk(
                 /* not sparse, calculate extent info directly */
                 return xfs_free_extent_later(tp,
                                 XFS_AGB_TO_FSB(mp, agno, sagbno),
-                               M_IGEO(mp)->ialloc_blks,
-                               &XFS_RMAP_OINFO_INODES);
+                               M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES,
+                               XFS_AG_RESV_NONE);
         }
  
         /* holemask is only 16-bits (fits in an unsigned long) */
@@ -1899,8 +1899,8 @@ xfs_difree_inode_chunk(
                 ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
                 ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
                 error = xfs_free_extent_later(tp,
-                               XFS_AGB_TO_FSB(mp, agno, agbno),
-                               contigblk, &XFS_RMAP_OINFO_INODES);
+                               XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
+                               &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE);
                 if (error)
                         return error;
  
@@ -2486,11 +2486,14 @@ xfs_ialloc_log_agi(
  
  static xfs_failaddr_t
  xfs_agi_verify(
-       struct xfs_buf  *bp)
+       struct xfs_buf          *bp)
  {
-       struct xfs_mount *mp = bp->b_mount;
-       struct xfs_agi  *agi = bp->b_addr;
-       int             i;
+       struct xfs_mount        *mp = bp->b_mount;
+       struct xfs_agi          *agi = bp->b_addr;
+       xfs_failaddr_t          fa;
+       uint32_t                agi_seqno = be32_to_cpu(agi->agi_seqno);
+       uint32_t                agi_length = be32_to_cpu(agi->agi_length);
+       int                     i;
  
         if (xfs_has_crc(mp)) {
                 if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid))
@@ -2507,6 +2510,10 @@ xfs_agi_verify(
         if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)))
                 return __this_address;
  
+       fa = xfs_validate_ag_length(bp, agi_seqno, agi_length);
+       if (fa)
+               return fa;
+
         if (be32_to_cpu(agi->agi_level) < 1 ||
             be32_to_cpu(agi->agi_level) > M_IGEO(mp)->inobt_maxlevels)
                 return __this_address;
@@ -2516,15 +2523,6 @@ xfs_agi_verify(
              be32_to_cpu(agi->agi_free_level) > M_IGEO(mp)->inobt_maxlevels))
                 return __this_address;
  
-       /*
-        * during growfs operations, the perag is not fully initialised,
-        * so we can't use it for any useful checking. growfs ensures we can't
-        * use it by using uncached buffers that don't have the perag attached
-        * so we can detect and avoid this problem.
-        */
-       if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno)
-               return __this_address;
-
         for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
                 if (agi->agi_unlinked[i] == cpu_to_be32(NULLAGINO))
                         continue;
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c

index 5a945ae..9258f01 100644 (file)
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -160,8 +160,7 @@ __xfs_inobt_free_block(
  
         xfs_inobt_mod_blockcount(cur, -1);
         fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
-       return xfs_free_extent(cur->bc_tp, cur->bc_ag.pag,
-                       XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1,
+       return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
                         &XFS_RMAP_OINFO_INOBT, resv);
  }
  
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c

index b6e2143..646b3fa 100644 (file)
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1152,7 +1152,8 @@ xfs_refcount_adjust_extents(
                                                 cur->bc_ag.pag->pag_agno,
                                                 tmp.rc_startblock);
                                 error = xfs_free_extent_later(cur->bc_tp, fsbno,
-                                                 tmp.rc_blockcount, NULL);
+                                                 tmp.rc_blockcount, NULL,
+                                                 XFS_AG_RESV_NONE);
                                 if (error)
                                         goto out_error;
                         }
@@ -1213,7 +1214,8 @@ xfs_refcount_adjust_extents(
                                         cur->bc_ag.pag->pag_agno,
                                         ext.rc_startblock);
                         error = xfs_free_extent_later(cur->bc_tp, fsbno,
-                                       ext.rc_blockcount, NULL);
+                                       ext.rc_blockcount, NULL,
+                                       XFS_AG_RESV_NONE);
                         if (error)
                                 goto out_error;
                 }
@@ -1919,8 +1921,13 @@ xfs_refcount_recover_cow_leftovers(
         struct xfs_buf                  *agbp;
         struct xfs_refcount_recovery    *rr, *n;
         struct list_head                debris;
-       union xfs_btree_irec            low;
-       union xfs_btree_irec            high;
+       union xfs_btree_irec            low = {
+               .rc.rc_domain           = XFS_REFC_DOMAIN_COW,
+       };
+       union xfs_btree_irec            high = {
+               .rc.rc_domain           = XFS_REFC_DOMAIN_COW,
+               .rc.rc_startblock       = -1U,
+       };
         xfs_fsblock_t                   fsb;
         int                             error;
  
@@ -1951,10 +1958,6 @@ xfs_refcount_recover_cow_leftovers(
         cur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag);
  
         /* Find all the leftover CoW staging extents. */
-       memset(&low, 0, sizeof(low));
-       memset(&high, 0, sizeof(high));
-       low.rc.rc_domain = high.rc.rc_domain = XFS_REFC_DOMAIN_COW;
-       high.rc.rc_startblock = -1U;
         error = xfs_btree_query_range(cur, &low, &high,
                         xfs_refcount_recover_extent, &debris);
         xfs_btree_del_cursor(cur, error);
@@ -1981,7 +1984,8 @@ xfs_refcount_recover_cow_leftovers(
  
                 /* Free the block. */
                 error = xfs_free_extent_later(tp, fsb,
-                               rr->rr_rrec.rc_blockcount, NULL);
+                               rr->rr_rrec.rc_blockcount, NULL,
+                               XFS_AG_RESV_NONE);
                 if (error)
                         goto out_trans;
  
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c

index d4afc5f..5c3987d 100644 (file)
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -106,19 +106,13 @@ xfs_refcountbt_free_block(
         struct xfs_buf          *agbp = cur->bc_ag.agbp;
         struct xfs_agf          *agf = agbp->b_addr;
         xfs_fsblock_t           fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
-       int                     error;
  
         trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.pag->pag_agno,
                         XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1);
         be32_add_cpu(&agf->agf_refcount_blocks, -1);
         xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS);
-       error = xfs_free_extent(cur->bc_tp, cur->bc_ag.pag,
-                       XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1,
+       return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
                         &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA);
-       if (error)
-               return error;
-
-       return error;
  }
  
  STATIC int
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c

index f4dc23b..fbb0b26 100644 (file)
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -2389,14 +2389,10 @@ xfs_rmap_query_range(
         xfs_rmap_query_range_fn                 fn,
         void                                    *priv)
  {
-       union xfs_btree_irec                    low_brec;
-       union xfs_btree_irec                    high_brec;
-       struct xfs_rmap_query_range_info        query;
+       union xfs_btree_irec                    low_brec = { .r = *low_rec };
+       union xfs_btree_irec                    high_brec = { .r = *high_rec };
+       struct xfs_rmap_query_range_info        query = { .priv = priv, .fn = fn };
  
-       low_brec.r = *low_rec;
-       high_brec.r = *high_rec;
-       query.priv = priv;
-       query.fn = fn;
         return xfs_btree_query_range(cur, &low_brec, &high_brec,
                         xfs_rmap_query_range_helper, &query);
  }
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c

index ba0f17b..5e17468 100644 (file)
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -412,7 +412,6 @@ xfs_validate_sb_common(
             sbp->sb_inodelog < XFS_DINODE_MIN_LOG                       ||
             sbp->sb_inodelog > XFS_DINODE_MAX_LOG                       ||
             sbp->sb_inodesize != (1 << sbp->sb_inodelog)                ||
-           sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE                    ||
             sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) ||
             XFS_FSB_TO_B(mp, sbp->sb_agblocks) < XFS_MIN_AG_BYTES       ||
             XFS_FSB_TO_B(mp, sbp->sb_agblocks) > XFS_MAX_AG_BYTES       ||
@@ -430,6 +429,61 @@ xfs_validate_sb_common(
                 return -EFSCORRUPTED;
         }
  
+       /*
+        * Logs that are too large are not supported at all. Reject them
+        * outright. Logs that are too small are tolerated on v4 filesystems,
+        * but we can only check that when mounting the log. Hence we skip
+        * those checks here.
+        */
+       if (sbp->sb_logblocks > XFS_MAX_LOG_BLOCKS) {
+               xfs_notice(mp,
+               "Log size 0x%x blocks too large, maximum size is 0x%llx blocks",
+                        sbp->sb_logblocks, XFS_MAX_LOG_BLOCKS);
+               return -EFSCORRUPTED;
+       }
+
+       if (XFS_FSB_TO_B(mp, sbp->sb_logblocks) > XFS_MAX_LOG_BYTES) {
+               xfs_warn(mp,
+               "log size 0x%llx bytes too large, maximum size is 0x%llx bytes",
+                        XFS_FSB_TO_B(mp, sbp->sb_logblocks),
+                        XFS_MAX_LOG_BYTES);
+               return -EFSCORRUPTED;
+       }
+
+       /*
+        * Do not allow filesystems with corrupted log sector or stripe units to
+        * be mounted. We cannot safely size the iclogs or write to the log if
+        * the log stripe unit is not valid.
+        */
+       if (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT) {
+               if (sbp->sb_logsectsize != (1U << sbp->sb_logsectlog)) {
+                       xfs_notice(mp,
+                       "log sector size in bytes/log2 (0x%x/0x%x) must match",
+                               sbp->sb_logsectsize, 1U << sbp->sb_logsectlog);
+                       return -EFSCORRUPTED;
+               }
+       } else if (sbp->sb_logsectsize || sbp->sb_logsectlog) {
+               xfs_notice(mp,
+               "log sector size in bytes/log2 (0x%x/0x%x) are not zero",
+                       sbp->sb_logsectsize, sbp->sb_logsectlog);
+               return -EFSCORRUPTED;
+       }
+
+       if (sbp->sb_logsunit > 1) {
+               if (sbp->sb_logsunit % sbp->sb_blocksize) {
+                       xfs_notice(mp,
+               "log stripe unit 0x%x bytes must be a multiple of block size",
+                               sbp->sb_logsunit);
+                       return -EFSCORRUPTED;
+               }
+               if (sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE) {
+                       xfs_notice(mp,
+               "log stripe unit 0x%x bytes over maximum size (0x%x bytes)",
+                               sbp->sb_logsunit, XLOG_MAX_RECORD_BSIZE);
+                       return -EFSCORRUPTED;
+               }
+       }
+
         /* Validate the realtime geometry; stolen from xfs_repair */
         if (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE ||
             sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) {
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c

index f3d328e..7c2fdc7 100644 (file)
--- a/fs/xfs/xfs_extent_busy.c
+++ b/fs/xfs/xfs_extent_busy.c
@@ -566,20 +566,45 @@ xfs_extent_busy_clear(
  
  /*
   * Flush out all busy extents for this AG.
+ *
+ * If the current transaction is holding busy extents, the caller may not want
+ * to wait for committed busy extents to resolve. If we are being told just to
+ * try a flush or progress has been made since we last skipped a busy extent,
+ * return immediately to allow the caller to try again.
+ *
+ * If we are freeing extents, we might actually be holding the only free extents
+ * in the transaction busy list and the log force won't resolve that situation.
+ * In this case, we must return -EAGAIN to avoid a deadlock by informing the
+ * caller it needs to commit the busy extents it holds before retrying the
+ * extent free operation.
   */
-void
+int
  xfs_extent_busy_flush(
-       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
         struct xfs_perag        *pag,
-       unsigned                busy_gen)
+       unsigned                busy_gen,
+       uint32_t                alloc_flags)
  {
         DEFINE_WAIT             (wait);
         int                     error;
  
-       error = xfs_log_force(mp, XFS_LOG_SYNC);
+       error = xfs_log_force(tp->t_mountp, XFS_LOG_SYNC);
         if (error)
-               return;
+               return error;
+
+       /* Avoid deadlocks on uncommitted busy extents. */
+       if (!list_empty(&tp->t_busy)) {
+               if (alloc_flags & XFS_ALLOC_FLAG_TRYFLUSH)
+                       return 0;
+
+               if (busy_gen != READ_ONCE(pag->pagb_gen))
+                       return 0;
+
+               if (alloc_flags & XFS_ALLOC_FLAG_FREEING)
+                       return -EAGAIN;
+       }
  
+       /* Wait for committed busy extents to resolve. */
         do {
                 prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE);
                 if  (busy_gen != READ_ONCE(pag->pagb_gen))
@@ -588,6 +613,7 @@ xfs_extent_busy_flush(
         } while (1);
  
         finish_wait(&pag->pagb_wait, &wait);
+       return 0;
  }
  
  void
diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h

index 4a11813..c37bf87 100644 (file)
--- a/fs/xfs/xfs_extent_busy.h
+++ b/fs/xfs/xfs_extent_busy.h
@@ -51,9 +51,9 @@ bool
  xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t *bno,
                 xfs_extlen_t *len, unsigned *busy_gen);
  
-void
-xfs_extent_busy_flush(struct xfs_mount *mp, struct xfs_perag *pag,
-       unsigned busy_gen);
+int
+xfs_extent_busy_flush(struct xfs_trans *tp, struct xfs_perag *pag,
+               unsigned busy_gen, uint32_t alloc_flags);
  
  void
  xfs_extent_busy_wait_all(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c

index f9e36b8..f1a5ecf 100644 (file)
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -337,6 +337,34 @@ xfs_trans_get_efd(
  }
  
  /*
+ * Fill the EFD with all extents from the EFI when we need to roll the
+ * transaction and continue with a new EFI.
+ *
+ * This simply copies all the extents in the EFI to the EFD rather than make
+ * assumptions about which extents in the EFI have already been processed. We
+ * currently keep the xefi list in the same order as the EFI extent list, but
+ * that may not always be the case. Copying everything avoids leaving a landmine
+ * were we fail to cancel all the extents in an EFI if the xefi list is
+ * processed in a different order to the extents in the EFI.
+ */
+static void
+xfs_efd_from_efi(
+       struct xfs_efd_log_item *efdp)
+{
+       struct xfs_efi_log_item *efip = efdp->efd_efip;
+       uint                    i;
+
+       ASSERT(efip->efi_format.efi_nextents > 0);
+       ASSERT(efdp->efd_next_extent < efip->efi_format.efi_nextents);
+
+       for (i = 0; i < efip->efi_format.efi_nextents; i++) {
+              efdp->efd_format.efd_extents[i] =
+                      efip->efi_format.efi_extents[i];
+       }
+       efdp->efd_next_extent = efip->efi_format.efi_nextents;
+}
+
+/*
   * Free an extent and log it to the EFD. Note that the transaction is marked
   * dirty regardless of whether the extent free succeeds or fails to support the
   * EFI/EFD lifecycle rules.
@@ -365,7 +393,7 @@ xfs_trans_free_extent(
                         agbno, xefi->xefi_blockcount);
  
         error = __xfs_free_extent(tp, xefi->xefi_pag, agbno,
-                       xefi->xefi_blockcount, &oinfo, XFS_AG_RESV_NONE,
+                       xefi->xefi_blockcount, &oinfo, xefi->xefi_agresv,
                         xefi->xefi_flags & XFS_EFI_SKIP_DISCARD);
  
         /*
@@ -378,6 +406,17 @@ xfs_trans_free_extent(
         tp->t_flags |= XFS_TRANS_DIRTY | XFS_TRANS_HAS_INTENT_DONE;
         set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
  
+       /*
+        * If we need a new transaction to make progress, the caller will log a
+        * new EFI with the current contents. It will also log an EFD to cancel
+        * the existing EFI, and so we need to copy all the unprocessed extents
+        * in this EFI to the EFD so this works correctly.
+        */
+       if (error == -EAGAIN) {
+               xfs_efd_from_efi(efdp);
+               return error;
+       }
+
         next_extent = efdp->efd_next_extent;
         ASSERT(next_extent < efdp->efd_format.efd_nextents);
         extp = &(efdp->efd_format.efd_extents[next_extent]);
@@ -495,6 +534,13 @@ xfs_extent_free_finish_item(
  
         error = xfs_trans_free_extent(tp, EFD_ITEM(done), xefi);
  
+       /*
+        * Don't free the XEFI if we need a new transaction to complete
+        * processing of it.
+        */
+       if (error == -EAGAIN)
+               return error;
+
         xfs_extent_free_put_group(xefi);
         kmem_cache_free(xfs_extfree_item_cache, xefi);
         return error;
@@ -620,6 +666,7 @@ xfs_efi_item_recover(
         struct xfs_trans                *tp;
         int                             i;
         int                             error = 0;
+       bool                            requeue_only = false;
  
         /*
          * First check the validity of the extents described by the
@@ -644,6 +691,7 @@ xfs_efi_item_recover(
         for (i = 0; i < efip->efi_format.efi_nextents; i++) {
                 struct xfs_extent_free_item     fake = {
                         .xefi_owner             = XFS_RMAP_OWN_UNKNOWN,
+                       .xefi_agresv            = XFS_AG_RESV_NONE,
                 };
                 struct xfs_extent               *extp;
  
@@ -652,9 +700,28 @@ xfs_efi_item_recover(
                 fake.xefi_startblock = extp->ext_start;
                 fake.xefi_blockcount = extp->ext_len;
  
-               xfs_extent_free_get_group(mp, &fake);
-               error = xfs_trans_free_extent(tp, efdp, &fake);
-               xfs_extent_free_put_group(&fake);
+               if (!requeue_only) {
+                       xfs_extent_free_get_group(mp, &fake);
+                       error = xfs_trans_free_extent(tp, efdp, &fake);
+                       xfs_extent_free_put_group(&fake);
+               }
+
+               /*
+                * If we can't free the extent without potentially deadlocking,
+                * requeue the rest of the extents to a new so that they get
+                * run again later with a new transaction context.
+                */
+               if (error == -EAGAIN || requeue_only) {
+                       error = xfs_free_extent_later(tp, fake.xefi_startblock,
+                                       fake.xefi_blockcount,
+                                       &XFS_RMAP_OINFO_ANY_OWNER,
+                                       fake.xefi_agresv);
+                       if (!error) {
+                               requeue_only = true;
+                               continue;
+                       }
+               }
+
                 if (error == -EFSCORRUPTED)
                         XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
                                         extp, sizeof(*extp));
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c

index 59e7d1a..10403ba 100644 (file)
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -160,9 +160,18 @@ struct xfs_getfsmap_info {
         struct xfs_buf          *agf_bp;        /* AGF, for refcount queries */
         struct xfs_perag        *pag;           /* AG info, if applicable */
         xfs_daddr_t             next_daddr;     /* next daddr we expect */
+       /* daddr of low fsmap key when we're using the rtbitmap */
+       xfs_daddr_t             low_daddr;
         u64                     missing_owner;  /* owner of holes */
         u32                     dev;            /* device id */
-       struct xfs_rmap_irec    low;            /* low rmap key */
+       /*
+        * Low rmap key for the query.  If low.rm_blockcount is nonzero, this
+        * is the second (or later) call to retrieve the recordset in pieces.
+        * xfs_getfsmap_rec_before_start will compare all records retrieved
+        * by the rmapbt query to filter out any records that start before
+        * the last record.
+        */
+       struct xfs_rmap_irec    low;
         struct xfs_rmap_irec    high;           /* high rmap key */
         bool                    last;           /* last extent? */
  };
@@ -237,16 +246,31 @@ xfs_getfsmap_format(
         xfs_fsmap_from_internal(rec, xfm);
  }
  
+static inline bool
+xfs_getfsmap_rec_before_start(
+       struct xfs_getfsmap_info        *info,
+       const struct xfs_rmap_irec      *rec,
+       xfs_daddr_t                     rec_daddr)
+{
+       if (info->low_daddr != -1ULL)
+               return rec_daddr < info->low_daddr;
+       if (info->low.rm_blockcount)
+               return xfs_rmap_compare(rec, &info->low) < 0;
+       return false;
+}
+
  /*
   * Format a reverse mapping for getfsmap, having translated rm_startblock
- * into the appropriate daddr units.
+ * into the appropriate daddr units.  Pass in a nonzero @len_daddr if the
+ * length could be larger than rm_blockcount in struct xfs_rmap_irec.
   */
  STATIC int
  xfs_getfsmap_helper(
         struct xfs_trans                *tp,
         struct xfs_getfsmap_info        *info,
         const struct xfs_rmap_irec      *rec,
-       xfs_daddr_t                     rec_daddr)
+       xfs_daddr_t                     rec_daddr,
+       xfs_daddr_t                     len_daddr)
  {
         struct xfs_fsmap                fmr;
         struct xfs_mount                *mp = tp->t_mountp;
@@ -256,12 +280,15 @@ xfs_getfsmap_helper(
         if (fatal_signal_pending(current))
                 return -EINTR;
  
+       if (len_daddr == 0)
+               len_daddr = XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+
         /*
          * Filter out records that start before our startpoint, if the
          * caller requested that.
          */
-       if (xfs_rmap_compare(rec, &info->low) < 0) {
-               rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+       if (xfs_getfsmap_rec_before_start(info, rec, rec_daddr)) {
+               rec_daddr += len_daddr;
                 if (info->next_daddr < rec_daddr)
                         info->next_daddr = rec_daddr;
                 return 0;
@@ -280,7 +307,7 @@ xfs_getfsmap_helper(
  
                 info->head->fmh_entries++;
  
-               rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+               rec_daddr += len_daddr;
                 if (info->next_daddr < rec_daddr)
                         info->next_daddr = rec_daddr;
                 return 0;
@@ -320,7 +347,7 @@ xfs_getfsmap_helper(
         if (error)
                 return error;
         fmr.fmr_offset = XFS_FSB_TO_BB(mp, rec->rm_offset);
-       fmr.fmr_length = XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+       fmr.fmr_length = len_daddr;
         if (rec->rm_flags & XFS_RMAP_UNWRITTEN)
                 fmr.fmr_flags |= FMR_OF_PREALLOC;
         if (rec->rm_flags & XFS_RMAP_ATTR_FORK)
@@ -337,7 +364,7 @@ xfs_getfsmap_helper(
  
         xfs_getfsmap_format(mp, &fmr, info);
  out:
-       rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+       rec_daddr += len_daddr;
         if (info->next_daddr < rec_daddr)
                 info->next_daddr = rec_daddr;
         return 0;
@@ -358,7 +385,7 @@ xfs_getfsmap_datadev_helper(
         fsb = XFS_AGB_TO_FSB(mp, cur->bc_ag.pag->pag_agno, rec->rm_startblock);
         rec_daddr = XFS_FSB_TO_DADDR(mp, fsb);
  
-       return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr);
+       return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr, 0);
  }
  
  /* Transform a bnobt irec into a fsmap */
@@ -382,7 +409,7 @@ xfs_getfsmap_datadev_bnobt_helper(
         irec.rm_offset = 0;
         irec.rm_flags = 0;
  
-       return xfs_getfsmap_helper(cur->bc_tp, info, &irec, rec_daddr);
+       return xfs_getfsmap_helper(cur->bc_tp, info, &irec, rec_daddr, 0);
  }
  
  /* Set rmap flags based on the getfsmap flags */
@@ -409,31 +436,25 @@ xfs_getfsmap_logdev(
  {
         struct xfs_mount                *mp = tp->t_mountp;
         struct xfs_rmap_irec            rmap;
-       int                             error;
+       xfs_daddr_t                     rec_daddr, len_daddr;
+       xfs_fsblock_t                   start_fsb, end_fsb;
+       uint64_t                        eofs;
  
-       /* Set up search keys */
-       info->low.rm_startblock = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical);
-       info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
-       error = xfs_fsmap_owner_to_rmap(&info->low, keys);
-       if (error)
-               return error;
-       info->low.rm_blockcount = 0;
-       xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
+       eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
+       if (keys[0].fmr_physical >= eofs)
+               return 0;
+       start_fsb = XFS_BB_TO_FSBT(mp,
+                               keys[0].fmr_physical + keys[0].fmr_length);
+       end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
  
-       error = xfs_fsmap_owner_to_rmap(&info->high, keys + 1);
-       if (error)
-               return error;
-       info->high.rm_startblock = -1U;
-       info->high.rm_owner = ULLONG_MAX;
-       info->high.rm_offset = ULLONG_MAX;
-       info->high.rm_blockcount = 0;
-       info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS;
-       info->missing_owner = XFS_FMR_OWN_FREE;
+       /* Adjust the low key if we are continuing from where we left off. */
+       if (keys[0].fmr_length > 0)
+               info->low_daddr = XFS_FSB_TO_BB(mp, start_fsb);
  
-       trace_xfs_fsmap_low_key(mp, info->dev, NULLAGNUMBER, &info->low);
-       trace_xfs_fsmap_high_key(mp, info->dev, NULLAGNUMBER, &info->high);
+       trace_xfs_fsmap_low_key_linear(mp, info->dev, start_fsb);
+       trace_xfs_fsmap_high_key_linear(mp, info->dev, end_fsb);
  
-       if (keys[0].fmr_physical > 0)
+       if (start_fsb > 0)
                 return 0;
  
         /* Fabricate an rmap entry for the external log device. */
@@ -443,7 +464,9 @@ xfs_getfsmap_logdev(
         rmap.rm_offset = 0;
         rmap.rm_flags = 0;
  
-       return xfs_getfsmap_helper(tp, info, &rmap, 0);
+       rec_daddr = XFS_FSB_TO_BB(mp, rmap.rm_startblock);
+       len_daddr = XFS_FSB_TO_BB(mp, rmap.rm_blockcount);
+       return xfs_getfsmap_helper(tp, info, &rmap, rec_daddr, len_daddr);
  }
  
  #ifdef CONFIG_XFS_RT
@@ -457,72 +480,58 @@ xfs_getfsmap_rtdev_rtbitmap_helper(
  {
         struct xfs_getfsmap_info        *info = priv;
         struct xfs_rmap_irec            irec;
-       xfs_daddr_t                     rec_daddr;
+       xfs_rtblock_t                   rtbno;
+       xfs_daddr_t                     rec_daddr, len_daddr;
+
+       rtbno = rec->ar_startext * mp->m_sb.sb_rextsize;
+       rec_daddr = XFS_FSB_TO_BB(mp, rtbno);
+       irec.rm_startblock = rtbno;
+
+       rtbno = rec->ar_extcount * mp->m_sb.sb_rextsize;
+       len_daddr = XFS_FSB_TO_BB(mp, rtbno);
+       irec.rm_blockcount = rtbno;
  
-       irec.rm_startblock = rec->ar_startext * mp->m_sb.sb_rextsize;
-       rec_daddr = XFS_FSB_TO_BB(mp, irec.rm_startblock);
-       irec.rm_blockcount = rec->ar_extcount * mp->m_sb.sb_rextsize;
         irec.rm_owner = XFS_RMAP_OWN_NULL;      /* "free" */
         irec.rm_offset = 0;
         irec.rm_flags = 0;
  
-       return xfs_getfsmap_helper(tp, info, &irec, rec_daddr);
+       return xfs_getfsmap_helper(tp, info, &irec, rec_daddr, len_daddr);
  }
  
-/* Execute a getfsmap query against the realtime device. */
+/* Execute a getfsmap query against the realtime device rtbitmap. */
  STATIC int
-__xfs_getfsmap_rtdev(
+xfs_getfsmap_rtdev_rtbitmap(
         struct xfs_trans                *tp,
         const struct xfs_fsmap          *keys,
-       int                             (*query_fn)(struct xfs_trans *,
-                                                   struct xfs_getfsmap_info *),
         struct xfs_getfsmap_info        *info)
  {
+
+       struct xfs_rtalloc_rec          alow = { 0 };
+       struct xfs_rtalloc_rec          ahigh = { 0 };
         struct xfs_mount                *mp = tp->t_mountp;
-       xfs_fsblock_t                   start_fsb;
-       xfs_fsblock_t                   end_fsb;
+       xfs_rtblock_t                   start_rtb;
+       xfs_rtblock_t                   end_rtb;
         uint64_t                        eofs;
-       int                             error = 0;
+       int                             error;
  
-       eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
+       eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rextents * mp->m_sb.sb_rextsize);
         if (keys[0].fmr_physical >= eofs)
                 return 0;
-       start_fsb = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical);
-       end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
+       start_rtb = XFS_BB_TO_FSBT(mp,
+                               keys[0].fmr_physical + keys[0].fmr_length);
+       end_rtb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
  
-       /* Set up search keys */
-       info->low.rm_startblock = start_fsb;
-       error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]);
-       if (error)
-               return error;
-       info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
-       info->low.rm_blockcount = 0;
-       xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
-
-       info->high.rm_startblock = end_fsb;
-       error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]);
-       if (error)
-               return error;
-       info->high.rm_offset = XFS_BB_TO_FSBT(mp, keys[1].fmr_offset);
-       info->high.rm_blockcount = 0;
-       xfs_getfsmap_set_irec_flags(&info->high, &keys[1]);
-
-       trace_xfs_fsmap_low_key(mp, info->dev, NULLAGNUMBER, &info->low);
-       trace_xfs_fsmap_high_key(mp, info->dev, NULLAGNUMBER, &info->high);
+       info->missing_owner = XFS_FMR_OWN_UNKNOWN;
  
-       return query_fn(tp, info);
-}
+       /* Adjust the low key if we are continuing from where we left off. */
+       if (keys[0].fmr_length > 0) {
+               info->low_daddr = XFS_FSB_TO_BB(mp, start_rtb);
+               if (info->low_daddr >= eofs)
+                       return 0;
+       }
  
-/* Actually query the realtime bitmap. */
-STATIC int
-xfs_getfsmap_rtdev_rtbitmap_query(
-       struct xfs_trans                *tp,
-       struct xfs_getfsmap_info        *info)
-{
-       struct xfs_rtalloc_rec          alow = { 0 };
-       struct xfs_rtalloc_rec          ahigh = { 0 };
-       struct xfs_mount                *mp = tp->t_mountp;
-       int                             error;
+       trace_xfs_fsmap_low_key_linear(mp, info->dev, start_rtb);
+       trace_xfs_fsmap_high_key_linear(mp, info->dev, end_rtb);
  
         xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
  
@@ -530,8 +539,8 @@ xfs_getfsmap_rtdev_rtbitmap_query(
          * Set up query parameters to return free rtextents covering the range
          * we want.
          */
-       alow.ar_startext = info->low.rm_startblock;
-       ahigh.ar_startext = info->high.rm_startblock;
+       alow.ar_startext = start_rtb;
+       ahigh.ar_startext = end_rtb;
         do_div(alow.ar_startext, mp->m_sb.sb_rextsize);
         if (do_div(ahigh.ar_startext, mp->m_sb.sb_rextsize))
                 ahigh.ar_startext++;
@@ -554,18 +563,6 @@ err:
         xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
         return error;
  }
-
-/* Execute a getfsmap query against the realtime device rtbitmap. */
-STATIC int
-xfs_getfsmap_rtdev_rtbitmap(
-       struct xfs_trans                *tp,
-       const struct xfs_fsmap          *keys,
-       struct xfs_getfsmap_info        *info)
-{
-       info->missing_owner = XFS_FMR_OWN_UNKNOWN;
-       return __xfs_getfsmap_rtdev(tp, keys, xfs_getfsmap_rtdev_rtbitmap_query,
-                       info);
-}
  #endif /* CONFIG_XFS_RT */
  
  /* Execute a getfsmap query against the regular data device. */
@@ -606,9 +603,27 @@ __xfs_getfsmap_datadev(
         error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]);
         if (error)
                 return error;
-       info->low.rm_blockcount = 0;
+       info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, keys[0].fmr_length);
         xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
  
+       /* Adjust the low key if we are continuing from where we left off. */
+       if (info->low.rm_blockcount == 0) {
+               /* empty */
+       } else if (XFS_RMAP_NON_INODE_OWNER(info->low.rm_owner) ||
+                  (info->low.rm_flags & (XFS_RMAP_ATTR_FORK |
+                                         XFS_RMAP_BMBT_BLOCK |
+                                         XFS_RMAP_UNWRITTEN))) {
+               info->low.rm_startblock += info->low.rm_blockcount;
+               info->low.rm_owner = 0;
+               info->low.rm_offset = 0;
+
+               start_fsb += info->low.rm_blockcount;
+               if (XFS_FSB_TO_DADDR(mp, start_fsb) >= eofs)
+                       return 0;
+       } else {
+               info->low.rm_offset += info->low.rm_blockcount;
+       }
+
         info->high.rm_startblock = -1U;
         info->high.rm_owner = ULLONG_MAX;
         info->high.rm_offset = ULLONG_MAX;
@@ -659,12 +674,8 @@ __xfs_getfsmap_datadev(
                  * Set the AG low key to the start of the AG prior to
                  * moving on to the next AG.
                  */
-               if (pag->pag_agno == start_ag) {
-                       info->low.rm_startblock = 0;
-                       info->low.rm_owner = 0;
-                       info->low.rm_offset = 0;
-                       info->low.rm_flags = 0;
-               }
+               if (pag->pag_agno == start_ag)
+                       memset(&info->low, 0, sizeof(info->low));
  
                 /*
                  * If this is the last AG, report any gap at the end of it
@@ -791,6 +802,19 @@ xfs_getfsmap_check_keys(
         struct xfs_fsmap                *low_key,
         struct xfs_fsmap                *high_key)
  {
+       if (low_key->fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) {
+               if (low_key->fmr_offset)
+                       return false;
+       }
+       if (high_key->fmr_flags != -1U &&
+           (high_key->fmr_flags & (FMR_OF_SPECIAL_OWNER |
+                                   FMR_OF_EXTENT_MAP))) {
+               if (high_key->fmr_offset && high_key->fmr_offset != -1ULL)
+                       return false;
+       }
+       if (high_key->fmr_length && high_key->fmr_length != -1ULL)
+               return false;
+
         if (low_key->fmr_device > high_key->fmr_device)
                 return false;
         if (low_key->fmr_device < high_key->fmr_device)
@@ -834,15 +858,15 @@ xfs_getfsmap_check_keys(
   * ----------------
   * There are multiple levels of keys and counters at work here:
   * xfs_fsmap_head.fmh_keys     -- low and high fsmap keys passed in;
- *                                these reflect fs-wide sector addrs.
+ *                                these reflect fs-wide sector addrs.
   * dkeys                       -- fmh_keys used to query each device;
- *                                these are fmh_keys but w/ the low key
- *                                bumped up by fmr_length.
+ *                                these are fmh_keys but w/ the low key
+ *                                bumped up by fmr_length.
   * xfs_getfsmap_info.next_daddr        -- next disk addr we expect to see; this
   *                                is how we detect gaps in the fsmap
                                    records and report them.
   * xfs_getfsmap_info.low/high  -- per-AG low/high keys computed from
- *                                dkeys; used to query the metadata.
+ *                                dkeys; used to query the metadata.
   */
  int
  xfs_getfsmap(
@@ -863,6 +887,8 @@ xfs_getfsmap(
         if (!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[0]) ||
             !xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1]))
                 return -EINVAL;
+       if (!xfs_getfsmap_check_keys(&head->fmh_keys[0], &head->fmh_keys[1]))
+               return -EINVAL;
  
         use_rmap = xfs_has_rmapbt(mp) &&
                    has_capability_noaudit(current, CAP_SYS_ADMIN);
@@ -901,26 +927,15 @@ xfs_getfsmap(
          * blocks could be mapped to several other files/offsets.
          * According to rmapbt record ordering, the minimal next
          * possible record for the block range is the next starting
-        * offset in the same inode. Therefore, bump the file offset to
-        * continue the search appropriately.  For all other low key
-        * mapping types (attr blocks, metadata), bump the physical
-        * offset as there can be no other mapping for the same physical
-        * block range.
+        * offset in the same inode. Therefore, each fsmap backend bumps
+        * the file offset to continue the search appropriately.  For
+        * all other low key mapping types (attr blocks, metadata), each
+        * fsmap backend bumps the physical offset as there can be no
+        * other mapping for the same physical block range.
          */
         dkeys[0] = head->fmh_keys[0];
-       if (dkeys[0].fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) {
-               dkeys[0].fmr_physical += dkeys[0].fmr_length;
-               dkeys[0].fmr_owner = 0;
-               if (dkeys[0].fmr_offset)
-                       return -EINVAL;
-       } else
-               dkeys[0].fmr_offset += dkeys[0].fmr_length;
-       dkeys[0].fmr_length = 0;
         memset(&dkeys[1], 0xFF, sizeof(struct xfs_fsmap));
  
-       if (!xfs_getfsmap_check_keys(dkeys, &head->fmh_keys[1]))
-               return -EINVAL;
-
         info.next_daddr = head->fmh_keys[0].fmr_physical +
                           head->fmh_keys[0].fmr_length;
         info.fsmap_recs = fsmap_recs;
@@ -960,6 +975,8 @@ xfs_getfsmap(
                 info.dev = handlers[i].dev;
                 info.last = false;
                 info.pag = NULL;
+               info.low_daddr = -1ULL;
+               info.low.rm_blockcount = 0;
                 error = handlers[i].fn(tp, dkeys, &info);
                 if (error)
                         break;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c

index fc61cc0..79004d1 100644 (file)
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -639,7 +639,6 @@ xfs_log_mount(
         int             num_bblks)
  {
         struct xlog     *log;
-       bool            fatal = xfs_has_crc(mp);
         int             error = 0;
         int             min_logfsbs;
  
@@ -663,53 +662,37 @@ xfs_log_mount(
         mp->m_log = log;
  
         /*
-        * Validate the given log space and drop a critical message via syslog
-        * if the log size is too small that would lead to some unexpected
-        * situations in transaction log space reservation stage.
+        * Now that we have set up the log and it's internal geometry
+        * parameters, we can validate the given log space and drop a critical
+        * message via syslog if the log size is too small. A log that is too
+        * small can lead to unexpected situations in transaction log space
+        * reservation stage. The superblock verifier has already validated all
+        * the other log geometry constraints, so we don't have to check those
+        * here.
          *
-        * Note: we can't just reject the mount if the validation fails.  This
-        * would mean that people would have to downgrade their kernel just to
-        * remedy the situation as there is no way to grow the log (short of
-        * black magic surgery with xfs_db).
+        * Note: For v4 filesystems, we can't just reject the mount if the
+        * validation fails.  This would mean that people would have to
+        * downgrade their kernel just to remedy the situation as there is no
+        * way to grow the log (short of black magic surgery with xfs_db).
          *
-        * We can, however, reject mounts for CRC format filesystems, as the
+        * We can, however, reject mounts for V5 format filesystems, as the
          * mkfs binary being used to make the filesystem should never create a
          * filesystem with a log that is too small.
          */
         min_logfsbs = xfs_log_calc_minimum_size(mp);
-
         if (mp->m_sb.sb_logblocks < min_logfsbs) {
                 xfs_warn(mp,
                 "Log size %d blocks too small, minimum size is %d blocks",
                          mp->m_sb.sb_logblocks, min_logfsbs);
-               error = -EINVAL;
-       } else if (mp->m_sb.sb_logblocks > XFS_MAX_LOG_BLOCKS) {
-               xfs_warn(mp,
-               "Log size %d blocks too large, maximum size is %lld blocks",
-                        mp->m_sb.sb_logblocks, XFS_MAX_LOG_BLOCKS);
-               error = -EINVAL;
-       } else if (XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks) > XFS_MAX_LOG_BYTES) {
-               xfs_warn(mp,
-               "log size %lld bytes too large, maximum size is %lld bytes",
-                        XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks),
-                        XFS_MAX_LOG_BYTES);
-               error = -EINVAL;
-       } else if (mp->m_sb.sb_logsunit > 1 &&
-                  mp->m_sb.sb_logsunit % mp->m_sb.sb_blocksize) {
-               xfs_warn(mp,
-               "log stripe unit %u bytes must be a multiple of block size",
-                        mp->m_sb.sb_logsunit);
-               error = -EINVAL;
-               fatal = true;
-       }
-       if (error) {
+
                 /*
                  * Log check errors are always fatal on v5; or whenever bad
                  * metadata leads to a crash.
                  */
-               if (fatal) {
+               if (xfs_has_crc(mp)) {
                         xfs_crit(mp, "AAIEEE! Log failed size checks. Abort!");
                         ASSERT(0);
+                       error = -EINVAL;
                         goto out_free_log;
                 }
                 xfs_crit(mp, "Log size out of supported range.");
diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c

index c4078d0..4a9bbd3 100644 (file)
--- a/fs/xfs/xfs_notify_failure.c
+++ b/fs/xfs/xfs_notify_failure.c
@@ -114,7 +114,8 @@ xfs_dax_notify_ddev_failure(
         int                     error = 0;
         xfs_fsblock_t           fsbno = XFS_DADDR_TO_FSB(mp, daddr);
         xfs_agnumber_t          agno = XFS_FSB_TO_AGNO(mp, fsbno);
-       xfs_fsblock_t           end_fsbno = XFS_DADDR_TO_FSB(mp, daddr + bblen);
+       xfs_fsblock_t           end_fsbno = XFS_DADDR_TO_FSB(mp,
+                                                            daddr + bblen - 1);
         xfs_agnumber_t          end_agno = XFS_FSB_TO_AGNO(mp, end_fsbno);
  
         error = xfs_trans_alloc_empty(mp, &tp);
@@ -210,7 +211,7 @@ xfs_dax_notify_failure(
         ddev_end = ddev_start + bdev_nr_bytes(mp->m_ddev_targp->bt_bdev) - 1;
  
         /* Ignore the range out of filesystem area */
-       if (offset + len < ddev_start)
+       if (offset + len - 1 < ddev_start)
                 return -ENXIO;
         if (offset > ddev_end)
                 return -ENXIO;
@@ -222,8 +223,8 @@ xfs_dax_notify_failure(
                 len -= ddev_start - offset;
                 offset = 0;
         }
-       if (offset + len > ddev_end)
-               len -= ddev_end - offset;
+       if (offset + len - 1 > ddev_end)
+               len = ddev_end - offset + 1;
  
         return xfs_dax_notify_ddev_failure(mp, BTOBB(offset), BTOBB(len),
                         mf_flags);
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c

index abcc559..eb91024 100644 (file)
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -617,7 +617,8 @@ xfs_reflink_cancel_cow_blocks(
                                         del.br_blockcount);
  
                         error = xfs_free_extent_later(*tpp, del.br_startblock,
-                                         del.br_blockcount, NULL);
+                                       del.br_blockcount, NULL,
+                                       XFS_AG_RESV_NONE);
                         if (error)
                                 break;
  
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h

index 4db6692..f3cc204 100644 (file)
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3623,6 +3623,31 @@ DEFINE_FSMAP_EVENT(xfs_fsmap_low_key);
  DEFINE_FSMAP_EVENT(xfs_fsmap_high_key);
  DEFINE_FSMAP_EVENT(xfs_fsmap_mapping);
  
+DECLARE_EVENT_CLASS(xfs_fsmap_linear_class,
+       TP_PROTO(struct xfs_mount *mp, u32 keydev, uint64_t bno),
+       TP_ARGS(mp, keydev, bno),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(dev_t, keydev)
+               __field(xfs_fsblock_t, bno)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->keydev = new_decode_dev(keydev);
+               __entry->bno = bno;
+       ),
+       TP_printk("dev %d:%d keydev %d:%d bno 0x%llx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 MAJOR(__entry->keydev), MINOR(__entry->keydev),
+                 __entry->bno)
+)
+#define DEFINE_FSMAP_LINEAR_EVENT(name) \
+DEFINE_EVENT(xfs_fsmap_linear_class, name, \
+       TP_PROTO(struct xfs_mount *mp, u32 keydev, uint64_t bno), \
+       TP_ARGS(mp, keydev, bno))
+DEFINE_FSMAP_LINEAR_EVENT(xfs_fsmap_low_key_linear);
+DEFINE_FSMAP_LINEAR_EVENT(xfs_fsmap_high_key_linear);
+
  DECLARE_EVENT_CLASS(xfs_getfsmap_class,
         TP_PROTO(struct xfs_mount *mp, struct xfs_fsmap *fsmap),
         TP_ARGS(mp, fsmap),
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c

index 7d4109a..1098452 100644 (file)
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -823,7 +823,7 @@ xfs_trans_ail_update_bulk(
                         trace_xfs_ail_insert(lip, 0, lsn);
                 }
                 lip->li_lsn = lsn;
-               list_add(&lip->li_ail, &tmp);
+               list_add_tail(&lip->li_ail, &tmp);
         }
  
         if (!list_empty(&tmp))
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 5 Jul 2023 21:08:03 +0000 (14:08 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 5 Jul 2023 21:08:03 +0000 (14:08 -0700)
fs/xfs/libxfs/xfs_ag.c		patch \| blob \| history
fs/xfs/libxfs/xfs_alloc.c		patch \| blob \| history
fs/xfs/libxfs/xfs_alloc.h		patch \| blob \| history
fs/xfs/libxfs/xfs_attr_leaf.c		patch \| blob \| history
fs/xfs/libxfs/xfs_bmap.c		patch \| blob \| history
fs/xfs/libxfs/xfs_bmap_btree.c		patch \| blob \| history
fs/xfs/libxfs/xfs_ialloc.c		patch \| blob \| history
fs/xfs/libxfs/xfs_ialloc_btree.c		patch \| blob \| history
fs/xfs/libxfs/xfs_refcount.c		patch \| blob \| history
fs/xfs/libxfs/xfs_refcount_btree.c		patch \| blob \| history
fs/xfs/libxfs/xfs_rmap.c		patch \| blob \| history
fs/xfs/libxfs/xfs_sb.c		patch \| blob \| history
fs/xfs/xfs_extent_busy.c		patch \| blob \| history
fs/xfs/xfs_extent_busy.h		patch \| blob \| history
fs/xfs/xfs_extfree_item.c		patch \| blob \| history
fs/xfs/xfs_fsmap.c		patch \| blob \| history
fs/xfs/xfs_log.c		patch \| blob \| history
fs/xfs/xfs_notify_failure.c		patch \| blob \| history
fs/xfs/xfs_reflink.c		patch \| blob \| history
fs/xfs/xfs_trace.h		patch \| blob \| history
fs/xfs/xfs_trans_ail.c		patch \| blob \| history