xfs: merge xfs_reclaim_inodes_ag into xfs_inode_walk_ag
authorDarrick J. Wong <djwong@kernel.org>
Mon, 31 May 2021 18:32:02 +0000 (11:32 -0700)
committerDarrick J. Wong <djwong@kernel.org>
Thu, 3 Jun 2021 22:56:04 +0000 (15:56 -0700)
Merge these two inode walk loops together, since they're pretty similar
now.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
fs/xfs/xfs_icache.c
fs/xfs/xfs_icache.h
fs/xfs/xfs_trace.h

index 0c40c39..1223921 100644 (file)
@@ -43,6 +43,7 @@ enum xfs_icwalk_goal {
 
        /* Goals directly associated with tagged inodes. */
        XFS_ICWALK_BLOCKGC      = XFS_ICI_BLOCKGC_TAG,
+       XFS_ICWALK_RECLAIM      = XFS_ICI_RECLAIM_TAG,
 };
 
 #define XFS_ICWALK_NULL_TAG    (-1U)
@@ -67,9 +68,13 @@ static int xfs_icwalk_ag(struct xfs_perag *pag,
 #define XFS_ICWALK_FLAG_DROP_GDQUOT    (1U << 30)
 #define XFS_ICWALK_FLAG_DROP_PDQUOT    (1U << 29)
 
+/* Stop scanning after icw_scan_limit inodes. */
+#define XFS_ICWALK_FLAG_SCAN_LIMIT     (1U << 28)
+
 #define XFS_ICWALK_PRIVATE_FLAGS       (XFS_ICWALK_FLAG_DROP_UDQUOT | \
                                         XFS_ICWALK_FLAG_DROP_GDQUOT | \
-                                        XFS_ICWALK_FLAG_DROP_PDQUOT)
+                                        XFS_ICWALK_FLAG_DROP_PDQUOT | \
+                                        XFS_ICWALK_FLAG_SCAN_LIMIT)
 
 /*
  * Allocate and initialise an xfs_inode.
@@ -760,17 +765,6 @@ xfs_icache_inode_is_allocated(
        return 0;
 }
 
-/*
- * The inode lookup is done in batches to keep the amount of lock traffic and
- * radix tree lookups to a minimum. The batch size is a trade off between
- * lookup reduction and stack usage. This is in the reclaim path, so we can't
- * be too greedy.
- *
- * XXX: This will be moved closer to xfs_icwalk* once we get rid of the
- * separate reclaim walk functions.
- */
-#define XFS_LOOKUP_BATCH       32
-
 #ifdef CONFIG_XFS_QUOTA
 /* Decide if we want to grab this inode to drop its dquots. */
 static bool
@@ -880,7 +874,7 @@ xfs_dqrele_all_inodes(
  * Return true if we grabbed it, false otherwise.
  */
 static bool
-xfs_reclaim_inode_grab(
+xfs_reclaim_igrab(
        struct xfs_inode        *ip)
 {
        ASSERT(rcu_read_lock_held());
@@ -990,108 +984,13 @@ out:
        xfs_iflags_clear(ip, XFS_IRECLAIM);
 }
 
-/*
- * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
- * corrupted, we still want to try to reclaim all the inodes. If we don't,
- * then a shut down during filesystem unmount reclaim walk leak all the
- * unreclaimed inodes.
- *
- * Returns non-zero if any AGs or inodes were skipped in the reclaim pass
- * so that callers that want to block until all dirty inodes are written back
- * and reclaimed can sanely loop.
- */
-static void
-xfs_reclaim_inodes_ag(
-       struct xfs_mount        *mp,
-       int                     *nr_to_scan)
-{
-       struct xfs_perag        *pag;
-       xfs_agnumber_t          ag = 0;
-
-       while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
-               unsigned long   first_index = 0;
-               int             done = 0;
-               int             nr_found = 0;
-
-               ag = pag->pag_agno + 1;
-
-               first_index = READ_ONCE(pag->pag_ici_reclaim_cursor);
-               do {
-                       struct xfs_inode *batch[XFS_LOOKUP_BATCH];
-                       int     i;
-
-                       rcu_read_lock();
-                       nr_found = radix_tree_gang_lookup_tag(
-                                       &pag->pag_ici_root,
-                                       (void **)batch, first_index,
-                                       XFS_LOOKUP_BATCH,
-                                       XFS_ICI_RECLAIM_TAG);
-                       if (!nr_found) {
-                               done = 1;
-                               rcu_read_unlock();
-                               break;
-                       }
-
-                       /*
-                        * Grab the inodes before we drop the lock. if we found
-                        * nothing, nr == 0 and the loop will be skipped.
-                        */
-                       for (i = 0; i < nr_found; i++) {
-                               struct xfs_inode *ip = batch[i];
-
-                               if (done || !xfs_reclaim_inode_grab(ip))
-                                       batch[i] = NULL;
-
-                               /*
-                                * Update the index for the next lookup. Catch
-                                * overflows into the next AG range which can
-                                * occur if we have inodes in the last block of
-                                * the AG and we are currently pointing to the
-                                * last inode.
-                                *
-                                * Because we may see inodes that are from the
-                                * wrong AG due to RCU freeing and
-                                * reallocation, only update the index if it
-                                * lies in this AG. It was a race that lead us
-                                * to see this inode, so another lookup from
-                                * the same index will not find it again.
-                                */
-                               if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
-                                                               pag->pag_agno)
-                                       continue;
-                               first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
-                               if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
-                                       done = 1;
-                       }
-
-                       /* unlock now we've grabbed the inodes. */
-                       rcu_read_unlock();
-
-                       for (i = 0; i < nr_found; i++) {
-                               if (batch[i])
-                                       xfs_reclaim_inode(batch[i], pag);
-                       }
-
-                       *nr_to_scan -= XFS_LOOKUP_BATCH;
-                       cond_resched();
-               } while (nr_found && !done && *nr_to_scan > 0);
-
-               if (done)
-                       first_index = 0;
-               WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index);
-               xfs_perag_put(pag);
-       }
-}
-
 void
 xfs_reclaim_inodes(
        struct xfs_mount        *mp)
 {
-       int             nr_to_scan = INT_MAX;
-
        while (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
                xfs_ail_push_all_sync(mp->m_ail);
-               xfs_reclaim_inodes_ag(mp, &nr_to_scan);
+               xfs_icwalk(mp, XFS_ICWALK_RECLAIM, NULL);
        }
 }
 
@@ -1107,11 +1006,16 @@ xfs_reclaim_inodes_nr(
        struct xfs_mount        *mp,
        int                     nr_to_scan)
 {
+       struct xfs_eofblocks    eofb = {
+               .eof_flags      = XFS_ICWALK_FLAG_SCAN_LIMIT,
+               .icw_scan_limit = nr_to_scan,
+       };
+
        /* kick background reclaimer and push the AIL */
        xfs_reclaim_work_queue(mp);
        xfs_ail_push_all(mp->m_ail);
 
-       xfs_reclaim_inodes_ag(mp, &nr_to_scan);
+       xfs_icwalk(mp, XFS_ICWALK_RECLAIM, &eofb);
        return 0;
 }
 
@@ -1221,9 +1125,8 @@ xfs_reclaim_worker(
 {
        struct xfs_mount *mp = container_of(to_delayed_work(work),
                                        struct xfs_mount, m_reclaim_work);
-       int             nr_to_scan = INT_MAX;
 
-       xfs_reclaim_inodes_ag(mp, &nr_to_scan);
+       xfs_icwalk(mp, XFS_ICWALK_RECLAIM, NULL);
        xfs_reclaim_work_queue(mp);
 }
 
@@ -1694,6 +1597,15 @@ xfs_blockgc_free_quota(
 /* XFS Inode Cache Walking Code */
 
 /*
+ * The inode lookup is done in batches to keep the amount of lock traffic and
+ * radix tree lookups to a minimum. The batch size is a trade off between
+ * lookup reduction and stack usage. This is in the reclaim path, so we can't
+ * be too greedy.
+ */
+#define XFS_LOOKUP_BATCH       32
+
+
+/*
  * Decide if we want to grab this inode in anticipation of doing work towards
  * the goal.
  */
@@ -1707,6 +1619,8 @@ xfs_icwalk_igrab(
                return xfs_dqrele_igrab(ip);
        case XFS_ICWALK_BLOCKGC:
                return xfs_blockgc_igrab(ip);
+       case XFS_ICWALK_RECLAIM:
+               return xfs_reclaim_igrab(ip);
        default:
                return false;
        }
@@ -1720,6 +1634,7 @@ static inline int
 xfs_icwalk_process_inode(
        enum xfs_icwalk_goal    goal,
        struct xfs_inode        *ip,
+       struct xfs_perag        *pag,
        struct xfs_eofblocks    *eofb)
 {
        int                     error = 0;
@@ -1731,6 +1646,9 @@ xfs_icwalk_process_inode(
        case XFS_ICWALK_BLOCKGC:
                error = xfs_blockgc_scan_inode(ip, eofb);
                break;
+       case XFS_ICWALK_RECLAIM:
+               xfs_reclaim_inode(ip, pag);
+               break;
        }
        return error;
 }
@@ -1755,7 +1673,10 @@ xfs_icwalk_ag(
 restart:
        done = false;
        skipped = 0;
-       first_index = 0;
+       if (goal == XFS_ICWALK_RECLAIM)
+               first_index = READ_ONCE(pag->pag_ici_reclaim_cursor);
+       else
+               first_index = 0;
        nr_found = 0;
        do {
                struct xfs_inode *batch[XFS_LOOKUP_BATCH];
@@ -1776,6 +1697,7 @@ restart:
                                        XFS_LOOKUP_BATCH, tag);
 
                if (!nr_found) {
+                       done = true;
                        rcu_read_unlock();
                        break;
                }
@@ -1815,7 +1737,8 @@ restart:
                for (i = 0; i < nr_found; i++) {
                        if (!batch[i])
                                continue;
-                       error = xfs_icwalk_process_inode(goal, batch[i], eofb);
+                       error = xfs_icwalk_process_inode(goal, batch[i], pag,
+                                       eofb);
                        if (error == -EAGAIN) {
                                skipped++;
                                continue;
@@ -1830,8 +1753,19 @@ restart:
 
                cond_resched();
 
+               if (eofb && (eofb->eof_flags & XFS_ICWALK_FLAG_SCAN_LIMIT)) {
+                       eofb->icw_scan_limit -= XFS_LOOKUP_BATCH;
+                       if (eofb->icw_scan_limit <= 0)
+                               break;
+               }
        } while (nr_found && !done);
 
+       if (goal == XFS_ICWALK_RECLAIM) {
+               if (done)
+                       first_index = 0;
+               WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index);
+       }
+
        if (skipped) {
                delay(1);
                goto restart;
index 3ec00f1..b6ab106 100644 (file)
@@ -15,6 +15,7 @@ struct xfs_eofblocks {
        kgid_t          eof_gid;
        prid_t          eof_prid;
        __u64           eof_min_file_size;
+       int             icw_scan_limit;
 };
 
 /*
index 808ae33..1377b1e 100644 (file)
@@ -3898,6 +3898,7 @@ DECLARE_EVENT_CLASS(xfs_eofblocks_class,
                __field(uint32_t, gid)
                __field(prid_t, prid)
                __field(__u64, min_file_size)
+               __field(int, scan_limit)
                __field(unsigned long, caller_ip)
        ),
        TP_fast_assign(
@@ -3909,15 +3910,17 @@ DECLARE_EVENT_CLASS(xfs_eofblocks_class,
                                                eofb->eof_gid) : 0;
                __entry->prid = eofb ? eofb->eof_prid : 0;
                __entry->min_file_size = eofb ? eofb->eof_min_file_size : 0;
+               __entry->scan_limit = eofb ? eofb->icw_scan_limit : 0;
                __entry->caller_ip = caller_ip;
        ),
-       TP_printk("dev %d:%d flags 0x%x uid %u gid %u prid %u minsize %llu caller %pS",
+       TP_printk("dev %d:%d flags 0x%x uid %u gid %u prid %u minsize %llu scan_limit %d caller %pS",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->flags,
                  __entry->uid,
                  __entry->gid,
                  __entry->prid,
                  __entry->min_file_size,
+                 __entry->scan_limit,
                  (char *)__entry->caller_ip)
 );
 #define DEFINE_EOFBLOCKS_EVENT(name)   \