xfs: scrub should use ECHRNG to signal that the drain is needed
authorDarrick J. Wong <djwong@kernel.org>
Wed, 12 Apr 2023 02:00:00 +0000 (19:00 -0700)
committerDarrick J. Wong <djwong@kernel.org>
Wed, 12 Apr 2023 02:00:00 +0000 (19:00 -0700)
In the previous patch, we added jump labels to the intent drain code so
that regular filesystem operations need not pay the price of checking
for someone (scrub) waiting on intents to drain from some part of the
filesystem when that someone isn't running.

However, I observed that xfs/285 now spends a lot more time pushing the
AIL from the inode btree scrubber than it used to.  This is because the
inobt scrubber will try push the AIL to try to get logged inode cores
written to the filesystem when it sees a weird discrepancy between the
ondisk inode and the inobt records.  This AIL push is triggered when the
setup function sees TRY_HARDER is set; and the requisite EDEADLOCK
return is initiated when the discrepancy is seen.

The solution to this performance slow down is to use a different result
code (ECHRNG) for scrub code to signal that it needs to wait for
deferred intent work items to drain out of some part of the filesystem.
When this happens, set a new scrub state flag (XCHK_NEED_DRAIN) so that
setup functions will activate the jump label.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
fs/xfs/scrub/btree.c
fs/xfs/scrub/common.c
fs/xfs/scrub/common.h
fs/xfs/scrub/dabtree.c
fs/xfs/scrub/repair.c
fs/xfs/scrub/scrub.c
fs/xfs/scrub/scrub.h
fs/xfs/scrub/trace.h

index e54c1cf..626282d 100644 (file)
@@ -36,6 +36,7 @@ __xchk_btree_process_error(
 
        switch (*error) {
        case -EDEADLOCK:
+       case -ECHRNG:
                /* Used to restart an op with deadlock avoidance. */
                trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
                break;
index 87649fa..dcfe660 100644 (file)
@@ -75,6 +75,7 @@ __xchk_process_error(
        case 0:
                return true;
        case -EDEADLOCK:
+       case -ECHRNG:
                /* Used to restart an op with deadlock avoidance. */
                trace_xchk_deadlock_retry(
                                sc->ip ? sc->ip : XFS_I(file_inode(sc->file)),
@@ -130,6 +131,7 @@ __xchk_fblock_process_error(
        case 0:
                return true;
        case -EDEADLOCK:
+       case -ECHRNG:
                /* Used to restart an op with deadlock avoidance. */
                trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
                break;
@@ -488,7 +490,7 @@ xchk_perag_drain_and_lock(
                }
 
                if (!(sc->flags & XCHK_FSGATES_DRAIN))
-                       return -EDEADLOCK;
+                       return -ECHRNG;
                error = xfs_perag_intent_drain(sa->pag);
                if (error == -ERESTARTSYS)
                        error = -EINTR;
index 4714e8a..83b1a39 100644 (file)
@@ -161,7 +161,7 @@ void xchk_start_reaping(struct xfs_scrub *sc);
  */
 static inline bool xchk_need_intent_drain(struct xfs_scrub *sc)
 {
-       return sc->flags & XCHK_TRY_HARDER;
+       return sc->flags & XCHK_NEED_DRAIN;
 }
 
 void xchk_fsgates_enable(struct xfs_scrub *sc, unsigned int scrub_fshooks);
index c392c07..82b150d 100644 (file)
@@ -39,6 +39,7 @@ xchk_da_process_error(
 
        switch (*error) {
        case -EDEADLOCK:
+       case -ECHRNG:
                /* Used to restart an op with deadlock avoidance. */
                trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
                break;
index b800341..ab07583 100644 (file)
@@ -60,6 +60,9 @@ xrep_attempt(
                sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
                sc->flags |= XREP_ALREADY_FIXED;
                return -EAGAIN;
+       case -ECHRNG:
+               sc->flags |= XCHK_NEED_DRAIN;
+               return -EAGAIN;
        case -EDEADLOCK:
                /* Tell the caller to try again having grabbed all the locks. */
                if (!(sc->flags & XCHK_TRY_HARDER)) {
index bd5d435..787a909 100644 (file)
@@ -510,6 +510,8 @@ retry_op:
        error = sc->ops->setup(sc);
        if (error == -EDEADLOCK && !(sc->flags & XCHK_TRY_HARDER))
                goto try_harder;
+       if (error == -ECHRNG && !(sc->flags & XCHK_NEED_DRAIN))
+               goto need_drain;
        if (error)
                goto out_teardown;
 
@@ -517,6 +519,8 @@ retry_op:
        error = sc->ops->scrub(sc);
        if (error == -EDEADLOCK && !(sc->flags & XCHK_TRY_HARDER))
                goto try_harder;
+       if (error == -ECHRNG && !(sc->flags & XCHK_NEED_DRAIN))
+               goto need_drain;
        if (error || (sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE))
                goto out_teardown;
 
@@ -575,6 +579,12 @@ out:
                error = 0;
        }
        return error;
+need_drain:
+       error = xchk_teardown(sc, 0);
+       if (error)
+               goto out_sc;
+       sc->flags |= XCHK_NEED_DRAIN;
+       goto retry_op;
 try_harder:
        /*
         * Scrubbers return -EDEADLOCK to mean 'try harder'.  Tear down
index 4fdb601..d85c3b8 100644 (file)
@@ -98,6 +98,7 @@ struct xfs_scrub {
 #define XCHK_TRY_HARDER                (1 << 0)  /* can't get resources, try again */
 #define XCHK_REAPING_DISABLED  (1 << 1)  /* background block reaping paused */
 #define XCHK_FSGATES_DRAIN     (1 << 2)  /* defer ops draining enabled */
+#define XCHK_NEED_DRAIN                (1 << 3)  /* scrub needs to drain defer ops */
 #define XREP_ALREADY_FIXED     (1 << 31) /* checking our repair work */
 
 /*
index 304c551..68efd6f 100644 (file)
@@ -100,6 +100,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS);
        { XCHK_TRY_HARDER,                      "try_harder" }, \
        { XCHK_REAPING_DISABLED,                "reaping_disabled" }, \
        { XCHK_FSGATES_DRAIN,                   "fsgates_drain" }, \
+       { XCHK_NEED_DRAIN,                      "need_drain" }, \
        { XREP_ALREADY_FIXED,                   "already_fixed" }
 
 DECLARE_EVENT_CLASS(xchk_class,