xfs: avoid unnecessary runtime sibling pointer endian conversions
authorDave Chinner <dchinner@redhat.com>
Fri, 27 May 2022 00:20:45 +0000 (10:20 +1000)
committerDave Chinner <david@fromorbit.com>
Fri, 27 May 2022 00:20:45 +0000 (10:20 +1000)
Commit dc04db2aa7c9 has caused a small aim7 regression, showing a
small increase in CPU usage in __xfs_btree_check_sblock() as a
result of the extra checking.

This is likely due to the endian conversion of the sibling poitners
being unconditional instead of relying on the compiler to endian
convert the NULL pointer at compile time and avoiding the runtime
conversion for this common case.

Rework the checks so that endian conversion of the sibling pointers
is only done if they are not null as the original code did.

.... and these need to be "inline" because the compiler completely
fails to inline them automatically like it should be doing.

$ size fs/xfs/libxfs/xfs_btree.o*
   text    data     bss     dec     hex filename
  51874     240       0   52114    cb92 fs/xfs/libxfs/xfs_btree.o.orig
  51562     240       0   51802    ca5a fs/xfs/libxfs/xfs_btree.o.inline

Just when you think the tools have advanced sufficiently we don't
have to care about stuff like this anymore, along comes a reminder
that *our tools still suck*.

Fixes: dc04db2aa7c9 ("xfs: detect self referencing btree sibling pointers")
Reported-by: kernel test robot <oliver.sang@intel.com>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
fs/xfs/libxfs/xfs_btree.c

index 2aa300f..786ec1c 100644 (file)
@@ -51,16 +51,31 @@ xfs_btree_magic(
        return magic;
 }
 
-static xfs_failaddr_t
+/*
+ * These sibling pointer checks are optimised for null sibling pointers. This
+ * happens a lot, and we don't need to byte swap at runtime if the sibling
+ * pointer is NULL.
+ *
+ * These are explicitly marked at inline because the cost of calling them as
+ * functions instead of inlining them is about 36 bytes extra code per call site
+ * on x86-64. Yes, gcc-11 fails to inline them, and explicit inlining of these
+ * two sibling check functions reduces the compiled code size by over 300
+ * bytes.
+ */
+static inline xfs_failaddr_t
 xfs_btree_check_lblock_siblings(
        struct xfs_mount        *mp,
        struct xfs_btree_cur    *cur,
        int                     level,
        xfs_fsblock_t           fsb,
-       xfs_fsblock_t           sibling)
+       __be64                  dsibling)
 {
-       if (sibling == NULLFSBLOCK)
+       xfs_fsblock_t           sibling;
+
+       if (dsibling == cpu_to_be64(NULLFSBLOCK))
                return NULL;
+
+       sibling = be64_to_cpu(dsibling);
        if (sibling == fsb)
                return __this_address;
        if (level >= 0) {
@@ -74,17 +89,21 @@ xfs_btree_check_lblock_siblings(
        return NULL;
 }
 
-static xfs_failaddr_t
+static inline xfs_failaddr_t
 xfs_btree_check_sblock_siblings(
        struct xfs_mount        *mp,
        struct xfs_btree_cur    *cur,
        int                     level,
        xfs_agnumber_t          agno,
        xfs_agblock_t           agbno,
-       xfs_agblock_t           sibling)
+       __be32                  dsibling)
 {
-       if (sibling == NULLAGBLOCK)
+       xfs_agblock_t           sibling;
+
+       if (dsibling == cpu_to_be32(NULLAGBLOCK))
                return NULL;
+
+       sibling = be32_to_cpu(dsibling);
        if (sibling == agbno)
                return __this_address;
        if (level >= 0) {
@@ -136,10 +155,10 @@ __xfs_btree_check_lblock(
                fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
 
        fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
-                       be64_to_cpu(block->bb_u.l.bb_leftsib));
+                       block->bb_u.l.bb_leftsib);
        if (!fa)
                fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
-                               be64_to_cpu(block->bb_u.l.bb_rightsib));
+                               block->bb_u.l.bb_rightsib);
        return fa;
 }
 
@@ -204,10 +223,10 @@ __xfs_btree_check_sblock(
        }
 
        fa = xfs_btree_check_sblock_siblings(mp, cur, level, agno, agbno,
-                       be32_to_cpu(block->bb_u.s.bb_leftsib));
+                       block->bb_u.s.bb_leftsib);
        if (!fa)
                fa = xfs_btree_check_sblock_siblings(mp, cur, level, agno,
-                                agbno, be32_to_cpu(block->bb_u.s.bb_rightsib));
+                                agbno, block->bb_u.s.bb_rightsib);
        return fa;
 }
 
@@ -4523,10 +4542,10 @@ xfs_btree_lblock_verify(
        /* sibling pointer verification */
        fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
        fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
-                       be64_to_cpu(block->bb_u.l.bb_leftsib));
+                       block->bb_u.l.bb_leftsib);
        if (!fa)
                fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
-                               be64_to_cpu(block->bb_u.l.bb_rightsib));
+                               block->bb_u.l.bb_rightsib);
        return fa;
 }
 
@@ -4580,10 +4599,10 @@ xfs_btree_sblock_verify(
        agno = xfs_daddr_to_agno(mp, xfs_buf_daddr(bp));
        agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
        fa = xfs_btree_check_sblock_siblings(mp, NULL, -1, agno, agbno,
-                       be32_to_cpu(block->bb_u.s.bb_leftsib));
+                       block->bb_u.s.bb_leftsib);
        if (!fa)
                fa = xfs_btree_check_sblock_siblings(mp, NULL, -1, agno, agbno,
-                               be32_to_cpu(block->bb_u.s.bb_rightsib));
+                               block->bb_u.s.bb_rightsib);
        return fa;
 }