xfs: track metadata health status
authorDarrick J. Wong <darrick.wong@oracle.com>
Fri, 12 Apr 2019 14:40:25 +0000 (07:40 -0700)
committerDarrick J. Wong <darrick.wong@oracle.com>
Mon, 15 Apr 2019 01:15:57 +0000 (18:15 -0700)
Add the necessary in-core metadata fields to keep track of which parts
of the filesystem have been observed and which parts were observed to be
unhealthy, and print a warning at unmount time if we have unfixed
problems.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
fs/xfs/Makefile
fs/xfs/libxfs/xfs_health.h [new file with mode: 0644]
fs/xfs/xfs_health.c [new file with mode: 0644]
fs/xfs/xfs_icache.c
fs/xfs/xfs_inode.h
fs/xfs/xfs_mount.c
fs/xfs/xfs_mount.h
fs/xfs/xfs_trace.h

index 7f96bda..786379c 100644 (file)
@@ -73,6 +73,7 @@ xfs-y                         += xfs_aops.o \
                                   xfs_fsmap.o \
                                   xfs_fsops.o \
                                   xfs_globals.o \
+                                  xfs_health.o \
                                   xfs_icache.o \
                                   xfs_ioctl.o \
                                   xfs_iomap.o \
diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h
new file mode 100644 (file)
index 0000000..9505356
--- /dev/null
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_HEALTH_H__
+#define __XFS_HEALTH_H__
+
+/*
+ * In-Core Filesystem Health Assessments
+ * =====================================
+ *
+ * We'd like to be able to summarize the current health status of the
+ * filesystem so that the administrator knows when it's necessary to schedule
+ * some downtime for repairs.  Until then, we would also like to avoid abrupt
+ * shutdowns due to corrupt metadata.
+ *
+ * The online scrub feature evaluates the health of all filesystem metadata.
+ * When scrub detects corruption in a piece of metadata it will set the
+ * corresponding sickness flag, and repair will clear it if successful.  If
+ * problems remain at unmount time, we can also request manual intervention by
+ * logging a notice to run xfs_repair.
+ *
+ * Each health tracking group uses a pair of fields for reporting.  The
+ * "checked" field tell us if a given piece of metadata has ever been examined,
+ * and the "sick" field tells us if that piece was found to need repairs.
+ * Therefore we can conclude that for a given sick flag value:
+ *
+ *  - checked && sick  => metadata needs repair
+ *  - checked && !sick => metadata is ok
+ *  - !checked         => has not been examined since mount
+ */
+
+struct xfs_mount;
+struct xfs_perag;
+struct xfs_inode;
+
+/* Observable health issues for metadata spanning the entire filesystem. */
+#define XFS_SICK_FS_COUNTERS   (1 << 0)  /* summary counters */
+#define XFS_SICK_FS_UQUOTA     (1 << 1)  /* user quota */
+#define XFS_SICK_FS_GQUOTA     (1 << 2)  /* group quota */
+#define XFS_SICK_FS_PQUOTA     (1 << 3)  /* project quota */
+
+/* Observable health issues for realtime volume metadata. */
+#define XFS_SICK_RT_BITMAP     (1 << 0)  /* realtime bitmap */
+#define XFS_SICK_RT_SUMMARY    (1 << 1)  /* realtime summary */
+
+/* Observable health issues for AG metadata. */
+#define XFS_SICK_AG_SB         (1 << 0)  /* superblock */
+#define XFS_SICK_AG_AGF                (1 << 1)  /* AGF header */
+#define XFS_SICK_AG_AGFL       (1 << 2)  /* AGFL header */
+#define XFS_SICK_AG_AGI                (1 << 3)  /* AGI header */
+#define XFS_SICK_AG_BNOBT      (1 << 4)  /* free space by block */
+#define XFS_SICK_AG_CNTBT      (1 << 5)  /* free space by length */
+#define XFS_SICK_AG_INOBT      (1 << 6)  /* inode index */
+#define XFS_SICK_AG_FINOBT     (1 << 7)  /* free inode index */
+#define XFS_SICK_AG_RMAPBT     (1 << 8)  /* reverse mappings */
+#define XFS_SICK_AG_REFCNTBT   (1 << 9)  /* reference counts */
+
+/* Observable health issues for inode metadata. */
+#define XFS_SICK_INO_CORE      (1 << 0)  /* inode core */
+#define XFS_SICK_INO_BMBTD     (1 << 1)  /* data fork */
+#define XFS_SICK_INO_BMBTA     (1 << 2)  /* attr fork */
+#define XFS_SICK_INO_BMBTC     (1 << 3)  /* cow fork */
+#define XFS_SICK_INO_DIR       (1 << 4)  /* directory */
+#define XFS_SICK_INO_XATTR     (1 << 5)  /* extended attributes */
+#define XFS_SICK_INO_SYMLINK   (1 << 6)  /* symbolic link remote target */
+#define XFS_SICK_INO_PARENT    (1 << 7)  /* parent pointers */
+
+/* Primary evidence of health problems in a given group. */
+#define XFS_SICK_FS_PRIMARY    (XFS_SICK_FS_COUNTERS | \
+                                XFS_SICK_FS_UQUOTA | \
+                                XFS_SICK_FS_GQUOTA | \
+                                XFS_SICK_FS_PQUOTA)
+
+#define XFS_SICK_RT_PRIMARY    (XFS_SICK_RT_BITMAP | \
+                                XFS_SICK_RT_SUMMARY)
+
+#define XFS_SICK_AG_PRIMARY    (XFS_SICK_AG_SB | \
+                                XFS_SICK_AG_AGF | \
+                                XFS_SICK_AG_AGFL | \
+                                XFS_SICK_AG_AGI | \
+                                XFS_SICK_AG_BNOBT | \
+                                XFS_SICK_AG_CNTBT | \
+                                XFS_SICK_AG_INOBT | \
+                                XFS_SICK_AG_FINOBT | \
+                                XFS_SICK_AG_RMAPBT | \
+                                XFS_SICK_AG_REFCNTBT)
+
+#define XFS_SICK_INO_PRIMARY   (XFS_SICK_INO_CORE | \
+                                XFS_SICK_INO_BMBTD | \
+                                XFS_SICK_INO_BMBTA | \
+                                XFS_SICK_INO_BMBTC | \
+                                XFS_SICK_INO_DIR | \
+                                XFS_SICK_INO_XATTR | \
+                                XFS_SICK_INO_SYMLINK | \
+                                XFS_SICK_INO_PARENT)
+
+/* These functions must be provided by the xfs implementation. */
+
+void xfs_fs_mark_sick(struct xfs_mount *mp, unsigned int mask);
+void xfs_fs_mark_healthy(struct xfs_mount *mp, unsigned int mask);
+void xfs_fs_measure_sickness(struct xfs_mount *mp, unsigned int *sick,
+               unsigned int *checked);
+
+void xfs_rt_mark_sick(struct xfs_mount *mp, unsigned int mask);
+void xfs_rt_mark_healthy(struct xfs_mount *mp, unsigned int mask);
+void xfs_rt_measure_sickness(struct xfs_mount *mp, unsigned int *sick,
+               unsigned int *checked);
+
+void xfs_ag_mark_sick(struct xfs_perag *pag, unsigned int mask);
+void xfs_ag_mark_healthy(struct xfs_perag *pag, unsigned int mask);
+void xfs_ag_measure_sickness(struct xfs_perag *pag, unsigned int *sick,
+               unsigned int *checked);
+
+void xfs_inode_mark_sick(struct xfs_inode *ip, unsigned int mask);
+void xfs_inode_mark_healthy(struct xfs_inode *ip, unsigned int mask);
+void xfs_inode_measure_sickness(struct xfs_inode *ip, unsigned int *sick,
+               unsigned int *checked);
+
+/* Now some helpers. */
+
+static inline bool
+xfs_fs_has_sickness(struct xfs_mount *mp, unsigned int mask)
+{
+       unsigned int    sick, checked;
+
+       xfs_fs_measure_sickness(mp, &sick, &checked);
+       return sick & mask;
+}
+
+static inline bool
+xfs_rt_has_sickness(struct xfs_mount *mp, unsigned int mask)
+{
+       unsigned int    sick, checked;
+
+       xfs_rt_measure_sickness(mp, &sick, &checked);
+       return sick & mask;
+}
+
+static inline bool
+xfs_ag_has_sickness(struct xfs_perag *pag, unsigned int mask)
+{
+       unsigned int    sick, checked;
+
+       xfs_ag_measure_sickness(pag, &sick, &checked);
+       return sick & mask;
+}
+
+static inline bool
+xfs_inode_has_sickness(struct xfs_inode *ip, unsigned int mask)
+{
+       unsigned int    sick, checked;
+
+       xfs_inode_measure_sickness(ip, &sick, &checked);
+       return sick & mask;
+}
+
+static inline bool
+xfs_fs_is_healthy(struct xfs_mount *mp)
+{
+       return !xfs_fs_has_sickness(mp, -1U);
+}
+
+static inline bool
+xfs_rt_is_healthy(struct xfs_mount *mp)
+{
+       return !xfs_rt_has_sickness(mp, -1U);
+}
+
+static inline bool
+xfs_ag_is_healthy(struct xfs_perag *pag)
+{
+       return !xfs_ag_has_sickness(pag, -1U);
+}
+
+static inline bool
+xfs_inode_is_healthy(struct xfs_inode *ip)
+{
+       return !xfs_inode_has_sickness(ip, -1U);
+}
+
+#endif /* __XFS_HEALTH_H__ */
diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c
new file mode 100644 (file)
index 0000000..941f330
--- /dev/null
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_inode.h"
+#include "xfs_trace.h"
+#include "xfs_health.h"
+
+/* Mark unhealthy per-fs metadata. */
+void
+xfs_fs_mark_sick(
+       struct xfs_mount        *mp,
+       unsigned int            mask)
+{
+       ASSERT(!(mask & ~XFS_SICK_FS_PRIMARY));
+       trace_xfs_fs_mark_sick(mp, mask);
+
+       spin_lock(&mp->m_sb_lock);
+       mp->m_fs_sick |= mask;
+       mp->m_fs_checked |= mask;
+       spin_unlock(&mp->m_sb_lock);
+}
+
+/* Mark a per-fs metadata healed. */
+void
+xfs_fs_mark_healthy(
+       struct xfs_mount        *mp,
+       unsigned int            mask)
+{
+       ASSERT(!(mask & ~XFS_SICK_FS_PRIMARY));
+       trace_xfs_fs_mark_healthy(mp, mask);
+
+       spin_lock(&mp->m_sb_lock);
+       mp->m_fs_sick &= ~mask;
+       mp->m_fs_checked |= mask;
+       spin_unlock(&mp->m_sb_lock);
+}
+
+/* Sample which per-fs metadata are unhealthy. */
+void
+xfs_fs_measure_sickness(
+       struct xfs_mount        *mp,
+       unsigned int            *sick,
+       unsigned int            *checked)
+{
+       spin_lock(&mp->m_sb_lock);
+       *sick = mp->m_fs_sick;
+       *checked = mp->m_fs_checked;
+       spin_unlock(&mp->m_sb_lock);
+}
+
+/* Mark unhealthy realtime metadata. */
+void
+xfs_rt_mark_sick(
+       struct xfs_mount        *mp,
+       unsigned int            mask)
+{
+       ASSERT(!(mask & ~XFS_SICK_RT_PRIMARY));
+       trace_xfs_rt_mark_sick(mp, mask);
+
+       spin_lock(&mp->m_sb_lock);
+       mp->m_rt_sick |= mask;
+       mp->m_rt_checked |= mask;
+       spin_unlock(&mp->m_sb_lock);
+}
+
+/* Mark a realtime metadata healed. */
+void
+xfs_rt_mark_healthy(
+       struct xfs_mount        *mp,
+       unsigned int            mask)
+{
+       ASSERT(!(mask & ~XFS_SICK_RT_PRIMARY));
+       trace_xfs_rt_mark_healthy(mp, mask);
+
+       spin_lock(&mp->m_sb_lock);
+       mp->m_rt_sick &= ~mask;
+       mp->m_rt_checked |= mask;
+       spin_unlock(&mp->m_sb_lock);
+}
+
+/* Sample which realtime metadata are unhealthy. */
+void
+xfs_rt_measure_sickness(
+       struct xfs_mount        *mp,
+       unsigned int            *sick,
+       unsigned int            *checked)
+{
+       spin_lock(&mp->m_sb_lock);
+       *sick = mp->m_rt_sick;
+       *checked = mp->m_rt_checked;
+       spin_unlock(&mp->m_sb_lock);
+}
+
+/* Mark unhealthy per-ag metadata. */
+void
+xfs_ag_mark_sick(
+       struct xfs_perag        *pag,
+       unsigned int            mask)
+{
+       ASSERT(!(mask & ~XFS_SICK_AG_PRIMARY));
+       trace_xfs_ag_mark_sick(pag->pag_mount, pag->pag_agno, mask);
+
+       spin_lock(&pag->pag_state_lock);
+       pag->pag_sick |= mask;
+       pag->pag_checked |= mask;
+       spin_unlock(&pag->pag_state_lock);
+}
+
+/* Mark per-ag metadata ok. */
+void
+xfs_ag_mark_healthy(
+       struct xfs_perag        *pag,
+       unsigned int            mask)
+{
+       ASSERT(!(mask & ~XFS_SICK_AG_PRIMARY));
+       trace_xfs_ag_mark_healthy(pag->pag_mount, pag->pag_agno, mask);
+
+       spin_lock(&pag->pag_state_lock);
+       pag->pag_sick &= ~mask;
+       pag->pag_checked |= mask;
+       spin_unlock(&pag->pag_state_lock);
+}
+
+/* Sample which per-ag metadata are unhealthy. */
+void
+xfs_ag_measure_sickness(
+       struct xfs_perag        *pag,
+       unsigned int            *sick,
+       unsigned int            *checked)
+{
+       spin_lock(&pag->pag_state_lock);
+       *sick = pag->pag_sick;
+       *checked = pag->pag_checked;
+       spin_unlock(&pag->pag_state_lock);
+}
+
+/* Mark the unhealthy parts of an inode. */
+void
+xfs_inode_mark_sick(
+       struct xfs_inode        *ip,
+       unsigned int            mask)
+{
+       ASSERT(!(mask & ~XFS_SICK_INO_PRIMARY));
+       trace_xfs_inode_mark_sick(ip, mask);
+
+       spin_lock(&ip->i_flags_lock);
+       ip->i_sick |= mask;
+       ip->i_checked |= mask;
+       spin_unlock(&ip->i_flags_lock);
+}
+
+/* Mark parts of an inode healed. */
+void
+xfs_inode_mark_healthy(
+       struct xfs_inode        *ip,
+       unsigned int            mask)
+{
+       ASSERT(!(mask & ~XFS_SICK_INO_PRIMARY));
+       trace_xfs_inode_mark_healthy(ip, mask);
+
+       spin_lock(&ip->i_flags_lock);
+       ip->i_sick &= ~mask;
+       ip->i_checked |= mask;
+       spin_unlock(&ip->i_flags_lock);
+}
+
+/* Sample which parts of an inode are unhealthy. */
+void
+xfs_inode_measure_sickness(
+       struct xfs_inode        *ip,
+       unsigned int            *sick,
+       unsigned int            *checked)
+{
+       spin_lock(&ip->i_flags_lock);
+       *sick = ip->i_sick;
+       *checked = ip->i_checked;
+       spin_unlock(&ip->i_flags_lock);
+}
index 245483c..f93924d 100644 (file)
@@ -70,6 +70,8 @@ xfs_inode_alloc(
        ip->i_flags = 0;
        ip->i_delayed_blks = 0;
        memset(&ip->i_d, 0, sizeof(ip->i_d));
+       ip->i_sick = 0;
+       ip->i_checked = 0;
 
        return ip;
 }
@@ -446,6 +448,8 @@ xfs_iget_cache_hit(
                ip->i_flags |= XFS_INEW;
                xfs_inode_clear_reclaim_tag(pag, ip->i_ino);
                inode->i_state = I_NEW;
+               ip->i_sick = 0;
+               ip->i_checked = 0;
 
                ASSERT(!rwsem_is_locked(&inode->i_rwsem));
                init_rwsem(&inode->i_rwsem);
index e62074a..7bb1961 100644 (file)
@@ -45,6 +45,14 @@ typedef struct xfs_inode {
        mrlock_t                i_lock;         /* inode lock */
        mrlock_t                i_mmaplock;     /* inode mmap IO lock */
        atomic_t                i_pincount;     /* inode pin count */
+
+       /*
+        * Bitsets of inode metadata that have been checked and/or are sick.
+        * Callers must hold i_flags_lock before accessing this field.
+        */
+       uint16_t                i_checked;
+       uint16_t                i_sick;
+
        spinlock_t              i_flags_lock;   /* inode i_flags lock */
        /* Miscellaneous state. */
        unsigned long           i_flags;        /* see defined flags below */
index fd63b0b..6581381 100644 (file)
@@ -231,6 +231,7 @@ xfs_initialize_perag(
                error = xfs_iunlink_init(pag);
                if (error)
                        goto out_hash_destroy;
+               spin_lock_init(&pag->pag_state_lock);
        }
 
        index = xfs_set_inode_alloc(mp, agcount);
index 110f927..cf7facc 100644 (file)
@@ -60,6 +60,20 @@ struct xfs_error_cfg {
 typedef struct xfs_mount {
        struct super_block      *m_super;
        xfs_tid_t               m_tid;          /* next unused tid for fs */
+
+       /*
+        * Bitsets of per-fs metadata that have been checked and/or are sick.
+        * Callers must hold m_sb_lock to access these two fields.
+        */
+       uint8_t                 m_fs_checked;
+       uint8_t                 m_fs_sick;
+       /*
+        * Bitsets of rt metadata that have been checked and/or are sick.
+        * Callers must hold m_sb_lock to access this field.
+        */
+       uint8_t                 m_rt_checked;
+       uint8_t                 m_rt_sick;
+
        struct xfs_ail          *m_ail;         /* fs active log item list */
 
        struct xfs_sb           m_sb;           /* copy of fs superblock */
@@ -369,6 +383,15 @@ typedef struct xfs_perag {
        xfs_agino_t     pagl_pagino;
        xfs_agino_t     pagl_leftrec;
        xfs_agino_t     pagl_rightrec;
+
+       /*
+        * Bitsets of per-ag metadata that have been checked and/or are sick.
+        * Callers should hold pag_state_lock before accessing this field.
+        */
+       uint16_t        pag_checked;
+       uint16_t        pag_sick;
+       spinlock_t      pag_state_lock;
+
        spinlock_t      pagb_lock;      /* lock for pagb_tree */
        struct rb_root  pagb_tree;      /* ordered tree of busy extents */
        unsigned int    pagb_gen;       /* generation count for pagb_tree */
index 47fb07d..f079841 100644 (file)
@@ -3440,6 +3440,79 @@ DEFINE_AGINODE_EVENT(xfs_iunlink);
 DEFINE_AGINODE_EVENT(xfs_iunlink_remove);
 DEFINE_AG_EVENT(xfs_iunlink_map_prev_fallback);
 
+DECLARE_EVENT_CLASS(xfs_fs_corrupt_class,
+       TP_PROTO(struct xfs_mount *mp, unsigned int flags),
+       TP_ARGS(mp, flags),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(unsigned int, flags)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->flags = flags;
+       ),
+       TP_printk("dev %d:%d flags 0x%x",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->flags)
+);
+#define DEFINE_FS_CORRUPT_EVENT(name)  \
+DEFINE_EVENT(xfs_fs_corrupt_class, name,       \
+       TP_PROTO(struct xfs_mount *mp, unsigned int flags), \
+       TP_ARGS(mp, flags))
+DEFINE_FS_CORRUPT_EVENT(xfs_fs_mark_sick);
+DEFINE_FS_CORRUPT_EVENT(xfs_fs_mark_healthy);
+DEFINE_FS_CORRUPT_EVENT(xfs_rt_mark_sick);
+DEFINE_FS_CORRUPT_EVENT(xfs_rt_mark_healthy);
+
+DECLARE_EVENT_CLASS(xfs_ag_corrupt_class,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, unsigned int flags),
+       TP_ARGS(mp, agno, flags),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(unsigned int, flags)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->flags = flags;
+       ),
+       TP_printk("dev %d:%d agno %u flags 0x%x",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno, __entry->flags)
+);
+#define DEFINE_AG_CORRUPT_EVENT(name)  \
+DEFINE_EVENT(xfs_ag_corrupt_class, name,       \
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+                unsigned int flags), \
+       TP_ARGS(mp, agno, flags))
+DEFINE_AG_CORRUPT_EVENT(xfs_ag_mark_sick);
+DEFINE_AG_CORRUPT_EVENT(xfs_ag_mark_healthy);
+
+DECLARE_EVENT_CLASS(xfs_inode_corrupt_class,
+       TP_PROTO(struct xfs_inode *ip, unsigned int flags),
+       TP_ARGS(ip, flags),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(unsigned int, flags)
+       ),
+       TP_fast_assign(
+               __entry->dev = ip->i_mount->m_super->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->flags = flags;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx flags 0x%x",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino, __entry->flags)
+);
+#define DEFINE_INODE_CORRUPT_EVENT(name)       \
+DEFINE_EVENT(xfs_inode_corrupt_class, name,    \
+       TP_PROTO(struct xfs_inode *ip, unsigned int flags), \
+       TP_ARGS(ip, flags))
+DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_sick);
+DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_healthy);
+
 #endif /* _TRACE_XFS_H */
 
 #undef TRACE_INCLUDE_PATH