xfs: log item flags are racy
authorDave Chinner <dchinner@redhat.com>
Wed, 9 May 2018 14:47:34 +0000 (07:47 -0700)
committerDarrick J. Wong <darrick.wong@oracle.com>
Thu, 10 May 2018 15:56:41 +0000 (08:56 -0700)
The log item flags contain a field that is protected by the AIL
lock - the XFS_LI_IN_AIL flag. We use non-atomic RMW operations to
set and clear these flags, but most of the updates and checks are
not done with the AIL lock held and so are susceptible to update
races.

Fix this by changing the log item flags to use atomic bitops rather
than be reliant on the AIL lock for update serialisation.

Signed-Off-By: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
19 files changed:
fs/xfs/xfs_bmap_item.c
fs/xfs/xfs_buf_item.c
fs/xfs/xfs_dquot.c
fs/xfs/xfs_dquot_item.c
fs/xfs/xfs_extfree_item.c
fs/xfs/xfs_icache.c
fs/xfs/xfs_icreate_item.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode_item.c
fs/xfs/xfs_log.c
fs/xfs/xfs_qm.c
fs/xfs/xfs_refcount_item.c
fs/xfs/xfs_rmap_item.c
fs/xfs/xfs_trace.h
fs/xfs/xfs_trans.c
fs/xfs/xfs_trans.h
fs/xfs/xfs_trans_ail.c
fs/xfs/xfs_trans_buf.c
fs/xfs/xfs_trans_priv.h

index 2203465..618bb71 100644 (file)
@@ -160,7 +160,7 @@ STATIC void
 xfs_bui_item_unlock(
        struct xfs_log_item     *lip)
 {
-       if (lip->li_flags & XFS_LI_ABORTED)
+       if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
                xfs_bui_release(BUI_ITEM(lip));
 }
 
@@ -305,7 +305,7 @@ xfs_bud_item_unlock(
 {
        struct xfs_bud_log_item *budp = BUD_ITEM(lip);
 
-       if (lip->li_flags & XFS_LI_ABORTED) {
+       if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
                xfs_bui_release(budp->bud_buip);
                kmem_zone_free(xfs_bud_zone, budp);
        }
index 82ad270..df62082 100644 (file)
@@ -568,13 +568,15 @@ xfs_buf_item_unlock(
 {
        struct xfs_buf_log_item *bip = BUF_ITEM(lip);
        struct xfs_buf          *bp = bip->bli_buf;
-       bool                    aborted = !!(lip->li_flags & XFS_LI_ABORTED);
+       bool                    aborted;
        bool                    hold = !!(bip->bli_flags & XFS_BLI_HOLD);
        bool                    dirty = !!(bip->bli_flags & XFS_BLI_DIRTY);
 #if defined(DEBUG) || defined(XFS_WARN)
        bool                    ordered = !!(bip->bli_flags & XFS_BLI_ORDERED);
 #endif
 
+       aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags);
+
        /* Clear the buffer's association with this transaction. */
        bp->b_transp = NULL;
 
index d0880c1..4ca9c39 100644 (file)
@@ -913,9 +913,9 @@ xfs_qm_dqflush_done(
         * since it's cheaper, and then we recheck while
         * holding the lock before removing the dquot from the AIL.
         */
-       if ((lip->li_flags & XFS_LI_IN_AIL) &&
+       if (test_bit(XFS_LI_IN_AIL, &lip->li_flags) &&
            ((lip->li_lsn == qip->qli_flush_lsn) ||
-            (lip->li_flags & XFS_LI_FAILED))) {
+            test_bit(XFS_LI_FAILED, &lip->li_flags))) {
 
                /* xfs_trans_ail_delete() drops the AIL lock. */
                spin_lock(&ailp->ail_lock);
@@ -926,8 +926,7 @@ xfs_qm_dqflush_done(
                         * Clear the failed state since we are about to drop the
                         * flush lock
                         */
-                       if (lip->li_flags & XFS_LI_FAILED)
-                               xfs_clear_li_failed(lip);
+                       xfs_clear_li_failed(lip);
                        spin_unlock(&ailp->ail_lock);
                }
        }
index 4b331e3..57df981 100644 (file)
@@ -173,7 +173,7 @@ xfs_qm_dquot_logitem_push(
         * The buffer containing this item failed to be written back
         * previously. Resubmit the buffer for IO
         */
-       if (lip->li_flags & XFS_LI_FAILED) {
+       if (test_bit(XFS_LI_FAILED, &lip->li_flags)) {
                if (!xfs_buf_trylock(bp))
                        return XFS_ITEM_LOCKED;
 
index b5b1e56..70b7d48 100644 (file)
@@ -168,7 +168,7 @@ STATIC void
 xfs_efi_item_unlock(
        struct xfs_log_item     *lip)
 {
-       if (lip->li_flags & XFS_LI_ABORTED)
+       if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
                xfs_efi_release(EFI_ITEM(lip));
 }
 
@@ -402,7 +402,7 @@ xfs_efd_item_unlock(
 {
        struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
 
-       if (lip->li_flags & XFS_LI_ABORTED) {
+       if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
                xfs_efi_release(efdp->efd_efip);
                xfs_efd_item_free(efdp);
        }
index 8178999..9deff13 100644 (file)
@@ -107,7 +107,8 @@ xfs_inode_free_callback(
                xfs_idestroy_fork(ip, XFS_COW_FORK);
 
        if (ip->i_itemp) {
-               ASSERT(!(ip->i_itemp->ili_item.li_flags & XFS_LI_IN_AIL));
+               ASSERT(!test_bit(XFS_LI_IN_AIL,
+                                &ip->i_itemp->ili_item.li_flags));
                xfs_inode_item_destroy(ip);
                ip->i_itemp = NULL;
        }
index 865ad13..a99a0f8 100644 (file)
@@ -91,7 +91,7 @@ xfs_icreate_item_unlock(
 {
        struct xfs_icreate_item *icp = ICR_ITEM(lip);
 
-       if (icp->ic_item.li_flags & XFS_LI_ABORTED)
+       if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
                kmem_zone_free(xfs_icreate_zone, icp);
        return;
 }
index d9b9160..42781ba 100644 (file)
@@ -498,7 +498,7 @@ again:
                if (!try_lock) {
                        for (j = (i - 1); j >= 0 && !try_lock; j--) {
                                lp = (xfs_log_item_t *)ips[j]->i_itemp;
-                               if (lp && (lp->li_flags & XFS_LI_IN_AIL))
+                               if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags))
                                        try_lock++;
                        }
                }
@@ -598,7 +598,7 @@ xfs_lock_two_inodes(
         * and try again.
         */
        lp = (xfs_log_item_t *)ip0->i_itemp;
-       if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
+       if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) {
                if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) {
                        xfs_iunlock(ip0, ip0_mode);
                        if ((++attempts % 5) == 0)
index 34b91b7..3e5b857 100644 (file)
@@ -518,7 +518,7 @@ xfs_inode_item_push(
         * The buffer containing this item failed to be written back
         * previously. Resubmit the buffer for IO.
         */
-       if (lip->li_flags & XFS_LI_FAILED) {
+       if (test_bit(XFS_LI_FAILED, &lip->li_flags)) {
                if (!xfs_buf_trylock(bp))
                        return XFS_ITEM_LOCKED;
 
@@ -729,14 +729,14 @@ xfs_iflush_done(
                 */
                iip = INODE_ITEM(blip);
                if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
-                   (blip->li_flags & XFS_LI_FAILED))
+                   test_bit(XFS_LI_FAILED, &blip->li_flags))
                        need_ail++;
        }
 
        /* make sure we capture the state of the initial inode. */
        iip = INODE_ITEM(lip);
        if ((iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) ||
-           lip->li_flags & XFS_LI_FAILED)
+           test_bit(XFS_LI_FAILED, &lip->li_flags))
                need_ail++;
 
        /*
@@ -803,7 +803,7 @@ xfs_iflush_abort(
        xfs_inode_log_item_t    *iip = ip->i_itemp;
 
        if (iip) {
-               if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
+               if (test_bit(XFS_LI_IN_AIL, &iip->ili_item.li_flags)) {
                        xfs_trans_ail_remove(&iip->ili_item,
                                             stale ? SHUTDOWN_LOG_IO_ERROR :
                                                     SHUTDOWN_CORRUPT_INCORE);
index 2fcd9ed..e427864 100644 (file)
@@ -2132,7 +2132,7 @@ xlog_print_trans(
 
                xfs_warn(mp, "log item: ");
                xfs_warn(mp, "  type    = 0x%x", lip->li_type);
-               xfs_warn(mp, "  flags   = 0x%x", lip->li_flags);
+               xfs_warn(mp, "  flags   = 0x%lx", lip->li_flags);
                if (!lv)
                        continue;
                xfs_warn(mp, "  niovecs = %d", lv->lv_niovecs);
index c72a8da..62764f3 100644 (file)
@@ -173,7 +173,7 @@ xfs_qm_dqpurge(
 
        ASSERT(atomic_read(&dqp->q_pincount) == 0);
        ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
-              !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
+               !test_bit(XFS_LI_IN_AIL, &dqp->q_logitem.qli_item.li_flags));
 
        xfs_dqfunlock(dqp);
        xfs_dqunlock(dqp);
index 15c9393..e5866b7 100644 (file)
@@ -159,7 +159,7 @@ STATIC void
 xfs_cui_item_unlock(
        struct xfs_log_item     *lip)
 {
-       if (lip->li_flags & XFS_LI_ABORTED)
+       if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
                xfs_cui_release(CUI_ITEM(lip));
 }
 
@@ -310,7 +310,7 @@ xfs_cud_item_unlock(
 {
        struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
 
-       if (lip->li_flags & XFS_LI_ABORTED) {
+       if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
                xfs_cui_release(cudp->cud_cuip);
                kmem_zone_free(xfs_cud_zone, cudp);
        }
index 06a0784..e5b5b3e 100644 (file)
@@ -158,7 +158,7 @@ STATIC void
 xfs_rui_item_unlock(
        struct xfs_log_item     *lip)
 {
-       if (lip->li_flags & XFS_LI_ABORTED)
+       if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
                xfs_rui_release(RUI_ITEM(lip));
 }
 
@@ -331,7 +331,7 @@ xfs_rud_item_unlock(
 {
        struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
 
-       if (lip->li_flags & XFS_LI_ABORTED) {
+       if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
                xfs_rui_release(rudp->rud_ruip);
                kmem_zone_free(xfs_rud_zone, rudp);
        }
index 2489225..989708d 100644 (file)
@@ -442,7 +442,7 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class,
                __field(int, bli_refcount)
                __field(unsigned, bli_flags)
                __field(void *, li_desc)
-               __field(unsigned, li_flags)
+               __field(unsigned long, li_flags)
        ),
        TP_fast_assign(
                __entry->dev = bip->bli_buf->b_target->bt_dev;
@@ -1018,7 +1018,7 @@ DECLARE_EVENT_CLASS(xfs_log_item_class,
                __field(dev_t, dev)
                __field(void *, lip)
                __field(uint, type)
-               __field(uint, flags)
+               __field(unsigned long, flags)
                __field(xfs_lsn_t, lsn)
        ),
        TP_fast_assign(
@@ -1070,7 +1070,7 @@ DECLARE_EVENT_CLASS(xfs_ail_class,
                __field(dev_t, dev)
                __field(void *, lip)
                __field(uint, type)
-               __field(uint, flags)
+               __field(unsigned long, flags)
                __field(xfs_lsn_t, old_lsn)
                __field(xfs_lsn_t, new_lsn)
        ),
index 06adb1a..83f2032 100644 (file)
@@ -792,7 +792,7 @@ xfs_trans_free_items(
                if (commit_lsn != NULLCOMMITLSN)
                        lip->li_ops->iop_committing(lip, commit_lsn);
                if (abort)
-                       lip->li_flags |= XFS_LI_ABORTED;
+                       set_bit(XFS_LI_ABORTED, &lip->li_flags);
                lip->li_ops->iop_unlock(lip);
 
                xfs_trans_free_item_desc(lidp);
@@ -863,7 +863,7 @@ xfs_trans_committed_bulk(
                xfs_lsn_t               item_lsn;
 
                if (aborted)
-                       lip->li_flags |= XFS_LI_ABORTED;
+                       set_bit(XFS_LI_ABORTED, &lip->li_flags);
                item_lsn = lip->li_ops->iop_committed(lip, commit_lsn);
 
                /* item_lsn of -1 means the item needs no further processing */
index 834388c..ca44903 100644 (file)
@@ -48,7 +48,7 @@ typedef struct xfs_log_item {
        struct xfs_mount                *li_mountp;     /* ptr to fs mount */
        struct xfs_ail                  *li_ailp;       /* ptr to AIL */
        uint                            li_type;        /* item type */
-       uint                            li_flags;       /* misc flags */
+       unsigned long                   li_flags;       /* misc flags */
        struct xfs_buf                  *li_buf;        /* real buffer pointer */
        struct list_head                li_bio_list;    /* buffer item list */
        void                            (*li_cb)(struct xfs_buf *,
@@ -64,14 +64,19 @@ typedef struct xfs_log_item {
        xfs_lsn_t                       li_seq;         /* CIL commit seq */
 } xfs_log_item_t;
 
-#define        XFS_LI_IN_AIL   0x1
-#define        XFS_LI_ABORTED  0x2
-#define        XFS_LI_FAILED   0x4
+/*
+ * li_flags use the (set/test/clear)_bit atomic interfaces because updates can
+ * race with each other and we don't want to have to use the AIL lock to
+ * serialise all updates.
+ */
+#define        XFS_LI_IN_AIL   0
+#define        XFS_LI_ABORTED  1
+#define        XFS_LI_FAILED   2
 
 #define XFS_LI_FLAGS \
-       { XFS_LI_IN_AIL,        "IN_AIL" }, \
-       { XFS_LI_ABORTED,       "ABORTED" }, \
-       { XFS_LI_FAILED,        "FAILED" }
+       { (1 << XFS_LI_IN_AIL),         "IN_AIL" }, \
+       { (1 << XFS_LI_ABORTED),        "ABORTED" }, \
+       { (1 << XFS_LI_FAILED),         "FAILED" }
 
 struct xfs_item_ops {
        void (*iop_size)(xfs_log_item_t *, int *, int *);
index d4a2445..50611d2 100644 (file)
@@ -46,7 +46,7 @@ xfs_ail_check(
        /*
         * Check the next and previous entries are valid.
         */
-       ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
+       ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags));
        prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail);
        if (&prev_lip->li_ail != &ailp->ail_head)
                ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
@@ -684,7 +684,7 @@ xfs_trans_ail_update_bulk(
 
        for (i = 0; i < nr_items; i++) {
                struct xfs_log_item *lip = log_items[i];
-               if (lip->li_flags & XFS_LI_IN_AIL) {
+               if (test_and_set_bit(XFS_LI_IN_AIL, &lip->li_flags)) {
                        /* check if we really need to move the item */
                        if (XFS_LSN_CMP(lsn, lip->li_lsn) <= 0)
                                continue;
@@ -694,7 +694,6 @@ xfs_trans_ail_update_bulk(
                        if (mlip == lip)
                                mlip_changed = 1;
                } else {
-                       lip->li_flags |= XFS_LI_IN_AIL;
                        trace_xfs_ail_insert(lip, 0, lsn);
                }
                lip->li_lsn = lsn;
@@ -725,7 +724,7 @@ xfs_ail_delete_one(
        trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn);
        xfs_ail_delete(ailp, lip);
        xfs_clear_li_failed(lip);
-       lip->li_flags &= ~XFS_LI_IN_AIL;
+       clear_bit(XFS_LI_IN_AIL, &lip->li_flags);
        lip->li_lsn = 0;
 
        return mlip == lip;
@@ -761,7 +760,7 @@ xfs_trans_ail_delete(
        struct xfs_mount        *mp = ailp->ail_mount;
        bool                    mlip_changed;
 
-       if (!(lip->li_flags & XFS_LI_IN_AIL)) {
+       if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) {
                spin_unlock(&ailp->ail_lock);
                if (!XFS_FORCED_SHUTDOWN(mp)) {
                        xfs_alert_tag(mp, XFS_PTAG_AILDELETE,
index a5d9dfc..0081e9b 100644 (file)
@@ -442,7 +442,7 @@ xfs_trans_brelse(
                ASSERT(bp->b_pincount == 0);
 ***/
                ASSERT(atomic_read(&bip->bli_refcount) == 0);
-               ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL));
+               ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
                ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF));
                xfs_buf_item_relse(bp);
        }
index be24b0c..43f7732 100644 (file)
@@ -119,7 +119,7 @@ xfs_trans_ail_remove(
 
        spin_lock(&ailp->ail_lock);
        /* xfs_trans_ail_delete() drops the AIL lock */
-       if (lip->li_flags & XFS_LI_IN_AIL)
+       if (test_bit(XFS_LI_IN_AIL, &lip->li_flags))
                xfs_trans_ail_delete(ailp, lip, shutdown_type);
        else
                spin_unlock(&ailp->ail_lock);
@@ -171,11 +171,10 @@ xfs_clear_li_failed(
 {
        struct xfs_buf  *bp = lip->li_buf;
 
-       ASSERT(lip->li_flags & XFS_LI_IN_AIL);
+       ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags));
        lockdep_assert_held(&lip->li_ailp->ail_lock);
 
-       if (lip->li_flags & XFS_LI_FAILED) {
-               lip->li_flags &= ~XFS_LI_FAILED;
+       if (test_and_clear_bit(XFS_LI_FAILED, &lip->li_flags)) {
                lip->li_buf = NULL;
                xfs_buf_rele(bp);
        }
@@ -188,9 +187,8 @@ xfs_set_li_failed(
 {
        lockdep_assert_held(&lip->li_ailp->ail_lock);
 
-       if (!(lip->li_flags & XFS_LI_FAILED)) {
+       if (!test_and_set_bit(XFS_LI_FAILED, &lip->li_flags)) {
                xfs_buf_hold(bp);
-               lip->li_flags |= XFS_LI_FAILED;
                lip->li_buf = bp;
        }
 }