xfs: fix per-cpu CIL structure aggregation racing with dying cpus

author Darrick J. Wong <djwong@kernel.org>

Mon, 11 Sep 2023 15:39:02 +0000 (08:39 -0700)

committer Darrick J. Wong <djwong@kernel.org>

Mon, 11 Sep 2023 15:39:02 +0000 (08:39 -0700)
author Darrick J. Wong <djwong@kernel.org>
Mon, 11 Sep 2023 15:39:02 +0000 (08:39 -0700)
committer Darrick J. Wong <djwong@kernel.org>
Mon, 11 Sep 2023 15:39:02 +0000 (08:39 -0700)
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c

index eccbfb9..ebc70aa 100644 (file)
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -124,7 +124,7 @@ xlog_cil_push_pcp_aggregate(
         struct xlog_cil_pcp     *cilpcp;
         int                     cpu;
  
-       for_each_online_cpu(cpu) {
+       for_each_cpu(cpu, &ctx->cil_pcpmask) {
                 cilpcp = per_cpu_ptr(cil->xc_pcp, cpu);
  
                 ctx->ticket->t_curr_res += cilpcp->space_reserved;
@@ -165,7 +165,13 @@ xlog_cil_insert_pcp_aggregate(
         if (!test_and_clear_bit(XLOG_CIL_PCP_SPACE, &cil->xc_flags))
                 return;
  
-       for_each_online_cpu(cpu) {
+       /*
+        * We can race with other cpus setting cil_pcpmask.  However, we've
+        * atomically cleared PCP_SPACE which forces other threads to add to
+        * the global space used count.  cil_pcpmask is a superset of cilpcp
+        * structures that could have a nonzero space_used.
+        */
+       for_each_cpu(cpu, &ctx->cil_pcpmask) {
                 int     old, prev;
  
                 cilpcp = per_cpu_ptr(cil->xc_pcp, cpu);
@@ -554,6 +560,7 @@ xlog_cil_insert_items(
         int                     iovhdr_res = 0, split_res = 0, ctx_res = 0;
         int                     space_used;
         int                     order;
+       unsigned int            cpu_nr;
         struct xlog_cil_pcp     *cilpcp;
  
         ASSERT(tp);
@@ -577,7 +584,12 @@ xlog_cil_insert_items(
          * can't be scheduled away between split sample/update operations that
          * are done without outside locking to serialise them.
          */
-       cilpcp = get_cpu_ptr(cil->xc_pcp);
+       cpu_nr = get_cpu();
+       cilpcp = this_cpu_ptr(cil->xc_pcp);
+
+       /* Tell the future push that there was work added by this CPU. */
+       if (!cpumask_test_cpu(cpu_nr, &ctx->cil_pcpmask))
+               cpumask_test_and_set_cpu(cpu_nr, &ctx->cil_pcpmask);
  
         /*
          * We need to take the CIL checkpoint unit reservation on the first
@@ -663,7 +675,7 @@ xlog_cil_insert_items(
                         continue;
                 list_add_tail(&lip->li_cil, &cilpcp->log_items);
         }
-       put_cpu_ptr(cilpcp);
+       put_cpu();
  
         /*
          * If we've overrun the reservation, dump the tx details before we move
@@ -1791,38 +1803,6 @@ out_shutdown:
  }
  
  /*
- * Move dead percpu state to the relevant CIL context structures.
- *
- * We have to lock the CIL context here to ensure that nothing is modifying
- * the percpu state, either addition or removal. Both of these are done under
- * the CIL context lock, so grabbing that exclusively here will ensure we can
- * safely drain the cilpcp for the CPU that is dying.
- */
-void
-xlog_cil_pcp_dead(
-       struct xlog             *log,
-       unsigned int            cpu)
-{
-       struct xfs_cil          *cil = log->l_cilp;
-       struct xlog_cil_pcp     *cilpcp = per_cpu_ptr(cil->xc_pcp, cpu);
-       struct xfs_cil_ctx      *ctx;
-
-       down_write(&cil->xc_ctx_lock);
-       ctx = cil->xc_ctx;
-       if (ctx->ticket)
-               ctx->ticket->t_curr_res += cilpcp->space_reserved;
-       cilpcp->space_reserved = 0;
-
-       if (!list_empty(&cilpcp->log_items))
-               list_splice_init(&cilpcp->log_items, &ctx->log_items);
-       if (!list_empty(&cilpcp->busy_extents))
-               list_splice_init(&cilpcp->busy_extents, &ctx->busy_extents);
-       atomic_add(cilpcp->space_used, &ctx->space_used);
-       cilpcp->space_used = 0;
-       up_write(&cil->xc_ctx_lock);
-}
-
-/*
   * Perform initial CIL structure initialisation.
   */
  int
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h

index 1bd2963..af87648 100644 (file)
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -231,6 +231,12 @@ struct xfs_cil_ctx {
         struct work_struct      discard_endio_work;
         struct work_struct      push_work;
         atomic_t                order_id;
+
+       /*
+        * CPUs that could have added items to the percpu CIL data.  Access is
+        * coordinated with xc_ctx_lock.
+        */
+       struct cpumask          cil_pcpmask;
  };
  
  /*
@@ -278,9 +284,6 @@ struct xfs_cil {
         wait_queue_head_t       xc_push_wait;   /* background push throttle */
  
         void __percpu           *xc_pcp;        /* percpu CIL structures */
-#ifdef CONFIG_HOTPLUG_CPU
-       struct list_head        xc_pcp_list;
-#endif
  } ____cacheline_aligned_in_smp;
  
  /* xc_flags bit values */
@@ -705,9 +708,4 @@ xlog_kvmalloc(
         return p;
  }
  
-/*
- * CIL CPU dead notifier
- */
-void xlog_cil_pcp_dead(struct xlog *log, unsigned int cpu);
-
  #endif /* __XFS_LOG_PRIV_H__ */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c

index 1f77014..ed29a50 100644 (file)
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -2337,7 +2337,6 @@ xfs_cpu_dead(
         list_for_each_entry_safe(mp, n, &xfs_mount_list, m_mount_list) {
                 spin_unlock(&xfs_mount_list_lock);
                 xfs_inodegc_cpu_dead(mp, cpu);
-               xlog_cil_pcp_dead(mp->m_log, cpu);
                 spin_lock(&xfs_mount_list_lock);
         }
         spin_unlock(&xfs_mount_list_lock);
author	Darrick J. Wong <djwong@kernel.org>
	Mon, 11 Sep 2023 15:39:02 +0000 (08:39 -0700)
committer	Darrick J. Wong <djwong@kernel.org>
	Mon, 11 Sep 2023 15:39:02 +0000 (08:39 -0700)
fs/xfs/xfs_log_cil.c		patch \| blob \| history
fs/xfs/xfs_log_priv.h		patch \| blob \| history
fs/xfs/xfs_super.c		patch \| blob \| history