jbd2: recheck chechpointing non-dirty buffer

author Zhang Yi <yi.zhang@huawei.com>

Tue, 6 Jun 2023 13:59:23 +0000 (21:59 +0800)

committer Theodore Ts'o <tytso@mit.edu>

Tue, 11 Jul 2023 03:09:21 +0000 (23:09 -0400)
author Zhang Yi <yi.zhang@huawei.com>
Tue, 6 Jun 2023 13:59:23 +0000 (21:59 +0800)
committer Theodore Ts'o <tytso@mit.edu>
Tue, 11 Jul 2023 03:09:21 +0000 (23:09 -0400)
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c

index 51bd38d..25e3c20 100644 (file)
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -58,28 +58,6 @@ static inline void __buffer_unlink(struct journal_head *jh)
  }
  
  /*
- * Move a buffer from the checkpoint list to the checkpoint io list
- *
- * Called with j_list_lock held
- */
-static inline void __buffer_relink_io(struct journal_head *jh)
-{
-       transaction_t *transaction = jh->b_cp_transaction;
-
-       __buffer_unlink_first(jh);
-
-       if (!transaction->t_checkpoint_io_list) {
-               jh->b_cpnext = jh->b_cpprev = jh;
-       } else {
-               jh->b_cpnext = transaction->t_checkpoint_io_list;
-               jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
-               jh->b_cpprev->b_cpnext = jh;
-               jh->b_cpnext->b_cpprev = jh;
-       }
-       transaction->t_checkpoint_io_list = jh;
-}
-
-/*
   * Check a checkpoint buffer could be release or not.
   *
   * Requires j_list_lock
@@ -183,6 +161,7 @@ __flush_batch(journal_t *journal, int *batch_count)
                 struct buffer_head *bh = journal->j_chkpt_bhs[i];
                 BUFFER_TRACE(bh, "brelse");
                 __brelse(bh);
+               journal->j_chkpt_bhs[i] = NULL;
         }
         *batch_count = 0;
  }
@@ -242,6 +221,11 @@ restart:
                 jh = transaction->t_checkpoint_list;
                 bh = jh2bh(jh);
  
+               /*
+                * The buffer may be writing back, or flushing out in the
+                * last couple of cycles, or re-adding into a new transaction,
+                * need to check it again until it's unlocked.
+                */
                 if (buffer_locked(bh)) {
                         get_bh(bh);
                         spin_unlock(&journal->j_list_lock);
@@ -287,28 +271,32 @@ restart:
                 }
                 if (!buffer_dirty(bh)) {
                         BUFFER_TRACE(bh, "remove from checkpoint");
-                       if (__jbd2_journal_remove_checkpoint(jh))
-                               /* The transaction was released; we're done */
+                       /*
+                        * If the transaction was released or the checkpoint
+                        * list was empty, we're done.
+                        */
+                       if (__jbd2_journal_remove_checkpoint(jh) ||
+                           !transaction->t_checkpoint_list)
                                 goto out;
-                       continue;
+               } else {
+                       /*
+                        * We are about to write the buffer, it could be
+                        * raced by some other transaction shrink or buffer
+                        * re-log logic once we release the j_list_lock,
+                        * leave it on the checkpoint list and check status
+                        * again to make sure it's clean.
+                        */
+                       BUFFER_TRACE(bh, "queue");
+                       get_bh(bh);
+                       J_ASSERT_BH(bh, !buffer_jwrite(bh));
+                       journal->j_chkpt_bhs[batch_count++] = bh;
+                       transaction->t_chp_stats.cs_written++;
+                       transaction->t_checkpoint_list = jh->b_cpnext;
                 }
-               /*
-                * Important: we are about to write the buffer, and
-                * possibly block, while still holding the journal
-                * lock.  We cannot afford to let the transaction
-                * logic start messing around with this buffer before
-                * we write it to disk, as that would break
-                * recoverability.
-                */
-               BUFFER_TRACE(bh, "queue");
-               get_bh(bh);
-               J_ASSERT_BH(bh, !buffer_jwrite(bh));
-               journal->j_chkpt_bhs[batch_count++] = bh;
-               __buffer_relink_io(jh);
-               transaction->t_chp_stats.cs_written++;
+
                 if ((batch_count == JBD2_NR_BATCH) ||
-                   need_resched() ||
-                   spin_needbreak(&journal->j_list_lock))
+                   need_resched() || spin_needbreak(&journal->j_list_lock) ||
+                   jh2bh(transaction->t_checkpoint_list) == journal->j_chkpt_bhs[0])
                         goto unlock_and_flush;
         }
  
@@ -322,38 +310,6 @@ restart:
                         goto restart;
         }
  
-       /*
-        * Now we issued all of the transaction's buffers, let's deal
-        * with the buffers that are out for I/O.
-        */
-restart2:
-       /* Did somebody clean up the transaction in the meanwhile? */
-       if (journal->j_checkpoint_transactions != transaction ||
-           transaction->t_tid != this_tid)
-               goto out;
-
-       while (transaction->t_checkpoint_io_list) {
-               jh = transaction->t_checkpoint_io_list;
-               bh = jh2bh(jh);
-               if (buffer_locked(bh)) {
-                       get_bh(bh);
-                       spin_unlock(&journal->j_list_lock);
-                       wait_on_buffer(bh);
-                       /* the journal_head may have gone by now */
-                       BUFFER_TRACE(bh, "brelse");
-                       __brelse(bh);
-                       spin_lock(&journal->j_list_lock);
-                       goto restart2;
-               }
-
-               /*
-                * Now in whatever state the buffer currently is, we
-                * know that it has been written out and so we can
-                * drop it from the list
-                */
-               if (__jbd2_journal_remove_checkpoint(jh))
-                       break;
-       }
  out:
         spin_unlock(&journal->j_list_lock);
         result = jbd2_cleanup_journal_tail(journal);
author	Zhang Yi <yi.zhang@huawei.com>
	Tue, 6 Jun 2023 13:59:23 +0000 (21:59 +0800)
committer	Theodore Ts'o <tytso@mit.edu>
	Tue, 11 Jul 2023 03:09:21 +0000 (23:09 -0400)