jbd2: add fast commit machinery
authorHarshad Shirwadkar <harshadshirwadkar@gmail.com>
Thu, 15 Oct 2020 20:37:56 +0000 (13:37 -0700)
committerTheodore Ts'o <tytso@mit.edu>
Thu, 22 Oct 2020 03:22:37 +0000 (23:22 -0400)
This functions adds necessary APIs needed in JBD2 layer for fast
commits.

Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Link: https://lore.kernel.org/r/20201015203802.3597742-5-harshadshirwadkar@gmail.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
fs/ext4/fast_commit.c
fs/jbd2/commit.c
fs/jbd2/journal.c
include/linux/jbd2.h

index 0dad8bd..f2d11b4 100644 (file)
@@ -8,11 +8,19 @@
  * Ext4 fast commits routines.
  */
 #include "ext4_jbd2.h"
+/*
+ * Fast commit cleanup routine. This is called after every fast commit and
+ * full commit. full is true if we are called after a full commit.
+ */
+static void ext4_fc_cleanup(journal_t *journal, int full)
+{
+}
 
 void ext4_fc_init(struct super_block *sb, journal_t *journal)
 {
        if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
                return;
+       journal->j_fc_cleanup_callback = ext4_fc_cleanup;
        if (jbd2_fc_init(journal, EXT4_NUM_FC_BLKS)) {
                pr_warn("Error while enabling fast commits, turning off.");
                ext4_clear_feature_fast_commit(sb);
index 6252b4c..fa688e1 100644 (file)
@@ -206,6 +206,30 @@ int jbd2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
        return generic_writepages(mapping, &wbc);
 }
 
+/* Send all the data buffers related to an inode */
+int jbd2_submit_inode_data(struct jbd2_inode *jinode)
+{
+
+       if (!jinode || !(jinode->i_flags & JI_WRITE_DATA))
+               return 0;
+
+       trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
+       return jbd2_journal_submit_inode_data_buffers(jinode);
+
+}
+EXPORT_SYMBOL(jbd2_submit_inode_data);
+
+int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode)
+{
+       if (!jinode || !(jinode->i_flags & JI_WAIT_DATA) ||
+               !jinode->i_vfs_inode || !jinode->i_vfs_inode->i_mapping)
+               return 0;
+       return filemap_fdatawait_range_keep_errors(
+               jinode->i_vfs_inode->i_mapping, jinode->i_dirty_start,
+               jinode->i_dirty_end);
+}
+EXPORT_SYMBOL(jbd2_wait_inode_data);
+
 /*
  * Submit all the data buffers of inode associated with the transaction to
  * disk.
@@ -415,6 +439,20 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        J_ASSERT(journal->j_running_transaction != NULL);
        J_ASSERT(journal->j_committing_transaction == NULL);
 
+       write_lock(&journal->j_state_lock);
+       journal->j_flags |= JBD2_FULL_COMMIT_ONGOING;
+       while (journal->j_flags & JBD2_FAST_COMMIT_ONGOING) {
+               DEFINE_WAIT(wait);
+
+               prepare_to_wait(&journal->j_fc_wait, &wait,
+                               TASK_UNINTERRUPTIBLE);
+               write_unlock(&journal->j_state_lock);
+               schedule();
+               write_lock(&journal->j_state_lock);
+               finish_wait(&journal->j_fc_wait, &wait);
+       }
+       write_unlock(&journal->j_state_lock);
+
        commit_transaction = journal->j_running_transaction;
 
        trace_jbd2_start_commit(journal, commit_transaction);
@@ -422,6 +460,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
                        commit_transaction->t_tid);
 
        write_lock(&journal->j_state_lock);
+       journal->j_fc_off = 0;
        J_ASSERT(commit_transaction->t_state == T_RUNNING);
        commit_transaction->t_state = T_LOCKED;
 
@@ -1121,12 +1160,16 @@ restart_loop:
 
        if (journal->j_commit_callback)
                journal->j_commit_callback(journal, commit_transaction);
+       if (journal->j_fc_cleanup_callback)
+               journal->j_fc_cleanup_callback(journal, 1);
 
        trace_jbd2_end_commit(journal, commit_transaction);
        jbd_debug(1, "JBD2: commit %d complete, head %d\n",
                  journal->j_commit_sequence, journal->j_tail_sequence);
 
        write_lock(&journal->j_state_lock);
+       journal->j_flags &= ~JBD2_FULL_COMMIT_ONGOING;
+       journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
        spin_lock(&journal->j_list_lock);
        commit_transaction->t_state = T_FINISHED;
        /* Check if the transaction can be dropped now that we are finished */
@@ -1138,6 +1181,7 @@ restart_loop:
        spin_unlock(&journal->j_list_lock);
        write_unlock(&journal->j_state_lock);
        wake_up(&journal->j_wait_done_commit);
+       wake_up(&journal->j_fc_wait);
 
        /*
         * Calculate overall stats
index 4497bfb..0c7c42b 100644 (file)
@@ -159,7 +159,9 @@ static void commit_timeout(struct timer_list *t)
  *
  * 1) COMMIT:  Every so often we need to commit the current state of the
  *    filesystem to disk.  The journal thread is responsible for writing
- *    all of the metadata buffers to disk.
+ *    all of the metadata buffers to disk. If a fast commit is ongoing
+ *    journal thread waits until it's done and then continues from
+ *    there on.
  *
  * 2) CHECKPOINT: We cannot reuse a used section of the log file until all
  *    of the data in that part of the log has been rewritten elsewhere on
@@ -716,6 +718,75 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
        return err;
 }
 
+/*
+ * Start a fast commit. If there's an ongoing fast or full commit wait for
+ * it to complete. Returns 0 if a new fast commit was started. Returns -EALREADY
+ * if a fast commit is not needed, either because there's an already a commit
+ * going on or this tid has already been committed. Returns -EINVAL if no jbd2
+ * commit has yet been performed.
+ */
+int jbd2_fc_begin_commit(journal_t *journal, tid_t tid)
+{
+       /*
+        * Fast commits only allowed if at least one full commit has
+        * been processed.
+        */
+       if (!journal->j_stats.ts_tid)
+               return -EINVAL;
+
+       if (tid <= journal->j_commit_sequence)
+               return -EALREADY;
+
+       write_lock(&journal->j_state_lock);
+       if (journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
+           (journal->j_flags & JBD2_FAST_COMMIT_ONGOING)) {
+               DEFINE_WAIT(wait);
+
+               prepare_to_wait(&journal->j_fc_wait, &wait,
+                               TASK_UNINTERRUPTIBLE);
+               write_unlock(&journal->j_state_lock);
+               schedule();
+               finish_wait(&journal->j_fc_wait, &wait);
+               return -EALREADY;
+       }
+       journal->j_flags |= JBD2_FAST_COMMIT_ONGOING;
+       write_unlock(&journal->j_state_lock);
+
+       return 0;
+}
+EXPORT_SYMBOL(jbd2_fc_begin_commit);
+
+/*
+ * Stop a fast commit. If fallback is set, this function starts commit of
+ * TID tid before any other fast commit can start.
+ */
+static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
+{
+       if (journal->j_fc_cleanup_callback)
+               journal->j_fc_cleanup_callback(journal, 0);
+       write_lock(&journal->j_state_lock);
+       journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
+       if (fallback)
+               journal->j_flags |= JBD2_FULL_COMMIT_ONGOING;
+       write_unlock(&journal->j_state_lock);
+       wake_up(&journal->j_fc_wait);
+       if (fallback)
+               return jbd2_complete_transaction(journal, tid);
+       return 0;
+}
+
+int jbd2_fc_end_commit(journal_t *journal)
+{
+       return __jbd2_fc_end_commit(journal, 0, 0);
+}
+EXPORT_SYMBOL(jbd2_fc_end_commit);
+
+int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid)
+{
+       return __jbd2_fc_end_commit(journal, tid, 1);
+}
+EXPORT_SYMBOL(jbd2_fc_end_commit_fallback);
+
 /* Return 1 when transaction with given tid has already committed. */
 int jbd2_transaction_committed(journal_t *journal, tid_t tid)
 {
@@ -784,6 +855,110 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
        return jbd2_journal_bmap(journal, blocknr, retp);
 }
 
+/* Map one fast commit buffer for use by the file system */
+int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
+{
+       unsigned long long pblock;
+       unsigned long blocknr;
+       int ret = 0;
+       struct buffer_head *bh;
+       int fc_off;
+
+       *bh_out = NULL;
+       write_lock(&journal->j_state_lock);
+
+       if (journal->j_fc_off + journal->j_fc_first < journal->j_fc_last) {
+               fc_off = journal->j_fc_off;
+               blocknr = journal->j_fc_first + fc_off;
+               journal->j_fc_off++;
+       } else {
+               ret = -EINVAL;
+       }
+       write_unlock(&journal->j_state_lock);
+
+       if (ret)
+               return ret;
+
+       ret = jbd2_journal_bmap(journal, blocknr, &pblock);
+       if (ret)
+               return ret;
+
+       bh = __getblk(journal->j_dev, pblock, journal->j_blocksize);
+       if (!bh)
+               return -ENOMEM;
+
+       lock_buffer(bh);
+
+       clear_buffer_uptodate(bh);
+       set_buffer_dirty(bh);
+       unlock_buffer(bh);
+       journal->j_fc_wbuf[fc_off] = bh;
+
+       *bh_out = bh;
+
+       return 0;
+}
+EXPORT_SYMBOL(jbd2_fc_get_buf);
+
+/*
+ * Wait on fast commit buffers that were allocated by jbd2_fc_get_buf
+ * for completion.
+ */
+int jbd2_fc_wait_bufs(journal_t *journal, int num_blks)
+{
+       struct buffer_head *bh;
+       int i, j_fc_off;
+
+       read_lock(&journal->j_state_lock);
+       j_fc_off = journal->j_fc_off;
+       read_unlock(&journal->j_state_lock);
+
+       /*
+        * Wait in reverse order to minimize chances of us being woken up before
+        * all IOs have completed
+        */
+       for (i = j_fc_off - 1; i >= j_fc_off - num_blks; i--) {
+               bh = journal->j_fc_wbuf[i];
+               wait_on_buffer(bh);
+               put_bh(bh);
+               journal->j_fc_wbuf[i] = NULL;
+               if (unlikely(!buffer_uptodate(bh)))
+                       return -EIO;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(jbd2_fc_wait_bufs);
+
+/*
+ * Wait on fast commit buffers that were allocated by jbd2_fc_get_buf
+ * for completion.
+ */
+int jbd2_fc_release_bufs(journal_t *journal)
+{
+       struct buffer_head *bh;
+       int i, j_fc_off;
+
+       read_lock(&journal->j_state_lock);
+       j_fc_off = journal->j_fc_off;
+       read_unlock(&journal->j_state_lock);
+
+       /*
+        * Wait in reverse order to minimize chances of us being woken up before
+        * all IOs have completed
+        */
+       for (i = j_fc_off - 1; i >= 0; i--) {
+               bh = journal->j_fc_wbuf[i];
+               if (!bh)
+                       break;
+               put_bh(bh);
+               journal->j_fc_wbuf[i] = NULL;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(jbd2_fc_release_bufs);
+
 /*
  * Conversion of logical to physical block numbers for the journal
  *
@@ -1142,6 +1317,7 @@ static journal_t *journal_init_common(struct block_device *bdev,
        init_waitqueue_head(&journal->j_wait_commit);
        init_waitqueue_head(&journal->j_wait_updates);
        init_waitqueue_head(&journal->j_wait_reserved);
+       init_waitqueue_head(&journal->j_fc_wait);
        mutex_init(&journal->j_abort_mutex);
        mutex_init(&journal->j_barrier);
        mutex_init(&journal->j_checkpoint_mutex);
@@ -1495,6 +1671,7 @@ out:
 static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
 {
        journal_superblock_t *sb = journal->j_superblock;
+       bool had_fast_commit = false;
 
        BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
        lock_buffer(journal->j_sb_buffer);
@@ -1508,9 +1685,20 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
 
        sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
        sb->s_start    = cpu_to_be32(0);
+       if (jbd2_has_feature_fast_commit(journal)) {
+               /*
+                * When journal is clean, no need to commit fast commit flag and
+                * make file system incompatible with older kernels.
+                */
+               jbd2_clear_feature_fast_commit(journal);
+               had_fast_commit = true;
+       }
 
        jbd2_write_superblock(journal, write_op);
 
+       if (had_fast_commit)
+               jbd2_set_feature_fast_commit(journal);
+
        /* Log is no longer empty */
        write_lock(&journal->j_state_lock);
        journal->j_flags |= JBD2_FLUSHED;
index 008629b..a009d9b 100644 (file)
@@ -862,6 +862,13 @@ struct journal_s
        wait_queue_head_t       j_wait_reserved;
 
        /**
+        * @j_fc_wait:
+        *
+        * Wait queue to wait for completion of async fast commits.
+        */
+       wait_queue_head_t       j_fc_wait;
+
+       /**
         * @j_checkpoint_mutex:
         *
         * Semaphore for locking against concurrent checkpoints.
@@ -1232,6 +1239,15 @@ struct journal_s
         */
        struct lockdep_map      j_trans_commit_map;
 #endif
+
+       /**
+        * @j_fc_cleanup_callback:
+        *
+        * Clean-up after fast commit or full commit. JBD2 calls this function
+        * after every commit operation.
+        */
+       void (*j_fc_cleanup_callback)(struct journal_s *journal, int);
+
 };
 
 #define jbd2_might_wait_for_commit(j) \
@@ -1316,6 +1332,8 @@ JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit,  FAST_COMMIT)
 #define JBD2_ABORT_ON_SYNCDATA_ERR     0x040   /* Abort the journal on file
                                                 * data write error in ordered
                                                 * mode */
+#define JBD2_FAST_COMMIT_ONGOING       0x100   /* Fast commit is ongoing */
+#define JBD2_FULL_COMMIT_ONGOING       0x200   /* Full commit is ongoing */
 
 /*
  * Function declarations for the journaling transaction and buffer
@@ -1574,6 +1592,15 @@ extern int jbd2_cleanup_journal_tail(journal_t *);
 
 /* Fast commit related APIs */
 int jbd2_fc_init(journal_t *journal, int num_fc_blks);
+int jbd2_fc_begin_commit(journal_t *journal, tid_t tid);
+int jbd2_fc_end_commit(journal_t *journal);
+int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid);
+int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out);
+int jbd2_submit_inode_data(struct jbd2_inode *jinode);
+int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode);
+int jbd2_fc_wait_bufs(journal_t *journal, int num_blks);
+int jbd2_fc_release_bufs(journal_t *journal);
+
 /*
  * is_journal_abort
  *