btrfs: switch to per-transaction pinned extents
authorNikolay Borisov <nborisov@suse.com>
Mon, 20 Jan 2020 14:09:18 +0000 (16:09 +0200)
committerDavid Sterba <dsterba@suse.com>
Mon, 23 Mar 2020 16:01:38 +0000 (17:01 +0100)
This commit flips the switch to start tracking/processing pinned extents
on a per-transaction basis. It mostly replaces all references from
btrfs_fs_info::(pinned_extents|freed_extents[]) to
btrfs_transaction::pinned_extents.

Two notable modifications that warrant explicit mention are changing
clean_pinned_extents to get a reference to the previously running
transaction. The other one is removal of call to
btrfs_destroy_pinned_extent since transactions are going to be cleaned
in btrfs_cleanup_one_transaction.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/block-group.c
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/extent-io-tree.h
fs/btrfs/extent-tree.c
fs/btrfs/free-space-cache.c
fs/btrfs/transaction.c
fs/btrfs/transaction.h
include/trace/events/btrfs.h

index 9fec78a..b8f39a6 100644 (file)
@@ -460,7 +460,7 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end
        int ret;
 
        while (start < end) {
-               ret = find_first_extent_bit(info->pinned_extents, start,
+               ret = find_first_extent_bit(&info->excluded_extents, start,
                                            &extent_start, &extent_end,
                                            EXTENT_DIRTY | EXTENT_UPTODATE,
                                            NULL);
@@ -1248,30 +1248,42 @@ out:
        return ret;
 }
 
-static bool clean_pinned_extents(struct btrfs_block_group *bg)
+static bool clean_pinned_extents(struct btrfs_trans_handle *trans,
+                                struct btrfs_block_group *bg)
 {
        struct btrfs_fs_info *fs_info = bg->fs_info;
+       struct btrfs_transaction *prev_trans = NULL;
        const u64 start = bg->start;
        const u64 end = start + bg->length - 1;
        int ret;
 
+       spin_lock(&fs_info->trans_lock);
+       if (trans->transaction->list.prev != &fs_info->trans_list) {
+               prev_trans = list_last_entry(&trans->transaction->list,
+                                            struct btrfs_transaction, list);
+               refcount_inc(&prev_trans->use_count);
+       }
+       spin_unlock(&fs_info->trans_lock);
+
        /*
         * Hold the unused_bg_unpin_mutex lock to avoid racing with
         * btrfs_finish_extent_commit(). If we are at transaction N, another
         * task might be running finish_extent_commit() for the previous
         * transaction N - 1, and have seen a range belonging to the block
-        * group in freed_extents[] before we were able to clear the whole
-        * block group range from freed_extents[]. This means that task can
-        * lookup for the block group after we unpinned it from freed_extents
-        * and removed it, leading to a BUG_ON() at unpin_extent_range().
+        * group in pinned_extents before we were able to clear the whole block
+        * group range from pinned_extents. This means that task can lookup for
+        * the block group after we unpinned it from pinned_extents and removed
+        * it, leading to a BUG_ON() at unpin_extent_range().
         */
        mutex_lock(&fs_info->unused_bg_unpin_mutex);
-       ret = clear_extent_bits(&fs_info->freed_extents[0], start, end,
-                               EXTENT_DIRTY);
-       if (ret)
-               goto err;
+       if (prev_trans) {
+               ret = clear_extent_bits(&prev_trans->pinned_extents, start, end,
+                                       EXTENT_DIRTY);
+               if (ret)
+                       goto err;
+       }
 
-       ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
+       ret = clear_extent_bits(&trans->transaction->pinned_extents, start, end,
                                EXTENT_DIRTY);
        if (ret)
                goto err;
@@ -1380,7 +1392,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
                 * We could have pending pinned extents for this block group,
                 * just delete them, we don't care about them anymore.
                 */
-               if (!clean_pinned_extents(block_group))
+               if (!clean_pinned_extents(trans, block_group))
                        goto end_trans;
 
                /*
@@ -2890,7 +2902,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
                                        &cache->space_info->total_bytes_pinned,
                                        num_bytes,
                                        BTRFS_TOTAL_BYTES_PINNED_BATCH);
-                       set_extent_dirty(info->pinned_extents,
+                       set_extent_dirty(&trans->transaction->pinned_extents,
                                         bytenr, bytenr + num_bytes - 1,
                                         GFP_NOFS | __GFP_NOFAIL);
                }
index 22d0cb0..bb237d5 100644 (file)
@@ -596,8 +596,8 @@ struct btrfs_fs_info {
        /* keep track of unallocated space */
        atomic64_t free_chunk_space;
 
-       struct extent_io_tree freed_extents[2];
-       struct extent_io_tree *pinned_extents;
+       /* Track ranges which are used by log trees blocks/logged data extents */
+       struct extent_io_tree excluded_extents;
 
        /* logical->physical extent mapping */
        struct extent_map_tree mapping_tree;
index 194c98a..e1e111c 100644 (file)
@@ -2075,10 +2075,8 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
                        btrfs_drop_and_free_fs_root(fs_info, gang[i]);
        }
 
-       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
+       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
                btrfs_free_log_root_tree(NULL, fs_info);
-               btrfs_destroy_pinned_extent(fs_info, fs_info->pinned_extents);
-       }
 }
 
 static void btrfs_init_scrub(struct btrfs_fs_info *fs_info)
@@ -2749,11 +2747,8 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
        fs_info->block_group_cache_tree = RB_ROOT;
        fs_info->first_logical_byte = (u64)-1;
 
-       extent_io_tree_init(fs_info, &fs_info->freed_extents[0],
-                           IO_TREE_FS_INFO_FREED_EXTENTS0, NULL);
-       extent_io_tree_init(fs_info, &fs_info->freed_extents[1],
-                           IO_TREE_FS_INFO_FREED_EXTENTS1, NULL);
-       fs_info->pinned_extents = &fs_info->freed_extents[0];
+       extent_io_tree_init(fs_info, &fs_info->excluded_extents,
+                           IO_TREE_FS_EXCLUDED_EXTENTS, NULL);
        set_bit(BTRFS_FS_BARRIER, &fs_info->flags);
 
        mutex_init(&fs_info->ordered_operations_mutex);
@@ -4434,16 +4429,12 @@ static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
 }
 
 static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
-                                      struct extent_io_tree *pinned_extents)
+                                      struct extent_io_tree *unpin)
 {
-       struct extent_io_tree *unpin;
        u64 start;
        u64 end;
        int ret;
-       bool loop = true;
 
-       unpin = pinned_extents;
-again:
        while (1) {
                struct extent_state *cached_state = NULL;
 
@@ -4468,15 +4459,6 @@ again:
                cond_resched();
        }
 
-       if (loop) {
-               if (unpin == &fs_info->freed_extents[0])
-                       unpin = &fs_info->freed_extents[1];
-               else
-                       unpin = &fs_info->freed_extents[0];
-               loop = false;
-               goto again;
-       }
-
        return 0;
 }
 
@@ -4567,8 +4549,7 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
 
        btrfs_destroy_marked_extents(fs_info, &cur_trans->dirty_pages,
                                     EXTENT_DIRTY);
-       btrfs_destroy_pinned_extent(fs_info,
-                                   fs_info->pinned_extents);
+       btrfs_destroy_pinned_extent(fs_info, &cur_trans->pinned_extents);
 
        cur_trans->state =TRANS_STATE_COMPLETED;
        wake_up(&cur_trans->commit_wait);
@@ -4620,7 +4601,6 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
        btrfs_destroy_all_ordered_extents(fs_info);
        btrfs_destroy_delayed_inodes(fs_info);
        btrfs_assert_delayed_root_empty(fs_info);
-       btrfs_destroy_pinned_extent(fs_info, fs_info->pinned_extents);
        btrfs_destroy_all_delalloc_inodes(fs_info);
        mutex_unlock(&fs_info->transaction_kthread_mutex);
 
index cc3037f..b4a7bad 100644 (file)
@@ -36,8 +36,8 @@ struct io_failure_record;
 #define CHUNK_TRIMMED                          EXTENT_DEFRAG
 
 enum {
-       IO_TREE_FS_INFO_FREED_EXTENTS0,
-       IO_TREE_FS_INFO_FREED_EXTENTS1,
+       IO_TREE_FS_PINNED_EXTENTS,
+       IO_TREE_FS_EXCLUDED_EXTENTS,
        IO_TREE_INODE_IO,
        IO_TREE_INODE_IO_FAILURE,
        IO_TREE_RELOC_BLOCKS,
index f97e631..136fffb 100644 (file)
@@ -64,10 +64,8 @@ int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
                              u64 start, u64 num_bytes)
 {
        u64 end = start + num_bytes - 1;
-       set_extent_bits(&fs_info->freed_extents[0],
-                       start, end, EXTENT_UPTODATE);
-       set_extent_bits(&fs_info->freed_extents[1],
-                       start, end, EXTENT_UPTODATE);
+       set_extent_bits(&fs_info->excluded_extents, start, end,
+                       EXTENT_UPTODATE);
        return 0;
 }
 
@@ -79,10 +77,8 @@ void btrfs_free_excluded_extents(struct btrfs_block_group *cache)
        start = cache->start;
        end = start + cache->length - 1;
 
-       clear_extent_bits(&fs_info->freed_extents[0],
-                         start, end, EXTENT_UPTODATE);
-       clear_extent_bits(&fs_info->freed_extents[1],
-                         start, end, EXTENT_UPTODATE);
+       clear_extent_bits(&fs_info->excluded_extents, start, end,
+                         EXTENT_UPTODATE);
 }
 
 static u64 generic_ref_to_space_flags(struct btrfs_ref *ref)
@@ -2605,7 +2601,7 @@ static int pin_down_extent(struct btrfs_trans_handle *trans,
 
        percpu_counter_add_batch(&cache->space_info->total_bytes_pinned,
                    num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH);
-       set_extent_dirty(fs_info->pinned_extents, bytenr,
+       set_extent_dirty(&trans->transaction->pinned_extents, bytenr,
                         bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
        return 0;
 }
@@ -2761,11 +2757,6 @@ void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info)
                }
        }
 
-       if (fs_info->pinned_extents == &fs_info->freed_extents[0])
-               fs_info->pinned_extents = &fs_info->freed_extents[1];
-       else
-               fs_info->pinned_extents = &fs_info->freed_extents[0];
-
        up_write(&fs_info->commit_root_sem);
 
        btrfs_update_global_block_rsv(fs_info);
@@ -2906,10 +2897,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
        u64 end;
        int ret;
 
-       if (fs_info->pinned_extents == &fs_info->freed_extents[0])
-               unpin = &fs_info->freed_extents[1];
-       else
-               unpin = &fs_info->freed_extents[0];
+       unpin = &trans->transaction->pinned_extents;
 
        while (!TRANS_ABORTED(trans)) {
                struct extent_state *cached_state = NULL;
@@ -2921,12 +2909,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
                        mutex_unlock(&fs_info->unused_bg_unpin_mutex);
                        break;
                }
-               if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
-                       clear_extent_bits(&fs_info->freed_extents[0], start,
+               if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
+                       clear_extent_bits(&fs_info->excluded_extents, start,
                                          end, EXTENT_UPTODATE);
-                       clear_extent_bits(&fs_info->freed_extents[1], start,
-                                         end, EXTENT_UPTODATE);
-               }
 
                if (btrfs_test_opt(fs_info, DISCARD_SYNC))
                        ret = btrfs_discard_extent(fs_info, start,
index 9d63721..bd9c4b5 100644 (file)
@@ -1086,7 +1086,7 @@ static noinline_for_stack int write_pinned_extent_entries(
         * We shouldn't have switched the pinned extents yet so this is the
         * right one
         */
-       unpin = block_group->fs_info->pinned_extents;
+       unpin = &trans->transaction->pinned_extents;
 
        start = block_group->start;
 
index 3768035..fdfdfc4 100644 (file)
@@ -336,6 +336,8 @@ loop:
        list_add_tail(&cur_trans->list, &fs_info->trans_list);
        extent_io_tree_init(fs_info, &cur_trans->dirty_pages,
                        IO_TREE_TRANS_DIRTY_PAGES, fs_info->btree_inode);
+       extent_io_tree_init(fs_info, &cur_trans->pinned_extents,
+                       IO_TREE_FS_PINNED_EXTENTS, NULL);
        fs_info->generation++;
        cur_trans->transid = fs_info->generation;
        fs_info->running_transaction = cur_trans;
index 453cea7..31ae8d2 100644 (file)
@@ -71,6 +71,7 @@ struct btrfs_transaction {
         */
        struct list_head io_bgs;
        struct list_head dropped_roots;
+       struct extent_io_tree pinned_extents;
 
        /*
         * we need to make sure block group deletion doesn't race with
index f1f2b6a..bcbc763 100644 (file)
@@ -81,8 +81,8 @@ TRACE_DEFINE_ENUM(COMMIT_TRANS);
 
 #define show_extent_io_tree_owner(owner)                                      \
        __print_symbolic(owner,                                                \
-               { IO_TREE_FS_INFO_FREED_EXTENTS0, "FREED_EXTENTS0" },          \
-               { IO_TREE_FS_INFO_FREED_EXTENTS1, "FREED_EXTENTS1" },          \
+               { IO_TREE_FS_PINNED_EXTENTS,      "PINNED_EXTENTS" },          \
+               { IO_TREE_FS_EXCLUDED_EXTENTS,    "EXCLUDED_EXTENTS" },        \
                { IO_TREE_INODE_IO,               "INODE_IO" },                \
                { IO_TREE_INODE_IO_FAILURE,       "INODE_IO_FAILURE" },        \
                { IO_TREE_RELOC_BLOCKS,           "RELOC_BLOCKS" },            \