btrfs: add missing discards when unpinning extents with -o discard
authorJeff Mahoney <jeffm@suse.com>
Mon, 15 Jun 2015 13:41:19 +0000 (09:41 -0400)
committerChris Mason <clm@fb.com>
Wed, 29 Jul 2015 15:15:29 +0000 (08:15 -0700)
When we clear the dirty bits in btrfs_delete_unused_bgs for extents
in the empty block group, it results in btrfs_finish_extent_commit being
unable to discard the freed extents.

The block group removal patch added an alternate path to forget extents
other than btrfs_finish_extent_commit.  As a result, any extents that
would be freed when the block group is removed aren't discarded.  In my
test run, with a large copy of mixed sized files followed by removal, it
left nearly 2/3 of extents undiscarded.

To clean up the block groups, we add the removed block group onto a list
that will be discarded after transaction commit.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Tested-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
fs/btrfs/ctree.h
fs/btrfs/extent-tree.c
fs/btrfs/free-space-cache.c
fs/btrfs/super.c
fs/btrfs/transaction.c
fs/btrfs/transaction.h

index aac314e..19ef3f3 100644 (file)
@@ -3437,6 +3437,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root, u64 group_start,
                             struct extent_map *em);
 void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
+void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache);
+void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache);
 void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
                                       struct btrfs_root *root);
 u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
@@ -4073,6 +4075,7 @@ __cold
 void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
                     unsigned int line, int errno, const char *fmt, ...);
 
+const char *btrfs_decode_error(int errno);
 
 __cold
 void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
index 15411ae..6b791f3 100644 (file)
@@ -6131,20 +6131,19 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_block_group_cache *block_group, *tmp;
+       struct list_head *deleted_bgs;
        struct extent_io_tree *unpin;
        u64 start;
        u64 end;
        int ret;
 
-       if (trans->aborted)
-               return 0;
-
        if (fs_info->pinned_extents == &fs_info->freed_extents[0])
                unpin = &fs_info->freed_extents[1];
        else
                unpin = &fs_info->freed_extents[0];
 
-       while (1) {
+       while (!trans->aborted) {
                mutex_lock(&fs_info->unused_bg_unpin_mutex);
                ret = find_first_extent_bit(unpin, 0, &start, &end,
                                            EXTENT_DIRTY, NULL);
@@ -6163,6 +6162,34 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
                cond_resched();
        }
 
+       /*
+        * Transaction is finished.  We don't need the lock anymore.  We
+        * do need to clean up the block groups in case of a transaction
+        * abort.
+        */
+       deleted_bgs = &trans->transaction->deleted_bgs;
+       list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
+               u64 trimmed = 0;
+
+               ret = -EROFS;
+               if (!trans->aborted)
+                       ret = btrfs_discard_extent(root,
+                                                  block_group->key.objectid,
+                                                  block_group->key.offset,
+                                                  &trimmed);
+
+               list_del_init(&block_group->bg_list);
+               btrfs_put_block_group_trimming(block_group);
+               btrfs_put_block_group(block_group);
+
+               if (ret) {
+                       const char *errstr = btrfs_decode_error(ret);
+                       btrfs_warn(fs_info,
+                                  "Discard failed while removing blockgroup: errno=%d %s\n",
+                                  ret, errstr);
+               }
+       }
+
        return 0;
 }
 
@@ -9903,6 +9930,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
         * currently running transaction might finish and a new one start,
         * allowing for new block groups to be created that can reuse the same
         * physical device locations unless we take this special care.
+        *
+        * There may also be an implicit trim operation if the file system
+        * is mounted with -odiscard. The same protections must remain
+        * in place until the extents have been discarded completely when
+        * the transaction commit has completed.
         */
        remove_em = (atomic_read(&block_group->trimming) == 0);
        /*
@@ -9977,6 +10009,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
        spin_lock(&fs_info->unused_bgs_lock);
        while (!list_empty(&fs_info->unused_bgs)) {
                u64 start, end;
+               int trimming;
 
                block_group = list_first_entry(&fs_info->unused_bgs,
                                               struct btrfs_block_group_cache,
@@ -10076,12 +10109,39 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
                spin_unlock(&block_group->lock);
                spin_unlock(&space_info->lock);
 
+               /* DISCARD can flip during remount */
+               trimming = btrfs_test_opt(root, DISCARD);
+
+               /* Implicit trim during transaction commit. */
+               if (trimming)
+                       btrfs_get_block_group_trimming(block_group);
+
                /*
                 * Btrfs_remove_chunk will abort the transaction if things go
                 * horribly wrong.
                 */
                ret = btrfs_remove_chunk(trans, root,
                                         block_group->key.objectid);
+
+               if (ret) {
+                       if (trimming)
+                               btrfs_put_block_group_trimming(block_group);
+                       goto end_trans;
+               }
+
+               /*
+                * If we're not mounted with -odiscard, we can just forget
+                * about this block group. Otherwise we'll need to wait
+                * until transaction commit to do the actual discard.
+                */
+               if (trimming) {
+                       WARN_ON(!list_empty(&block_group->bg_list));
+                       spin_lock(&trans->transaction->deleted_bgs_lock);
+                       list_move(&block_group->bg_list,
+                                 &trans->transaction->deleted_bgs);
+                       spin_unlock(&trans->transaction->deleted_bgs_lock);
+                       btrfs_get_block_group(block_group);
+               }
 end_trans:
                btrfs_end_transaction(trans, root);
 next:
index fb5a6b1..abe3a66 100644 (file)
@@ -3272,35 +3272,23 @@ next:
        return ret;
 }
 
-int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
-                          u64 *trimmed, u64 start, u64 end, u64 minlen)
+void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache)
 {
-       int ret;
+       atomic_inc(&cache->trimming);
+}
 
-       *trimmed = 0;
+void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group)
+{
+       struct extent_map_tree *em_tree;
+       struct extent_map *em;
+       bool cleanup;
 
        spin_lock(&block_group->lock);
-       if (block_group->removed) {
-               spin_unlock(&block_group->lock);
-               return 0;
-       }
-       atomic_inc(&block_group->trimming);
+       cleanup = (atomic_dec_and_test(&block_group->trimming) &&
+                  block_group->removed);
        spin_unlock(&block_group->lock);
 
-       ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
-       if (ret)
-               goto out;
-
-       ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
-out:
-       spin_lock(&block_group->lock);
-       if (atomic_dec_and_test(&block_group->trimming) &&
-           block_group->removed) {
-               struct extent_map_tree *em_tree;
-               struct extent_map *em;
-
-               spin_unlock(&block_group->lock);
-
+       if (cleanup) {
                lock_chunks(block_group->fs_info->chunk_root);
                em_tree = &block_group->fs_info->mapping_tree.map_tree;
                write_lock(&em_tree->lock);
@@ -3324,10 +3312,31 @@ out:
                 * this block group have left 1 entry each one. Free them.
                 */
                __btrfs_remove_free_space_cache(block_group->free_space_ctl);
-       } else {
+       }
+}
+
+int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
+                          u64 *trimmed, u64 start, u64 end, u64 minlen)
+{
+       int ret;
+
+       *trimmed = 0;
+
+       spin_lock(&block_group->lock);
+       if (block_group->removed) {
                spin_unlock(&block_group->lock);
+               return 0;
        }
+       btrfs_get_block_group_trimming(block_group);
+       spin_unlock(&block_group->lock);
+
+       ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
+       if (ret)
+               goto out;
 
+       ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
+out:
+       btrfs_put_block_group_trimming(block_group);
        return ret;
 }
 
index a1077e0..8da24e2 100644 (file)
@@ -69,7 +69,7 @@ static struct file_system_type btrfs_fs_type;
 
 static int btrfs_remount(struct super_block *sb, int *flags, char *data);
 
-static const char *btrfs_decode_error(int errno)
+const char *btrfs_decode_error(int errno)
 {
        char *errstr = "unknown";
 
index f5021fc..44da929 100644 (file)
@@ -258,6 +258,8 @@ loop:
        mutex_init(&cur_trans->cache_write_mutex);
        cur_trans->num_dirty_bgs = 0;
        spin_lock_init(&cur_trans->dirty_bgs_lock);
+       INIT_LIST_HEAD(&cur_trans->deleted_bgs);
+       spin_lock_init(&cur_trans->deleted_bgs_lock);
        list_add_tail(&cur_trans->list, &fs_info->trans_list);
        extent_io_tree_init(&cur_trans->dirty_pages,
                             fs_info->btree_inode->i_mapping);
index eb09c20..edc2fbc 100644 (file)
@@ -74,6 +74,8 @@ struct btrfs_transaction {
         */
        struct mutex cache_write_mutex;
        spinlock_t dirty_bgs_lock;
+       struct list_head deleted_bgs;
+       spinlock_t deleted_bgs_lock;
        struct btrfs_delayed_ref_root delayed_refs;
        int aborted;
        int dirty_bg_run;